From 57aaa5bec033acb53b45989b599c4666123be085 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Wed, 2 Dec 2020 18:06:28 +0100 Subject: [PATCH] Allow madvise()ing image block data This will be on by default and can be disabled with the `no_image_madvise` option. --- doc/dwarfs.md | 10 +++++++ include/dwarfs/block_cache.h | 11 ++++---- include/dwarfs/logger.h | 2 ++ include/dwarfs/mmap.h | 13 +++++++-- include/dwarfs/mmif.h | 27 +++++++----------- include/dwarfs/options.h | 1 + src/dwarfs.cpp | 4 +++ src/dwarfs/block_cache.cpp | 55 ++++++++++++++++++++++++++++-------- src/dwarfs/filesystem_v2.cpp | 9 ++---- src/dwarfs/mmap.cpp | 46 ++++++++++++++++++++++++------ test/dwarfs.cpp | 13 +++++++-- 11 files changed, 139 insertions(+), 52 deletions(-) diff --git a/doc/dwarfs.md b/doc/dwarfs.md index c2063060..2e5f9900 100644 --- a/doc/dwarfs.md +++ b/doc/dwarfs.md @@ -64,6 +64,16 @@ options: will also consume more memory to hold the hardlink count table. This will be 4 bytes for every regular file inode. + * `-o no_image_madvise` + By default, `dwarfs` will issue `madvise` calls after reading + the compressed block data from the file system image. This + will reduce the memory consumption of the FUSE driver to + slightly more than the `cachesize`. This usually isn't a + problem, especially when the image is stored on an SSD, but if + you want to maximize performance it can be beneficial to use + this option to keep the compressed image data in the kernel + cache. + * `-o debuglevel=`*name*: Use this for different levels of verbosity along with either the `-f` or `-d` FUSE options. This can give you some insight diff --git a/include/dwarfs/block_cache.h b/include/dwarfs/block_cache.h index 627941d0..106ac04d 100644 --- a/include/dwarfs/block_cache.h +++ b/include/dwarfs/block_cache.h @@ -35,15 +35,17 @@ struct block_cache_options; class block_range; class logger; +class mmif; class block_cache { public: - block_cache(logger& lgr, const block_cache_options& options); + block_cache(logger& lgr, std::shared_ptr mm, + const block_cache_options& options); size_t block_count() const { return impl_->block_count(); } - void insert(compression_type comp, const uint8_t* data, size_t size) { - impl_->insert(comp, data, size); + void insert(compression_type comp, off_t offset, size_t size) { + impl_->insert(comp, offset, size); } void set_block_size(size_t size) { impl_->set_block_size(size); } @@ -58,8 +60,7 @@ class block_cache { virtual ~impl() = default; virtual size_t block_count() const = 0; - virtual void - insert(compression_type comp, const uint8_t* data, size_t size) = 0; + virtual void insert(compression_type comp, off_t offset, size_t size) = 0; virtual void set_block_size(size_t size) = 0; virtual std::future get(size_t block_no, size_t offset, size_t length) const = 0; diff --git a/include/dwarfs/logger.h b/include/dwarfs/logger.h index c6eec044..6d06875a 100644 --- a/include/dwarfs/logger.h +++ b/include/dwarfs/logger.h @@ -240,6 +240,8 @@ class log_proxy { lgr_, logger::TRACE); } + logger& get_logger() const { return lgr_; } + private: logger& lgr_; }; diff --git a/include/dwarfs/mmap.h b/include/dwarfs/mmap.h index c56d9f12..9077aed9 100644 --- a/include/dwarfs/mmap.h +++ b/include/dwarfs/mmap.h @@ -30,12 +30,21 @@ namespace dwarfs { class mmap : public mmif { public: - mmap(const std::string& path); + explicit mmap(const std::string& path); mmap(const std::string& path, size_t size); - virtual ~mmap() noexcept; + ~mmap() noexcept override; + + void const* addr() const override; + size_t size() const override; + + boost::system::error_code lock(off_t offset, size_t size) override; + boost::system::error_code release(off_t offset, size_t size) override; private: int fd_; + size_t size_; + void* addr_; + off_t const page_size_; }; } // namespace dwarfs diff --git a/include/dwarfs/mmif.h b/include/dwarfs/mmif.h index 609cc45d..b226021d 100644 --- a/include/dwarfs/mmif.h +++ b/include/dwarfs/mmif.h @@ -24,6 +24,7 @@ #include #include +#include #include @@ -33,28 +34,20 @@ class mmif : public boost::noncopyable { public: virtual ~mmif() = default; - const void* get() const { return addr_; } - template - const T* as(size_t offset = 0) const { - return reinterpret_cast( - reinterpret_cast(const_cast(addr_)) + offset); + T const* as(off_t offset = 0) const { + return reinterpret_cast( + reinterpret_cast(this->addr()) + offset); } - size_t size() const { return size_; } - - folly::ByteRange range(size_t start, size_t size) const { - return folly::ByteRange(as(start), size); + folly::ByteRange range(off_t offset, size_t length) const { + return folly::ByteRange(this->as(offset), length); } - protected: - void assign(const void* addr, size_t size) { - addr_ = addr; - size_ = size; - } + virtual void const* addr() const = 0; + virtual size_t size() const = 0; - private: - const void* addr_; - size_t size_; + virtual boost::system::error_code lock(off_t offset, size_t size) = 0; + virtual boost::system::error_code release(off_t offset, size_t size) = 0; }; } // namespace dwarfs diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h index 89ba796b..dd9ea502 100644 --- a/include/dwarfs/options.h +++ b/include/dwarfs/options.h @@ -33,6 +33,7 @@ struct block_cache_options { size_t max_bytes{0}; size_t num_workers{0}; double decompress_ratio{1.0}; + bool mm_release{true}; }; struct metadata_options { diff --git a/src/dwarfs.cpp b/src/dwarfs.cpp index 94d3b098..4062aa55 100644 --- a/src/dwarfs.cpp +++ b/src/dwarfs.cpp @@ -53,6 +53,7 @@ struct options { const char* mlock_str; // TODO: const?? -> use string? const char* decompress_ratio_str; // TODO: const?? -> use string? int enable_nlink; + int no_image_madvise; size_t cachesize; size_t workers; mlock_mode lock_mode; @@ -77,6 +78,7 @@ const struct fuse_opt dwarfs_opts[] = { DWARFS_OPT("mlock=%s", mlock_str, 0), DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), DWARFS_OPT("enable_nlink", enable_nlink, 1), + DWARFS_OPT("no_image_madvise", no_image_madvise, 1), FUSE_OPT_END}; options opts; @@ -97,6 +99,7 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) { fsopts.block_cache.max_bytes = opts.cachesize; fsopts.block_cache.num_workers = opts.workers; fsopts.block_cache.decompress_ratio = opts.decompress_ratio; + fsopts.block_cache.mm_release = !opts.no_image_madvise; fsopts.metadata.enable_nlink = bool(opts.enable_nlink); s_fs = std::make_shared( s_lgr, std::make_shared(opts.fsimage), fsopts, @@ -392,6 +395,7 @@ void usage(const char* progname) { << " -o mlock=NAME mlock mode: (none), try, must\n" << " -o decratio=NUM ratio for full decompression (0.8)\n" << " -o enable_nlink show correct hardlink numbers\n" + << " -o no_image_madvise keep image in kernel cache\n" << " -o debuglevel=NAME error, warn, (info), debug, trace\n" << std::endl; diff --git a/src/dwarfs/block_cache.cpp b/src/dwarfs/block_cache.cpp index 06ff179a..6840e691 100644 --- a/src/dwarfs/block_cache.cpp +++ b/src/dwarfs/block_cache.cpp @@ -40,27 +40,39 @@ #include "dwarfs/block_cache.h" #include "dwarfs/fstypes.h" #include "dwarfs/logger.h" +#include "dwarfs/mmif.h" #include "dwarfs/options.h" #include "dwarfs/worker_group.h" namespace dwarfs { struct block { - block(compression_type compression, const uint8_t* data, size_t size) + block(compression_type compression, off_t offset, size_t size) : compression(compression) - , data(data) + , offset(offset) , size(size) {} const compression_type compression; - const uint8_t* const data; + const off_t offset; const size_t size; }; class cached_block { public: - explicit cached_block(const block& b) + cached_block(logger& lgr, block const& b, std::shared_ptr mm, + bool release) : decompressor_(std::make_unique( - b.compression, b.data, b.size, data_)) {} + b.compression, mm->as(b.offset), b.size, data_)) + , mm_(std::move(mm)) + , spec_(b) + , log_(lgr) + , release_(release) {} + + ~cached_block() { + if (decompressor_) { + try_release(); + } + } // once the block is fully decompressed, we can reset the decompressor_ @@ -76,6 +88,9 @@ class cached_block { if (decompressor_->decompress_frame()) { // We're done, free the memory decompressor_.reset(); + + // And release the memory from the mapping + try_release(); } range_end_ = data_.size(); @@ -88,9 +103,21 @@ class cached_block { } private: + void try_release() { + if (release_) { + if (auto ec = mm_->release(spec_.offset, spec_.size)) { + log_.info() << "madvise() failed: " << ec.message(); + } + } + } + std::atomic range_end_{0}; std::vector data_; std::unique_ptr decompressor_; + std::shared_ptr mm_; + block const& spec_; + log_proxy log_; + bool const release_; }; class block_request { @@ -178,12 +205,14 @@ class block_request_set { template class block_cache_ : public block_cache::impl { public: - block_cache_(logger& lgr, const block_cache_options& options) + block_cache_(logger& lgr, std::shared_ptr mm, + block_cache_options const& options) : cache_(0) , wg_("blkcache", std::max(options.num_workers > 0 ? options.num_workers : std::thread::hardware_concurrency(), static_cast(1))) + , mm_(std::move(mm)) , log_(lgr) , options_(options) {} @@ -234,9 +263,8 @@ class block_cache_ : public block_cache::impl { size_t block_count() const override { return block_.size(); } - void - insert(compression_type comp, const uint8_t* data, size_t size) override { - block_.emplace_back(comp, data, size); + void insert(compression_type comp, off_t offset, size_t size) override { + block_.emplace_back(comp, offset, size); } void set_block_size(size_t size) override { @@ -377,7 +405,8 @@ class block_cache_ : public block_cache::impl { assert(block_no < block_.size()); - auto block = std::make_shared(block_[block_no]); + auto block = std::make_shared( + log_.get_logger(), block_[block_no], mm_, options_.mm_release); ++blocks_created_; // Make a new set for the block @@ -517,13 +546,15 @@ class block_cache_ : public block_cache::impl { mutable worker_group wg_; std::vector block_; + std::shared_ptr mm_; log_proxy log_; const block_cache_options options_; }; -block_cache::block_cache(logger& lgr, const block_cache_options& options) +block_cache::block_cache(logger& lgr, std::shared_ptr mm, + const block_cache_options& options) : impl_(make_unique_logging_object( - lgr, options)) {} + lgr, std::move(mm), options)) {} // TODO: clean up: this is defined in fstypes.h... block_range::block_range(std::shared_ptr block, diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index 681bf000..827f3428 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -19,7 +19,6 @@ * along with dwarfs. If not, see . */ -#include #include #include #include @@ -164,9 +163,7 @@ make_metadata(logger& lgr, std::shared_ptr mm, get_section_data(mm, meta_it->second, meta_buffer, force_buffers); if (lock_mode != mlock_mode::NONE) { - int rv = ::mlock(meta_section.data(), meta_section.size()); - if (rv != 0) { - boost::system::error_code ec(errno, boost::system::generic_category()); + if (auto ec = mm->lock(meta_it->second.start, meta_section.size())) { if (lock_mode == mlock_mode::MUST) { throw boost::system::system_error(ec, "mlock"); } else { @@ -225,13 +222,13 @@ filesystem_::filesystem_(logger& lgr, std::shared_ptr mm, : log_(lgr) , mm_(mm) { filesystem_parser parser(mm_); - block_cache cache(lgr, options.block_cache); + block_cache cache(lgr, mm, options.block_cache); section_map sections; while (auto s = parser.next_section(log_)) { if (s->header.type == section_type::BLOCK) { - cache.insert(s->header.compression, mm_->as(s->start), + cache.insert(s->header.compression, s->start, static_cast(s->header.length)); } else { if (!sections.emplace(s->header.type, *s).second) { diff --git a/src/dwarfs/mmap.cpp b/src/dwarfs/mmap.cpp index 890cf4c1..36f75e69 100644 --- a/src/dwarfs/mmap.cpp +++ b/src/dwarfs/mmap.cpp @@ -20,6 +20,7 @@ */ #include +#include // TODO #include #include @@ -67,19 +68,48 @@ void* safe_mmap(int fd, size_t size) { } } // namespace -mmap::mmap(const std::string& path) - : fd_(safe_open(path)) { - size_t size = safe_size(fd_); - assign(safe_mmap(fd_, size), size); +boost::system::error_code mmap::lock(off_t offset, size_t size) { + boost::system::error_code ec; + auto addr = reinterpret_cast(addr_) + offset; + if (::mlock(addr, size) != 0) { + ec.assign(errno, boost::system::generic_category()); + } + return ec; } +boost::system::error_code mmap::release(off_t offset, size_t size) { + boost::system::error_code ec; + auto misalign = offset % page_size_; + + offset -= misalign; + size += misalign; + size -= size % page_size_; + + auto addr = reinterpret_cast(addr_) + offset; + if (::madvise(addr, size, MADV_DONTNEED) != 0) { + ec.assign(errno, boost::system::generic_category()); + } + return ec; +} + +void const* mmap::addr() const { return addr_; } + +size_t mmap::size() const { return size_; } + +mmap::mmap(const std::string& path) + : fd_(safe_open(path)) + , size_(safe_size(fd_)) + , addr_(safe_mmap(fd_, size_)) + , page_size_(::sysconf(_SC_PAGESIZE)) {} + mmap::mmap(const std::string& path, size_t size) - : fd_(safe_open(path)) { - assign(safe_mmap(fd_, size), size); -} + : fd_(safe_open(path)) + , size_(size) + , addr_(safe_mmap(fd_, size_)) + , page_size_(::sysconf(_SC_PAGESIZE)) {} mmap::~mmap() noexcept { - ::munmap(const_cast(get()), size()); + ::munmap(addr_, size_); ::close(fd_); } } // namespace dwarfs diff --git a/test/dwarfs.cpp b/test/dwarfs.cpp index 06613b90..385c1f23 100644 --- a/test/dwarfs.cpp +++ b/test/dwarfs.cpp @@ -88,8 +88,17 @@ std::map statmap{ class mmap_mock : public mmif { public: mmap_mock(const std::string& data) - : m_data(data) { - assign(m_data.data(), m_data.size()); + : m_data(data) {} + + void const* addr() const override { return m_data.data(); } + + size_t size() const override { return m_data.size(); } + + boost::system::error_code lock(off_t, size_t) override { + return boost::system::error_code(); + } + boost::system::error_code release(off_t, size_t) override { + return boost::system::error_code(); } private: