Allow madvise()ing image block data

This will be on by default and can be disabled with the `no_image_madvise` option.
2025-09-14 23:05:33 -04:00 · 2020-12-02 18:06:28 +01:00 · 2020-12-02 18:06:28 +01:00 · 57aaa5bec0
commit 57aaa5bec0
parent 1326868143
11 changed files with 139 additions and 52 deletions
--- a/doc/dwarfs.md
+++ b/doc/dwarfs.md
@ -64,6 +64,16 @@ options:
    will also consume more memory to hold the hardlink count table.
    This will be 4 bytes for every regular file inode.
  * `-o no_image_madvise`
    By default, `dwarfs` will issue `madvise` calls after reading
    the compressed block data from the file system image. This
    will reduce the memory consumption of the FUSE driver to
    slightly more than the `cachesize`. This usually isn't a
    problem, especially when the image is stored on an SSD, but if
    you want to maximize performance it can be beneficial to use
    this option to keep the compressed image data in the kernel
    cache.
  * `-o debuglevel=`*name*:
    Use this for different levels of verbosity along with either
    the `-f` or `-d` FUSE options. This can give you some insight
--- a/include/dwarfs/block_cache.h
+++ b/include/dwarfs/block_cache.h
@ -35,15 +35,17 @@ struct block_cache_options;
 class block_range;
 class logger;
 class mmif;
 class block_cache {
 public:
-  block_cache(logger& lgr, const block_cache_options& options);
+  block_cache(logger& lgr, std::shared_ptr<mmif> mm,
              const block_cache_options& options);
  size_t block_count() const { return impl_->block_count(); }
-  void insert(compression_type comp, const uint8_t* data, size_t size) {
+  void insert(compression_type comp, off_t offset, size_t size) {
-    impl_->insert(comp, data, size);
+    impl_->insert(comp, offset, size);
  }
  void set_block_size(size_t size) { impl_->set_block_size(size); }
@ -58,8 +60,7 @@ class block_cache {
    virtual ~impl() = default;
    virtual size_t block_count() const = 0;
-    virtual void
+    virtual void insert(compression_type comp, off_t offset, size_t size) = 0;
    insert(compression_type comp, const uint8_t* data, size_t size) = 0;
    virtual void set_block_size(size_t size) = 0;
    virtual std::future<block_range>
    get(size_t block_no, size_t offset, size_t length) const = 0;
--- a/include/dwarfs/logger.h
+++ b/include/dwarfs/logger.h
@ -240,6 +240,8 @@ class log_proxy {
        lgr_, logger::TRACE);
  }
  logger& get_logger() const { return lgr_; }
 private:
  logger& lgr_;
 };
--- a/include/dwarfs/mmap.h
+++ b/include/dwarfs/mmap.h
@ -30,12 +30,21 @@ namespace dwarfs {
 class mmap : public mmif {
 public:
-  mmap(const std::string& path);
+  explicit mmap(const std::string& path);
  mmap(const std::string& path, size_t size);
-  virtual ~mmap() noexcept;
+  ~mmap() noexcept override;
  void const* addr() const override;
  size_t size() const override;
  boost::system::error_code lock(off_t offset, size_t size) override;
  boost::system::error_code release(off_t offset, size_t size) override;
 private:
  int fd_;
  size_t size_;
  void* addr_;
  off_t const page_size_;
 };
 } // namespace dwarfs
--- a/include/dwarfs/mmif.h
+++ b/include/dwarfs/mmif.h
@ -24,6 +24,7 @@
 #include <string>
 #include <boost/noncopyable.hpp>
 #include <boost/system/system_error.hpp>
 #include <folly/Range.h>
@ -33,28 +34,20 @@ class mmif : public boost::noncopyable {
 public:
  virtual ~mmif() = default;
  const void* get() const { return addr_; }
  template <typename T>
-  const T* as(size_t offset = 0) const {
+  T const* as(off_t offset = 0) const {
-    return reinterpret_cast<const T*>(
+    return reinterpret_cast<T const*>(
-        reinterpret_cast<const char*>(const_cast<const void*>(addr_)) + offset);
+        reinterpret_cast<char const*>(this->addr()) + offset);
  }
-  size_t size() const { return size_; }
+  folly::ByteRange range(off_t offset, size_t length) const {
-
+    return folly::ByteRange(this->as<uint8_t>(offset), length);
  folly::ByteRange range(size_t start, size_t size) const {
    return folly::ByteRange(as<uint8_t>(start), size);
  }
- protected:
+  virtual void const* addr() const = 0;
-  void assign(const void* addr, size_t size) {
+  virtual size_t size() const = 0;
    addr_ = addr;
    size_ = size;
  }
- private:
+  virtual boost::system::error_code lock(off_t offset, size_t size) = 0;
-  const void* addr_;
+  virtual boost::system::error_code release(off_t offset, size_t size) = 0;
  size_t size_;
 };
 } // namespace dwarfs
--- a/include/dwarfs/options.h
+++ b/include/dwarfs/options.h
@ -33,6 +33,7 @@ struct block_cache_options {
  size_t max_bytes{0};
  size_t num_workers{0};
  double decompress_ratio{1.0};
  bool mm_release{true};
 };
 struct metadata_options {
--- a/src/dwarfs.cpp
+++ b/src/dwarfs.cpp
@ -53,6 +53,7 @@ struct options {
  const char* mlock_str;            // TODO: const?? -> use string?
  const char* decompress_ratio_str; // TODO: const?? -> use string?
  int enable_nlink;
  int no_image_madvise;
  size_t cachesize;
  size_t workers;
  mlock_mode lock_mode;
@ -77,6 +78,7 @@ const struct fuse_opt dwarfs_opts[] = {
    DWARFS_OPT("mlock=%s", mlock_str, 0),
    DWARFS_OPT("decratio=%s", decompress_ratio_str, 0),
    DWARFS_OPT("enable_nlink", enable_nlink, 1),
    DWARFS_OPT("no_image_madvise", no_image_madvise, 1),
    FUSE_OPT_END};
 options opts;
@ -97,6 +99,7 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
    fsopts.block_cache.max_bytes = opts.cachesize;
    fsopts.block_cache.num_workers = opts.workers;
    fsopts.block_cache.decompress_ratio = opts.decompress_ratio;
    fsopts.block_cache.mm_release = !opts.no_image_madvise;
    fsopts.metadata.enable_nlink = bool(opts.enable_nlink);
    s_fs = std::make_shared<filesystem_v2>(
        s_lgr, std::make_shared<mmap>(opts.fsimage), fsopts,
@ -392,6 +395,7 @@ void usage(const char* progname) {
            << "    -o mlock=NAME          mlock mode: (none), try, must\n"
            << "    -o decratio=NUM        ratio for full decompression (0.8)\n"
            << "    -o enable_nlink        show correct hardlink numbers\n"
            << "    -o no_image_madvise    keep image in kernel cache\n"
            << "    -o debuglevel=NAME     error, warn, (info), debug, trace\n"
            << std::endl;
--- a/src/dwarfs/block_cache.cpp
+++ b/src/dwarfs/block_cache.cpp
@ -40,27 +40,39 @@
 #include "dwarfs/block_cache.h"
 #include "dwarfs/fstypes.h"
 #include "dwarfs/logger.h"
 #include "dwarfs/mmif.h"
 #include "dwarfs/options.h"
 #include "dwarfs/worker_group.h"
 namespace dwarfs {
 struct block {
-  block(compression_type compression, const uint8_t* data, size_t size)
+  block(compression_type compression, off_t offset, size_t size)
      : compression(compression)
-      , data(data)
+      , offset(offset)
      , size(size) {}
  const compression_type compression;
-  const uint8_t* const data;
+  const off_t offset;
  const size_t size;
 };
 class cached_block {
 public:
-  explicit cached_block(const block& b)
+  cached_block(logger& lgr, block const& b, std::shared_ptr<mmif> mm,
               bool release)
      : decompressor_(std::make_unique<block_decompressor>(
-            b.compression, b.data, b.size, data_)) {}
+            b.compression, mm->as<uint8_t>(b.offset), b.size, data_))
      , mm_(std::move(mm))
      , spec_(b)
      , log_(lgr)
      , release_(release) {}
  ~cached_block() {
    if (decompressor_) {
      try_release();
    }
  }
  // once the block is fully decompressed, we can reset the decompressor_
@ -76,6 +88,9 @@ class cached_block {
      if (decompressor_->decompress_frame()) {
        // We're done, free the memory
        decompressor_.reset();
        // And release the memory from the mapping
        try_release();
      }
      range_end_ = data_.size();
@ -88,9 +103,21 @@ class cached_block {
  }
 private:
  void try_release() {
    if (release_) {
      if (auto ec = mm_->release(spec_.offset, spec_.size)) {
        log_.info() << "madvise() failed: " << ec.message();
      }
    }
  }
  std::atomic<size_t> range_end_{0};
  std::vector<uint8_t> data_;
  std::unique_ptr<block_decompressor> decompressor_;
  std::shared_ptr<mmif> mm_;
  block const& spec_;
  log_proxy<debug_logger_policy> log_;
  bool const release_;
 };
 class block_request {
@ -178,12 +205,14 @@ class block_request_set {
 template <typename LoggerPolicy>
 class block_cache_ : public block_cache::impl {
 public:
-  block_cache_(logger& lgr, const block_cache_options& options)
+  block_cache_(logger& lgr, std::shared_ptr<mmif> mm,
               block_cache_options const& options)
      : cache_(0)
      , wg_("blkcache", std::max(options.num_workers > 0
                                     ? options.num_workers
                                     : std::thread::hardware_concurrency(),
                                 static_cast<size_t>(1)))
      , mm_(std::move(mm))
      , log_(lgr)
      , options_(options) {}
@ -234,9 +263,8 @@ class block_cache_ : public block_cache::impl {
  size_t block_count() const override { return block_.size(); }
-  void
+  void insert(compression_type comp, off_t offset, size_t size) override {
-  insert(compression_type comp, const uint8_t* data, size_t size) override {
+    block_.emplace_back(comp, offset, size);
    block_.emplace_back(comp, data, size);
  }
  void set_block_size(size_t size) override {
@ -377,7 +405,8 @@ class block_cache_ : public block_cache::impl {
    assert(block_no < block_.size());
-    auto block = std::make_shared<cached_block>(block_[block_no]);
+    auto block = std::make_shared<cached_block>(
        log_.get_logger(), block_[block_no], mm_, options_.mm_release);
    ++blocks_created_;
    // Make a new set for the block
@ -517,13 +546,15 @@ class block_cache_ : public block_cache::impl {
  mutable worker_group wg_;
  std::vector<block> block_;
  std::shared_ptr<mmif> mm_;
  log_proxy<LoggerPolicy> log_;
  const block_cache_options options_;
 };
-block_cache::block_cache(logger& lgr, const block_cache_options& options)
+block_cache::block_cache(logger& lgr, std::shared_ptr<mmif> mm,
                         const block_cache_options& options)
    : impl_(make_unique_logging_object<impl, block_cache_, logger_policies>(
-          lgr, options)) {}
+          lgr, std::move(mm), options)) {}
 // TODO: clean up: this is defined in fstypes.h...
 block_range::block_range(std::shared_ptr<cached_block const> block,
--- a/src/dwarfs/filesystem_v2.cpp
+++ b/src/dwarfs/filesystem_v2.cpp
@ -19,7 +19,6 @@
 * along with dwarfs.  If not, see <https://www.gnu.org/licenses/>.
 */
 #include <cerrno>
 #include <cstddef>
 #include <cstring>
 #include <optional>
@ -164,9 +163,7 @@ make_metadata(logger& lgr, std::shared_ptr<mmif> mm,
      get_section_data(mm, meta_it->second, meta_buffer, force_buffers);
  if (lock_mode != mlock_mode::NONE) {
-    int rv = ::mlock(meta_section.data(), meta_section.size());
+    if (auto ec = mm->lock(meta_it->second.start, meta_section.size())) {
    if (rv != 0) {
      boost::system::error_code ec(errno, boost::system::generic_category());
      if (lock_mode == mlock_mode::MUST) {
        throw boost::system::system_error(ec, "mlock");
      } else {
@ -225,13 +222,13 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
    : log_(lgr)
    , mm_(mm) {
  filesystem_parser parser(mm_);
-  block_cache cache(lgr, options.block_cache);
+  block_cache cache(lgr, mm, options.block_cache);
  section_map sections;
  while (auto s = parser.next_section(log_)) {
    if (s->header.type == section_type::BLOCK) {
-      cache.insert(s->header.compression, mm_->as<uint8_t>(s->start),
+      cache.insert(s->header.compression, s->start,
                   static_cast<size_t>(s->header.length));
    } else {
      if (!sections.emplace(s->header.type, *s).second) {
--- a/src/dwarfs/mmap.cpp
+++ b/src/dwarfs/mmap.cpp
@ -20,6 +20,7 @@
 */
 #include <cerrno>
 #include <cstdio> // TODO
 #include <fcntl.h>
 #include <sys/mman.h>
@ -67,19 +68,48 @@ void* safe_mmap(int fd, size_t size) {
 }
 } // namespace
-mmap::mmap(const std::string& path)
+boost::system::error_code mmap::lock(off_t offset, size_t size) {
-    : fd_(safe_open(path)) {
+  boost::system::error_code ec;
-  size_t size = safe_size(fd_);
+  auto addr = reinterpret_cast<uint8_t*>(addr_) + offset;
-  assign(safe_mmap(fd_, size), size);
+  if (::mlock(addr, size) != 0) {
    ec.assign(errno, boost::system::generic_category());
  }
  return ec;
 }
 boost::system::error_code mmap::release(off_t offset, size_t size) {
  boost::system::error_code ec;
  auto misalign = offset % page_size_;
  offset -= misalign;
  size += misalign;
  size -= size % page_size_;
  auto addr = reinterpret_cast<uint8_t*>(addr_) + offset;
  if (::madvise(addr, size, MADV_DONTNEED) != 0) {
    ec.assign(errno, boost::system::generic_category());
  }
  return ec;
 }
 void const* mmap::addr() const { return addr_; }
 size_t mmap::size() const { return size_; }
 mmap::mmap(const std::string& path)
    : fd_(safe_open(path))
    , size_(safe_size(fd_))
    , addr_(safe_mmap(fd_, size_))
    , page_size_(::sysconf(_SC_PAGESIZE)) {}
 mmap::mmap(const std::string& path, size_t size)
-    : fd_(safe_open(path)) {
+    : fd_(safe_open(path))
-  assign(safe_mmap(fd_, size), size);
+    , size_(size)
-}
+    , addr_(safe_mmap(fd_, size_))
    , page_size_(::sysconf(_SC_PAGESIZE)) {}
 mmap::~mmap() noexcept {
-  ::munmap(const_cast<void*>(get()), size());
+  ::munmap(addr_, size_);
  ::close(fd_);
 }
 } // namespace dwarfs
--- a/test/dwarfs.cpp
+++ b/test/dwarfs.cpp
@ -88,8 +88,17 @@ std::map<std::string, simplestat> statmap{
 class mmap_mock : public mmif {
 public:
  mmap_mock(const std::string& data)
-      : m_data(data) {
+      : m_data(data) {}
-    assign(m_data.data(), m_data.size());
+
  void const* addr() const override { return m_data.data(); }
  size_t size() const override { return m_data.size(); }
  boost::system::error_code lock(off_t, size_t) override {
    return boost::system::error_code();
  }
  boost::system::error_code release(off_t, size_t) override {
    return boost::system::error_code();
  }
 private: