Allow madvise()ing image block data

This will be on by default and can be disabled with the
`no_image_madvise` option.
This commit is contained in:
Marcus Holland-Moritz 2020-12-02 18:06:28 +01:00
parent 1326868143
commit 57aaa5bec0
11 changed files with 139 additions and 52 deletions

View File

@ -64,6 +64,16 @@ options:
will also consume more memory to hold the hardlink count table. will also consume more memory to hold the hardlink count table.
This will be 4 bytes for every regular file inode. This will be 4 bytes for every regular file inode.
* `-o no_image_madvise`
By default, `dwarfs` will issue `madvise` calls after reading
the compressed block data from the file system image. This
will reduce the memory consumption of the FUSE driver to
slightly more than the `cachesize`. This usually isn't a
problem, especially when the image is stored on an SSD, but if
you want to maximize performance it can be beneficial to use
this option to keep the compressed image data in the kernel
cache.
* `-o debuglevel=`*name*: * `-o debuglevel=`*name*:
Use this for different levels of verbosity along with either Use this for different levels of verbosity along with either
the `-f` or `-d` FUSE options. This can give you some insight the `-f` or `-d` FUSE options. This can give you some insight

View File

@ -35,15 +35,17 @@ struct block_cache_options;
class block_range; class block_range;
class logger; class logger;
class mmif;
class block_cache { class block_cache {
public: public:
block_cache(logger& lgr, const block_cache_options& options); block_cache(logger& lgr, std::shared_ptr<mmif> mm,
const block_cache_options& options);
size_t block_count() const { return impl_->block_count(); } size_t block_count() const { return impl_->block_count(); }
void insert(compression_type comp, const uint8_t* data, size_t size) { void insert(compression_type comp, off_t offset, size_t size) {
impl_->insert(comp, data, size); impl_->insert(comp, offset, size);
} }
void set_block_size(size_t size) { impl_->set_block_size(size); } void set_block_size(size_t size) { impl_->set_block_size(size); }
@ -58,8 +60,7 @@ class block_cache {
virtual ~impl() = default; virtual ~impl() = default;
virtual size_t block_count() const = 0; virtual size_t block_count() const = 0;
virtual void virtual void insert(compression_type comp, off_t offset, size_t size) = 0;
insert(compression_type comp, const uint8_t* data, size_t size) = 0;
virtual void set_block_size(size_t size) = 0; virtual void set_block_size(size_t size) = 0;
virtual std::future<block_range> virtual std::future<block_range>
get(size_t block_no, size_t offset, size_t length) const = 0; get(size_t block_no, size_t offset, size_t length) const = 0;

View File

@ -240,6 +240,8 @@ class log_proxy {
lgr_, logger::TRACE); lgr_, logger::TRACE);
} }
logger& get_logger() const { return lgr_; }
private: private:
logger& lgr_; logger& lgr_;
}; };

View File

@ -30,12 +30,21 @@ namespace dwarfs {
class mmap : public mmif { class mmap : public mmif {
public: public:
mmap(const std::string& path); explicit mmap(const std::string& path);
mmap(const std::string& path, size_t size); mmap(const std::string& path, size_t size);
virtual ~mmap() noexcept; ~mmap() noexcept override;
void const* addr() const override;
size_t size() const override;
boost::system::error_code lock(off_t offset, size_t size) override;
boost::system::error_code release(off_t offset, size_t size) override;
private: private:
int fd_; int fd_;
size_t size_;
void* addr_;
off_t const page_size_;
}; };
} // namespace dwarfs } // namespace dwarfs

View File

@ -24,6 +24,7 @@
#include <string> #include <string>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
#include <boost/system/system_error.hpp>
#include <folly/Range.h> #include <folly/Range.h>
@ -33,28 +34,20 @@ class mmif : public boost::noncopyable {
public: public:
virtual ~mmif() = default; virtual ~mmif() = default;
const void* get() const { return addr_; }
template <typename T> template <typename T>
const T* as(size_t offset = 0) const { T const* as(off_t offset = 0) const {
return reinterpret_cast<const T*>( return reinterpret_cast<T const*>(
reinterpret_cast<const char*>(const_cast<const void*>(addr_)) + offset); reinterpret_cast<char const*>(this->addr()) + offset);
} }
size_t size() const { return size_; } folly::ByteRange range(off_t offset, size_t length) const {
return folly::ByteRange(this->as<uint8_t>(offset), length);
folly::ByteRange range(size_t start, size_t size) const {
return folly::ByteRange(as<uint8_t>(start), size);
} }
protected: virtual void const* addr() const = 0;
void assign(const void* addr, size_t size) { virtual size_t size() const = 0;
addr_ = addr;
size_ = size;
}
private: virtual boost::system::error_code lock(off_t offset, size_t size) = 0;
const void* addr_; virtual boost::system::error_code release(off_t offset, size_t size) = 0;
size_t size_;
}; };
} // namespace dwarfs } // namespace dwarfs

View File

@ -33,6 +33,7 @@ struct block_cache_options {
size_t max_bytes{0}; size_t max_bytes{0};
size_t num_workers{0}; size_t num_workers{0};
double decompress_ratio{1.0}; double decompress_ratio{1.0};
bool mm_release{true};
}; };
struct metadata_options { struct metadata_options {

View File

@ -53,6 +53,7 @@ struct options {
const char* mlock_str; // TODO: const?? -> use string? const char* mlock_str; // TODO: const?? -> use string?
const char* decompress_ratio_str; // TODO: const?? -> use string? const char* decompress_ratio_str; // TODO: const?? -> use string?
int enable_nlink; int enable_nlink;
int no_image_madvise;
size_t cachesize; size_t cachesize;
size_t workers; size_t workers;
mlock_mode lock_mode; mlock_mode lock_mode;
@ -77,6 +78,7 @@ const struct fuse_opt dwarfs_opts[] = {
DWARFS_OPT("mlock=%s", mlock_str, 0), DWARFS_OPT("mlock=%s", mlock_str, 0),
DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), DWARFS_OPT("decratio=%s", decompress_ratio_str, 0),
DWARFS_OPT("enable_nlink", enable_nlink, 1), DWARFS_OPT("enable_nlink", enable_nlink, 1),
DWARFS_OPT("no_image_madvise", no_image_madvise, 1),
FUSE_OPT_END}; FUSE_OPT_END};
options opts; options opts;
@ -97,6 +99,7 @@ void op_init(void* /*userdata*/, struct fuse_conn_info* /*conn*/) {
fsopts.block_cache.max_bytes = opts.cachesize; fsopts.block_cache.max_bytes = opts.cachesize;
fsopts.block_cache.num_workers = opts.workers; fsopts.block_cache.num_workers = opts.workers;
fsopts.block_cache.decompress_ratio = opts.decompress_ratio; fsopts.block_cache.decompress_ratio = opts.decompress_ratio;
fsopts.block_cache.mm_release = !opts.no_image_madvise;
fsopts.metadata.enable_nlink = bool(opts.enable_nlink); fsopts.metadata.enable_nlink = bool(opts.enable_nlink);
s_fs = std::make_shared<filesystem_v2>( s_fs = std::make_shared<filesystem_v2>(
s_lgr, std::make_shared<mmap>(opts.fsimage), fsopts, s_lgr, std::make_shared<mmap>(opts.fsimage), fsopts,
@ -392,6 +395,7 @@ void usage(const char* progname) {
<< " -o mlock=NAME mlock mode: (none), try, must\n" << " -o mlock=NAME mlock mode: (none), try, must\n"
<< " -o decratio=NUM ratio for full decompression (0.8)\n" << " -o decratio=NUM ratio for full decompression (0.8)\n"
<< " -o enable_nlink show correct hardlink numbers\n" << " -o enable_nlink show correct hardlink numbers\n"
<< " -o no_image_madvise keep image in kernel cache\n"
<< " -o debuglevel=NAME error, warn, (info), debug, trace\n" << " -o debuglevel=NAME error, warn, (info), debug, trace\n"
<< std::endl; << std::endl;

View File

@ -40,27 +40,39 @@
#include "dwarfs/block_cache.h" #include "dwarfs/block_cache.h"
#include "dwarfs/fstypes.h" #include "dwarfs/fstypes.h"
#include "dwarfs/logger.h" #include "dwarfs/logger.h"
#include "dwarfs/mmif.h"
#include "dwarfs/options.h" #include "dwarfs/options.h"
#include "dwarfs/worker_group.h" #include "dwarfs/worker_group.h"
namespace dwarfs { namespace dwarfs {
struct block { struct block {
block(compression_type compression, const uint8_t* data, size_t size) block(compression_type compression, off_t offset, size_t size)
: compression(compression) : compression(compression)
, data(data) , offset(offset)
, size(size) {} , size(size) {}
const compression_type compression; const compression_type compression;
const uint8_t* const data; const off_t offset;
const size_t size; const size_t size;
}; };
class cached_block { class cached_block {
public: public:
explicit cached_block(const block& b) cached_block(logger& lgr, block const& b, std::shared_ptr<mmif> mm,
bool release)
: decompressor_(std::make_unique<block_decompressor>( : decompressor_(std::make_unique<block_decompressor>(
b.compression, b.data, b.size, data_)) {} b.compression, mm->as<uint8_t>(b.offset), b.size, data_))
, mm_(std::move(mm))
, spec_(b)
, log_(lgr)
, release_(release) {}
~cached_block() {
if (decompressor_) {
try_release();
}
}
// once the block is fully decompressed, we can reset the decompressor_ // once the block is fully decompressed, we can reset the decompressor_
@ -76,6 +88,9 @@ class cached_block {
if (decompressor_->decompress_frame()) { if (decompressor_->decompress_frame()) {
// We're done, free the memory // We're done, free the memory
decompressor_.reset(); decompressor_.reset();
// And release the memory from the mapping
try_release();
} }
range_end_ = data_.size(); range_end_ = data_.size();
@ -88,9 +103,21 @@ class cached_block {
} }
private: private:
void try_release() {
if (release_) {
if (auto ec = mm_->release(spec_.offset, spec_.size)) {
log_.info() << "madvise() failed: " << ec.message();
}
}
}
std::atomic<size_t> range_end_{0}; std::atomic<size_t> range_end_{0};
std::vector<uint8_t> data_; std::vector<uint8_t> data_;
std::unique_ptr<block_decompressor> decompressor_; std::unique_ptr<block_decompressor> decompressor_;
std::shared_ptr<mmif> mm_;
block const& spec_;
log_proxy<debug_logger_policy> log_;
bool const release_;
}; };
class block_request { class block_request {
@ -178,12 +205,14 @@ class block_request_set {
template <typename LoggerPolicy> template <typename LoggerPolicy>
class block_cache_ : public block_cache::impl { class block_cache_ : public block_cache::impl {
public: public:
block_cache_(logger& lgr, const block_cache_options& options) block_cache_(logger& lgr, std::shared_ptr<mmif> mm,
block_cache_options const& options)
: cache_(0) : cache_(0)
, wg_("blkcache", std::max(options.num_workers > 0 , wg_("blkcache", std::max(options.num_workers > 0
? options.num_workers ? options.num_workers
: std::thread::hardware_concurrency(), : std::thread::hardware_concurrency(),
static_cast<size_t>(1))) static_cast<size_t>(1)))
, mm_(std::move(mm))
, log_(lgr) , log_(lgr)
, options_(options) {} , options_(options) {}
@ -234,9 +263,8 @@ class block_cache_ : public block_cache::impl {
size_t block_count() const override { return block_.size(); } size_t block_count() const override { return block_.size(); }
void void insert(compression_type comp, off_t offset, size_t size) override {
insert(compression_type comp, const uint8_t* data, size_t size) override { block_.emplace_back(comp, offset, size);
block_.emplace_back(comp, data, size);
} }
void set_block_size(size_t size) override { void set_block_size(size_t size) override {
@ -377,7 +405,8 @@ class block_cache_ : public block_cache::impl {
assert(block_no < block_.size()); assert(block_no < block_.size());
auto block = std::make_shared<cached_block>(block_[block_no]); auto block = std::make_shared<cached_block>(
log_.get_logger(), block_[block_no], mm_, options_.mm_release);
++blocks_created_; ++blocks_created_;
// Make a new set for the block // Make a new set for the block
@ -517,13 +546,15 @@ class block_cache_ : public block_cache::impl {
mutable worker_group wg_; mutable worker_group wg_;
std::vector<block> block_; std::vector<block> block_;
std::shared_ptr<mmif> mm_;
log_proxy<LoggerPolicy> log_; log_proxy<LoggerPolicy> log_;
const block_cache_options options_; const block_cache_options options_;
}; };
block_cache::block_cache(logger& lgr, const block_cache_options& options) block_cache::block_cache(logger& lgr, std::shared_ptr<mmif> mm,
const block_cache_options& options)
: impl_(make_unique_logging_object<impl, block_cache_, logger_policies>( : impl_(make_unique_logging_object<impl, block_cache_, logger_policies>(
lgr, options)) {} lgr, std::move(mm), options)) {}
// TODO: clean up: this is defined in fstypes.h... // TODO: clean up: this is defined in fstypes.h...
block_range::block_range(std::shared_ptr<cached_block const> block, block_range::block_range(std::shared_ptr<cached_block const> block,

View File

@ -19,7 +19,6 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>. * along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/ */
#include <cerrno>
#include <cstddef> #include <cstddef>
#include <cstring> #include <cstring>
#include <optional> #include <optional>
@ -164,9 +163,7 @@ make_metadata(logger& lgr, std::shared_ptr<mmif> mm,
get_section_data(mm, meta_it->second, meta_buffer, force_buffers); get_section_data(mm, meta_it->second, meta_buffer, force_buffers);
if (lock_mode != mlock_mode::NONE) { if (lock_mode != mlock_mode::NONE) {
int rv = ::mlock(meta_section.data(), meta_section.size()); if (auto ec = mm->lock(meta_it->second.start, meta_section.size())) {
if (rv != 0) {
boost::system::error_code ec(errno, boost::system::generic_category());
if (lock_mode == mlock_mode::MUST) { if (lock_mode == mlock_mode::MUST) {
throw boost::system::system_error(ec, "mlock"); throw boost::system::system_error(ec, "mlock");
} else { } else {
@ -225,13 +222,13 @@ filesystem_<LoggerPolicy>::filesystem_(logger& lgr, std::shared_ptr<mmif> mm,
: log_(lgr) : log_(lgr)
, mm_(mm) { , mm_(mm) {
filesystem_parser parser(mm_); filesystem_parser parser(mm_);
block_cache cache(lgr, options.block_cache); block_cache cache(lgr, mm, options.block_cache);
section_map sections; section_map sections;
while (auto s = parser.next_section(log_)) { while (auto s = parser.next_section(log_)) {
if (s->header.type == section_type::BLOCK) { if (s->header.type == section_type::BLOCK) {
cache.insert(s->header.compression, mm_->as<uint8_t>(s->start), cache.insert(s->header.compression, s->start,
static_cast<size_t>(s->header.length)); static_cast<size_t>(s->header.length));
} else { } else {
if (!sections.emplace(s->header.type, *s).second) { if (!sections.emplace(s->header.type, *s).second) {

View File

@ -20,6 +20,7 @@
*/ */
#include <cerrno> #include <cerrno>
#include <cstdio> // TODO
#include <fcntl.h> #include <fcntl.h>
#include <sys/mman.h> #include <sys/mman.h>
@ -67,19 +68,48 @@ void* safe_mmap(int fd, size_t size) {
} }
} // namespace } // namespace
mmap::mmap(const std::string& path) boost::system::error_code mmap::lock(off_t offset, size_t size) {
: fd_(safe_open(path)) { boost::system::error_code ec;
size_t size = safe_size(fd_); auto addr = reinterpret_cast<uint8_t*>(addr_) + offset;
assign(safe_mmap(fd_, size), size); if (::mlock(addr, size) != 0) {
ec.assign(errno, boost::system::generic_category());
}
return ec;
} }
boost::system::error_code mmap::release(off_t offset, size_t size) {
boost::system::error_code ec;
auto misalign = offset % page_size_;
offset -= misalign;
size += misalign;
size -= size % page_size_;
auto addr = reinterpret_cast<uint8_t*>(addr_) + offset;
if (::madvise(addr, size, MADV_DONTNEED) != 0) {
ec.assign(errno, boost::system::generic_category());
}
return ec;
}
void const* mmap::addr() const { return addr_; }
size_t mmap::size() const { return size_; }
mmap::mmap(const std::string& path)
: fd_(safe_open(path))
, size_(safe_size(fd_))
, addr_(safe_mmap(fd_, size_))
, page_size_(::sysconf(_SC_PAGESIZE)) {}
mmap::mmap(const std::string& path, size_t size) mmap::mmap(const std::string& path, size_t size)
: fd_(safe_open(path)) { : fd_(safe_open(path))
assign(safe_mmap(fd_, size), size); , size_(size)
} , addr_(safe_mmap(fd_, size_))
, page_size_(::sysconf(_SC_PAGESIZE)) {}
mmap::~mmap() noexcept { mmap::~mmap() noexcept {
::munmap(const_cast<void*>(get()), size()); ::munmap(addr_, size_);
::close(fd_); ::close(fd_);
} }
} // namespace dwarfs } // namespace dwarfs

View File

@ -88,8 +88,17 @@ std::map<std::string, simplestat> statmap{
class mmap_mock : public mmif { class mmap_mock : public mmif {
public: public:
mmap_mock(const std::string& data) mmap_mock(const std::string& data)
: m_data(data) { : m_data(data) {}
assign(m_data.data(), m_data.size());
void const* addr() const override { return m_data.data(); }
size_t size() const override { return m_data.size(); }
boost::system::error_code lock(off_t, size_t) override {
return boost::system::error_code();
}
boost::system::error_code release(off_t, size_t) override {
return boost::system::error_code();
} }
private: private: