From f1f55dd48c16df0d4a86a5ed4272857b6fe5cbf7 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 12 Feb 2024 13:02:05 +0100 Subject: [PATCH] feat(dwarfs): add readahead option The implementation is extremely simple for now and readahead is off by default. When sequentially accessing large files, the speedup can be significant, though. --- doc/dwarfs.md | 6 +++ include/dwarfs/inode_reader_v2.h | 2 + include/dwarfs/options.h | 5 ++ src/dwarfs/filesystem_v2.cpp | 2 +- src/dwarfs/inode_reader_v2.cpp | 79 +++++++++++++++++++++++++++++--- src/dwarfs_main.cpp | 7 +++ test/dwarfs_test.cpp | 26 +++++++---- 7 files changed, 110 insertions(+), 17 deletions(-) diff --git a/doc/dwarfs.md b/doc/dwarfs.md index 398817a0..9387c35b 100644 --- a/doc/dwarfs.md +++ b/doc/dwarfs.md @@ -39,6 +39,12 @@ options: request. Use this along with macFUSE's `iosize` option to tune throughput. +- `-o readahead=`*value*: + How much data to read ahead when receiving a read request. + This is experimental and disabled by default. If you perform + a lot of large, sequential reads, throughput may benefit from + enabling readahead. + - `-o workers=`*value*: Number of worker threads to use for decompressing blocks. If you have a lot of CPUs, increasing this number can help diff --git a/include/dwarfs/inode_reader_v2.h b/include/dwarfs/inode_reader_v2.h index 5ac3125d..a538152c 100644 --- a/include/dwarfs/inode_reader_v2.h +++ b/include/dwarfs/inode_reader_v2.h @@ -37,6 +37,7 @@ namespace dwarfs { struct cache_tidy_config; class block_cache; class logger; +struct inode_reader_options; struct iovec_read_buf; class performance_monitor; @@ -45,6 +46,7 @@ class inode_reader_v2 { inode_reader_v2() = default; inode_reader_v2(logger& lgr, block_cache&& bc, + inode_reader_options const& opts, std::shared_ptr perfmon); inode_reader_v2& operator=(inode_reader_v2&&) = default; diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h index 04ebdf27..07bf80cc 100644 --- a/include/dwarfs/options.h +++ b/include/dwarfs/options.h @@ -71,6 +71,10 @@ struct metadata_options { size_t block_size{512}; }; +struct inode_reader_options { + size_t readahead{0}; +}; + struct filesystem_options { static constexpr file_off_t IMAGE_OFFSET_AUTO{-1}; @@ -78,6 +82,7 @@ struct filesystem_options { file_off_t image_offset{0}; block_cache_options block_cache{}; metadata_options metadata{}; + inode_reader_options inode_reader{}; int inode_offset{0}; }; diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index 6690df36..c3cda493 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -605,7 +605,7 @@ filesystem_::filesystem_( cache.set_block_size(meta_.block_size()); - ir_ = inode_reader_v2(lgr, std::move(cache), perfmon); + ir_ = inode_reader_v2(lgr, std::move(cache), options.inode_reader, perfmon); if (auto it = sections.find(section_type::HISTORY); it != sections.end()) { for (auto& section : it->second) { diff --git a/src/dwarfs/inode_reader_v2.cpp b/src/dwarfs/inode_reader_v2.cpp index f2dbc969..28e68eec 100644 --- a/src/dwarfs/inode_reader_v2.cpp +++ b/src/dwarfs/inode_reader_v2.cpp @@ -39,6 +39,7 @@ #include "dwarfs/iovec_read_buf.h" #include "dwarfs/logger.h" #include "dwarfs/offset_cache.h" +#include "dwarfs/options.h" #include "dwarfs/performance_monitor.h" namespace dwarfs { @@ -85,14 +86,16 @@ namespace { constexpr size_t const offset_cache_chunk_index_interval = 256; constexpr size_t const offset_cache_updater_max_inline_offsets = 4; constexpr size_t const offset_cache_size = 64; +constexpr size_t const readahead_cache_size = 64; template class inode_reader_ final : public inode_reader_v2::impl { public: - inode_reader_(logger& lgr, block_cache&& bc, + inode_reader_(logger& lgr, block_cache&& bc, inode_reader_options const& opts, std::shared_ptr perfmon [[maybe_unused]]) : cache_(std::move(bc)) + , opts_{opts} , LOG_PROXY_INIT(lgr) // clang-format off PERFMON_CLS_PROXY_INIT(perfmon, "inode_reader_v2") @@ -100,6 +103,7 @@ class inode_reader_ final : public inode_reader_v2::impl { PERFMON_CLS_TIMER_INIT(readv_iovec) PERFMON_CLS_TIMER_INIT(readv_future) // clang-format on , offset_cache_{offset_cache_size} + , readahead_cache_{readahead_cache_size} , iovec_sizes_(1, 0, 256) {} ~inode_reader_() override { @@ -135,23 +139,32 @@ class inode_reader_ final : public inode_reader_v2::impl { offset_cache_chunk_index_interval, offset_cache_updater_max_inline_offsets>; + using readahead_cache_type = folly::EvictingCacheMap; + folly::Expected>, int> read_internal(uint32_t inode, size_t size, file_off_t offset, chunk_range chunks) const; template - ssize_t read_internal(uint32_t inode, size_t size, file_off_t offset, + ssize_t read_internal(uint32_t inode, size_t size, file_off_t read_offset, chunk_range chunks, const StoreFunc& store) const; + void do_readahead(uint32_t inode, chunk_range::iterator it, + chunk_range::iterator end, file_off_t read_offset, + size_t size, file_off_t it_offset) const; + block_cache cache_; + inode_reader_options const opts_; LOG_PROXY_DECL(LoggerPolicy); PERFMON_CLS_PROXY_DECL PERFMON_CLS_TIMER_DECL(read) PERFMON_CLS_TIMER_DECL(readv_iovec) PERFMON_CLS_TIMER_DECL(readv_future) mutable offset_cache_type offset_cache_; - mutable folly::Histogram iovec_sizes_; + mutable std::mutex readahead_cache_mutex_; + mutable readahead_cache_type readahead_cache_; mutable std::mutex iovec_sizes_mutex_; + mutable folly::Histogram iovec_sizes_; }; template @@ -164,11 +177,59 @@ void inode_reader_::dump(std::ostream& os, } } +template +void inode_reader_::do_readahead(uint32_t inode, + chunk_range::iterator it, + chunk_range::iterator end, + file_off_t const read_offset, + size_t const size, + file_off_t it_offset) const { + LOG_TRACE << "readahead (" << inode << "): " << read_offset << "/" << size + << "/" << it_offset; + + file_off_t readahead_pos{0}; + file_off_t const current_offset = read_offset + size; + file_off_t const readahead_until = current_offset + opts_.readahead; + + { + std::lock_guard lock(readahead_cache_mutex_); + + if (read_offset > 0) { + if (auto it = readahead_cache_.find(inode); + it != readahead_cache_.end()) { + readahead_pos = it->second; + } + + if (readahead_until <= readahead_pos) { + return; + } + } + + readahead_cache_.set(inode, readahead_until); + } + + while (it != end) { + if (it_offset + it->size() >= readahead_pos) { + cache_.get(it->block(), it->offset(), it->size()); + } + + it_offset += it->size(); + + if (it_offset >= readahead_until) { + break; + } + + ++it; + } +} + template folly::Expected>, int> inode_reader_::read_internal(uint32_t inode, size_t const size, - file_off_t offset, + file_off_t const read_offset, chunk_range chunks) const { + auto offset = read_offset; + if (offset < 0) { return folly::makeUnexpected(-EINVAL); } @@ -245,6 +306,10 @@ inode_reader_::read_internal(uint32_t inode, size_t const size, offset_cache_.set(inode, std::move(oc_ent)); } + if (opts_.readahead > 0) { + do_readahead(inode, it, end, read_offset, size, it_offset); + } + break; } @@ -332,10 +397,10 @@ ssize_t inode_reader_::readv(iovec_read_buf& buf, uint32_t inode, } // namespace inode_reader_v2::inode_reader_v2( - logger& lgr, block_cache&& bc, + logger& lgr, block_cache&& bc, inode_reader_options const& opts, std::shared_ptr perfmon) : impl_(make_unique_logging_object(lgr, std::move(bc), - std::move(perfmon))) {} + logger_policies>( + lgr, std::move(bc), opts, std::move(perfmon))) {} } // namespace dwarfs diff --git a/src/dwarfs_main.cpp b/src/dwarfs_main.cpp index dcf8c879..f7ac250b 100644 --- a/src/dwarfs_main.cpp +++ b/src/dwarfs_main.cpp @@ -151,6 +151,7 @@ struct options { int seen_mountpoint{0}; char const* cachesize_str{nullptr}; // TODO: const?? -> use string? char const* blocksize_str{nullptr}; // TODO: const?? -> use string? + char const* readahead_str{nullptr}; // TODO: const?? -> use string? char const* debuglevel_str{nullptr}; // TODO: const?? -> use string? char const* workers_str{nullptr}; // TODO: const?? -> use string? char const* mlock_str{nullptr}; // TODO: const?? -> use string? @@ -168,6 +169,7 @@ struct options { int cache_files{0}; size_t cachesize{0}; size_t blocksize{0}; + size_t readahead{0}; size_t workers{0}; mlock_mode lock_mode{mlock_mode::NONE}; double decompress_ratio{0.0}; @@ -220,6 +222,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = { // TODO: user, group, atime, mtime, ctime for those fs who don't have it? DWARFS_OPT("cachesize=%s", cachesize_str, 0), DWARFS_OPT("blocksize=%s", blocksize_str, 0), + DWARFS_OPT("readahead=%s", readahead_str, 0), DWARFS_OPT("debuglevel=%s", debuglevel_str, 0), DWARFS_OPT("workers=%s", workers_str, 0), DWARFS_OPT("mlock=%s", mlock_str, 0), @@ -1019,6 +1022,7 @@ void usage(std::ostream& os, std::filesystem::path const& progname) { << "DWARFS options:\n" << " -o cachesize=SIZE set size of block cache (512M)\n" << " -o blocksize=SIZE set file block size\n" + << " -o readahead=SIZE set readahead size (0)\n" << " -o workers=NUM number of worker threads (2)\n" << " -o mlock=NAME mlock mode: (none), try, must\n" << " -o decratio=NUM ratio for full decompression (0.8)\n" @@ -1255,6 +1259,7 @@ void load_filesystem(dwarfs_userdata& userdata) { fsopts.block_cache.decompress_ratio = opts.decompress_ratio; fsopts.block_cache.mm_release = !opts.cache_image; fsopts.block_cache.init_workers = false; + fsopts.inode_reader.readahead = opts.readahead; fsopts.metadata.enable_nlink = bool(opts.enable_nlink); fsopts.metadata.readonly = bool(opts.readonly); fsopts.metadata.block_size = opts.blocksize; @@ -1391,6 +1396,8 @@ int dwarfs_main(int argc, sys_char** argv, iolayer const& iol) { opts.blocksize = opts.blocksize_str ? parse_size_with_unit(opts.blocksize_str) : kDefaultBlockSize; + opts.readahead = + opts.readahead_str ? parse_size_with_unit(opts.readahead_str) : 0; opts.workers = opts.workers_str ? folly::to(opts.workers_str) : 2; opts.lock_mode = opts.mlock_str ? parse_mlock_mode(opts.mlock_str) : mlock_mode::NONE; diff --git a/test/dwarfs_test.cpp b/test/dwarfs_test.cpp index b475f1ad..090079a5 100644 --- a/test/dwarfs_test.cpp +++ b/test/dwarfs_test.cpp @@ -114,7 +114,7 @@ void basic_end_to_end_test(std::string const& compressor, bool pack_names, bool pack_names_index, bool pack_symlinks, bool pack_symlinks_index, bool plain_names_table, bool plain_symlinks_table, - bool access_fail, + bool access_fail, size_t readahead, std::optional file_hash_algo) { segmenter::config cfg; scanner_options options; @@ -218,6 +218,7 @@ void basic_end_to_end_test(std::string const& compressor, opts.block_cache.max_bytes = 1 << 20; opts.metadata.enable_nlink = enable_nlink; opts.metadata.check_consistency = true; + opts.inode_reader.readahead = readahead; filesystem_v2 fs(lgr, *input, mm, opts); @@ -552,9 +553,15 @@ TEST_P(compression_test, end_to_end) { return; } + size_t readahead = 0; + + if (block_size_bits < 20) { + readahead = static_cast(4) << block_size_bits; + } + basic_end_to_end_test(compressor, block_size_bits, file_order, true, true, false, false, false, false, false, true, true, true, - true, true, true, true, false, false, false, + true, true, true, true, false, false, false, readahead, file_hash_algo); } @@ -562,16 +569,17 @@ TEST_P(scanner_test, end_to_end) { auto [with_devices, with_specials, set_uid, set_gid, set_time, keep_all_times, enable_nlink, access_fail, file_hash_algo] = GetParam(); - basic_end_to_end_test( - compressions[0], 15, file_order_mode::NONE, with_devices, with_specials, - set_uid, set_gid, set_time, keep_all_times, enable_nlink, true, true, - true, true, true, true, true, false, false, access_fail, file_hash_algo); + basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, + with_devices, with_specials, set_uid, set_gid, set_time, + keep_all_times, enable_nlink, true, true, true, true, + true, true, true, false, false, access_fail, 0, + file_hash_algo); } TEST_P(hashing_test, end_to_end) { basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true, true, true, true, true, true, true, true, true, true, - true, true, true, false, false, false, GetParam()); + true, true, true, false, false, false, 0, GetParam()); } TEST_P(packing_test, end_to_end) { @@ -582,7 +590,7 @@ TEST_P(packing_test, end_to_end) { false, false, false, false, false, pack_chunk_table, pack_directories, pack_shared_files_table, pack_names, pack_names_index, pack_symlinks, pack_symlinks_index, - false, false, false, default_file_hash_algo); + false, false, false, 0, default_file_hash_algo); } TEST_P(plain_tables_test, end_to_end) { @@ -591,7 +599,7 @@ TEST_P(plain_tables_test, end_to_end) { basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true, false, false, false, false, false, false, false, false, false, false, false, false, plain_names_table, - plain_symlinks_table, false, default_file_hash_algo); + plain_symlinks_table, false, 0, default_file_hash_algo); } TEST_P(packing_test, regression_empty_fs) {