feat(dwarfs): add readahead option

The implementation is extremely simple for now and readahead is off
by default. When sequentially accessing large files, the speedup can
be significant, though.
This commit is contained in:
Marcus Holland-Moritz 2024-02-12 13:02:05 +01:00
parent 5460900c97
commit f1f55dd48c
7 changed files with 110 additions and 17 deletions

View File

@ -39,6 +39,12 @@ options:
request. Use this along with macFUSE's `iosize` option to request. Use this along with macFUSE's `iosize` option to
tune throughput. tune throughput.
- `-o readahead=`*value*:
How much data to read ahead when receiving a read request.
This is experimental and disabled by default. If you perform
a lot of large, sequential reads, throughput may benefit from
enabling readahead.
- `-o workers=`*value*: - `-o workers=`*value*:
Number of worker threads to use for decompressing blocks. Number of worker threads to use for decompressing blocks.
If you have a lot of CPUs, increasing this number can help If you have a lot of CPUs, increasing this number can help

View File

@ -37,6 +37,7 @@ namespace dwarfs {
struct cache_tidy_config; struct cache_tidy_config;
class block_cache; class block_cache;
class logger; class logger;
struct inode_reader_options;
struct iovec_read_buf; struct iovec_read_buf;
class performance_monitor; class performance_monitor;
@ -45,6 +46,7 @@ class inode_reader_v2 {
inode_reader_v2() = default; inode_reader_v2() = default;
inode_reader_v2(logger& lgr, block_cache&& bc, inode_reader_v2(logger& lgr, block_cache&& bc,
inode_reader_options const& opts,
std::shared_ptr<performance_monitor const> perfmon); std::shared_ptr<performance_monitor const> perfmon);
inode_reader_v2& operator=(inode_reader_v2&&) = default; inode_reader_v2& operator=(inode_reader_v2&&) = default;

View File

@ -71,6 +71,10 @@ struct metadata_options {
size_t block_size{512}; size_t block_size{512};
}; };
struct inode_reader_options {
size_t readahead{0};
};
struct filesystem_options { struct filesystem_options {
static constexpr file_off_t IMAGE_OFFSET_AUTO{-1}; static constexpr file_off_t IMAGE_OFFSET_AUTO{-1};
@ -78,6 +82,7 @@ struct filesystem_options {
file_off_t image_offset{0}; file_off_t image_offset{0};
block_cache_options block_cache{}; block_cache_options block_cache{};
metadata_options metadata{}; metadata_options metadata{};
inode_reader_options inode_reader{};
int inode_offset{0}; int inode_offset{0};
}; };

View File

@ -605,7 +605,7 @@ filesystem_<LoggerPolicy>::filesystem_(
cache.set_block_size(meta_.block_size()); cache.set_block_size(meta_.block_size());
ir_ = inode_reader_v2(lgr, std::move(cache), perfmon); ir_ = inode_reader_v2(lgr, std::move(cache), options.inode_reader, perfmon);
if (auto it = sections.find(section_type::HISTORY); it != sections.end()) { if (auto it = sections.find(section_type::HISTORY); it != sections.end()) {
for (auto& section : it->second) { for (auto& section : it->second) {

View File

@ -39,6 +39,7 @@
#include "dwarfs/iovec_read_buf.h" #include "dwarfs/iovec_read_buf.h"
#include "dwarfs/logger.h" #include "dwarfs/logger.h"
#include "dwarfs/offset_cache.h" #include "dwarfs/offset_cache.h"
#include "dwarfs/options.h"
#include "dwarfs/performance_monitor.h" #include "dwarfs/performance_monitor.h"
namespace dwarfs { namespace dwarfs {
@ -85,14 +86,16 @@ namespace {
constexpr size_t const offset_cache_chunk_index_interval = 256; constexpr size_t const offset_cache_chunk_index_interval = 256;
constexpr size_t const offset_cache_updater_max_inline_offsets = 4; constexpr size_t const offset_cache_updater_max_inline_offsets = 4;
constexpr size_t const offset_cache_size = 64; constexpr size_t const offset_cache_size = 64;
constexpr size_t const readahead_cache_size = 64;
template <typename LoggerPolicy> template <typename LoggerPolicy>
class inode_reader_ final : public inode_reader_v2::impl { class inode_reader_ final : public inode_reader_v2::impl {
public: public:
inode_reader_(logger& lgr, block_cache&& bc, inode_reader_(logger& lgr, block_cache&& bc, inode_reader_options const& opts,
std::shared_ptr<performance_monitor const> perfmon std::shared_ptr<performance_monitor const> perfmon
[[maybe_unused]]) [[maybe_unused]])
: cache_(std::move(bc)) : cache_(std::move(bc))
, opts_{opts}
, LOG_PROXY_INIT(lgr) , LOG_PROXY_INIT(lgr)
// clang-format off // clang-format off
PERFMON_CLS_PROXY_INIT(perfmon, "inode_reader_v2") PERFMON_CLS_PROXY_INIT(perfmon, "inode_reader_v2")
@ -100,6 +103,7 @@ class inode_reader_ final : public inode_reader_v2::impl {
PERFMON_CLS_TIMER_INIT(readv_iovec) PERFMON_CLS_TIMER_INIT(readv_iovec)
PERFMON_CLS_TIMER_INIT(readv_future) // clang-format on PERFMON_CLS_TIMER_INIT(readv_future) // clang-format on
, offset_cache_{offset_cache_size} , offset_cache_{offset_cache_size}
, readahead_cache_{readahead_cache_size}
, iovec_sizes_(1, 0, 256) {} , iovec_sizes_(1, 0, 256) {}
~inode_reader_() override { ~inode_reader_() override {
@ -135,23 +139,32 @@ class inode_reader_ final : public inode_reader_v2::impl {
offset_cache_chunk_index_interval, offset_cache_chunk_index_interval,
offset_cache_updater_max_inline_offsets>; offset_cache_updater_max_inline_offsets>;
using readahead_cache_type = folly::EvictingCacheMap<uint32_t, file_off_t>;
folly::Expected<std::vector<std::future<block_range>>, int> folly::Expected<std::vector<std::future<block_range>>, int>
read_internal(uint32_t inode, size_t size, file_off_t offset, read_internal(uint32_t inode, size_t size, file_off_t offset,
chunk_range chunks) const; chunk_range chunks) const;
template <typename StoreFunc> template <typename StoreFunc>
ssize_t read_internal(uint32_t inode, size_t size, file_off_t offset, ssize_t read_internal(uint32_t inode, size_t size, file_off_t read_offset,
chunk_range chunks, const StoreFunc& store) const; chunk_range chunks, const StoreFunc& store) const;
void do_readahead(uint32_t inode, chunk_range::iterator it,
chunk_range::iterator end, file_off_t read_offset,
size_t size, file_off_t it_offset) const;
block_cache cache_; block_cache cache_;
inode_reader_options const opts_;
LOG_PROXY_DECL(LoggerPolicy); LOG_PROXY_DECL(LoggerPolicy);
PERFMON_CLS_PROXY_DECL PERFMON_CLS_PROXY_DECL
PERFMON_CLS_TIMER_DECL(read) PERFMON_CLS_TIMER_DECL(read)
PERFMON_CLS_TIMER_DECL(readv_iovec) PERFMON_CLS_TIMER_DECL(readv_iovec)
PERFMON_CLS_TIMER_DECL(readv_future) PERFMON_CLS_TIMER_DECL(readv_future)
mutable offset_cache_type offset_cache_; mutable offset_cache_type offset_cache_;
mutable folly::Histogram<size_t> iovec_sizes_; mutable std::mutex readahead_cache_mutex_;
mutable readahead_cache_type readahead_cache_;
mutable std::mutex iovec_sizes_mutex_; mutable std::mutex iovec_sizes_mutex_;
mutable folly::Histogram<size_t> iovec_sizes_;
}; };
template <typename LoggerPolicy> template <typename LoggerPolicy>
@ -164,11 +177,59 @@ void inode_reader_<LoggerPolicy>::dump(std::ostream& os,
} }
} }
template <typename LoggerPolicy>
void inode_reader_<LoggerPolicy>::do_readahead(uint32_t inode,
chunk_range::iterator it,
chunk_range::iterator end,
file_off_t const read_offset,
size_t const size,
file_off_t it_offset) const {
LOG_TRACE << "readahead (" << inode << "): " << read_offset << "/" << size
<< "/" << it_offset;
file_off_t readahead_pos{0};
file_off_t const current_offset = read_offset + size;
file_off_t const readahead_until = current_offset + opts_.readahead;
{
std::lock_guard lock(readahead_cache_mutex_);
if (read_offset > 0) {
if (auto it = readahead_cache_.find(inode);
it != readahead_cache_.end()) {
readahead_pos = it->second;
}
if (readahead_until <= readahead_pos) {
return;
}
}
readahead_cache_.set(inode, readahead_until);
}
while (it != end) {
if (it_offset + it->size() >= readahead_pos) {
cache_.get(it->block(), it->offset(), it->size());
}
it_offset += it->size();
if (it_offset >= readahead_until) {
break;
}
++it;
}
}
template <typename LoggerPolicy> template <typename LoggerPolicy>
folly::Expected<std::vector<std::future<block_range>>, int> folly::Expected<std::vector<std::future<block_range>>, int>
inode_reader_<LoggerPolicy>::read_internal(uint32_t inode, size_t const size, inode_reader_<LoggerPolicy>::read_internal(uint32_t inode, size_t const size,
file_off_t offset, file_off_t const read_offset,
chunk_range chunks) const { chunk_range chunks) const {
auto offset = read_offset;
if (offset < 0) { if (offset < 0) {
return folly::makeUnexpected(-EINVAL); return folly::makeUnexpected(-EINVAL);
} }
@ -245,6 +306,10 @@ inode_reader_<LoggerPolicy>::read_internal(uint32_t inode, size_t const size,
offset_cache_.set(inode, std::move(oc_ent)); offset_cache_.set(inode, std::move(oc_ent));
} }
if (opts_.readahead > 0) {
do_readahead(inode, it, end, read_offset, size, it_offset);
}
break; break;
} }
@ -332,10 +397,10 @@ ssize_t inode_reader_<LoggerPolicy>::readv(iovec_read_buf& buf, uint32_t inode,
} // namespace } // namespace
inode_reader_v2::inode_reader_v2( inode_reader_v2::inode_reader_v2(
logger& lgr, block_cache&& bc, logger& lgr, block_cache&& bc, inode_reader_options const& opts,
std::shared_ptr<performance_monitor const> perfmon) std::shared_ptr<performance_monitor const> perfmon)
: impl_(make_unique_logging_object<inode_reader_v2::impl, inode_reader_, : impl_(make_unique_logging_object<inode_reader_v2::impl, inode_reader_,
logger_policies>(lgr, std::move(bc), logger_policies>(
std::move(perfmon))) {} lgr, std::move(bc), opts, std::move(perfmon))) {}
} // namespace dwarfs } // namespace dwarfs

View File

@ -151,6 +151,7 @@ struct options {
int seen_mountpoint{0}; int seen_mountpoint{0};
char const* cachesize_str{nullptr}; // TODO: const?? -> use string? char const* cachesize_str{nullptr}; // TODO: const?? -> use string?
char const* blocksize_str{nullptr}; // TODO: const?? -> use string? char const* blocksize_str{nullptr}; // TODO: const?? -> use string?
char const* readahead_str{nullptr}; // TODO: const?? -> use string?
char const* debuglevel_str{nullptr}; // TODO: const?? -> use string? char const* debuglevel_str{nullptr}; // TODO: const?? -> use string?
char const* workers_str{nullptr}; // TODO: const?? -> use string? char const* workers_str{nullptr}; // TODO: const?? -> use string?
char const* mlock_str{nullptr}; // TODO: const?? -> use string? char const* mlock_str{nullptr}; // TODO: const?? -> use string?
@ -168,6 +169,7 @@ struct options {
int cache_files{0}; int cache_files{0};
size_t cachesize{0}; size_t cachesize{0};
size_t blocksize{0}; size_t blocksize{0};
size_t readahead{0};
size_t workers{0}; size_t workers{0};
mlock_mode lock_mode{mlock_mode::NONE}; mlock_mode lock_mode{mlock_mode::NONE};
double decompress_ratio{0.0}; double decompress_ratio{0.0};
@ -220,6 +222,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = {
// TODO: user, group, atime, mtime, ctime for those fs who don't have it? // TODO: user, group, atime, mtime, ctime for those fs who don't have it?
DWARFS_OPT("cachesize=%s", cachesize_str, 0), DWARFS_OPT("cachesize=%s", cachesize_str, 0),
DWARFS_OPT("blocksize=%s", blocksize_str, 0), DWARFS_OPT("blocksize=%s", blocksize_str, 0),
DWARFS_OPT("readahead=%s", readahead_str, 0),
DWARFS_OPT("debuglevel=%s", debuglevel_str, 0), DWARFS_OPT("debuglevel=%s", debuglevel_str, 0),
DWARFS_OPT("workers=%s", workers_str, 0), DWARFS_OPT("workers=%s", workers_str, 0),
DWARFS_OPT("mlock=%s", mlock_str, 0), DWARFS_OPT("mlock=%s", mlock_str, 0),
@ -1019,6 +1022,7 @@ void usage(std::ostream& os, std::filesystem::path const& progname) {
<< "DWARFS options:\n" << "DWARFS options:\n"
<< " -o cachesize=SIZE set size of block cache (512M)\n" << " -o cachesize=SIZE set size of block cache (512M)\n"
<< " -o blocksize=SIZE set file block size\n" << " -o blocksize=SIZE set file block size\n"
<< " -o readahead=SIZE set readahead size (0)\n"
<< " -o workers=NUM number of worker threads (2)\n" << " -o workers=NUM number of worker threads (2)\n"
<< " -o mlock=NAME mlock mode: (none), try, must\n" << " -o mlock=NAME mlock mode: (none), try, must\n"
<< " -o decratio=NUM ratio for full decompression (0.8)\n" << " -o decratio=NUM ratio for full decompression (0.8)\n"
@ -1255,6 +1259,7 @@ void load_filesystem(dwarfs_userdata& userdata) {
fsopts.block_cache.decompress_ratio = opts.decompress_ratio; fsopts.block_cache.decompress_ratio = opts.decompress_ratio;
fsopts.block_cache.mm_release = !opts.cache_image; fsopts.block_cache.mm_release = !opts.cache_image;
fsopts.block_cache.init_workers = false; fsopts.block_cache.init_workers = false;
fsopts.inode_reader.readahead = opts.readahead;
fsopts.metadata.enable_nlink = bool(opts.enable_nlink); fsopts.metadata.enable_nlink = bool(opts.enable_nlink);
fsopts.metadata.readonly = bool(opts.readonly); fsopts.metadata.readonly = bool(opts.readonly);
fsopts.metadata.block_size = opts.blocksize; fsopts.metadata.block_size = opts.blocksize;
@ -1391,6 +1396,8 @@ int dwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
opts.blocksize = opts.blocksize_str opts.blocksize = opts.blocksize_str
? parse_size_with_unit(opts.blocksize_str) ? parse_size_with_unit(opts.blocksize_str)
: kDefaultBlockSize; : kDefaultBlockSize;
opts.readahead =
opts.readahead_str ? parse_size_with_unit(opts.readahead_str) : 0;
opts.workers = opts.workers_str ? folly::to<size_t>(opts.workers_str) : 2; opts.workers = opts.workers_str ? folly::to<size_t>(opts.workers_str) : 2;
opts.lock_mode = opts.lock_mode =
opts.mlock_str ? parse_mlock_mode(opts.mlock_str) : mlock_mode::NONE; opts.mlock_str ? parse_mlock_mode(opts.mlock_str) : mlock_mode::NONE;

View File

@ -114,7 +114,7 @@ void basic_end_to_end_test(std::string const& compressor,
bool pack_names, bool pack_names_index, bool pack_names, bool pack_names_index,
bool pack_symlinks, bool pack_symlinks_index, bool pack_symlinks, bool pack_symlinks_index,
bool plain_names_table, bool plain_symlinks_table, bool plain_names_table, bool plain_symlinks_table,
bool access_fail, bool access_fail, size_t readahead,
std::optional<std::string> file_hash_algo) { std::optional<std::string> file_hash_algo) {
segmenter::config cfg; segmenter::config cfg;
scanner_options options; scanner_options options;
@ -218,6 +218,7 @@ void basic_end_to_end_test(std::string const& compressor,
opts.block_cache.max_bytes = 1 << 20; opts.block_cache.max_bytes = 1 << 20;
opts.metadata.enable_nlink = enable_nlink; opts.metadata.enable_nlink = enable_nlink;
opts.metadata.check_consistency = true; opts.metadata.check_consistency = true;
opts.inode_reader.readahead = readahead;
filesystem_v2 fs(lgr, *input, mm, opts); filesystem_v2 fs(lgr, *input, mm, opts);
@ -552,9 +553,15 @@ TEST_P(compression_test, end_to_end) {
return; return;
} }
size_t readahead = 0;
if (block_size_bits < 20) {
readahead = static_cast<size_t>(4) << block_size_bits;
}
basic_end_to_end_test(compressor, block_size_bits, file_order, true, true, basic_end_to_end_test(compressor, block_size_bits, file_order, true, true,
false, false, false, false, false, true, true, true, false, false, false, false, false, true, true, true,
true, true, true, true, false, false, false, true, true, true, true, false, false, false, readahead,
file_hash_algo); file_hash_algo);
} }
@ -562,16 +569,17 @@ TEST_P(scanner_test, end_to_end) {
auto [with_devices, with_specials, set_uid, set_gid, set_time, keep_all_times, auto [with_devices, with_specials, set_uid, set_gid, set_time, keep_all_times,
enable_nlink, access_fail, file_hash_algo] = GetParam(); enable_nlink, access_fail, file_hash_algo] = GetParam();
basic_end_to_end_test( basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE,
compressions[0], 15, file_order_mode::NONE, with_devices, with_specials, with_devices, with_specials, set_uid, set_gid, set_time,
set_uid, set_gid, set_time, keep_all_times, enable_nlink, true, true, keep_all_times, enable_nlink, true, true, true, true,
true, true, true, true, true, false, false, access_fail, file_hash_algo); true, true, true, false, false, access_fail, 0,
file_hash_algo);
} }
TEST_P(hashing_test, end_to_end) { TEST_P(hashing_test, end_to_end) {
basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true, basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true,
true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true,
true, true, true, false, false, false, GetParam()); true, true, true, false, false, false, 0, GetParam());
} }
TEST_P(packing_test, end_to_end) { TEST_P(packing_test, end_to_end) {
@ -582,7 +590,7 @@ TEST_P(packing_test, end_to_end) {
false, false, false, false, false, pack_chunk_table, false, false, false, false, false, pack_chunk_table,
pack_directories, pack_shared_files_table, pack_names, pack_directories, pack_shared_files_table, pack_names,
pack_names_index, pack_symlinks, pack_symlinks_index, pack_names_index, pack_symlinks, pack_symlinks_index,
false, false, false, default_file_hash_algo); false, false, false, 0, default_file_hash_algo);
} }
TEST_P(plain_tables_test, end_to_end) { TEST_P(plain_tables_test, end_to_end) {
@ -591,7 +599,7 @@ TEST_P(plain_tables_test, end_to_end) {
basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true, basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true,
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
false, false, false, false, plain_names_table, false, false, false, false, plain_names_table,
plain_symlinks_table, false, default_file_hash_algo); plain_symlinks_table, false, 0, default_file_hash_algo);
} }
TEST_P(packing_test, regression_empty_fs) { TEST_P(packing_test, regression_empty_fs) {