feat(dwarfs): add readahead option

The implementation is extremely simple for now and readahead is off
by default. When sequentially accessing large files, the speedup can
be significant, though.
This commit is contained in:
Marcus Holland-Moritz 2024-02-12 13:02:05 +01:00
parent 5460900c97
commit f1f55dd48c
7 changed files with 110 additions and 17 deletions

View File

@ -39,6 +39,12 @@ options:
request. Use this along with macFUSE's `iosize` option to
tune throughput.
- `-o readahead=`*value*:
How much data to read ahead when receiving a read request.
This is experimental and disabled by default. If you perform
a lot of large, sequential reads, throughput may benefit from
enabling readahead.
- `-o workers=`*value*:
Number of worker threads to use for decompressing blocks.
If you have a lot of CPUs, increasing this number can help

View File

@ -37,6 +37,7 @@ namespace dwarfs {
struct cache_tidy_config;
class block_cache;
class logger;
struct inode_reader_options;
struct iovec_read_buf;
class performance_monitor;
@ -45,6 +46,7 @@ class inode_reader_v2 {
inode_reader_v2() = default;
inode_reader_v2(logger& lgr, block_cache&& bc,
inode_reader_options const& opts,
std::shared_ptr<performance_monitor const> perfmon);
inode_reader_v2& operator=(inode_reader_v2&&) = default;

View File

@ -71,6 +71,10 @@ struct metadata_options {
size_t block_size{512};
};
struct inode_reader_options {
size_t readahead{0};
};
struct filesystem_options {
static constexpr file_off_t IMAGE_OFFSET_AUTO{-1};
@ -78,6 +82,7 @@ struct filesystem_options {
file_off_t image_offset{0};
block_cache_options block_cache{};
metadata_options metadata{};
inode_reader_options inode_reader{};
int inode_offset{0};
};

View File

@ -605,7 +605,7 @@ filesystem_<LoggerPolicy>::filesystem_(
cache.set_block_size(meta_.block_size());
ir_ = inode_reader_v2(lgr, std::move(cache), perfmon);
ir_ = inode_reader_v2(lgr, std::move(cache), options.inode_reader, perfmon);
if (auto it = sections.find(section_type::HISTORY); it != sections.end()) {
for (auto& section : it->second) {

View File

@ -39,6 +39,7 @@
#include "dwarfs/iovec_read_buf.h"
#include "dwarfs/logger.h"
#include "dwarfs/offset_cache.h"
#include "dwarfs/options.h"
#include "dwarfs/performance_monitor.h"
namespace dwarfs {
@ -85,14 +86,16 @@ namespace {
constexpr size_t const offset_cache_chunk_index_interval = 256;
constexpr size_t const offset_cache_updater_max_inline_offsets = 4;
constexpr size_t const offset_cache_size = 64;
constexpr size_t const readahead_cache_size = 64;
template <typename LoggerPolicy>
class inode_reader_ final : public inode_reader_v2::impl {
public:
inode_reader_(logger& lgr, block_cache&& bc,
inode_reader_(logger& lgr, block_cache&& bc, inode_reader_options const& opts,
std::shared_ptr<performance_monitor const> perfmon
[[maybe_unused]])
: cache_(std::move(bc))
, opts_{opts}
, LOG_PROXY_INIT(lgr)
// clang-format off
PERFMON_CLS_PROXY_INIT(perfmon, "inode_reader_v2")
@ -100,6 +103,7 @@ class inode_reader_ final : public inode_reader_v2::impl {
PERFMON_CLS_TIMER_INIT(readv_iovec)
PERFMON_CLS_TIMER_INIT(readv_future) // clang-format on
, offset_cache_{offset_cache_size}
, readahead_cache_{readahead_cache_size}
, iovec_sizes_(1, 0, 256) {}
~inode_reader_() override {
@ -135,23 +139,32 @@ class inode_reader_ final : public inode_reader_v2::impl {
offset_cache_chunk_index_interval,
offset_cache_updater_max_inline_offsets>;
using readahead_cache_type = folly::EvictingCacheMap<uint32_t, file_off_t>;
folly::Expected<std::vector<std::future<block_range>>, int>
read_internal(uint32_t inode, size_t size, file_off_t offset,
chunk_range chunks) const;
template <typename StoreFunc>
ssize_t read_internal(uint32_t inode, size_t size, file_off_t offset,
ssize_t read_internal(uint32_t inode, size_t size, file_off_t read_offset,
chunk_range chunks, const StoreFunc& store) const;
void do_readahead(uint32_t inode, chunk_range::iterator it,
chunk_range::iterator end, file_off_t read_offset,
size_t size, file_off_t it_offset) const;
block_cache cache_;
inode_reader_options const opts_;
LOG_PROXY_DECL(LoggerPolicy);
PERFMON_CLS_PROXY_DECL
PERFMON_CLS_TIMER_DECL(read)
PERFMON_CLS_TIMER_DECL(readv_iovec)
PERFMON_CLS_TIMER_DECL(readv_future)
mutable offset_cache_type offset_cache_;
mutable folly::Histogram<size_t> iovec_sizes_;
mutable std::mutex readahead_cache_mutex_;
mutable readahead_cache_type readahead_cache_;
mutable std::mutex iovec_sizes_mutex_;
mutable folly::Histogram<size_t> iovec_sizes_;
};
template <typename LoggerPolicy>
@ -164,11 +177,59 @@ void inode_reader_<LoggerPolicy>::dump(std::ostream& os,
}
}
template <typename LoggerPolicy>
void inode_reader_<LoggerPolicy>::do_readahead(uint32_t inode,
chunk_range::iterator it,
chunk_range::iterator end,
file_off_t const read_offset,
size_t const size,
file_off_t it_offset) const {
LOG_TRACE << "readahead (" << inode << "): " << read_offset << "/" << size
<< "/" << it_offset;
file_off_t readahead_pos{0};
file_off_t const current_offset = read_offset + size;
file_off_t const readahead_until = current_offset + opts_.readahead;
{
std::lock_guard lock(readahead_cache_mutex_);
if (read_offset > 0) {
if (auto it = readahead_cache_.find(inode);
it != readahead_cache_.end()) {
readahead_pos = it->second;
}
if (readahead_until <= readahead_pos) {
return;
}
}
readahead_cache_.set(inode, readahead_until);
}
while (it != end) {
if (it_offset + it->size() >= readahead_pos) {
cache_.get(it->block(), it->offset(), it->size());
}
it_offset += it->size();
if (it_offset >= readahead_until) {
break;
}
++it;
}
}
template <typename LoggerPolicy>
folly::Expected<std::vector<std::future<block_range>>, int>
inode_reader_<LoggerPolicy>::read_internal(uint32_t inode, size_t const size,
file_off_t offset,
file_off_t const read_offset,
chunk_range chunks) const {
auto offset = read_offset;
if (offset < 0) {
return folly::makeUnexpected(-EINVAL);
}
@ -245,6 +306,10 @@ inode_reader_<LoggerPolicy>::read_internal(uint32_t inode, size_t const size,
offset_cache_.set(inode, std::move(oc_ent));
}
if (opts_.readahead > 0) {
do_readahead(inode, it, end, read_offset, size, it_offset);
}
break;
}
@ -332,10 +397,10 @@ ssize_t inode_reader_<LoggerPolicy>::readv(iovec_read_buf& buf, uint32_t inode,
} // namespace
inode_reader_v2::inode_reader_v2(
logger& lgr, block_cache&& bc,
logger& lgr, block_cache&& bc, inode_reader_options const& opts,
std::shared_ptr<performance_monitor const> perfmon)
: impl_(make_unique_logging_object<inode_reader_v2::impl, inode_reader_,
logger_policies>(lgr, std::move(bc),
std::move(perfmon))) {}
logger_policies>(
lgr, std::move(bc), opts, std::move(perfmon))) {}
} // namespace dwarfs

View File

@ -151,6 +151,7 @@ struct options {
int seen_mountpoint{0};
char const* cachesize_str{nullptr}; // TODO: const?? -> use string?
char const* blocksize_str{nullptr}; // TODO: const?? -> use string?
char const* readahead_str{nullptr}; // TODO: const?? -> use string?
char const* debuglevel_str{nullptr}; // TODO: const?? -> use string?
char const* workers_str{nullptr}; // TODO: const?? -> use string?
char const* mlock_str{nullptr}; // TODO: const?? -> use string?
@ -168,6 +169,7 @@ struct options {
int cache_files{0};
size_t cachesize{0};
size_t blocksize{0};
size_t readahead{0};
size_t workers{0};
mlock_mode lock_mode{mlock_mode::NONE};
double decompress_ratio{0.0};
@ -220,6 +222,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = {
// TODO: user, group, atime, mtime, ctime for those fs who don't have it?
DWARFS_OPT("cachesize=%s", cachesize_str, 0),
DWARFS_OPT("blocksize=%s", blocksize_str, 0),
DWARFS_OPT("readahead=%s", readahead_str, 0),
DWARFS_OPT("debuglevel=%s", debuglevel_str, 0),
DWARFS_OPT("workers=%s", workers_str, 0),
DWARFS_OPT("mlock=%s", mlock_str, 0),
@ -1019,6 +1022,7 @@ void usage(std::ostream& os, std::filesystem::path const& progname) {
<< "DWARFS options:\n"
<< " -o cachesize=SIZE set size of block cache (512M)\n"
<< " -o blocksize=SIZE set file block size\n"
<< " -o readahead=SIZE set readahead size (0)\n"
<< " -o workers=NUM number of worker threads (2)\n"
<< " -o mlock=NAME mlock mode: (none), try, must\n"
<< " -o decratio=NUM ratio for full decompression (0.8)\n"
@ -1255,6 +1259,7 @@ void load_filesystem(dwarfs_userdata& userdata) {
fsopts.block_cache.decompress_ratio = opts.decompress_ratio;
fsopts.block_cache.mm_release = !opts.cache_image;
fsopts.block_cache.init_workers = false;
fsopts.inode_reader.readahead = opts.readahead;
fsopts.metadata.enable_nlink = bool(opts.enable_nlink);
fsopts.metadata.readonly = bool(opts.readonly);
fsopts.metadata.block_size = opts.blocksize;
@ -1391,6 +1396,8 @@ int dwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
opts.blocksize = opts.blocksize_str
? parse_size_with_unit(opts.blocksize_str)
: kDefaultBlockSize;
opts.readahead =
opts.readahead_str ? parse_size_with_unit(opts.readahead_str) : 0;
opts.workers = opts.workers_str ? folly::to<size_t>(opts.workers_str) : 2;
opts.lock_mode =
opts.mlock_str ? parse_mlock_mode(opts.mlock_str) : mlock_mode::NONE;

View File

@ -114,7 +114,7 @@ void basic_end_to_end_test(std::string const& compressor,
bool pack_names, bool pack_names_index,
bool pack_symlinks, bool pack_symlinks_index,
bool plain_names_table, bool plain_symlinks_table,
bool access_fail,
bool access_fail, size_t readahead,
std::optional<std::string> file_hash_algo) {
segmenter::config cfg;
scanner_options options;
@ -218,6 +218,7 @@ void basic_end_to_end_test(std::string const& compressor,
opts.block_cache.max_bytes = 1 << 20;
opts.metadata.enable_nlink = enable_nlink;
opts.metadata.check_consistency = true;
opts.inode_reader.readahead = readahead;
filesystem_v2 fs(lgr, *input, mm, opts);
@ -552,9 +553,15 @@ TEST_P(compression_test, end_to_end) {
return;
}
size_t readahead = 0;
if (block_size_bits < 20) {
readahead = static_cast<size_t>(4) << block_size_bits;
}
basic_end_to_end_test(compressor, block_size_bits, file_order, true, true,
false, false, false, false, false, true, true, true,
true, true, true, true, false, false, false,
true, true, true, true, false, false, false, readahead,
file_hash_algo);
}
@ -562,16 +569,17 @@ TEST_P(scanner_test, end_to_end) {
auto [with_devices, with_specials, set_uid, set_gid, set_time, keep_all_times,
enable_nlink, access_fail, file_hash_algo] = GetParam();
basic_end_to_end_test(
compressions[0], 15, file_order_mode::NONE, with_devices, with_specials,
set_uid, set_gid, set_time, keep_all_times, enable_nlink, true, true,
true, true, true, true, true, false, false, access_fail, file_hash_algo);
basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE,
with_devices, with_specials, set_uid, set_gid, set_time,
keep_all_times, enable_nlink, true, true, true, true,
true, true, true, false, false, access_fail, 0,
file_hash_algo);
}
TEST_P(hashing_test, end_to_end) {
basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true,
true, true, true, true, true, true, true, true, true,
true, true, true, false, false, false, GetParam());
true, true, true, false, false, false, 0, GetParam());
}
TEST_P(packing_test, end_to_end) {
@ -582,7 +590,7 @@ TEST_P(packing_test, end_to_end) {
false, false, false, false, false, pack_chunk_table,
pack_directories, pack_shared_files_table, pack_names,
pack_names_index, pack_symlinks, pack_symlinks_index,
false, false, false, default_file_hash_algo);
false, false, false, 0, default_file_hash_algo);
}
TEST_P(plain_tables_test, end_to_end) {
@ -591,7 +599,7 @@ TEST_P(plain_tables_test, end_to_end) {
basic_end_to_end_test(compressions[0], 15, file_order_mode::NONE, true, true,
false, false, false, false, false, false, false, false,
false, false, false, false, plain_names_table,
plain_symlinks_table, false, default_file_hash_algo);
plain_symlinks_table, false, 0, default_file_hash_algo);
}
TEST_P(packing_test, regression_empty_fs) {