feat: add inode size cache

This commit is contained in:
Marcus Holland-Moritz 2024-08-19 17:01:30 +02:00
parent 5c9cfd75bb
commit e803efebe6
4 changed files with 88 additions and 2 deletions

View File

@ -64,6 +64,7 @@ struct scanner_options {
bool enable_history{true};
std::optional<std::vector<std::string>> command_line_arguments;
history_config history;
size_t inode_size_cache_min_chunk_count{128};
};
} // namespace dwarfs::writer

View File

@ -292,6 +292,12 @@ void analyze_frozen(std::ostream& os,
#undef META_OPT_LIST_SIZE
#undef META_OPT_STRING_TABLE_SIZE
if (auto cache = meta.reg_file_size_cache()) {
add_list_size(
"inode_size_cache", cache->lookup(),
l->reg_file_size_cacheField.layout.valueField.layout.lookupField);
}
std::sort(usage.begin(), usage.end(), [](auto const& a, auto const& b) {
return a.first > b.first || (a.first == b.first && a.second < b.second);
});
@ -538,6 +544,8 @@ class metadata_ final : public metadata_v2::impl {
thrift::metadata::metadata unpack_metadata() const;
void check_inode_size_cache() const;
file_stat getattr_impl(inode_view iv, getattr_options const& opts) const;
inode_view make_inode_view(uint32_t inode) const {
@ -671,17 +679,41 @@ class metadata_ final : public metadata_v2::impl {
return {};
}
size_t reg_file_size(inode_view iv) const {
size_t reg_file_size_impl(inode_view iv, bool use_cache) const {
PERFMON_CLS_SCOPED_SECTION(reg_file_size)
// Looking up the chunk range is cheap, and we likely have to do it anyway
std::error_code ec;
auto cr = get_chunk_range(iv.inode_num(), ec);
DWARFS_CHECK(!ec, fmt::format("get_chunk_range({}): {}", iv.inode_num(),
ec.message()));
if (use_cache) {
if (auto cache = meta_.reg_file_size_cache()) {
if (cr.size() >= cache->min_chunk_count()) {
LOG_TRACE << "using size cache lookup for inode " << iv.inode_num();
if (auto size = cache->lookup().getOptional(iv.inode_num() -
file_inode_offset_)) {
return *size;
}
}
}
}
// This is the expensive part for highly fragmented inodes
return std::accumulate(
cr.begin(), cr.end(), static_cast<size_t>(0),
[](size_t s, chunk_view cv) { return s + cv.size(); });
}
size_t reg_file_size_nocache(inode_view iv) const {
return reg_file_size_impl(iv, false);
}
size_t reg_file_size(inode_view iv) const {
return reg_file_size_impl(iv, true);
}
size_t file_size(inode_view iv, uint16_t mode) const {
switch (posix_file_type::from_mode(mode)) {
case posix_file_type::regular:
@ -915,9 +947,31 @@ void metadata_<LoggerPolicy>::analyze_chunks(std::ostream& os) const {
<< "\n";
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::check_inode_size_cache() const {
if (auto cache = meta_.reg_file_size_cache()) {
LOG_DEBUG << "checking inode size cache";
for (auto entry : cache->lookup()) {
auto inode = entry.first();
auto size = entry.second();
auto iv = make_inode_view(file_inode_offset_ + inode);
LOG_TRACE << "checking inode " << inode << " size " << size;
auto expected = reg_file_size_nocache(iv);
if (size != expected) {
DWARFS_THROW(
runtime_error,
fmt::format(
"inode size cache mismatch: inode {} expected {} got {}", inode,
expected, size));
}
}
}
}
template <typename LoggerPolicy>
void metadata_<LoggerPolicy>::check_consistency() const {
global_.check_consistency(LOG_GET_LOGGER);
check_inode_size_cache();
}
template <typename LoggerPolicy>

View File

@ -892,11 +892,15 @@ void scanner_<LoggerPolicy>::scan(
LOG_INFO << "saving chunks...";
mv2.chunk_table()->resize(im.count() + 1);
auto& size_cache = mv2.reg_file_size_cache().emplace();
size_cache.min_chunk_count() = options_.inode_size_cache_min_chunk_count;
// TODO: we should be able to start this once all blocks have been
// submitted for compression
mv2.chunks().value().reserve(prog.chunk_count);
im.for_each_inode_in_order([&](std::shared_ptr<inode> const& ino) {
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = mv2.chunks()->size();
auto const total_chunks = mv2.chunks()->size();
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = total_chunks;
if (!ino->append_chunks_to(mv2.chunks().value())) {
std::ostringstream oss;
for (auto fp : ino->all()) {
@ -905,6 +909,12 @@ void scanner_<LoggerPolicy>::scan(
LOG_ERROR << "inconsistent fragments in inode " << ino->num()
<< ", the following files will be empty:" << oss.str();
}
auto num_inode_chunks = mv2.chunks()->size() - total_chunks;
if (num_inode_chunks >= options_.inode_size_cache_min_chunk_count) {
LOG_DEBUG << "caching size " << ino->size() << " for inode " << ino->num()
<< " with " << num_inode_chunks << " chunks";
size_cache.lookup()->emplace(ino->num(), ino->size());
}
});
blockmgr->map_logical_blocks(mv2.chunks().value());

View File

@ -169,6 +169,20 @@ struct string_table {
4: bool packed_index
}
/*
* For highly fragmented inodes, computing the size from the
* individual chunks can be extremely slow. This cache can be
* used to bypass the chunk lookup and size computation.
*/
struct inode_size_cache {
// lookup from inode number to size
1: map<UInt32, UInt64> lookup
// minimum number of chunks for a file to be found in the cache,
// corresponds to scanner_options.inode_size_cache_min_chunk_count
2: UInt64 min_chunk_count
}
/**
* File System Metadata
*
@ -388,4 +402,11 @@ struct metadata {
// index into this vector is the block number and the value
// is an index into `category_names`.
29: optional list<UInt32> block_categories
//==========================================================//
// fields added with dwarfs-0.11.0, file system version 2.5 //
//==========================================================//
// Size cache for highly fragmented file inodes
30: optional inode_size_cache reg_file_size_cache
}