mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-11 05:23:29 -04:00
feat: add inode size cache
This commit is contained in:
parent
5c9cfd75bb
commit
e803efebe6
@ -64,6 +64,7 @@ struct scanner_options {
|
||||
bool enable_history{true};
|
||||
std::optional<std::vector<std::string>> command_line_arguments;
|
||||
history_config history;
|
||||
size_t inode_size_cache_min_chunk_count{128};
|
||||
};
|
||||
|
||||
} // namespace dwarfs::writer
|
||||
|
@ -292,6 +292,12 @@ void analyze_frozen(std::ostream& os,
|
||||
#undef META_OPT_LIST_SIZE
|
||||
#undef META_OPT_STRING_TABLE_SIZE
|
||||
|
||||
if (auto cache = meta.reg_file_size_cache()) {
|
||||
add_list_size(
|
||||
"inode_size_cache", cache->lookup(),
|
||||
l->reg_file_size_cacheField.layout.valueField.layout.lookupField);
|
||||
}
|
||||
|
||||
std::sort(usage.begin(), usage.end(), [](auto const& a, auto const& b) {
|
||||
return a.first > b.first || (a.first == b.first && a.second < b.second);
|
||||
});
|
||||
@ -538,6 +544,8 @@ class metadata_ final : public metadata_v2::impl {
|
||||
|
||||
thrift::metadata::metadata unpack_metadata() const;
|
||||
|
||||
void check_inode_size_cache() const;
|
||||
|
||||
file_stat getattr_impl(inode_view iv, getattr_options const& opts) const;
|
||||
|
||||
inode_view make_inode_view(uint32_t inode) const {
|
||||
@ -671,17 +679,41 @@ class metadata_ final : public metadata_v2::impl {
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t reg_file_size(inode_view iv) const {
|
||||
size_t reg_file_size_impl(inode_view iv, bool use_cache) const {
|
||||
PERFMON_CLS_SCOPED_SECTION(reg_file_size)
|
||||
|
||||
// Looking up the chunk range is cheap, and we likely have to do it anyway
|
||||
std::error_code ec;
|
||||
auto cr = get_chunk_range(iv.inode_num(), ec);
|
||||
DWARFS_CHECK(!ec, fmt::format("get_chunk_range({}): {}", iv.inode_num(),
|
||||
ec.message()));
|
||||
|
||||
if (use_cache) {
|
||||
if (auto cache = meta_.reg_file_size_cache()) {
|
||||
if (cr.size() >= cache->min_chunk_count()) {
|
||||
LOG_TRACE << "using size cache lookup for inode " << iv.inode_num();
|
||||
if (auto size = cache->lookup().getOptional(iv.inode_num() -
|
||||
file_inode_offset_)) {
|
||||
return *size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is the expensive part for highly fragmented inodes
|
||||
return std::accumulate(
|
||||
cr.begin(), cr.end(), static_cast<size_t>(0),
|
||||
[](size_t s, chunk_view cv) { return s + cv.size(); });
|
||||
}
|
||||
|
||||
size_t reg_file_size_nocache(inode_view iv) const {
|
||||
return reg_file_size_impl(iv, false);
|
||||
}
|
||||
|
||||
size_t reg_file_size(inode_view iv) const {
|
||||
return reg_file_size_impl(iv, true);
|
||||
}
|
||||
|
||||
size_t file_size(inode_view iv, uint16_t mode) const {
|
||||
switch (posix_file_type::from_mode(mode)) {
|
||||
case posix_file_type::regular:
|
||||
@ -915,9 +947,31 @@ void metadata_<LoggerPolicy>::analyze_chunks(std::ostream& os) const {
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_<LoggerPolicy>::check_inode_size_cache() const {
|
||||
if (auto cache = meta_.reg_file_size_cache()) {
|
||||
LOG_DEBUG << "checking inode size cache";
|
||||
for (auto entry : cache->lookup()) {
|
||||
auto inode = entry.first();
|
||||
auto size = entry.second();
|
||||
auto iv = make_inode_view(file_inode_offset_ + inode);
|
||||
LOG_TRACE << "checking inode " << inode << " size " << size;
|
||||
auto expected = reg_file_size_nocache(iv);
|
||||
if (size != expected) {
|
||||
DWARFS_THROW(
|
||||
runtime_error,
|
||||
fmt::format(
|
||||
"inode size cache mismatch: inode {} expected {} got {}", inode,
|
||||
expected, size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_<LoggerPolicy>::check_consistency() const {
|
||||
global_.check_consistency(LOG_GET_LOGGER);
|
||||
check_inode_size_cache();
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
|
@ -892,11 +892,15 @@ void scanner_<LoggerPolicy>::scan(
|
||||
LOG_INFO << "saving chunks...";
|
||||
mv2.chunk_table()->resize(im.count() + 1);
|
||||
|
||||
auto& size_cache = mv2.reg_file_size_cache().emplace();
|
||||
size_cache.min_chunk_count() = options_.inode_size_cache_min_chunk_count;
|
||||
|
||||
// TODO: we should be able to start this once all blocks have been
|
||||
// submitted for compression
|
||||
mv2.chunks().value().reserve(prog.chunk_count);
|
||||
im.for_each_inode_in_order([&](std::shared_ptr<inode> const& ino) {
|
||||
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = mv2.chunks()->size();
|
||||
auto const total_chunks = mv2.chunks()->size();
|
||||
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = total_chunks;
|
||||
if (!ino->append_chunks_to(mv2.chunks().value())) {
|
||||
std::ostringstream oss;
|
||||
for (auto fp : ino->all()) {
|
||||
@ -905,6 +909,12 @@ void scanner_<LoggerPolicy>::scan(
|
||||
LOG_ERROR << "inconsistent fragments in inode " << ino->num()
|
||||
<< ", the following files will be empty:" << oss.str();
|
||||
}
|
||||
auto num_inode_chunks = mv2.chunks()->size() - total_chunks;
|
||||
if (num_inode_chunks >= options_.inode_size_cache_min_chunk_count) {
|
||||
LOG_DEBUG << "caching size " << ino->size() << " for inode " << ino->num()
|
||||
<< " with " << num_inode_chunks << " chunks";
|
||||
size_cache.lookup()->emplace(ino->num(), ino->size());
|
||||
}
|
||||
});
|
||||
|
||||
blockmgr->map_logical_blocks(mv2.chunks().value());
|
||||
|
@ -169,6 +169,20 @@ struct string_table {
|
||||
4: bool packed_index
|
||||
}
|
||||
|
||||
/*
|
||||
* For highly fragmented inodes, computing the size from the
|
||||
* individual chunks can be extremely slow. This cache can be
|
||||
* used to bypass the chunk lookup and size computation.
|
||||
*/
|
||||
struct inode_size_cache {
|
||||
// lookup from inode number to size
|
||||
1: map<UInt32, UInt64> lookup
|
||||
|
||||
// minimum number of chunks for a file to be found in the cache,
|
||||
// corresponds to scanner_options.inode_size_cache_min_chunk_count
|
||||
2: UInt64 min_chunk_count
|
||||
}
|
||||
|
||||
/**
|
||||
* File System Metadata
|
||||
*
|
||||
@ -388,4 +402,11 @@ struct metadata {
|
||||
// index into this vector is the block number and the value
|
||||
// is an index into `category_names`.
|
||||
29: optional list<UInt32> block_categories
|
||||
|
||||
//==========================================================//
|
||||
// fields added with dwarfs-0.11.0, file system version 2.5 //
|
||||
//==========================================================//
|
||||
|
||||
// Size cache for highly fragmented file inodes
|
||||
30: optional inode_size_cache reg_file_size_cache
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user