mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-11 13:30:47 -04:00
feat: add inode size cache
This commit is contained in:
parent
5c9cfd75bb
commit
e803efebe6
@ -64,6 +64,7 @@ struct scanner_options {
|
|||||||
bool enable_history{true};
|
bool enable_history{true};
|
||||||
std::optional<std::vector<std::string>> command_line_arguments;
|
std::optional<std::vector<std::string>> command_line_arguments;
|
||||||
history_config history;
|
history_config history;
|
||||||
|
size_t inode_size_cache_min_chunk_count{128};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace dwarfs::writer
|
} // namespace dwarfs::writer
|
||||||
|
@ -292,6 +292,12 @@ void analyze_frozen(std::ostream& os,
|
|||||||
#undef META_OPT_LIST_SIZE
|
#undef META_OPT_LIST_SIZE
|
||||||
#undef META_OPT_STRING_TABLE_SIZE
|
#undef META_OPT_STRING_TABLE_SIZE
|
||||||
|
|
||||||
|
if (auto cache = meta.reg_file_size_cache()) {
|
||||||
|
add_list_size(
|
||||||
|
"inode_size_cache", cache->lookup(),
|
||||||
|
l->reg_file_size_cacheField.layout.valueField.layout.lookupField);
|
||||||
|
}
|
||||||
|
|
||||||
std::sort(usage.begin(), usage.end(), [](auto const& a, auto const& b) {
|
std::sort(usage.begin(), usage.end(), [](auto const& a, auto const& b) {
|
||||||
return a.first > b.first || (a.first == b.first && a.second < b.second);
|
return a.first > b.first || (a.first == b.first && a.second < b.second);
|
||||||
});
|
});
|
||||||
@ -538,6 +544,8 @@ class metadata_ final : public metadata_v2::impl {
|
|||||||
|
|
||||||
thrift::metadata::metadata unpack_metadata() const;
|
thrift::metadata::metadata unpack_metadata() const;
|
||||||
|
|
||||||
|
void check_inode_size_cache() const;
|
||||||
|
|
||||||
file_stat getattr_impl(inode_view iv, getattr_options const& opts) const;
|
file_stat getattr_impl(inode_view iv, getattr_options const& opts) const;
|
||||||
|
|
||||||
inode_view make_inode_view(uint32_t inode) const {
|
inode_view make_inode_view(uint32_t inode) const {
|
||||||
@ -671,17 +679,41 @@ class metadata_ final : public metadata_v2::impl {
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t reg_file_size(inode_view iv) const {
|
size_t reg_file_size_impl(inode_view iv, bool use_cache) const {
|
||||||
PERFMON_CLS_SCOPED_SECTION(reg_file_size)
|
PERFMON_CLS_SCOPED_SECTION(reg_file_size)
|
||||||
|
|
||||||
|
// Looking up the chunk range is cheap, and we likely have to do it anyway
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
auto cr = get_chunk_range(iv.inode_num(), ec);
|
auto cr = get_chunk_range(iv.inode_num(), ec);
|
||||||
DWARFS_CHECK(!ec, fmt::format("get_chunk_range({}): {}", iv.inode_num(),
|
DWARFS_CHECK(!ec, fmt::format("get_chunk_range({}): {}", iv.inode_num(),
|
||||||
ec.message()));
|
ec.message()));
|
||||||
|
|
||||||
|
if (use_cache) {
|
||||||
|
if (auto cache = meta_.reg_file_size_cache()) {
|
||||||
|
if (cr.size() >= cache->min_chunk_count()) {
|
||||||
|
LOG_TRACE << "using size cache lookup for inode " << iv.inode_num();
|
||||||
|
if (auto size = cache->lookup().getOptional(iv.inode_num() -
|
||||||
|
file_inode_offset_)) {
|
||||||
|
return *size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is the expensive part for highly fragmented inodes
|
||||||
return std::accumulate(
|
return std::accumulate(
|
||||||
cr.begin(), cr.end(), static_cast<size_t>(0),
|
cr.begin(), cr.end(), static_cast<size_t>(0),
|
||||||
[](size_t s, chunk_view cv) { return s + cv.size(); });
|
[](size_t s, chunk_view cv) { return s + cv.size(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t reg_file_size_nocache(inode_view iv) const {
|
||||||
|
return reg_file_size_impl(iv, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t reg_file_size(inode_view iv) const {
|
||||||
|
return reg_file_size_impl(iv, true);
|
||||||
|
}
|
||||||
|
|
||||||
size_t file_size(inode_view iv, uint16_t mode) const {
|
size_t file_size(inode_view iv, uint16_t mode) const {
|
||||||
switch (posix_file_type::from_mode(mode)) {
|
switch (posix_file_type::from_mode(mode)) {
|
||||||
case posix_file_type::regular:
|
case posix_file_type::regular:
|
||||||
@ -915,9 +947,31 @@ void metadata_<LoggerPolicy>::analyze_chunks(std::ostream& os) const {
|
|||||||
<< "\n";
|
<< "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void metadata_<LoggerPolicy>::check_inode_size_cache() const {
|
||||||
|
if (auto cache = meta_.reg_file_size_cache()) {
|
||||||
|
LOG_DEBUG << "checking inode size cache";
|
||||||
|
for (auto entry : cache->lookup()) {
|
||||||
|
auto inode = entry.first();
|
||||||
|
auto size = entry.second();
|
||||||
|
auto iv = make_inode_view(file_inode_offset_ + inode);
|
||||||
|
LOG_TRACE << "checking inode " << inode << " size " << size;
|
||||||
|
auto expected = reg_file_size_nocache(iv);
|
||||||
|
if (size != expected) {
|
||||||
|
DWARFS_THROW(
|
||||||
|
runtime_error,
|
||||||
|
fmt::format(
|
||||||
|
"inode size cache mismatch: inode {} expected {} got {}", inode,
|
||||||
|
expected, size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void metadata_<LoggerPolicy>::check_consistency() const {
|
void metadata_<LoggerPolicy>::check_consistency() const {
|
||||||
global_.check_consistency(LOG_GET_LOGGER);
|
global_.check_consistency(LOG_GET_LOGGER);
|
||||||
|
check_inode_size_cache();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
|
@ -892,11 +892,15 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
LOG_INFO << "saving chunks...";
|
LOG_INFO << "saving chunks...";
|
||||||
mv2.chunk_table()->resize(im.count() + 1);
|
mv2.chunk_table()->resize(im.count() + 1);
|
||||||
|
|
||||||
|
auto& size_cache = mv2.reg_file_size_cache().emplace();
|
||||||
|
size_cache.min_chunk_count() = options_.inode_size_cache_min_chunk_count;
|
||||||
|
|
||||||
// TODO: we should be able to start this once all blocks have been
|
// TODO: we should be able to start this once all blocks have been
|
||||||
// submitted for compression
|
// submitted for compression
|
||||||
mv2.chunks().value().reserve(prog.chunk_count);
|
mv2.chunks().value().reserve(prog.chunk_count);
|
||||||
im.for_each_inode_in_order([&](std::shared_ptr<inode> const& ino) {
|
im.for_each_inode_in_order([&](std::shared_ptr<inode> const& ino) {
|
||||||
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = mv2.chunks()->size();
|
auto const total_chunks = mv2.chunks()->size();
|
||||||
|
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = total_chunks;
|
||||||
if (!ino->append_chunks_to(mv2.chunks().value())) {
|
if (!ino->append_chunks_to(mv2.chunks().value())) {
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
for (auto fp : ino->all()) {
|
for (auto fp : ino->all()) {
|
||||||
@ -905,6 +909,12 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
LOG_ERROR << "inconsistent fragments in inode " << ino->num()
|
LOG_ERROR << "inconsistent fragments in inode " << ino->num()
|
||||||
<< ", the following files will be empty:" << oss.str();
|
<< ", the following files will be empty:" << oss.str();
|
||||||
}
|
}
|
||||||
|
auto num_inode_chunks = mv2.chunks()->size() - total_chunks;
|
||||||
|
if (num_inode_chunks >= options_.inode_size_cache_min_chunk_count) {
|
||||||
|
LOG_DEBUG << "caching size " << ino->size() << " for inode " << ino->num()
|
||||||
|
<< " with " << num_inode_chunks << " chunks";
|
||||||
|
size_cache.lookup()->emplace(ino->num(), ino->size());
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
blockmgr->map_logical_blocks(mv2.chunks().value());
|
blockmgr->map_logical_blocks(mv2.chunks().value());
|
||||||
|
@ -169,6 +169,20 @@ struct string_table {
|
|||||||
4: bool packed_index
|
4: bool packed_index
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For highly fragmented inodes, computing the size from the
|
||||||
|
* individual chunks can be extremely slow. This cache can be
|
||||||
|
* used to bypass the chunk lookup and size computation.
|
||||||
|
*/
|
||||||
|
struct inode_size_cache {
|
||||||
|
// lookup from inode number to size
|
||||||
|
1: map<UInt32, UInt64> lookup
|
||||||
|
|
||||||
|
// minimum number of chunks for a file to be found in the cache,
|
||||||
|
// corresponds to scanner_options.inode_size_cache_min_chunk_count
|
||||||
|
2: UInt64 min_chunk_count
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* File System Metadata
|
* File System Metadata
|
||||||
*
|
*
|
||||||
@ -388,4 +402,11 @@ struct metadata {
|
|||||||
// index into this vector is the block number and the value
|
// index into this vector is the block number and the value
|
||||||
// is an index into `category_names`.
|
// is an index into `category_names`.
|
||||||
29: optional list<UInt32> block_categories
|
29: optional list<UInt32> block_categories
|
||||||
|
|
||||||
|
//==========================================================//
|
||||||
|
// fields added with dwarfs-0.11.0, file system version 2.5 //
|
||||||
|
//==========================================================//
|
||||||
|
|
||||||
|
// Size cache for highly fragmented file inodes
|
||||||
|
30: optional inode_size_cache reg_file_size_cache
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user