From bd43f473dc93d79d29ad41939a2800a92f197ecb Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sun, 11 May 2025 12:11:53 +0200 Subject: [PATCH] feat: update inode info when rebuilding metadata --- include/dwarfs/writer/metadata_options.h | 2 +- src/writer/internal/global_entry_data.cpp | 4 +- src/writer/internal/metadata_builder.cpp | 108 ++++++++++++++++++++-- src/writer/metadata_options.cpp | 4 +- 4 files changed, 107 insertions(+), 11 deletions(-) diff --git a/include/dwarfs/writer/metadata_options.h b/include/dwarfs/writer/metadata_options.h index 9c35d48e..29c657b2 100644 --- a/include/dwarfs/writer/metadata_options.h +++ b/include/dwarfs/writer/metadata_options.h @@ -42,7 +42,7 @@ struct metadata_options { std::optional gid{}; std::optional timestamp{}; bool keep_all_times{false}; - uint32_t time_resolution_sec{1}; + std::optional time_resolution_sec{}; bool pack_chunk_table{false}; bool pack_directories{false}; bool pack_shared_files_table{false}; diff --git a/src/writer/internal/global_entry_data.cpp b/src/writer/internal/global_entry_data.cpp index 470517f8..8301bb75 100644 --- a/src/writer/internal/global_entry_data.cpp +++ b/src/writer/internal/global_entry_data.cpp @@ -82,7 +82,7 @@ void global_entry_data::index() { } uint64_t global_entry_data::get_time_offset(uint64_t time) const { - return (time - timestamp_base_) / options_.time_resolution_sec; + return (time - timestamp_base_) / options_.time_resolution_sec.value_or(1); } uint64_t global_entry_data::get_mtime_offset(uint64_t time) const { @@ -101,7 +101,7 @@ uint64_t global_entry_data::get_ctime_offset(uint64_t time) const { uint64_t global_entry_data::get_timestamp_base() const { return (options_.timestamp ? *options_.timestamp : timestamp_base_) / - options_.time_resolution_sec; + options_.time_resolution_sec.value_or(1); } size_t global_entry_data::get_uid_index(uid_type uid) const { diff --git a/src/writer/internal/metadata_builder.cpp b/src/writer/internal/metadata_builder.cpp index e4e099a0..6949eb2a 100644 --- a/src/writer/internal/metadata_builder.cpp +++ b/src/writer/internal/metadata_builder.cpp @@ -25,6 +25,7 @@ #include +#include #include #include #include @@ -52,6 +53,9 @@ using namespace dwarfs::internal; template class metadata_builder_ final : public metadata_builder::impl { public: + using uid_type = file_stat::uid_type; + using gid_type = file_stat::gid_type; + metadata_builder_(logger& lgr, metadata_options const& options) : LOG_PROXY_INIT(lgr) , options_{options} {} @@ -64,6 +68,7 @@ class metadata_builder_ final : public metadata_builder::impl { , md_{md} , options_{options} { upgrade_metadata(orig_fs_options, orig_fs_version); + update_inodes(); } metadata_builder_(logger& lgr, thrift::metadata::metadata&& md, @@ -74,6 +79,7 @@ class metadata_builder_ final : public metadata_builder::impl { , md_{std::move(md)} , options_{options} { upgrade_metadata(orig_fs_options, orig_fs_version); + update_inodes(); } void set_devices(std::vector devices) override { @@ -138,6 +144,18 @@ class metadata_builder_ final : public metadata_builder::impl { filesystem_version const& orig_fs_version); void upgrade_from_pre_v2_2(); + uint32_t get_time_resolution() const { + uint32_t resolution = 1; + if (md_.options()) { + if (auto res = md_.options()->time_resolution_sec()) { + resolution = *res; + } + } + return resolution; + } + + void update_inodes(); + LOG_PROXY_DECL(LoggerPolicy); thrift::metadata::metadata md_; feature_set features_; @@ -251,14 +269,95 @@ void metadata_builder_::gather_global_entry_data( md_.timestamp_base() = ge_data.get_timestamp_base(); } +template +void metadata_builder_::update_inodes() { + bool const update_uid{options_.uid.has_value()}; + bool const update_gid{options_.gid.has_value()}; + bool const set_timestamp{options_.timestamp.has_value()}; + bool const remove_atime_ctime{ + !options_.keep_all_times && + !(md_.options().has_value() && md_.options()->mtime_only().value())}; + bool update_resolution{false}; + auto orig_resolution = get_time_resolution(); + auto new_resolution = orig_resolution; + + if (options_.time_resolution_sec.has_value()) { + auto res = *options_.time_resolution_sec; + if (res > orig_resolution) { + new_resolution = res; + update_resolution = true; + } else if (res < orig_resolution) { + LOG_WARN << "cannot increase time resolution from " << orig_resolution + << "s to " << res << "s"; + } + } + + if (!update_uid && !update_gid && !set_timestamp && !remove_atime_ctime && + !update_resolution) { + // nothing to do + return; + } + + auto transform_timeval = [&](auto val) { + return (val * orig_resolution) / new_resolution; + }; + + for (auto& inode : md_.inodes().value()) { + if (update_uid) { + inode.owner_index() = 0; + } + + if (update_gid) { + inode.group_index() = 0; + } + + if (set_timestamp) { + inode.mtime_offset() = 0; + } else if (update_resolution) { + inode.mtime_offset() = transform_timeval(inode.mtime_offset().value()); + } + + if (set_timestamp || remove_atime_ctime) { + inode.atime_offset() = 0; + inode.ctime_offset() = 0; + } else if (update_resolution) { + inode.atime_offset() = transform_timeval(inode.atime_offset().value()); + inode.ctime_offset() = transform_timeval(inode.ctime_offset().value()); + } + } + + if (update_uid) { + md_.uids() = std::vector{*options_.uid}; + } + + if (update_gid) { + md_.gids() = std::vector{*options_.gid}; + } + + if (set_timestamp) { + md_.timestamp_base() = *options_.timestamp / new_resolution; + } else if (update_resolution) { + md_.timestamp_base() = transform_timeval(md_.timestamp_base().value()); + } + + if (new_resolution > 1) { + md_.options().ensure(); + md_.options()->time_resolution_sec() = new_resolution; + } + + // TODO: also allow chmod? -> that's quite a lot more involved, + // but we can probably get rid of scanner transformers +} + template thrift::metadata::metadata const& metadata_builder_::build() { LOG_VERBOSE << "building metadata"; thrift::metadata::fs_options fsopts; fsopts.mtime_only() = !options_.keep_all_times; - if (options_.time_resolution_sec > 1) { - fsopts.time_resolution_sec() = options_.time_resolution_sec; + if (options_.time_resolution_sec.has_value() && + options_.time_resolution_sec.value() > 1) { + fsopts.time_resolution_sec() = options_.time_resolution_sec.value(); } fsopts.packed_chunk_table() = options_.pack_chunk_table; fsopts.packed_directories() = options_.pack_directories; @@ -350,11 +449,9 @@ thrift::metadata::metadata const& metadata_builder_::build() { md_.block_category_metadata().reset(); } - // TODO: don't overwrite all options when upgrading! md_.options() = fsopts; md_.features() = features_.get(); - // TODO: try and keep metadata upgrade history md_.dwarfs_version() = std::string("libdwarfs ") + DWARFS_GIT_ID; if (!options_.no_create_timestamp) { md_.create_timestamp() = std::time(nullptr); @@ -575,9 +672,6 @@ void metadata_builder_::upgrade_metadata( upgrade_from_pre_v2_2(); } - // TODO: update uid, gid, timestamp, mtime_only, time_resolution_sec - // TODO: do we need to do this here??? - tv << "upgrading metadata..."; md_.metadata_version_history().ensure(); diff --git a/src/writer/metadata_options.cpp b/src/writer/metadata_options.cpp index 9ead56e7..e347d21a 100644 --- a/src/writer/metadata_options.cpp +++ b/src/writer/metadata_options.cpp @@ -39,7 +39,9 @@ std::ostream& operator<<(std::ostream& os, metadata_options const& opts) { if (opts.keep_all_times) { os << "keep_all_times, "; } - os << "time_resolution_sec: " << opts.time_resolution_sec << ", "; + if (opts.time_resolution_sec) { + os << "time_resolution_sec: " << *opts.time_resolution_sec << ", "; + } if (opts.pack_chunk_table) { os << "pack_chunk_table, "; }