feat: update inode info when rebuilding metadata

This commit is contained in:
Marcus Holland-Moritz 2025-05-11 12:11:53 +02:00
parent 77fc3ff679
commit bd43f473dc
4 changed files with 107 additions and 11 deletions

View File

@ -42,7 +42,7 @@ struct metadata_options {
std::optional<file_stat::gid_type> gid{}; std::optional<file_stat::gid_type> gid{};
std::optional<uint64_t> timestamp{}; std::optional<uint64_t> timestamp{};
bool keep_all_times{false}; bool keep_all_times{false};
uint32_t time_resolution_sec{1}; std::optional<uint32_t> time_resolution_sec{};
bool pack_chunk_table{false}; bool pack_chunk_table{false};
bool pack_directories{false}; bool pack_directories{false};
bool pack_shared_files_table{false}; bool pack_shared_files_table{false};

View File

@ -82,7 +82,7 @@ void global_entry_data::index() {
} }
uint64_t global_entry_data::get_time_offset(uint64_t time) const { uint64_t global_entry_data::get_time_offset(uint64_t time) const {
return (time - timestamp_base_) / options_.time_resolution_sec; return (time - timestamp_base_) / options_.time_resolution_sec.value_or(1);
} }
uint64_t global_entry_data::get_mtime_offset(uint64_t time) const { uint64_t global_entry_data::get_mtime_offset(uint64_t time) const {
@ -101,7 +101,7 @@ uint64_t global_entry_data::get_ctime_offset(uint64_t time) const {
uint64_t global_entry_data::get_timestamp_base() const { uint64_t global_entry_data::get_timestamp_base() const {
return (options_.timestamp ? *options_.timestamp : timestamp_base_) / return (options_.timestamp ? *options_.timestamp : timestamp_base_) /
options_.time_resolution_sec; options_.time_resolution_sec.value_or(1);
} }
size_t global_entry_data::get_uid_index(uid_type uid) const { size_t global_entry_data::get_uid_index(uid_type uid) const {

View File

@ -25,6 +25,7 @@
#include <thrift/lib/cpp2/protocol/DebugProtocol.h> #include <thrift/lib/cpp2/protocol/DebugProtocol.h>
#include <dwarfs/file_stat.h>
#include <dwarfs/fstypes.h> #include <dwarfs/fstypes.h>
#include <dwarfs/logger.h> #include <dwarfs/logger.h>
#include <dwarfs/version.h> #include <dwarfs/version.h>
@ -52,6 +53,9 @@ using namespace dwarfs::internal;
template <typename LoggerPolicy> template <typename LoggerPolicy>
class metadata_builder_ final : public metadata_builder::impl { class metadata_builder_ final : public metadata_builder::impl {
public: public:
using uid_type = file_stat::uid_type;
using gid_type = file_stat::gid_type;
metadata_builder_(logger& lgr, metadata_options const& options) metadata_builder_(logger& lgr, metadata_options const& options)
: LOG_PROXY_INIT(lgr) : LOG_PROXY_INIT(lgr)
, options_{options} {} , options_{options} {}
@ -64,6 +68,7 @@ class metadata_builder_ final : public metadata_builder::impl {
, md_{md} , md_{md}
, options_{options} { , options_{options} {
upgrade_metadata(orig_fs_options, orig_fs_version); upgrade_metadata(orig_fs_options, orig_fs_version);
update_inodes();
} }
metadata_builder_(logger& lgr, thrift::metadata::metadata&& md, metadata_builder_(logger& lgr, thrift::metadata::metadata&& md,
@ -74,6 +79,7 @@ class metadata_builder_ final : public metadata_builder::impl {
, md_{std::move(md)} , md_{std::move(md)}
, options_{options} { , options_{options} {
upgrade_metadata(orig_fs_options, orig_fs_version); upgrade_metadata(orig_fs_options, orig_fs_version);
update_inodes();
} }
void set_devices(std::vector<uint64_t> devices) override { void set_devices(std::vector<uint64_t> devices) override {
@ -138,6 +144,18 @@ class metadata_builder_ final : public metadata_builder::impl {
filesystem_version const& orig_fs_version); filesystem_version const& orig_fs_version);
void upgrade_from_pre_v2_2(); void upgrade_from_pre_v2_2();
uint32_t get_time_resolution() const {
uint32_t resolution = 1;
if (md_.options()) {
if (auto res = md_.options()->time_resolution_sec()) {
resolution = *res;
}
}
return resolution;
}
void update_inodes();
LOG_PROXY_DECL(LoggerPolicy); LOG_PROXY_DECL(LoggerPolicy);
thrift::metadata::metadata md_; thrift::metadata::metadata md_;
feature_set features_; feature_set features_;
@ -251,14 +269,95 @@ void metadata_builder_<LoggerPolicy>::gather_global_entry_data(
md_.timestamp_base() = ge_data.get_timestamp_base(); md_.timestamp_base() = ge_data.get_timestamp_base();
} }
template <typename LoggerPolicy>
void metadata_builder_<LoggerPolicy>::update_inodes() {
bool const update_uid{options_.uid.has_value()};
bool const update_gid{options_.gid.has_value()};
bool const set_timestamp{options_.timestamp.has_value()};
bool const remove_atime_ctime{
!options_.keep_all_times &&
!(md_.options().has_value() && md_.options()->mtime_only().value())};
bool update_resolution{false};
auto orig_resolution = get_time_resolution();
auto new_resolution = orig_resolution;
if (options_.time_resolution_sec.has_value()) {
auto res = *options_.time_resolution_sec;
if (res > orig_resolution) {
new_resolution = res;
update_resolution = true;
} else if (res < orig_resolution) {
LOG_WARN << "cannot increase time resolution from " << orig_resolution
<< "s to " << res << "s";
}
}
if (!update_uid && !update_gid && !set_timestamp && !remove_atime_ctime &&
!update_resolution) {
// nothing to do
return;
}
auto transform_timeval = [&](auto val) {
return (val * orig_resolution) / new_resolution;
};
for (auto& inode : md_.inodes().value()) {
if (update_uid) {
inode.owner_index() = 0;
}
if (update_gid) {
inode.group_index() = 0;
}
if (set_timestamp) {
inode.mtime_offset() = 0;
} else if (update_resolution) {
inode.mtime_offset() = transform_timeval(inode.mtime_offset().value());
}
if (set_timestamp || remove_atime_ctime) {
inode.atime_offset() = 0;
inode.ctime_offset() = 0;
} else if (update_resolution) {
inode.atime_offset() = transform_timeval(inode.atime_offset().value());
inode.ctime_offset() = transform_timeval(inode.ctime_offset().value());
}
}
if (update_uid) {
md_.uids() = std::vector<uid_type>{*options_.uid};
}
if (update_gid) {
md_.gids() = std::vector<gid_type>{*options_.gid};
}
if (set_timestamp) {
md_.timestamp_base() = *options_.timestamp / new_resolution;
} else if (update_resolution) {
md_.timestamp_base() = transform_timeval(md_.timestamp_base().value());
}
if (new_resolution > 1) {
md_.options().ensure();
md_.options()->time_resolution_sec() = new_resolution;
}
// TODO: also allow chmod? -> that's quite a lot more involved,
// but we can probably get rid of scanner transformers
}
template <typename LoggerPolicy> template <typename LoggerPolicy>
thrift::metadata::metadata const& metadata_builder_<LoggerPolicy>::build() { thrift::metadata::metadata const& metadata_builder_<LoggerPolicy>::build() {
LOG_VERBOSE << "building metadata"; LOG_VERBOSE << "building metadata";
thrift::metadata::fs_options fsopts; thrift::metadata::fs_options fsopts;
fsopts.mtime_only() = !options_.keep_all_times; fsopts.mtime_only() = !options_.keep_all_times;
if (options_.time_resolution_sec > 1) { if (options_.time_resolution_sec.has_value() &&
fsopts.time_resolution_sec() = options_.time_resolution_sec; options_.time_resolution_sec.value() > 1) {
fsopts.time_resolution_sec() = options_.time_resolution_sec.value();
} }
fsopts.packed_chunk_table() = options_.pack_chunk_table; fsopts.packed_chunk_table() = options_.pack_chunk_table;
fsopts.packed_directories() = options_.pack_directories; fsopts.packed_directories() = options_.pack_directories;
@ -350,11 +449,9 @@ thrift::metadata::metadata const& metadata_builder_<LoggerPolicy>::build() {
md_.block_category_metadata().reset(); md_.block_category_metadata().reset();
} }
// TODO: don't overwrite all options when upgrading!
md_.options() = fsopts; md_.options() = fsopts;
md_.features() = features_.get(); md_.features() = features_.get();
// TODO: try and keep metadata upgrade history
md_.dwarfs_version() = std::string("libdwarfs ") + DWARFS_GIT_ID; md_.dwarfs_version() = std::string("libdwarfs ") + DWARFS_GIT_ID;
if (!options_.no_create_timestamp) { if (!options_.no_create_timestamp) {
md_.create_timestamp() = std::time(nullptr); md_.create_timestamp() = std::time(nullptr);
@ -575,9 +672,6 @@ void metadata_builder_<LoggerPolicy>::upgrade_metadata(
upgrade_from_pre_v2_2(); upgrade_from_pre_v2_2();
} }
// TODO: update uid, gid, timestamp, mtime_only, time_resolution_sec
// TODO: do we need to do this here???
tv << "upgrading metadata..."; tv << "upgrading metadata...";
md_.metadata_version_history().ensure(); md_.metadata_version_history().ensure();

View File

@ -39,7 +39,9 @@ std::ostream& operator<<(std::ostream& os, metadata_options const& opts) {
if (opts.keep_all_times) { if (opts.keep_all_times) {
os << "keep_all_times, "; os << "keep_all_times, ";
} }
os << "time_resolution_sec: " << opts.time_resolution_sec << ", "; if (opts.time_resolution_sec) {
os << "time_resolution_sec: " << *opts.time_resolution_sec << ", ";
}
if (opts.pack_chunk_table) { if (opts.pack_chunk_table) {
os << "pack_chunk_table, "; os << "pack_chunk_table, ";
} }