From 8aa0e4158b8678a0db1135d7c165564ce7b489ce Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 24 Feb 2025 21:33:42 +0100 Subject: [PATCH] feat: add metadata version history --- include/dwarfs/fstypes.h | 5 + include/dwarfs/reader/filesystem_v2.h | 11 +- .../reader/internal/filesystem_parser.h | 13 ++- include/dwarfs/reader/internal/metadata_v2.h | 5 +- .../dwarfs/writer/internal/metadata_builder.h | 11 ++ src/reader/filesystem_v2.cpp | 22 ++++ src/reader/internal/filesystem_parser.cpp | 22 ++-- src/reader/internal/metadata_v2.cpp | 106 +++++++++++++++++- src/utility/rewrite_filesystem.cpp | 6 +- src/writer/internal/metadata_builder.cpp | 47 ++++++-- test/compat_test.cpp | 97 ++++++++++++++-- test/metadata_test.cpp | 26 ++++- thrift/metadata.thrift | 19 ++++ 13 files changed, 341 insertions(+), 49 deletions(-) diff --git a/include/dwarfs/fstypes.h b/include/dwarfs/fstypes.h index 54a92c25..31bb593e 100644 --- a/include/dwarfs/fstypes.h +++ b/include/dwarfs/fstypes.h @@ -108,6 +108,11 @@ struct filesystem_info { std::vector> uncompressed_block_sizes; }; +struct filesystem_version { + uint8_t major{0}; + uint8_t minor{0}; +}; + bool is_known_compression_type(compression_type type); bool is_known_section_type(section_type type); diff --git a/include/dwarfs/reader/filesystem_v2.h b/include/dwarfs/reader/filesystem_v2.h index 0ee96da9..deeac448 100644 --- a/include/dwarfs/reader/filesystem_v2.h +++ b/include/dwarfs/reader/filesystem_v2.h @@ -45,6 +45,7 @@ #include #include +#include #include #include #include @@ -61,8 +62,9 @@ class os_access; class performance_monitor; namespace thrift::metadata { +class fs_options; class metadata; -} +} // namespace thrift::metadata namespace reader { @@ -100,6 +102,8 @@ class filesystem_v2_lite { filesystem_options const& options, std::shared_ptr const& perfmon = nullptr); + filesystem_version version() const { return lite_->version(); } + void walk(std::function const& func) const { lite_->walk(func); } @@ -334,6 +338,7 @@ class filesystem_v2_lite { public: virtual ~impl_lite() = default; + virtual filesystem_version version() const = 0; virtual void walk(std::function const& func) const = 0; virtual void @@ -480,6 +485,8 @@ class filesystem_v2 final : public filesystem_v2_lite { std::unique_ptr thawed_metadata() const; std::unique_ptr unpacked_metadata() const; + std::unique_ptr thawed_fs_options() const; + class impl : public impl_lite { public: virtual int @@ -495,6 +502,8 @@ class filesystem_v2 final : public filesystem_v2_lite { thawed_metadata() const = 0; virtual std::unique_ptr unpacked_metadata() const = 0; + virtual std::unique_ptr + thawed_fs_options() const = 0; }; private: diff --git a/include/dwarfs/reader/internal/filesystem_parser.h b/include/dwarfs/reader/internal/filesystem_parser.h index 2f1b631c..4415070b 100644 --- a/include/dwarfs/reader/internal/filesystem_parser.h +++ b/include/dwarfs/reader/internal/filesystem_parser.h @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -64,9 +65,10 @@ class filesystem_parser { std::string version() const; - int major_version() const { return major_; } - int minor_version() const { return minor_; } - int header_version() const { return version_; } + int major_version() const { return fs_version_.major; } + int minor_version() const { return fs_version_.minor; } + int header_version() const { return header_version_; } + filesystem_version const& fs_version() const { return fs_version_; } file_off_t image_offset() const { return image_offset_; } @@ -84,9 +86,8 @@ class filesystem_parser { file_off_t const image_offset_{0}; file_off_t const image_size_{std::numeric_limits::max()}; file_off_t offset_{0}; - int version_{0}; - uint8_t major_{0}; - uint8_t minor_{0}; + int header_version_{0}; + filesystem_version fs_version_{}; std::vector index_; }; diff --git a/include/dwarfs/reader/internal/metadata_v2.h b/include/dwarfs/reader/internal/metadata_v2.h index 41979086..5d656983 100644 --- a/include/dwarfs/reader/internal/metadata_v2.h +++ b/include/dwarfs/reader/internal/metadata_v2.h @@ -55,8 +55,9 @@ struct vfs_stat; class performance_monitor; namespace thrift::metadata { +class fs_options; class metadata; -} +} // namespace thrift::metadata namespace reader { @@ -266,6 +267,8 @@ class metadata_v2_utils { std::unique_ptr unpack() const; + std::unique_ptr thaw_fs_options() const; + private: metadata_v2_data const& data_; }; diff --git a/include/dwarfs/writer/internal/metadata_builder.h b/include/dwarfs/writer/internal/metadata_builder.h index 09d089a8..800508f7 100644 --- a/include/dwarfs/writer/internal/metadata_builder.h +++ b/include/dwarfs/writer/internal/metadata_builder.h @@ -29,6 +29,8 @@ namespace dwarfs { +struct filesystem_version; + class logger; namespace writer { @@ -36,6 +38,7 @@ struct metadata_options; } namespace thrift::metadata { +class fs_options; class metadata; } // namespace thrift::metadata @@ -48,11 +51,19 @@ class dir; class metadata_builder { public: + // Start with empty metadata metadata_builder(logger& lgr, metadata_options const& options); + + // Start with existing metadata, upgrade if necessary metadata_builder(logger& lgr, thrift::metadata::metadata const& md, + thrift::metadata::fs_options const* orig_fs_options, + filesystem_version const& orig_fs_version, metadata_options const& options); metadata_builder(logger& lgr, thrift::metadata::metadata&& md, + thrift::metadata::fs_options const* orig_fs_options, + filesystem_version const& orig_fs_version, metadata_options const& options); + ~metadata_builder(); void set_devices(std::vector devices) { diff --git a/src/reader/filesystem_v2.cpp b/src/reader/filesystem_v2.cpp index 89f813ad..e0ac50c8 100644 --- a/src/reader/filesystem_v2.cpp +++ b/src/reader/filesystem_v2.cpp @@ -219,6 +219,7 @@ class filesystem_ final { info_as_json(fsinfo_options const& opts, history const& hist) const; nlohmann::json metadata_as_json() const; std::string serialize_metadata_as_json(bool simple) const; + filesystem_version version() const; void walk(std::function const& func) const; void walk_data_order(std::function const& func) const; dir_entry_view root() const; @@ -340,6 +341,10 @@ class filesystem_ final { return metadata_v2_utils(meta_).unpack(); } + std::unique_ptr thawed_fs_options() const { + return metadata_v2_utils(meta_).thaw_fs_options(); + } + private: filesystem_parser make_fs_parser() const { return filesystem_parser(mm_, image_offset_, options_.image_size); @@ -375,6 +380,7 @@ class filesystem_ final { std::vector history_sections_; file_off_t const image_offset_; filesystem_options const options_; + filesystem_version version_; PERFMON_CLS_PROXY_DECL PERFMON_CLS_TIMER_DECL(find_path) PERFMON_CLS_TIMER_DECL(find_inode) @@ -506,6 +512,7 @@ filesystem_::filesystem_( } header_ = parser.header(); + version_ = parser.fs_version(); section_map sections; @@ -788,6 +795,11 @@ filesystem_::serialize_metadata_as_json(bool simple) const { return metadata_v2_utils(meta_).serialize_as_json(simple); } +template +filesystem_version filesystem_::version() const { + return version_; +} + template void filesystem_::walk( std::function const& func) const { @@ -1148,6 +1160,7 @@ class filesystem_common_ : public Base { std::shared_ptr const& perfmon) : fs_{lgr, os, std::move(mm), options, perfmon} {} + filesystem_version version() const override { return fs_.version(); } void walk(std::function const& func) const override { fs_.walk(func); } @@ -1369,6 +1382,10 @@ class filesystem_full_ unpacked_metadata() const override { return fs().unpacked_metadata(); } + std::unique_ptr + thawed_fs_options() const override { + return fs().thawed_fs_options(); + } private: history history_; @@ -1496,6 +1513,11 @@ filesystem_v2::unpacked_metadata() const { return full_().unpacked_metadata(); } +std::unique_ptr +filesystem_v2::thawed_fs_options() const { + return full_().thawed_fs_options(); +} + auto filesystem_v2::full_() const -> impl const& { return this->as_(); } } // namespace dwarfs::reader diff --git a/src/reader/internal/filesystem_parser.cpp b/src/reader/internal/filesystem_parser.cpp index 6d54a3f8..ab2e4e86 100644 --- a/src/reader/internal/filesystem_parser.cpp +++ b/src/reader/internal/filesystem_parser.cpp @@ -175,11 +175,11 @@ filesystem_parser::filesystem_parser(std::shared_ptr mm, DWARFS_THROW(runtime_error, "newer minor version"); } - version_ = fh->minor >= 2 ? 2 : 1; - major_ = fh->major; - minor_ = fh->minor; + header_version_ = fh->minor >= 2 ? 2 : 1; + fs_version_.major = fh->major; + fs_version_.minor = fh->minor; - if (minor_ >= 4) { + if (fs_version_.minor >= 4) { find_index(); } @@ -189,7 +189,7 @@ filesystem_parser::filesystem_parser(std::shared_ptr mm, std::optional filesystem_parser::next_section() { if (index_.empty()) { if (std::cmp_less(offset_, image_offset_ + image_size_)) { - auto section = fs_section(*mm_, offset_, version_); + auto section = fs_section(*mm_, offset_, header_version_); offset_ = section.end(); return section; } @@ -201,7 +201,8 @@ std::optional filesystem_parser::next_section() { ? index_[offset_] & section_offset_mask : image_size_; return fs_section(mm_, static_cast(id >> 48), - image_offset_ + offset, next_offset - offset, version_); + image_offset_ + offset, next_offset - offset, + header_version_); } } @@ -218,7 +219,7 @@ std::optional> filesystem_parser::header() const { void filesystem_parser::rewind() { if (index_.empty()) { offset_ = image_offset_; - if (version_ == 1) { + if (header_version_ == 1) { offset_ += sizeof(file_header); } } else { @@ -227,10 +228,11 @@ void filesystem_parser::rewind() { } std::string filesystem_parser::version() const { - return fmt::format("{0}.{1} [{2}]", major_, minor_, version_); + return fmt::format("{0}.{1} [{2}]", fs_version_.major, fs_version_.minor, + header_version_); } -bool filesystem_parser::has_checksums() const { return version_ >= 2; } +bool filesystem_parser::has_checksums() const { return header_version_ >= 2; } bool filesystem_parser::has_index() const { return !index_.empty(); } @@ -261,7 +263,7 @@ void filesystem_parser::find_index() { return; } - auto section = fs_section(*mm_, index_pos, version_); + auto section = fs_section(*mm_, index_pos, header_version_); if (section.type() != section_type::SECTION_INDEX) { return; diff --git a/src/reader/internal/metadata_v2.cpp b/src/reader/internal/metadata_v2.cpp index 6835f195..a9ace1f4 100644 --- a/src/reader/internal/metadata_v2.cpp +++ b/src/reader/internal/metadata_v2.cpp @@ -90,6 +90,7 @@ namespace fs = std::filesystem; namespace { using ::apache::thrift::frozen::MappedFrozen; +using ::apache::thrift::frozen::View; ::apache::thrift::frozen::schema::Schema deserialize_schema(std::span data) { @@ -426,6 +427,17 @@ void analyze_frozen(std::ostream& os, l.reg_file_size_cacheField.layout.valueField.layout.lookupField); } + if (auto list = meta.metadata_version_history()) { + size_t history_size = + list_size(*list, l.metadata_version_historyField.layout.valueField); + for (auto const& entry : *list) { + if (entry.dwarfs_version()) { + history_size += entry.dwarfs_version()->size(); + } + } + add_size("metadata_version_history", list->size(), history_size); + } + if (auto version = meta.dwarfs_version()) { add_size_unique("dwarfs_version", version->size()); } @@ -449,15 +461,32 @@ void analyze_frozen(std::ostream& os, } } +template +void parse_fs_options(View opt, + Function const& func) { + func("mtime_only", opt.mtime_only()); + func("packed_chunk_table", opt.packed_chunk_table()); + func("packed_directories", opt.packed_directories()); + func("packed_shared_files_table", opt.packed_shared_files_table()); +} + +std::vector +get_fs_options(View opt) { + std::vector rv; + parse_fs_options(opt, [&](std::string_view name, bool value) { + if (value) { + rv.emplace_back(name); + } + }); + return rv; +} + template void parse_metadata_options( MappedFrozen const& meta, Function const& func) { if (auto opt = meta.options()) { - func("mtime_only", opt->mtime_only()); - func("packed_chunk_table", opt->packed_chunk_table()); - func("packed_directories", opt->packed_directories()); - func("packed_shared_files_table", opt->packed_shared_files_table()); + parse_fs_options(*opt, func); } if (auto names = meta.compact_names()) { func("packed_names", static_cast(names->symtab())); @@ -646,6 +675,14 @@ class metadata_v2_data { return std::make_unique(meta_.thaw()); } + std::unique_ptr thaw_fs_options() const { + if (meta_.options().has_value()) { + return std::make_unique( + meta_.options()->thaw()); + } + return nullptr; + } + private: template using set_type = phmap::flat_hash_set; @@ -1552,6 +1589,44 @@ metadata_v2_data::info_as_json(fsinfo_options const& opts, meta["unique_files"] = unique_files_; } + if (auto history = meta_.metadata_version_history(); history.has_value()) { + nlohmann::json jhistory = nlohmann::json::array(); + + for (auto const& ent : *history) { + nlohmann::json jent; + + jent["major"] = ent.major(); + jent["minor"] = ent.minor(); + + if (ent.dwarfs_version().has_value()) { + jent["dwarfs_version"] = ent.dwarfs_version().value(); + } + + jent["block_size"] = ent.block_size(); + + if (auto entopts = ent.options(); entopts.has_value()) { + nlohmann::json options; + + options["mtime_only"] = entopts->mtime_only(); + + if (auto res = entopts->time_resolution_sec(); res.has_value()) { + options["time_resolution"] = res.value(); + } + + options["packed_chunk_table"] = entopts->packed_chunk_table(); + options["packed_directories"] = entopts->packed_directories(); + options["packed_shared_files_table"] = + entopts->packed_shared_files_table(); + + jent["options"] = std::move(options); + } + + jhistory.push_back(std::move(jent)); + } + + meta["metadata_version_history"] = std::move(jhistory); + } + info["meta"] = std::move(meta); } @@ -1674,6 +1749,24 @@ void metadata_v2_data::dump( } } + if (auto history = meta_.metadata_version_history(); history.has_value()) { + os << "previous metadata versions:\n"; + for (auto const& ent : *history) { + os << " [" << static_cast(ent.major()) << "." + << static_cast(ent.minor()) << "] " + << size_with_unit(ent.block_size()) << " blocks, " + << ent.dwarfs_version().value_or("") << "\n"; + if (auto he_opts = ent.options()) { + if (auto str_opts = get_fs_options(*he_opts); !str_opts.empty()) { + os << " options: " << boost::join(str_opts, ", ") << "\n"; + } + if (auto res = he_opts->time_resolution_sec()) { + os << " time resolution: " << *res << " seconds\n"; + } + } + } + } + if (opts.features.has(fsinfo_feature::frozen_analysis)) { analyze_frozen(os, meta_, data_.size()); } @@ -2446,6 +2539,11 @@ std::unique_ptr metadata_v2_utils::unpack() const { return data_.unpack(); } +std::unique_ptr +metadata_v2_utils::thaw_fs_options() const { + return data_.thaw_fs_options(); +} + metadata_v2::metadata_v2( logger& lgr, std::span schema, std::span data, metadata_options const& options, int inode_offset, diff --git a/src/utility/rewrite_filesystem.cpp b/src/utility/rewrite_filesystem.cpp index 7224bd71..5cae0c30 100644 --- a/src/utility/rewrite_filesystem.cpp +++ b/src/utility/rewrite_filesystem.cpp @@ -174,8 +174,10 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs, using namespace dwarfs::writer::internal; auto md = fs.unpacked_metadata(); - auto builder = metadata_builder(lgr, std::move(*md), - opts.rebuild_metadata.value()); + auto fsopts = fs.thawed_fs_options(); + auto builder = + metadata_builder(lgr, std::move(*md), fsopts.get(), fs.version(), + opts.rebuild_metadata.value()); auto [schema, data] = metadata_freezer(LOG_GET_LOGGER).freeze(builder.build()); diff --git a/src/writer/internal/metadata_builder.cpp b/src/writer/internal/metadata_builder.cpp index fc352569..e4e099a0 100644 --- a/src/writer/internal/metadata_builder.cpp +++ b/src/writer/internal/metadata_builder.cpp @@ -25,6 +25,7 @@ #include +#include #include #include #include @@ -56,19 +57,23 @@ class metadata_builder_ final : public metadata_builder::impl { , options_{options} {} metadata_builder_(logger& lgr, thrift::metadata::metadata const& md, + thrift::metadata::fs_options const* orig_fs_options, + filesystem_version const& orig_fs_version, metadata_options const& options) : LOG_PROXY_INIT(lgr) , md_{md} , options_{options} { - upgrade_metadata(); + upgrade_metadata(orig_fs_options, orig_fs_version); } metadata_builder_(logger& lgr, thrift::metadata::metadata&& md, + thrift::metadata::fs_options const* orig_fs_options, + filesystem_version const& orig_fs_version, metadata_options const& options) : LOG_PROXY_INIT(lgr) , md_{std::move(md)} , options_{options} { - upgrade_metadata(); + upgrade_metadata(orig_fs_options, orig_fs_version); } void set_devices(std::vector devices) override { @@ -129,7 +134,8 @@ class metadata_builder_ final : public metadata_builder::impl { private: thrift::metadata::inode_size_cache build_inode_size_cache() const; - void upgrade_metadata(); + void upgrade_metadata(thrift::metadata::fs_options const* orig_fs_options, + filesystem_version const& orig_fs_version); void upgrade_from_pre_v2_2(); LOG_PROXY_DECL(LoggerPolicy); @@ -544,11 +550,23 @@ void metadata_builder_::upgrade_from_pre_v2_2() { } template -void metadata_builder_::upgrade_metadata() { +void metadata_builder_::upgrade_metadata( + thrift::metadata::fs_options const* orig_fs_options, + filesystem_version const& orig_fs_version) { auto tv = LOG_TIMED_VERBOSE; // std::cout << apache::thrift::debugString(md_); + thrift::metadata::history_entry histent; + histent.major() = orig_fs_version.major; + histent.minor() = orig_fs_version.minor; + histent.dwarfs_version().copy_from(md_.dwarfs_version()); + histent.block_size() = md_.block_size().value(); + if (orig_fs_options) { + histent.options().ensure(); + histent.options() = *orig_fs_options; + } + if (apache::thrift::is_non_optional_field_set_manually_or_by_serializer( md_.entry_table_v2_2())) { DWARFS_CHECK(!md_.dir_entries().has_value(), @@ -558,8 +576,12 @@ void metadata_builder_::upgrade_metadata() { } // TODO: update uid, gid, timestamp, mtime_only, time_resolution_sec + // TODO: do we need to do this here??? tv << "upgrading metadata..."; + + md_.metadata_version_history().ensure(); + md_.metadata_version_history()->push_back(std::move(histent)); } } // namespace @@ -569,18 +591,21 @@ metadata_builder::metadata_builder(logger& lgr, metadata_options const& options) make_unique_logging_object( lgr, options)} {} -metadata_builder::metadata_builder(logger& lgr, - thrift::metadata::metadata const& md, - metadata_options const& options) +metadata_builder::metadata_builder( + logger& lgr, thrift::metadata::metadata const& md, + thrift::metadata::fs_options const* orig_fs_options, + filesystem_version const& orig_fs_version, metadata_options const& options) : impl_{ make_unique_logging_object( - lgr, md, options)} {} + lgr, md, orig_fs_options, orig_fs_version, options)} {} -metadata_builder::metadata_builder(logger& lgr, thrift::metadata::metadata&& md, - metadata_options const& options) +metadata_builder::metadata_builder( + logger& lgr, thrift::metadata::metadata&& md, + thrift::metadata::fs_options const* orig_fs_options, + filesystem_version const& orig_fs_version, metadata_options const& options) : impl_{ make_unique_logging_object( - lgr, std::move(md), options)} {} + lgr, std::move(md), orig_fs_options, orig_fs_version, options)} {} metadata_builder::~metadata_builder() = default; diff --git a/test/compat_test.cpp b/test/compat_test.cpp index 0199da53..751d600f 100644 --- a/test/compat_test.cpp +++ b/test/compat_test.cpp @@ -1229,8 +1229,66 @@ auto get_image_path(std::string const& version) { class compat_metadata : public testing::TestWithParam {}; +void check_history(nlohmann::json info, reader::filesystem_v2 const& origfs, + nlohmann::json originfo) { + auto meta = info["meta"]; + auto origmeta = originfo["meta"]; + + auto history = meta["metadata_version_history"]; + + ASSERT_GE(history.size(), 1); + + if (origmeta.contains("metadata_version_history")) { + auto orighistory = origmeta["metadata_version_history"]; + ASSERT_EQ(history.size(), orighistory.size() + 1); + for (size_t i = 0; i < orighistory.size(); ++i) { + EXPECT_EQ(history[i], orighistory[i]); + } + } else { + EXPECT_EQ(history.size(), 1); + } + + auto hent = history.back(); + + // std::cerr << origmeta.dump(2) << std::endl; + // std::cerr << hent.dump(2) << std::endl; + + EXPECT_EQ(hent["major"], origfs.version().major); + EXPECT_EQ(hent["minor"], origfs.version().minor); + + if (originfo.contains("created_by")) { + EXPECT_EQ(hent["dwarfs_version"], originfo["created_by"]); + } else { + EXPECT_FALSE(hent.contains("dwarfs_version")); + } + + EXPECT_EQ(hent["block_size"], originfo["block_size"]); + + if (originfo.contains("options")) { + nlohmann::json expected{ + {"mtime_only", false}, + {"packed_chunk_table", false}, + {"packed_directories", false}, + {"packed_shared_files_table", false}, + }; + + if (originfo.contains("time_resolution")) { + expected["time_resolution"] = originfo["time_resolution"]; + } + + for (auto const& opt : originfo["options"]) { + expected[opt.template get()] = true; + } + + EXPECT_EQ(expected, hent["options"]); + } else { + EXPECT_FALSE(hent.contains("options")); + } +} + void check_dynamic(std::string const& version, reader::filesystem_v2 const& fs, - bool rebuild_metadata [[maybe_unused]] = false) { + std::shared_ptr origmm = nullptr, + bool rebuild_metadata = false) { auto meta = fs.metadata_as_json(); nlohmann::json ref; if (version.starts_with("0.2.")) { @@ -1246,6 +1304,18 @@ void check_dynamic(std::string const& version, reader::filesystem_v2 const& fs, } } + if (rebuild_metadata) { + test::test_logger lgr; + test::os_access_mock os; + reader::filesystem_options fsopts; + fsopts.image_offset = reader::filesystem_options::IMAGE_OFFSET_AUTO; + reader::filesystem_v2 orig(lgr, os, origmm, fsopts); + reader::fsinfo_options io{ + .features = {reader::fsinfo_feature::metadata_details, + reader::fsinfo_feature::metadata_summary}}; + check_history(fs.info_as_json(io), orig, orig.info_as_json(io)); + } + remove_inode_numbers(ref); remove_inode_numbers(meta); @@ -1353,13 +1423,14 @@ TEST_P(rewrite, filesystem_rewrite) { utility::rewrite_filesystem(lgr, fs, fsw, resolver, opts); }; + std::shared_ptr origmm = std::make_shared(filename); + { writer::filesystem_writer fsw(rewritten, lgr, pool, prog); fsw.add_default_compressor(bc); - auto mm = std::make_shared(filename); - EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, mm, idss)); - EXPECT_FALSE(reader::filesystem_v2::header(mm)); - rewrite_fs(fsw, mm); + EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, origmm, idss)); + EXPECT_FALSE(reader::filesystem_v2::header(origmm)); + rewrite_fs(fsw, origmm); } { @@ -1367,7 +1438,7 @@ TEST_P(rewrite, filesystem_rewrite) { EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, mm, idss)); EXPECT_FALSE(reader::filesystem_v2::header(mm)); reader::filesystem_v2 fs(lgr, os, mm); - check_dynamic(version, fs, rebuild_metadata.has_value()); + check_dynamic(version, fs, origmm, rebuild_metadata.has_value()); check_checksums(fs); } @@ -1380,7 +1451,7 @@ TEST_P(rewrite, filesystem_rewrite) { writer::filesystem_writer fsw(rewritten, lgr, pool, prog, fsw_opts, &hdr_iss); fsw.add_default_compressor(bc); - rewrite_fs(fsw, std::make_shared(filename)); + rewrite_fs(fsw, origmm); } { @@ -1396,7 +1467,7 @@ TEST_P(rewrite, filesystem_rewrite) { reader::filesystem_options fsopts; fsopts.image_offset = reader::filesystem_options::IMAGE_OFFSET_AUTO; reader::filesystem_v2 fs(lgr, os, mm, fsopts); - check_dynamic(version, fs, rebuild_metadata.has_value()); + check_dynamic(version, fs, origmm, rebuild_metadata.has_value()); check_checksums(fs); } @@ -1438,13 +1509,14 @@ TEST_P(rewrite, filesystem_rewrite) { } std::ostringstream rewritten4; + origmm = std::make_shared(rewritten3.str()); { writer::filesystem_writer_options fsw_opts; fsw_opts.remove_header = true; writer::filesystem_writer fsw(rewritten4, lgr, pool, prog, fsw_opts); fsw.add_default_compressor(bc); - rewrite_fs(fsw, std::make_shared(rewritten3.str())); + rewrite_fs(fsw, origmm); } { @@ -1453,18 +1525,19 @@ TEST_P(rewrite, filesystem_rewrite) { EXPECT_FALSE(reader::filesystem_v2::header(mm)) << folly::hexDump(rewritten4.str().data(), rewritten4.str().size()); reader::filesystem_v2 fs(lgr, os, mm); - check_dynamic(version, fs, rebuild_metadata.has_value()); + check_dynamic(version, fs, origmm, rebuild_metadata.has_value()); check_checksums(fs); } std::ostringstream rewritten5; + origmm = std::make_shared(rewritten4.str()); { writer::filesystem_writer_options fsw_opts; fsw_opts.no_section_index = true; writer::filesystem_writer fsw(rewritten5, lgr, pool, prog, fsw_opts); fsw.add_default_compressor(bc); - rewrite_fs(fsw, std::make_shared(rewritten4.str())); + rewrite_fs(fsw, origmm); } { @@ -1473,7 +1546,7 @@ TEST_P(rewrite, filesystem_rewrite) { EXPECT_FALSE(reader::filesystem_v2::header(mm)) << folly::hexDump(rewritten5.str().data(), rewritten5.str().size()); reader::filesystem_v2 fs(lgr, os, mm); - check_dynamic(version, fs, rebuild_metadata.has_value()); + check_dynamic(version, fs, origmm, rebuild_metadata.has_value()); check_checksums(fs); } } diff --git a/test/metadata_test.cpp b/test/metadata_test.cpp index da86fa01..8b9c86e4 100644 --- a/test/metadata_test.cpp +++ b/test/metadata_test.cpp @@ -79,10 +79,12 @@ std::string make_fragmented_file(size_t fragment_size, size_t fragment_count) { } auto rebuild_metadata(logger& lgr, thrift::metadata::metadata const& md, + thrift::metadata::fs_options const* fs_options, + filesystem_version const& fs_version, writer::metadata_options const& options) { using namespace writer::internal; return metadata_freezer(lgr).freeze( - metadata_builder(lgr, md, options).build()); + metadata_builder(lgr, md, fs_options, fs_version, options).build()); } template @@ -153,8 +155,9 @@ TEST_F(metadata_test, basic) { // std::cout << ::apache::thrift::debugString(unpacked1) << std::endl; { + auto fsopts = fs.thawed_fs_options(); auto [schema, data] = rebuild_metadata( - lgr, unpacked1, + lgr, unpacked1, fsopts.get(), fs.version(), {.plain_names_table = true, .no_create_timestamp = true}); reader::internal::metadata_v2 mv2(lgr, schema.span(), data.span(), {}); using utils = reader::internal::metadata_v2_utils; @@ -164,6 +167,25 @@ TEST_F(metadata_test, basic) { // std::cout << ::apache::thrift::debugString(unpacked2) << std::endl; + auto history = unpacked2.metadata_version_history(); + + ASSERT_TRUE(history.has_value()); + EXPECT_EQ(history->size(), 1); + auto hent = history->at(0); + EXPECT_EQ(hent.major().value(), fs.version().major); + EXPECT_EQ(hent.minor().value(), fs.version().minor); + ASSERT_TRUE(hent.dwarfs_version().has_value()); + ASSERT_TRUE(unpacked1.dwarfs_version().has_value()); + EXPECT_EQ(hent.dwarfs_version().value(), + unpacked1.dwarfs_version().value()); + EXPECT_EQ(hent.block_size().value(), unpacked1.block_size().value()); + ASSERT_TRUE(hent.options().has_value()); + ASSERT_TRUE(unpacked1.options().has_value()); + EXPECT_EQ(hent.options().value(), unpacked1.options().value()) + << thrift_diff(hent.options().value(), unpacked1.options().value()); + + unpacked2.metadata_version_history().reset(); + EXPECT_EQ(unpacked1, unpacked2) << thrift_diff(unpacked1, unpacked2); EXPECT_NE(thawed1, thawed2) << thrift_diff(thawed1, thawed2); diff --git a/thrift/metadata.thrift b/thrift/metadata.thrift index e03e40a4..c15d2f00 100644 --- a/thrift/metadata.thrift +++ b/thrift/metadata.thrift @@ -196,6 +196,22 @@ struct inode_size_cache { 2: UInt64 min_chunk_count } +/* + * This structure contains the version of the metadata format used + * for tracking metadata rewrite history. + */ +struct history_entry { + // major and minor version numbers corresponding to the block header + 1: UInt8 major + 2: UInt8 minor + + // version string of dwarfs library used to create the metadata + 3: optional string dwarfs_version + + 4: UInt32 block_size + 5: optional fs_options options +} + /** * File System Metadata * @@ -436,4 +452,7 @@ struct metadata { // The metadata associated with each block. Maps from block // number to index into `categorization_metadata_json`. 32: optional map block_category_metadata + + // version strings for all metadata versions + 33: optional list metadata_version_history }