feat: add metadata version history

This commit is contained in:
Marcus Holland-Moritz 2025-02-24 21:33:42 +01:00
parent faea9c3c4d
commit 8aa0e4158b
13 changed files with 341 additions and 49 deletions

View File

@ -108,6 +108,11 @@ struct filesystem_info {
std::vector<std::optional<size_t>> uncompressed_block_sizes;
};
struct filesystem_version {
uint8_t major{0};
uint8_t minor{0};
};
bool is_known_compression_type(compression_type type);
bool is_known_section_type(section_type type);

View File

@ -45,6 +45,7 @@
#include <nlohmann/json.hpp>
#include <dwarfs/file_stat.h>
#include <dwarfs/fstypes.h>
#include <dwarfs/reader/block_range.h>
#include <dwarfs/reader/fsinfo_features.h>
#include <dwarfs/reader/metadata_types.h>
@ -61,8 +62,9 @@ class os_access;
class performance_monitor;
namespace thrift::metadata {
class fs_options;
class metadata;
}
} // namespace thrift::metadata
namespace reader {
@ -100,6 +102,8 @@ class filesystem_v2_lite {
filesystem_options const& options,
std::shared_ptr<performance_monitor const> const& perfmon = nullptr);
filesystem_version version() const { return lite_->version(); }
void walk(std::function<void(dir_entry_view)> const& func) const {
lite_->walk(func);
}
@ -334,6 +338,7 @@ class filesystem_v2_lite {
public:
virtual ~impl_lite() = default;
virtual filesystem_version version() const = 0;
virtual void
walk(std::function<void(dir_entry_view)> const& func) const = 0;
virtual void
@ -480,6 +485,8 @@ class filesystem_v2 final : public filesystem_v2_lite {
std::unique_ptr<thrift::metadata::metadata> thawed_metadata() const;
std::unique_ptr<thrift::metadata::metadata> unpacked_metadata() const;
std::unique_ptr<thrift::metadata::fs_options> thawed_fs_options() const;
class impl : public impl_lite {
public:
virtual int
@ -495,6 +502,8 @@ class filesystem_v2 final : public filesystem_v2_lite {
thawed_metadata() const = 0;
virtual std::unique_ptr<thrift::metadata::metadata>
unpacked_metadata() const = 0;
virtual std::unique_ptr<thrift::metadata::fs_options>
thawed_fs_options() const = 0;
};
private:

View File

@ -35,6 +35,7 @@
#include <string>
#include <vector>
#include <dwarfs/fstypes.h>
#include <dwarfs/types.h>
#include <dwarfs/internal/fs_section.h>
@ -64,9 +65,10 @@ class filesystem_parser {
std::string version() const;
int major_version() const { return major_; }
int minor_version() const { return minor_; }
int header_version() const { return version_; }
int major_version() const { return fs_version_.major; }
int minor_version() const { return fs_version_.minor; }
int header_version() const { return header_version_; }
filesystem_version const& fs_version() const { return fs_version_; }
file_off_t image_offset() const { return image_offset_; }
@ -84,9 +86,8 @@ class filesystem_parser {
file_off_t const image_offset_{0};
file_off_t const image_size_{std::numeric_limits<file_off_t>::max()};
file_off_t offset_{0};
int version_{0};
uint8_t major_{0};
uint8_t minor_{0};
int header_version_{0};
filesystem_version fs_version_{};
std::vector<uint64_t> index_;
};

View File

@ -55,8 +55,9 @@ struct vfs_stat;
class performance_monitor;
namespace thrift::metadata {
class fs_options;
class metadata;
}
} // namespace thrift::metadata
namespace reader {
@ -266,6 +267,8 @@ class metadata_v2_utils {
std::unique_ptr<thrift::metadata::metadata> unpack() const;
std::unique_ptr<thrift::metadata::fs_options> thaw_fs_options() const;
private:
metadata_v2_data const& data_;
};

View File

@ -29,6 +29,8 @@
namespace dwarfs {
struct filesystem_version;
class logger;
namespace writer {
@ -36,6 +38,7 @@ struct metadata_options;
}
namespace thrift::metadata {
class fs_options;
class metadata;
} // namespace thrift::metadata
@ -48,11 +51,19 @@ class dir;
class metadata_builder {
public:
// Start with empty metadata
metadata_builder(logger& lgr, metadata_options const& options);
// Start with existing metadata, upgrade if necessary
metadata_builder(logger& lgr, thrift::metadata::metadata const& md,
thrift::metadata::fs_options const* orig_fs_options,
filesystem_version const& orig_fs_version,
metadata_options const& options);
metadata_builder(logger& lgr, thrift::metadata::metadata&& md,
thrift::metadata::fs_options const* orig_fs_options,
filesystem_version const& orig_fs_version,
metadata_options const& options);
~metadata_builder();
void set_devices(std::vector<uint64_t> devices) {

View File

@ -219,6 +219,7 @@ class filesystem_ final {
info_as_json(fsinfo_options const& opts, history const& hist) const;
nlohmann::json metadata_as_json() const;
std::string serialize_metadata_as_json(bool simple) const;
filesystem_version version() const;
void walk(std::function<void(dir_entry_view)> const& func) const;
void walk_data_order(std::function<void(dir_entry_view)> const& func) const;
dir_entry_view root() const;
@ -340,6 +341,10 @@ class filesystem_ final {
return metadata_v2_utils(meta_).unpack();
}
std::unique_ptr<thrift::metadata::fs_options> thawed_fs_options() const {
return metadata_v2_utils(meta_).thaw_fs_options();
}
private:
filesystem_parser make_fs_parser() const {
return filesystem_parser(mm_, image_offset_, options_.image_size);
@ -375,6 +380,7 @@ class filesystem_ final {
std::vector<fs_section> history_sections_;
file_off_t const image_offset_;
filesystem_options const options_;
filesystem_version version_;
PERFMON_CLS_PROXY_DECL
PERFMON_CLS_TIMER_DECL(find_path)
PERFMON_CLS_TIMER_DECL(find_inode)
@ -506,6 +512,7 @@ filesystem_<LoggerPolicy>::filesystem_(
}
header_ = parser.header();
version_ = parser.fs_version();
section_map sections;
@ -788,6 +795,11 @@ filesystem_<LoggerPolicy>::serialize_metadata_as_json(bool simple) const {
return metadata_v2_utils(meta_).serialize_as_json(simple);
}
template <typename LoggerPolicy>
filesystem_version filesystem_<LoggerPolicy>::version() const {
return version_;
}
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::walk(
std::function<void(dir_entry_view)> const& func) const {
@ -1148,6 +1160,7 @@ class filesystem_common_ : public Base {
std::shared_ptr<performance_monitor const> const& perfmon)
: fs_{lgr, os, std::move(mm), options, perfmon} {}
filesystem_version version() const override { return fs_.version(); }
void walk(std::function<void(dir_entry_view)> const& func) const override {
fs_.walk(func);
}
@ -1369,6 +1382,10 @@ class filesystem_full_
unpacked_metadata() const override {
return fs().unpacked_metadata();
}
std::unique_ptr<thrift::metadata::fs_options>
thawed_fs_options() const override {
return fs().thawed_fs_options();
}
private:
history history_;
@ -1496,6 +1513,11 @@ filesystem_v2::unpacked_metadata() const {
return full_().unpacked_metadata();
}
std::unique_ptr<thrift::metadata::fs_options>
filesystem_v2::thawed_fs_options() const {
return full_().thawed_fs_options();
}
auto filesystem_v2::full_() const -> impl const& { return this->as_<impl>(); }
} // namespace dwarfs::reader

View File

@ -175,11 +175,11 @@ filesystem_parser::filesystem_parser(std::shared_ptr<mmif> mm,
DWARFS_THROW(runtime_error, "newer minor version");
}
version_ = fh->minor >= 2 ? 2 : 1;
major_ = fh->major;
minor_ = fh->minor;
header_version_ = fh->minor >= 2 ? 2 : 1;
fs_version_.major = fh->major;
fs_version_.minor = fh->minor;
if (minor_ >= 4) {
if (fs_version_.minor >= 4) {
find_index();
}
@ -189,7 +189,7 @@ filesystem_parser::filesystem_parser(std::shared_ptr<mmif> mm,
std::optional<fs_section> filesystem_parser::next_section() {
if (index_.empty()) {
if (std::cmp_less(offset_, image_offset_ + image_size_)) {
auto section = fs_section(*mm_, offset_, version_);
auto section = fs_section(*mm_, offset_, header_version_);
offset_ = section.end();
return section;
}
@ -201,7 +201,8 @@ std::optional<fs_section> filesystem_parser::next_section() {
? index_[offset_] & section_offset_mask
: image_size_;
return fs_section(mm_, static_cast<section_type>(id >> 48),
image_offset_ + offset, next_offset - offset, version_);
image_offset_ + offset, next_offset - offset,
header_version_);
}
}
@ -218,7 +219,7 @@ std::optional<std::span<uint8_t const>> filesystem_parser::header() const {
void filesystem_parser::rewind() {
if (index_.empty()) {
offset_ = image_offset_;
if (version_ == 1) {
if (header_version_ == 1) {
offset_ += sizeof(file_header);
}
} else {
@ -227,10 +228,11 @@ void filesystem_parser::rewind() {
}
std::string filesystem_parser::version() const {
return fmt::format("{0}.{1} [{2}]", major_, minor_, version_);
return fmt::format("{0}.{1} [{2}]", fs_version_.major, fs_version_.minor,
header_version_);
}
bool filesystem_parser::has_checksums() const { return version_ >= 2; }
bool filesystem_parser::has_checksums() const { return header_version_ >= 2; }
bool filesystem_parser::has_index() const { return !index_.empty(); }
@ -261,7 +263,7 @@ void filesystem_parser::find_index() {
return;
}
auto section = fs_section(*mm_, index_pos, version_);
auto section = fs_section(*mm_, index_pos, header_version_);
if (section.type() != section_type::SECTION_INDEX) {
return;

View File

@ -90,6 +90,7 @@ namespace fs = std::filesystem;
namespace {
using ::apache::thrift::frozen::MappedFrozen;
using ::apache::thrift::frozen::View;
::apache::thrift::frozen::schema::Schema
deserialize_schema(std::span<uint8_t const> data) {
@ -426,6 +427,17 @@ void analyze_frozen(std::ostream& os,
l.reg_file_size_cacheField.layout.valueField.layout.lookupField);
}
if (auto list = meta.metadata_version_history()) {
size_t history_size =
list_size(*list, l.metadata_version_historyField.layout.valueField);
for (auto const& entry : *list) {
if (entry.dwarfs_version()) {
history_size += entry.dwarfs_version()->size();
}
}
add_size("metadata_version_history", list->size(), history_size);
}
if (auto version = meta.dwarfs_version()) {
add_size_unique("dwarfs_version", version->size());
}
@ -449,15 +461,32 @@ void analyze_frozen(std::ostream& os,
}
}
template <typename Function>
void parse_fs_options(View<thrift::metadata::fs_options> opt,
Function const& func) {
func("mtime_only", opt.mtime_only());
func("packed_chunk_table", opt.packed_chunk_table());
func("packed_directories", opt.packed_directories());
func("packed_shared_files_table", opt.packed_shared_files_table());
}
std::vector<std::string>
get_fs_options(View<thrift::metadata::fs_options> opt) {
std::vector<std::string> rv;
parse_fs_options(opt, [&](std::string_view name, bool value) {
if (value) {
rv.emplace_back(name);
}
});
return rv;
}
template <typename Function>
void parse_metadata_options(
MappedFrozen<thrift::metadata::metadata> const& meta,
Function const& func) {
if (auto opt = meta.options()) {
func("mtime_only", opt->mtime_only());
func("packed_chunk_table", opt->packed_chunk_table());
func("packed_directories", opt->packed_directories());
func("packed_shared_files_table", opt->packed_shared_files_table());
parse_fs_options(*opt, func);
}
if (auto names = meta.compact_names()) {
func("packed_names", static_cast<bool>(names->symtab()));
@ -646,6 +675,14 @@ class metadata_v2_data {
return std::make_unique<thrift::metadata::metadata>(meta_.thaw());
}
std::unique_ptr<thrift::metadata::fs_options> thaw_fs_options() const {
if (meta_.options().has_value()) {
return std::make_unique<thrift::metadata::fs_options>(
meta_.options()->thaw());
}
return nullptr;
}
private:
template <typename K>
using set_type = phmap::flat_hash_set<K>;
@ -1552,6 +1589,44 @@ metadata_v2_data::info_as_json(fsinfo_options const& opts,
meta["unique_files"] = unique_files_;
}
if (auto history = meta_.metadata_version_history(); history.has_value()) {
nlohmann::json jhistory = nlohmann::json::array();
for (auto const& ent : *history) {
nlohmann::json jent;
jent["major"] = ent.major();
jent["minor"] = ent.minor();
if (ent.dwarfs_version().has_value()) {
jent["dwarfs_version"] = ent.dwarfs_version().value();
}
jent["block_size"] = ent.block_size();
if (auto entopts = ent.options(); entopts.has_value()) {
nlohmann::json options;
options["mtime_only"] = entopts->mtime_only();
if (auto res = entopts->time_resolution_sec(); res.has_value()) {
options["time_resolution"] = res.value();
}
options["packed_chunk_table"] = entopts->packed_chunk_table();
options["packed_directories"] = entopts->packed_directories();
options["packed_shared_files_table"] =
entopts->packed_shared_files_table();
jent["options"] = std::move(options);
}
jhistory.push_back(std::move(jent));
}
meta["metadata_version_history"] = std::move(jhistory);
}
info["meta"] = std::move(meta);
}
@ -1674,6 +1749,24 @@ void metadata_v2_data::dump(
}
}
if (auto history = meta_.metadata_version_history(); history.has_value()) {
os << "previous metadata versions:\n";
for (auto const& ent : *history) {
os << " [" << static_cast<int>(ent.major()) << "."
<< static_cast<int>(ent.minor()) << "] "
<< size_with_unit(ent.block_size()) << " blocks, "
<< ent.dwarfs_version().value_or("<unknown library version>") << "\n";
if (auto he_opts = ent.options()) {
if (auto str_opts = get_fs_options(*he_opts); !str_opts.empty()) {
os << " options: " << boost::join(str_opts, ", ") << "\n";
}
if (auto res = he_opts->time_resolution_sec()) {
os << " time resolution: " << *res << " seconds\n";
}
}
}
}
if (opts.features.has(fsinfo_feature::frozen_analysis)) {
analyze_frozen(os, meta_, data_.size());
}
@ -2446,6 +2539,11 @@ std::unique_ptr<thrift::metadata::metadata> metadata_v2_utils::unpack() const {
return data_.unpack();
}
std::unique_ptr<thrift::metadata::fs_options>
metadata_v2_utils::thaw_fs_options() const {
return data_.thaw_fs_options();
}
metadata_v2::metadata_v2(
logger& lgr, std::span<uint8_t const> schema, std::span<uint8_t const> data,
metadata_options const& options, int inode_offset,

View File

@ -174,8 +174,10 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
using namespace dwarfs::writer::internal;
auto md = fs.unpacked_metadata();
auto builder = metadata_builder(lgr, std::move(*md),
opts.rebuild_metadata.value());
auto fsopts = fs.thawed_fs_options();
auto builder =
metadata_builder(lgr, std::move(*md), fsopts.get(), fs.version(),
opts.rebuild_metadata.value());
auto [schema, data] =
metadata_freezer(LOG_GET_LOGGER).freeze(builder.build());

View File

@ -25,6 +25,7 @@
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
#include <dwarfs/fstypes.h>
#include <dwarfs/logger.h>
#include <dwarfs/version.h>
#include <dwarfs/writer/metadata_options.h>
@ -56,19 +57,23 @@ class metadata_builder_ final : public metadata_builder::impl {
, options_{options} {}
metadata_builder_(logger& lgr, thrift::metadata::metadata const& md,
thrift::metadata::fs_options const* orig_fs_options,
filesystem_version const& orig_fs_version,
metadata_options const& options)
: LOG_PROXY_INIT(lgr)
, md_{md}
, options_{options} {
upgrade_metadata();
upgrade_metadata(orig_fs_options, orig_fs_version);
}
metadata_builder_(logger& lgr, thrift::metadata::metadata&& md,
thrift::metadata::fs_options const* orig_fs_options,
filesystem_version const& orig_fs_version,
metadata_options const& options)
: LOG_PROXY_INIT(lgr)
, md_{std::move(md)}
, options_{options} {
upgrade_metadata();
upgrade_metadata(orig_fs_options, orig_fs_version);
}
void set_devices(std::vector<uint64_t> devices) override {
@ -129,7 +134,8 @@ class metadata_builder_ final : public metadata_builder::impl {
private:
thrift::metadata::inode_size_cache build_inode_size_cache() const;
void upgrade_metadata();
void upgrade_metadata(thrift::metadata::fs_options const* orig_fs_options,
filesystem_version const& orig_fs_version);
void upgrade_from_pre_v2_2();
LOG_PROXY_DECL(LoggerPolicy);
@ -544,11 +550,23 @@ void metadata_builder_<LoggerPolicy>::upgrade_from_pre_v2_2() {
}
template <typename LoggerPolicy>
void metadata_builder_<LoggerPolicy>::upgrade_metadata() {
void metadata_builder_<LoggerPolicy>::upgrade_metadata(
thrift::metadata::fs_options const* orig_fs_options,
filesystem_version const& orig_fs_version) {
auto tv = LOG_TIMED_VERBOSE;
// std::cout << apache::thrift::debugString(md_);
thrift::metadata::history_entry histent;
histent.major() = orig_fs_version.major;
histent.minor() = orig_fs_version.minor;
histent.dwarfs_version().copy_from(md_.dwarfs_version());
histent.block_size() = md_.block_size().value();
if (orig_fs_options) {
histent.options().ensure();
histent.options() = *orig_fs_options;
}
if (apache::thrift::is_non_optional_field_set_manually_or_by_serializer(
md_.entry_table_v2_2())) {
DWARFS_CHECK(!md_.dir_entries().has_value(),
@ -558,8 +576,12 @@ void metadata_builder_<LoggerPolicy>::upgrade_metadata() {
}
// TODO: update uid, gid, timestamp, mtime_only, time_resolution_sec
// TODO: do we need to do this here???
tv << "upgrading metadata...";
md_.metadata_version_history().ensure();
md_.metadata_version_history()->push_back(std::move(histent));
}
} // namespace
@ -569,18 +591,21 @@ metadata_builder::metadata_builder(logger& lgr, metadata_options const& options)
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
lgr, options)} {}
metadata_builder::metadata_builder(logger& lgr,
thrift::metadata::metadata const& md,
metadata_options const& options)
metadata_builder::metadata_builder(
logger& lgr, thrift::metadata::metadata const& md,
thrift::metadata::fs_options const* orig_fs_options,
filesystem_version const& orig_fs_version, metadata_options const& options)
: impl_{
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
lgr, md, options)} {}
lgr, md, orig_fs_options, orig_fs_version, options)} {}
metadata_builder::metadata_builder(logger& lgr, thrift::metadata::metadata&& md,
metadata_options const& options)
metadata_builder::metadata_builder(
logger& lgr, thrift::metadata::metadata&& md,
thrift::metadata::fs_options const* orig_fs_options,
filesystem_version const& orig_fs_version, metadata_options const& options)
: impl_{
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
lgr, std::move(md), options)} {}
lgr, std::move(md), orig_fs_options, orig_fs_version, options)} {}
metadata_builder::~metadata_builder() = default;

View File

@ -1229,8 +1229,66 @@ auto get_image_path(std::string const& version) {
class compat_metadata : public testing::TestWithParam<std::string> {};
void check_history(nlohmann::json info, reader::filesystem_v2 const& origfs,
nlohmann::json originfo) {
auto meta = info["meta"];
auto origmeta = originfo["meta"];
auto history = meta["metadata_version_history"];
ASSERT_GE(history.size(), 1);
if (origmeta.contains("metadata_version_history")) {
auto orighistory = origmeta["metadata_version_history"];
ASSERT_EQ(history.size(), orighistory.size() + 1);
for (size_t i = 0; i < orighistory.size(); ++i) {
EXPECT_EQ(history[i], orighistory[i]);
}
} else {
EXPECT_EQ(history.size(), 1);
}
auto hent = history.back();
// std::cerr << origmeta.dump(2) << std::endl;
// std::cerr << hent.dump(2) << std::endl;
EXPECT_EQ(hent["major"], origfs.version().major);
EXPECT_EQ(hent["minor"], origfs.version().minor);
if (originfo.contains("created_by")) {
EXPECT_EQ(hent["dwarfs_version"], originfo["created_by"]);
} else {
EXPECT_FALSE(hent.contains("dwarfs_version"));
}
EXPECT_EQ(hent["block_size"], originfo["block_size"]);
if (originfo.contains("options")) {
nlohmann::json expected{
{"mtime_only", false},
{"packed_chunk_table", false},
{"packed_directories", false},
{"packed_shared_files_table", false},
};
if (originfo.contains("time_resolution")) {
expected["time_resolution"] = originfo["time_resolution"];
}
for (auto const& opt : originfo["options"]) {
expected[opt.template get<std::string>()] = true;
}
EXPECT_EQ(expected, hent["options"]);
} else {
EXPECT_FALSE(hent.contains("options"));
}
}
void check_dynamic(std::string const& version, reader::filesystem_v2 const& fs,
bool rebuild_metadata [[maybe_unused]] = false) {
std::shared_ptr<mmif> origmm = nullptr,
bool rebuild_metadata = false) {
auto meta = fs.metadata_as_json();
nlohmann::json ref;
if (version.starts_with("0.2.")) {
@ -1246,6 +1304,18 @@ void check_dynamic(std::string const& version, reader::filesystem_v2 const& fs,
}
}
if (rebuild_metadata) {
test::test_logger lgr;
test::os_access_mock os;
reader::filesystem_options fsopts;
fsopts.image_offset = reader::filesystem_options::IMAGE_OFFSET_AUTO;
reader::filesystem_v2 orig(lgr, os, origmm, fsopts);
reader::fsinfo_options io{
.features = {reader::fsinfo_feature::metadata_details,
reader::fsinfo_feature::metadata_summary}};
check_history(fs.info_as_json(io), orig, orig.info_as_json(io));
}
remove_inode_numbers(ref);
remove_inode_numbers(meta);
@ -1353,13 +1423,14 @@ TEST_P(rewrite, filesystem_rewrite) {
utility::rewrite_filesystem(lgr, fs, fsw, resolver, opts);
};
std::shared_ptr<mmif> origmm = std::make_shared<mmap>(filename);
{
writer::filesystem_writer fsw(rewritten, lgr, pool, prog);
fsw.add_default_compressor(bc);
auto mm = std::make_shared<mmap>(filename);
EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, mm, idss));
EXPECT_FALSE(reader::filesystem_v2::header(mm));
rewrite_fs(fsw, mm);
EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, origmm, idss));
EXPECT_FALSE(reader::filesystem_v2::header(origmm));
rewrite_fs(fsw, origmm);
}
{
@ -1367,7 +1438,7 @@ TEST_P(rewrite, filesystem_rewrite) {
EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, mm, idss));
EXPECT_FALSE(reader::filesystem_v2::header(mm));
reader::filesystem_v2 fs(lgr, os, mm);
check_dynamic(version, fs, rebuild_metadata.has_value());
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
check_checksums(fs);
}
@ -1380,7 +1451,7 @@ TEST_P(rewrite, filesystem_rewrite) {
writer::filesystem_writer fsw(rewritten, lgr, pool, prog, fsw_opts,
&hdr_iss);
fsw.add_default_compressor(bc);
rewrite_fs(fsw, std::make_shared<mmap>(filename));
rewrite_fs(fsw, origmm);
}
{
@ -1396,7 +1467,7 @@ TEST_P(rewrite, filesystem_rewrite) {
reader::filesystem_options fsopts;
fsopts.image_offset = reader::filesystem_options::IMAGE_OFFSET_AUTO;
reader::filesystem_v2 fs(lgr, os, mm, fsopts);
check_dynamic(version, fs, rebuild_metadata.has_value());
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
check_checksums(fs);
}
@ -1438,13 +1509,14 @@ TEST_P(rewrite, filesystem_rewrite) {
}
std::ostringstream rewritten4;
origmm = std::make_shared<test::mmap_mock>(rewritten3.str());
{
writer::filesystem_writer_options fsw_opts;
fsw_opts.remove_header = true;
writer::filesystem_writer fsw(rewritten4, lgr, pool, prog, fsw_opts);
fsw.add_default_compressor(bc);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten3.str()));
rewrite_fs(fsw, origmm);
}
{
@ -1453,18 +1525,19 @@ TEST_P(rewrite, filesystem_rewrite) {
EXPECT_FALSE(reader::filesystem_v2::header(mm))
<< folly::hexDump(rewritten4.str().data(), rewritten4.str().size());
reader::filesystem_v2 fs(lgr, os, mm);
check_dynamic(version, fs, rebuild_metadata.has_value());
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
check_checksums(fs);
}
std::ostringstream rewritten5;
origmm = std::make_shared<test::mmap_mock>(rewritten4.str());
{
writer::filesystem_writer_options fsw_opts;
fsw_opts.no_section_index = true;
writer::filesystem_writer fsw(rewritten5, lgr, pool, prog, fsw_opts);
fsw.add_default_compressor(bc);
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten4.str()));
rewrite_fs(fsw, origmm);
}
{
@ -1473,7 +1546,7 @@ TEST_P(rewrite, filesystem_rewrite) {
EXPECT_FALSE(reader::filesystem_v2::header(mm))
<< folly::hexDump(rewritten5.str().data(), rewritten5.str().size());
reader::filesystem_v2 fs(lgr, os, mm);
check_dynamic(version, fs, rebuild_metadata.has_value());
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
check_checksums(fs);
}
}

View File

@ -79,10 +79,12 @@ std::string make_fragmented_file(size_t fragment_size, size_t fragment_count) {
}
auto rebuild_metadata(logger& lgr, thrift::metadata::metadata const& md,
thrift::metadata::fs_options const* fs_options,
filesystem_version const& fs_version,
writer::metadata_options const& options) {
using namespace writer::internal;
return metadata_freezer(lgr).freeze(
metadata_builder(lgr, md, options).build());
metadata_builder(lgr, md, fs_options, fs_version, options).build());
}
template <typename T>
@ -153,8 +155,9 @@ TEST_F(metadata_test, basic) {
// std::cout << ::apache::thrift::debugString(unpacked1) << std::endl;
{
auto fsopts = fs.thawed_fs_options();
auto [schema, data] = rebuild_metadata(
lgr, unpacked1,
lgr, unpacked1, fsopts.get(), fs.version(),
{.plain_names_table = true, .no_create_timestamp = true});
reader::internal::metadata_v2 mv2(lgr, schema.span(), data.span(), {});
using utils = reader::internal::metadata_v2_utils;
@ -164,6 +167,25 @@ TEST_F(metadata_test, basic) {
// std::cout << ::apache::thrift::debugString(unpacked2) << std::endl;
auto history = unpacked2.metadata_version_history();
ASSERT_TRUE(history.has_value());
EXPECT_EQ(history->size(), 1);
auto hent = history->at(0);
EXPECT_EQ(hent.major().value(), fs.version().major);
EXPECT_EQ(hent.minor().value(), fs.version().minor);
ASSERT_TRUE(hent.dwarfs_version().has_value());
ASSERT_TRUE(unpacked1.dwarfs_version().has_value());
EXPECT_EQ(hent.dwarfs_version().value(),
unpacked1.dwarfs_version().value());
EXPECT_EQ(hent.block_size().value(), unpacked1.block_size().value());
ASSERT_TRUE(hent.options().has_value());
ASSERT_TRUE(unpacked1.options().has_value());
EXPECT_EQ(hent.options().value(), unpacked1.options().value())
<< thrift_diff(hent.options().value(), unpacked1.options().value());
unpacked2.metadata_version_history().reset();
EXPECT_EQ(unpacked1, unpacked2) << thrift_diff(unpacked1, unpacked2);
EXPECT_NE(thawed1, thawed2) << thrift_diff(thawed1, thawed2);

View File

@ -196,6 +196,22 @@ struct inode_size_cache {
2: UInt64 min_chunk_count
}
/*
* This structure contains the version of the metadata format used
* for tracking metadata rewrite history.
*/
struct history_entry {
// major and minor version numbers corresponding to the block header
1: UInt8 major
2: UInt8 minor
// version string of dwarfs library used to create the metadata
3: optional string dwarfs_version
4: UInt32 block_size
5: optional fs_options options
}
/**
* File System Metadata
*
@ -436,4 +452,7 @@ struct metadata {
// The metadata associated with each block. Maps from block
// number to index into `categorization_metadata_json`.
32: optional map<UInt32, UInt32> block_category_metadata
// version strings for all metadata versions
33: optional list<history_entry> metadata_version_history
}