mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 03:49:44 -04:00
feat: add metadata version history
This commit is contained in:
parent
faea9c3c4d
commit
8aa0e4158b
@ -108,6 +108,11 @@ struct filesystem_info {
|
||||
std::vector<std::optional<size_t>> uncompressed_block_sizes;
|
||||
};
|
||||
|
||||
struct filesystem_version {
|
||||
uint8_t major{0};
|
||||
uint8_t minor{0};
|
||||
};
|
||||
|
||||
bool is_known_compression_type(compression_type type);
|
||||
|
||||
bool is_known_section_type(section_type type);
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <dwarfs/file_stat.h>
|
||||
#include <dwarfs/fstypes.h>
|
||||
#include <dwarfs/reader/block_range.h>
|
||||
#include <dwarfs/reader/fsinfo_features.h>
|
||||
#include <dwarfs/reader/metadata_types.h>
|
||||
@ -61,8 +62,9 @@ class os_access;
|
||||
class performance_monitor;
|
||||
|
||||
namespace thrift::metadata {
|
||||
class fs_options;
|
||||
class metadata;
|
||||
}
|
||||
} // namespace thrift::metadata
|
||||
|
||||
namespace reader {
|
||||
|
||||
@ -100,6 +102,8 @@ class filesystem_v2_lite {
|
||||
filesystem_options const& options,
|
||||
std::shared_ptr<performance_monitor const> const& perfmon = nullptr);
|
||||
|
||||
filesystem_version version() const { return lite_->version(); }
|
||||
|
||||
void walk(std::function<void(dir_entry_view)> const& func) const {
|
||||
lite_->walk(func);
|
||||
}
|
||||
@ -334,6 +338,7 @@ class filesystem_v2_lite {
|
||||
public:
|
||||
virtual ~impl_lite() = default;
|
||||
|
||||
virtual filesystem_version version() const = 0;
|
||||
virtual void
|
||||
walk(std::function<void(dir_entry_view)> const& func) const = 0;
|
||||
virtual void
|
||||
@ -480,6 +485,8 @@ class filesystem_v2 final : public filesystem_v2_lite {
|
||||
std::unique_ptr<thrift::metadata::metadata> thawed_metadata() const;
|
||||
std::unique_ptr<thrift::metadata::metadata> unpacked_metadata() const;
|
||||
|
||||
std::unique_ptr<thrift::metadata::fs_options> thawed_fs_options() const;
|
||||
|
||||
class impl : public impl_lite {
|
||||
public:
|
||||
virtual int
|
||||
@ -495,6 +502,8 @@ class filesystem_v2 final : public filesystem_v2_lite {
|
||||
thawed_metadata() const = 0;
|
||||
virtual std::unique_ptr<thrift::metadata::metadata>
|
||||
unpacked_metadata() const = 0;
|
||||
virtual std::unique_ptr<thrift::metadata::fs_options>
|
||||
thawed_fs_options() const = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <dwarfs/fstypes.h>
|
||||
#include <dwarfs/types.h>
|
||||
|
||||
#include <dwarfs/internal/fs_section.h>
|
||||
@ -64,9 +65,10 @@ class filesystem_parser {
|
||||
|
||||
std::string version() const;
|
||||
|
||||
int major_version() const { return major_; }
|
||||
int minor_version() const { return minor_; }
|
||||
int header_version() const { return version_; }
|
||||
int major_version() const { return fs_version_.major; }
|
||||
int minor_version() const { return fs_version_.minor; }
|
||||
int header_version() const { return header_version_; }
|
||||
filesystem_version const& fs_version() const { return fs_version_; }
|
||||
|
||||
file_off_t image_offset() const { return image_offset_; }
|
||||
|
||||
@ -84,9 +86,8 @@ class filesystem_parser {
|
||||
file_off_t const image_offset_{0};
|
||||
file_off_t const image_size_{std::numeric_limits<file_off_t>::max()};
|
||||
file_off_t offset_{0};
|
||||
int version_{0};
|
||||
uint8_t major_{0};
|
||||
uint8_t minor_{0};
|
||||
int header_version_{0};
|
||||
filesystem_version fs_version_{};
|
||||
std::vector<uint64_t> index_;
|
||||
};
|
||||
|
||||
|
@ -55,8 +55,9 @@ struct vfs_stat;
|
||||
class performance_monitor;
|
||||
|
||||
namespace thrift::metadata {
|
||||
class fs_options;
|
||||
class metadata;
|
||||
}
|
||||
} // namespace thrift::metadata
|
||||
|
||||
namespace reader {
|
||||
|
||||
@ -266,6 +267,8 @@ class metadata_v2_utils {
|
||||
|
||||
std::unique_ptr<thrift::metadata::metadata> unpack() const;
|
||||
|
||||
std::unique_ptr<thrift::metadata::fs_options> thaw_fs_options() const;
|
||||
|
||||
private:
|
||||
metadata_v2_data const& data_;
|
||||
};
|
||||
|
@ -29,6 +29,8 @@
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
struct filesystem_version;
|
||||
|
||||
class logger;
|
||||
|
||||
namespace writer {
|
||||
@ -36,6 +38,7 @@ struct metadata_options;
|
||||
}
|
||||
|
||||
namespace thrift::metadata {
|
||||
class fs_options;
|
||||
class metadata;
|
||||
} // namespace thrift::metadata
|
||||
|
||||
@ -48,11 +51,19 @@ class dir;
|
||||
|
||||
class metadata_builder {
|
||||
public:
|
||||
// Start with empty metadata
|
||||
metadata_builder(logger& lgr, metadata_options const& options);
|
||||
|
||||
// Start with existing metadata, upgrade if necessary
|
||||
metadata_builder(logger& lgr, thrift::metadata::metadata const& md,
|
||||
thrift::metadata::fs_options const* orig_fs_options,
|
||||
filesystem_version const& orig_fs_version,
|
||||
metadata_options const& options);
|
||||
metadata_builder(logger& lgr, thrift::metadata::metadata&& md,
|
||||
thrift::metadata::fs_options const* orig_fs_options,
|
||||
filesystem_version const& orig_fs_version,
|
||||
metadata_options const& options);
|
||||
|
||||
~metadata_builder();
|
||||
|
||||
void set_devices(std::vector<uint64_t> devices) {
|
||||
|
@ -219,6 +219,7 @@ class filesystem_ final {
|
||||
info_as_json(fsinfo_options const& opts, history const& hist) const;
|
||||
nlohmann::json metadata_as_json() const;
|
||||
std::string serialize_metadata_as_json(bool simple) const;
|
||||
filesystem_version version() const;
|
||||
void walk(std::function<void(dir_entry_view)> const& func) const;
|
||||
void walk_data_order(std::function<void(dir_entry_view)> const& func) const;
|
||||
dir_entry_view root() const;
|
||||
@ -340,6 +341,10 @@ class filesystem_ final {
|
||||
return metadata_v2_utils(meta_).unpack();
|
||||
}
|
||||
|
||||
std::unique_ptr<thrift::metadata::fs_options> thawed_fs_options() const {
|
||||
return metadata_v2_utils(meta_).thaw_fs_options();
|
||||
}
|
||||
|
||||
private:
|
||||
filesystem_parser make_fs_parser() const {
|
||||
return filesystem_parser(mm_, image_offset_, options_.image_size);
|
||||
@ -375,6 +380,7 @@ class filesystem_ final {
|
||||
std::vector<fs_section> history_sections_;
|
||||
file_off_t const image_offset_;
|
||||
filesystem_options const options_;
|
||||
filesystem_version version_;
|
||||
PERFMON_CLS_PROXY_DECL
|
||||
PERFMON_CLS_TIMER_DECL(find_path)
|
||||
PERFMON_CLS_TIMER_DECL(find_inode)
|
||||
@ -506,6 +512,7 @@ filesystem_<LoggerPolicy>::filesystem_(
|
||||
}
|
||||
|
||||
header_ = parser.header();
|
||||
version_ = parser.fs_version();
|
||||
|
||||
section_map sections;
|
||||
|
||||
@ -788,6 +795,11 @@ filesystem_<LoggerPolicy>::serialize_metadata_as_json(bool simple) const {
|
||||
return metadata_v2_utils(meta_).serialize_as_json(simple);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
filesystem_version filesystem_<LoggerPolicy>::version() const {
|
||||
return version_;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_<LoggerPolicy>::walk(
|
||||
std::function<void(dir_entry_view)> const& func) const {
|
||||
@ -1148,6 +1160,7 @@ class filesystem_common_ : public Base {
|
||||
std::shared_ptr<performance_monitor const> const& perfmon)
|
||||
: fs_{lgr, os, std::move(mm), options, perfmon} {}
|
||||
|
||||
filesystem_version version() const override { return fs_.version(); }
|
||||
void walk(std::function<void(dir_entry_view)> const& func) const override {
|
||||
fs_.walk(func);
|
||||
}
|
||||
@ -1369,6 +1382,10 @@ class filesystem_full_
|
||||
unpacked_metadata() const override {
|
||||
return fs().unpacked_metadata();
|
||||
}
|
||||
std::unique_ptr<thrift::metadata::fs_options>
|
||||
thawed_fs_options() const override {
|
||||
return fs().thawed_fs_options();
|
||||
}
|
||||
|
||||
private:
|
||||
history history_;
|
||||
@ -1496,6 +1513,11 @@ filesystem_v2::unpacked_metadata() const {
|
||||
return full_().unpacked_metadata();
|
||||
}
|
||||
|
||||
std::unique_ptr<thrift::metadata::fs_options>
|
||||
filesystem_v2::thawed_fs_options() const {
|
||||
return full_().thawed_fs_options();
|
||||
}
|
||||
|
||||
auto filesystem_v2::full_() const -> impl const& { return this->as_<impl>(); }
|
||||
|
||||
} // namespace dwarfs::reader
|
||||
|
@ -175,11 +175,11 @@ filesystem_parser::filesystem_parser(std::shared_ptr<mmif> mm,
|
||||
DWARFS_THROW(runtime_error, "newer minor version");
|
||||
}
|
||||
|
||||
version_ = fh->minor >= 2 ? 2 : 1;
|
||||
major_ = fh->major;
|
||||
minor_ = fh->minor;
|
||||
header_version_ = fh->minor >= 2 ? 2 : 1;
|
||||
fs_version_.major = fh->major;
|
||||
fs_version_.minor = fh->minor;
|
||||
|
||||
if (minor_ >= 4) {
|
||||
if (fs_version_.minor >= 4) {
|
||||
find_index();
|
||||
}
|
||||
|
||||
@ -189,7 +189,7 @@ filesystem_parser::filesystem_parser(std::shared_ptr<mmif> mm,
|
||||
std::optional<fs_section> filesystem_parser::next_section() {
|
||||
if (index_.empty()) {
|
||||
if (std::cmp_less(offset_, image_offset_ + image_size_)) {
|
||||
auto section = fs_section(*mm_, offset_, version_);
|
||||
auto section = fs_section(*mm_, offset_, header_version_);
|
||||
offset_ = section.end();
|
||||
return section;
|
||||
}
|
||||
@ -201,7 +201,8 @@ std::optional<fs_section> filesystem_parser::next_section() {
|
||||
? index_[offset_] & section_offset_mask
|
||||
: image_size_;
|
||||
return fs_section(mm_, static_cast<section_type>(id >> 48),
|
||||
image_offset_ + offset, next_offset - offset, version_);
|
||||
image_offset_ + offset, next_offset - offset,
|
||||
header_version_);
|
||||
}
|
||||
}
|
||||
|
||||
@ -218,7 +219,7 @@ std::optional<std::span<uint8_t const>> filesystem_parser::header() const {
|
||||
void filesystem_parser::rewind() {
|
||||
if (index_.empty()) {
|
||||
offset_ = image_offset_;
|
||||
if (version_ == 1) {
|
||||
if (header_version_ == 1) {
|
||||
offset_ += sizeof(file_header);
|
||||
}
|
||||
} else {
|
||||
@ -227,10 +228,11 @@ void filesystem_parser::rewind() {
|
||||
}
|
||||
|
||||
std::string filesystem_parser::version() const {
|
||||
return fmt::format("{0}.{1} [{2}]", major_, minor_, version_);
|
||||
return fmt::format("{0}.{1} [{2}]", fs_version_.major, fs_version_.minor,
|
||||
header_version_);
|
||||
}
|
||||
|
||||
bool filesystem_parser::has_checksums() const { return version_ >= 2; }
|
||||
bool filesystem_parser::has_checksums() const { return header_version_ >= 2; }
|
||||
|
||||
bool filesystem_parser::has_index() const { return !index_.empty(); }
|
||||
|
||||
@ -261,7 +263,7 @@ void filesystem_parser::find_index() {
|
||||
return;
|
||||
}
|
||||
|
||||
auto section = fs_section(*mm_, index_pos, version_);
|
||||
auto section = fs_section(*mm_, index_pos, header_version_);
|
||||
|
||||
if (section.type() != section_type::SECTION_INDEX) {
|
||||
return;
|
||||
|
@ -90,6 +90,7 @@ namespace fs = std::filesystem;
|
||||
namespace {
|
||||
|
||||
using ::apache::thrift::frozen::MappedFrozen;
|
||||
using ::apache::thrift::frozen::View;
|
||||
|
||||
::apache::thrift::frozen::schema::Schema
|
||||
deserialize_schema(std::span<uint8_t const> data) {
|
||||
@ -426,6 +427,17 @@ void analyze_frozen(std::ostream& os,
|
||||
l.reg_file_size_cacheField.layout.valueField.layout.lookupField);
|
||||
}
|
||||
|
||||
if (auto list = meta.metadata_version_history()) {
|
||||
size_t history_size =
|
||||
list_size(*list, l.metadata_version_historyField.layout.valueField);
|
||||
for (auto const& entry : *list) {
|
||||
if (entry.dwarfs_version()) {
|
||||
history_size += entry.dwarfs_version()->size();
|
||||
}
|
||||
}
|
||||
add_size("metadata_version_history", list->size(), history_size);
|
||||
}
|
||||
|
||||
if (auto version = meta.dwarfs_version()) {
|
||||
add_size_unique("dwarfs_version", version->size());
|
||||
}
|
||||
@ -449,15 +461,32 @@ void analyze_frozen(std::ostream& os,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Function>
|
||||
void parse_fs_options(View<thrift::metadata::fs_options> opt,
|
||||
Function const& func) {
|
||||
func("mtime_only", opt.mtime_only());
|
||||
func("packed_chunk_table", opt.packed_chunk_table());
|
||||
func("packed_directories", opt.packed_directories());
|
||||
func("packed_shared_files_table", opt.packed_shared_files_table());
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
get_fs_options(View<thrift::metadata::fs_options> opt) {
|
||||
std::vector<std::string> rv;
|
||||
parse_fs_options(opt, [&](std::string_view name, bool value) {
|
||||
if (value) {
|
||||
rv.emplace_back(name);
|
||||
}
|
||||
});
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <typename Function>
|
||||
void parse_metadata_options(
|
||||
MappedFrozen<thrift::metadata::metadata> const& meta,
|
||||
Function const& func) {
|
||||
if (auto opt = meta.options()) {
|
||||
func("mtime_only", opt->mtime_only());
|
||||
func("packed_chunk_table", opt->packed_chunk_table());
|
||||
func("packed_directories", opt->packed_directories());
|
||||
func("packed_shared_files_table", opt->packed_shared_files_table());
|
||||
parse_fs_options(*opt, func);
|
||||
}
|
||||
if (auto names = meta.compact_names()) {
|
||||
func("packed_names", static_cast<bool>(names->symtab()));
|
||||
@ -646,6 +675,14 @@ class metadata_v2_data {
|
||||
return std::make_unique<thrift::metadata::metadata>(meta_.thaw());
|
||||
}
|
||||
|
||||
std::unique_ptr<thrift::metadata::fs_options> thaw_fs_options() const {
|
||||
if (meta_.options().has_value()) {
|
||||
return std::make_unique<thrift::metadata::fs_options>(
|
||||
meta_.options()->thaw());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename K>
|
||||
using set_type = phmap::flat_hash_set<K>;
|
||||
@ -1552,6 +1589,44 @@ metadata_v2_data::info_as_json(fsinfo_options const& opts,
|
||||
meta["unique_files"] = unique_files_;
|
||||
}
|
||||
|
||||
if (auto history = meta_.metadata_version_history(); history.has_value()) {
|
||||
nlohmann::json jhistory = nlohmann::json::array();
|
||||
|
||||
for (auto const& ent : *history) {
|
||||
nlohmann::json jent;
|
||||
|
||||
jent["major"] = ent.major();
|
||||
jent["minor"] = ent.minor();
|
||||
|
||||
if (ent.dwarfs_version().has_value()) {
|
||||
jent["dwarfs_version"] = ent.dwarfs_version().value();
|
||||
}
|
||||
|
||||
jent["block_size"] = ent.block_size();
|
||||
|
||||
if (auto entopts = ent.options(); entopts.has_value()) {
|
||||
nlohmann::json options;
|
||||
|
||||
options["mtime_only"] = entopts->mtime_only();
|
||||
|
||||
if (auto res = entopts->time_resolution_sec(); res.has_value()) {
|
||||
options["time_resolution"] = res.value();
|
||||
}
|
||||
|
||||
options["packed_chunk_table"] = entopts->packed_chunk_table();
|
||||
options["packed_directories"] = entopts->packed_directories();
|
||||
options["packed_shared_files_table"] =
|
||||
entopts->packed_shared_files_table();
|
||||
|
||||
jent["options"] = std::move(options);
|
||||
}
|
||||
|
||||
jhistory.push_back(std::move(jent));
|
||||
}
|
||||
|
||||
meta["metadata_version_history"] = std::move(jhistory);
|
||||
}
|
||||
|
||||
info["meta"] = std::move(meta);
|
||||
}
|
||||
|
||||
@ -1674,6 +1749,24 @@ void metadata_v2_data::dump(
|
||||
}
|
||||
}
|
||||
|
||||
if (auto history = meta_.metadata_version_history(); history.has_value()) {
|
||||
os << "previous metadata versions:\n";
|
||||
for (auto const& ent : *history) {
|
||||
os << " [" << static_cast<int>(ent.major()) << "."
|
||||
<< static_cast<int>(ent.minor()) << "] "
|
||||
<< size_with_unit(ent.block_size()) << " blocks, "
|
||||
<< ent.dwarfs_version().value_or("<unknown library version>") << "\n";
|
||||
if (auto he_opts = ent.options()) {
|
||||
if (auto str_opts = get_fs_options(*he_opts); !str_opts.empty()) {
|
||||
os << " options: " << boost::join(str_opts, ", ") << "\n";
|
||||
}
|
||||
if (auto res = he_opts->time_resolution_sec()) {
|
||||
os << " time resolution: " << *res << " seconds\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (opts.features.has(fsinfo_feature::frozen_analysis)) {
|
||||
analyze_frozen(os, meta_, data_.size());
|
||||
}
|
||||
@ -2446,6 +2539,11 @@ std::unique_ptr<thrift::metadata::metadata> metadata_v2_utils::unpack() const {
|
||||
return data_.unpack();
|
||||
}
|
||||
|
||||
std::unique_ptr<thrift::metadata::fs_options>
|
||||
metadata_v2_utils::thaw_fs_options() const {
|
||||
return data_.thaw_fs_options();
|
||||
}
|
||||
|
||||
metadata_v2::metadata_v2(
|
||||
logger& lgr, std::span<uint8_t const> schema, std::span<uint8_t const> data,
|
||||
metadata_options const& options, int inode_offset,
|
||||
|
@ -174,8 +174,10 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
||||
using namespace dwarfs::writer::internal;
|
||||
|
||||
auto md = fs.unpacked_metadata();
|
||||
auto builder = metadata_builder(lgr, std::move(*md),
|
||||
opts.rebuild_metadata.value());
|
||||
auto fsopts = fs.thawed_fs_options();
|
||||
auto builder =
|
||||
metadata_builder(lgr, std::move(*md), fsopts.get(), fs.version(),
|
||||
opts.rebuild_metadata.value());
|
||||
auto [schema, data] =
|
||||
metadata_freezer(LOG_GET_LOGGER).freeze(builder.build());
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#include <thrift/lib/cpp2/protocol/DebugProtocol.h>
|
||||
|
||||
#include <dwarfs/fstypes.h>
|
||||
#include <dwarfs/logger.h>
|
||||
#include <dwarfs/version.h>
|
||||
#include <dwarfs/writer/metadata_options.h>
|
||||
@ -56,19 +57,23 @@ class metadata_builder_ final : public metadata_builder::impl {
|
||||
, options_{options} {}
|
||||
|
||||
metadata_builder_(logger& lgr, thrift::metadata::metadata const& md,
|
||||
thrift::metadata::fs_options const* orig_fs_options,
|
||||
filesystem_version const& orig_fs_version,
|
||||
metadata_options const& options)
|
||||
: LOG_PROXY_INIT(lgr)
|
||||
, md_{md}
|
||||
, options_{options} {
|
||||
upgrade_metadata();
|
||||
upgrade_metadata(orig_fs_options, orig_fs_version);
|
||||
}
|
||||
|
||||
metadata_builder_(logger& lgr, thrift::metadata::metadata&& md,
|
||||
thrift::metadata::fs_options const* orig_fs_options,
|
||||
filesystem_version const& orig_fs_version,
|
||||
metadata_options const& options)
|
||||
: LOG_PROXY_INIT(lgr)
|
||||
, md_{std::move(md)}
|
||||
, options_{options} {
|
||||
upgrade_metadata();
|
||||
upgrade_metadata(orig_fs_options, orig_fs_version);
|
||||
}
|
||||
|
||||
void set_devices(std::vector<uint64_t> devices) override {
|
||||
@ -129,7 +134,8 @@ class metadata_builder_ final : public metadata_builder::impl {
|
||||
|
||||
private:
|
||||
thrift::metadata::inode_size_cache build_inode_size_cache() const;
|
||||
void upgrade_metadata();
|
||||
void upgrade_metadata(thrift::metadata::fs_options const* orig_fs_options,
|
||||
filesystem_version const& orig_fs_version);
|
||||
void upgrade_from_pre_v2_2();
|
||||
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
@ -544,11 +550,23 @@ void metadata_builder_<LoggerPolicy>::upgrade_from_pre_v2_2() {
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void metadata_builder_<LoggerPolicy>::upgrade_metadata() {
|
||||
void metadata_builder_<LoggerPolicy>::upgrade_metadata(
|
||||
thrift::metadata::fs_options const* orig_fs_options,
|
||||
filesystem_version const& orig_fs_version) {
|
||||
auto tv = LOG_TIMED_VERBOSE;
|
||||
|
||||
// std::cout << apache::thrift::debugString(md_);
|
||||
|
||||
thrift::metadata::history_entry histent;
|
||||
histent.major() = orig_fs_version.major;
|
||||
histent.minor() = orig_fs_version.minor;
|
||||
histent.dwarfs_version().copy_from(md_.dwarfs_version());
|
||||
histent.block_size() = md_.block_size().value();
|
||||
if (orig_fs_options) {
|
||||
histent.options().ensure();
|
||||
histent.options() = *orig_fs_options;
|
||||
}
|
||||
|
||||
if (apache::thrift::is_non_optional_field_set_manually_or_by_serializer(
|
||||
md_.entry_table_v2_2())) {
|
||||
DWARFS_CHECK(!md_.dir_entries().has_value(),
|
||||
@ -558,8 +576,12 @@ void metadata_builder_<LoggerPolicy>::upgrade_metadata() {
|
||||
}
|
||||
|
||||
// TODO: update uid, gid, timestamp, mtime_only, time_resolution_sec
|
||||
// TODO: do we need to do this here???
|
||||
|
||||
tv << "upgrading metadata...";
|
||||
|
||||
md_.metadata_version_history().ensure();
|
||||
md_.metadata_version_history()->push_back(std::move(histent));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -569,18 +591,21 @@ metadata_builder::metadata_builder(logger& lgr, metadata_options const& options)
|
||||
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
|
||||
lgr, options)} {}
|
||||
|
||||
metadata_builder::metadata_builder(logger& lgr,
|
||||
thrift::metadata::metadata const& md,
|
||||
metadata_options const& options)
|
||||
metadata_builder::metadata_builder(
|
||||
logger& lgr, thrift::metadata::metadata const& md,
|
||||
thrift::metadata::fs_options const* orig_fs_options,
|
||||
filesystem_version const& orig_fs_version, metadata_options const& options)
|
||||
: impl_{
|
||||
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
|
||||
lgr, md, options)} {}
|
||||
lgr, md, orig_fs_options, orig_fs_version, options)} {}
|
||||
|
||||
metadata_builder::metadata_builder(logger& lgr, thrift::metadata::metadata&& md,
|
||||
metadata_options const& options)
|
||||
metadata_builder::metadata_builder(
|
||||
logger& lgr, thrift::metadata::metadata&& md,
|
||||
thrift::metadata::fs_options const* orig_fs_options,
|
||||
filesystem_version const& orig_fs_version, metadata_options const& options)
|
||||
: impl_{
|
||||
make_unique_logging_object<impl, metadata_builder_, logger_policies>(
|
||||
lgr, std::move(md), options)} {}
|
||||
lgr, std::move(md), orig_fs_options, orig_fs_version, options)} {}
|
||||
|
||||
metadata_builder::~metadata_builder() = default;
|
||||
|
||||
|
@ -1229,8 +1229,66 @@ auto get_image_path(std::string const& version) {
|
||||
|
||||
class compat_metadata : public testing::TestWithParam<std::string> {};
|
||||
|
||||
void check_history(nlohmann::json info, reader::filesystem_v2 const& origfs,
|
||||
nlohmann::json originfo) {
|
||||
auto meta = info["meta"];
|
||||
auto origmeta = originfo["meta"];
|
||||
|
||||
auto history = meta["metadata_version_history"];
|
||||
|
||||
ASSERT_GE(history.size(), 1);
|
||||
|
||||
if (origmeta.contains("metadata_version_history")) {
|
||||
auto orighistory = origmeta["metadata_version_history"];
|
||||
ASSERT_EQ(history.size(), orighistory.size() + 1);
|
||||
for (size_t i = 0; i < orighistory.size(); ++i) {
|
||||
EXPECT_EQ(history[i], orighistory[i]);
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(history.size(), 1);
|
||||
}
|
||||
|
||||
auto hent = history.back();
|
||||
|
||||
// std::cerr << origmeta.dump(2) << std::endl;
|
||||
// std::cerr << hent.dump(2) << std::endl;
|
||||
|
||||
EXPECT_EQ(hent["major"], origfs.version().major);
|
||||
EXPECT_EQ(hent["minor"], origfs.version().minor);
|
||||
|
||||
if (originfo.contains("created_by")) {
|
||||
EXPECT_EQ(hent["dwarfs_version"], originfo["created_by"]);
|
||||
} else {
|
||||
EXPECT_FALSE(hent.contains("dwarfs_version"));
|
||||
}
|
||||
|
||||
EXPECT_EQ(hent["block_size"], originfo["block_size"]);
|
||||
|
||||
if (originfo.contains("options")) {
|
||||
nlohmann::json expected{
|
||||
{"mtime_only", false},
|
||||
{"packed_chunk_table", false},
|
||||
{"packed_directories", false},
|
||||
{"packed_shared_files_table", false},
|
||||
};
|
||||
|
||||
if (originfo.contains("time_resolution")) {
|
||||
expected["time_resolution"] = originfo["time_resolution"];
|
||||
}
|
||||
|
||||
for (auto const& opt : originfo["options"]) {
|
||||
expected[opt.template get<std::string>()] = true;
|
||||
}
|
||||
|
||||
EXPECT_EQ(expected, hent["options"]);
|
||||
} else {
|
||||
EXPECT_FALSE(hent.contains("options"));
|
||||
}
|
||||
}
|
||||
|
||||
void check_dynamic(std::string const& version, reader::filesystem_v2 const& fs,
|
||||
bool rebuild_metadata [[maybe_unused]] = false) {
|
||||
std::shared_ptr<mmif> origmm = nullptr,
|
||||
bool rebuild_metadata = false) {
|
||||
auto meta = fs.metadata_as_json();
|
||||
nlohmann::json ref;
|
||||
if (version.starts_with("0.2.")) {
|
||||
@ -1246,6 +1304,18 @@ void check_dynamic(std::string const& version, reader::filesystem_v2 const& fs,
|
||||
}
|
||||
}
|
||||
|
||||
if (rebuild_metadata) {
|
||||
test::test_logger lgr;
|
||||
test::os_access_mock os;
|
||||
reader::filesystem_options fsopts;
|
||||
fsopts.image_offset = reader::filesystem_options::IMAGE_OFFSET_AUTO;
|
||||
reader::filesystem_v2 orig(lgr, os, origmm, fsopts);
|
||||
reader::fsinfo_options io{
|
||||
.features = {reader::fsinfo_feature::metadata_details,
|
||||
reader::fsinfo_feature::metadata_summary}};
|
||||
check_history(fs.info_as_json(io), orig, orig.info_as_json(io));
|
||||
}
|
||||
|
||||
remove_inode_numbers(ref);
|
||||
remove_inode_numbers(meta);
|
||||
|
||||
@ -1353,13 +1423,14 @@ TEST_P(rewrite, filesystem_rewrite) {
|
||||
utility::rewrite_filesystem(lgr, fs, fsw, resolver, opts);
|
||||
};
|
||||
|
||||
std::shared_ptr<mmif> origmm = std::make_shared<mmap>(filename);
|
||||
|
||||
{
|
||||
writer::filesystem_writer fsw(rewritten, lgr, pool, prog);
|
||||
fsw.add_default_compressor(bc);
|
||||
auto mm = std::make_shared<mmap>(filename);
|
||||
EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, mm, idss));
|
||||
EXPECT_FALSE(reader::filesystem_v2::header(mm));
|
||||
rewrite_fs(fsw, mm);
|
||||
EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, origmm, idss));
|
||||
EXPECT_FALSE(reader::filesystem_v2::header(origmm));
|
||||
rewrite_fs(fsw, origmm);
|
||||
}
|
||||
|
||||
{
|
||||
@ -1367,7 +1438,7 @@ TEST_P(rewrite, filesystem_rewrite) {
|
||||
EXPECT_NO_THROW(reader::filesystem_v2::identify(lgr, os, mm, idss));
|
||||
EXPECT_FALSE(reader::filesystem_v2::header(mm));
|
||||
reader::filesystem_v2 fs(lgr, os, mm);
|
||||
check_dynamic(version, fs, rebuild_metadata.has_value());
|
||||
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
|
||||
check_checksums(fs);
|
||||
}
|
||||
|
||||
@ -1380,7 +1451,7 @@ TEST_P(rewrite, filesystem_rewrite) {
|
||||
writer::filesystem_writer fsw(rewritten, lgr, pool, prog, fsw_opts,
|
||||
&hdr_iss);
|
||||
fsw.add_default_compressor(bc);
|
||||
rewrite_fs(fsw, std::make_shared<mmap>(filename));
|
||||
rewrite_fs(fsw, origmm);
|
||||
}
|
||||
|
||||
{
|
||||
@ -1396,7 +1467,7 @@ TEST_P(rewrite, filesystem_rewrite) {
|
||||
reader::filesystem_options fsopts;
|
||||
fsopts.image_offset = reader::filesystem_options::IMAGE_OFFSET_AUTO;
|
||||
reader::filesystem_v2 fs(lgr, os, mm, fsopts);
|
||||
check_dynamic(version, fs, rebuild_metadata.has_value());
|
||||
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
|
||||
check_checksums(fs);
|
||||
}
|
||||
|
||||
@ -1438,13 +1509,14 @@ TEST_P(rewrite, filesystem_rewrite) {
|
||||
}
|
||||
|
||||
std::ostringstream rewritten4;
|
||||
origmm = std::make_shared<test::mmap_mock>(rewritten3.str());
|
||||
|
||||
{
|
||||
writer::filesystem_writer_options fsw_opts;
|
||||
fsw_opts.remove_header = true;
|
||||
writer::filesystem_writer fsw(rewritten4, lgr, pool, prog, fsw_opts);
|
||||
fsw.add_default_compressor(bc);
|
||||
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten3.str()));
|
||||
rewrite_fs(fsw, origmm);
|
||||
}
|
||||
|
||||
{
|
||||
@ -1453,18 +1525,19 @@ TEST_P(rewrite, filesystem_rewrite) {
|
||||
EXPECT_FALSE(reader::filesystem_v2::header(mm))
|
||||
<< folly::hexDump(rewritten4.str().data(), rewritten4.str().size());
|
||||
reader::filesystem_v2 fs(lgr, os, mm);
|
||||
check_dynamic(version, fs, rebuild_metadata.has_value());
|
||||
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
|
||||
check_checksums(fs);
|
||||
}
|
||||
|
||||
std::ostringstream rewritten5;
|
||||
origmm = std::make_shared<test::mmap_mock>(rewritten4.str());
|
||||
|
||||
{
|
||||
writer::filesystem_writer_options fsw_opts;
|
||||
fsw_opts.no_section_index = true;
|
||||
writer::filesystem_writer fsw(rewritten5, lgr, pool, prog, fsw_opts);
|
||||
fsw.add_default_compressor(bc);
|
||||
rewrite_fs(fsw, std::make_shared<test::mmap_mock>(rewritten4.str()));
|
||||
rewrite_fs(fsw, origmm);
|
||||
}
|
||||
|
||||
{
|
||||
@ -1473,7 +1546,7 @@ TEST_P(rewrite, filesystem_rewrite) {
|
||||
EXPECT_FALSE(reader::filesystem_v2::header(mm))
|
||||
<< folly::hexDump(rewritten5.str().data(), rewritten5.str().size());
|
||||
reader::filesystem_v2 fs(lgr, os, mm);
|
||||
check_dynamic(version, fs, rebuild_metadata.has_value());
|
||||
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
|
||||
check_checksums(fs);
|
||||
}
|
||||
}
|
||||
|
@ -79,10 +79,12 @@ std::string make_fragmented_file(size_t fragment_size, size_t fragment_count) {
|
||||
}
|
||||
|
||||
auto rebuild_metadata(logger& lgr, thrift::metadata::metadata const& md,
|
||||
thrift::metadata::fs_options const* fs_options,
|
||||
filesystem_version const& fs_version,
|
||||
writer::metadata_options const& options) {
|
||||
using namespace writer::internal;
|
||||
return metadata_freezer(lgr).freeze(
|
||||
metadata_builder(lgr, md, options).build());
|
||||
metadata_builder(lgr, md, fs_options, fs_version, options).build());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -153,8 +155,9 @@ TEST_F(metadata_test, basic) {
|
||||
// std::cout << ::apache::thrift::debugString(unpacked1) << std::endl;
|
||||
|
||||
{
|
||||
auto fsopts = fs.thawed_fs_options();
|
||||
auto [schema, data] = rebuild_metadata(
|
||||
lgr, unpacked1,
|
||||
lgr, unpacked1, fsopts.get(), fs.version(),
|
||||
{.plain_names_table = true, .no_create_timestamp = true});
|
||||
reader::internal::metadata_v2 mv2(lgr, schema.span(), data.span(), {});
|
||||
using utils = reader::internal::metadata_v2_utils;
|
||||
@ -164,6 +167,25 @@ TEST_F(metadata_test, basic) {
|
||||
|
||||
// std::cout << ::apache::thrift::debugString(unpacked2) << std::endl;
|
||||
|
||||
auto history = unpacked2.metadata_version_history();
|
||||
|
||||
ASSERT_TRUE(history.has_value());
|
||||
EXPECT_EQ(history->size(), 1);
|
||||
auto hent = history->at(0);
|
||||
EXPECT_EQ(hent.major().value(), fs.version().major);
|
||||
EXPECT_EQ(hent.minor().value(), fs.version().minor);
|
||||
ASSERT_TRUE(hent.dwarfs_version().has_value());
|
||||
ASSERT_TRUE(unpacked1.dwarfs_version().has_value());
|
||||
EXPECT_EQ(hent.dwarfs_version().value(),
|
||||
unpacked1.dwarfs_version().value());
|
||||
EXPECT_EQ(hent.block_size().value(), unpacked1.block_size().value());
|
||||
ASSERT_TRUE(hent.options().has_value());
|
||||
ASSERT_TRUE(unpacked1.options().has_value());
|
||||
EXPECT_EQ(hent.options().value(), unpacked1.options().value())
|
||||
<< thrift_diff(hent.options().value(), unpacked1.options().value());
|
||||
|
||||
unpacked2.metadata_version_history().reset();
|
||||
|
||||
EXPECT_EQ(unpacked1, unpacked2) << thrift_diff(unpacked1, unpacked2);
|
||||
EXPECT_NE(thawed1, thawed2) << thrift_diff(thawed1, thawed2);
|
||||
|
||||
|
@ -196,6 +196,22 @@ struct inode_size_cache {
|
||||
2: UInt64 min_chunk_count
|
||||
}
|
||||
|
||||
/*
|
||||
* This structure contains the version of the metadata format used
|
||||
* for tracking metadata rewrite history.
|
||||
*/
|
||||
struct history_entry {
|
||||
// major and minor version numbers corresponding to the block header
|
||||
1: UInt8 major
|
||||
2: UInt8 minor
|
||||
|
||||
// version string of dwarfs library used to create the metadata
|
||||
3: optional string dwarfs_version
|
||||
|
||||
4: UInt32 block_size
|
||||
5: optional fs_options options
|
||||
}
|
||||
|
||||
/**
|
||||
* File System Metadata
|
||||
*
|
||||
@ -436,4 +452,7 @@ struct metadata {
|
||||
// The metadata associated with each block. Maps from block
|
||||
// number to index into `categorization_metadata_json`.
|
||||
32: optional map<UInt32, UInt32> block_category_metadata
|
||||
|
||||
// version strings for all metadata versions
|
||||
33: optional list<history_entry> metadata_version_history
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user