From 5345f48595bafcdb1fc737f2762cd78bc67225e5 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Wed, 14 May 2025 16:41:12 +0200 Subject: [PATCH] feat: use sane per-category defaults for rewriting --- include/dwarfs/internal/fs_section.h | 3 + include/dwarfs/writer/category_parser.h | 7 +- include/dwarfs/writer/contextual_option.h | 13 +-- src/reader/filesystem_v2.cpp | 1 + src/writer/category_parser.cpp | 14 ++-- test/tool_main_test.cpp | 98 +++++++++++++++++++++-- tools/src/mkdwarfs_main.cpp | 19 ++++- 7 files changed, 128 insertions(+), 27 deletions(-) diff --git a/include/dwarfs/internal/fs_section.h b/include/dwarfs/internal/fs_section.h index c64a0553..1d6789bc 100644 --- a/include/dwarfs/internal/fs_section.h +++ b/include/dwarfs/internal/fs_section.h @@ -52,6 +52,9 @@ class fs_section { bool is_known_compression() const { return impl_->is_known_compression(); } bool is_known_type() const { return impl_->is_known_type(); } compression_type compression() const { return impl_->compression(); } + std::string compression_name() const { + return get_compression_name(compression()); + } section_type type() const { return impl_->type(); } std::string name() const { return impl_->name(); } std::string description() const { return impl_->description(); } diff --git a/include/dwarfs/writer/category_parser.h b/include/dwarfs/writer/category_parser.h index d1e77a9d..cf8eee63 100644 --- a/include/dwarfs/writer/category_parser.h +++ b/include/dwarfs/writer/category_parser.h @@ -24,6 +24,7 @@ #pragma once #include +#include #include #include @@ -34,13 +35,17 @@ class category_resolver; class category_parser { public: - category_parser(std::shared_ptr resolver); + category_parser( + std::shared_ptr resolver, + std::unordered_set const& accepted_categories); std::vector parse(std::string_view arg) const; std::string to_string(fragment_category::value_type const& val) const; + bool has_resolver() const; private: std::shared_ptr resolver_; + std::unordered_set accepted_categories_; }; } // namespace dwarfs::writer diff --git a/include/dwarfs/writer/contextual_option.h b/include/dwarfs/writer/contextual_option.h index 4563de2e..215fe223 100644 --- a/include/dwarfs/writer/contextual_option.h +++ b/include/dwarfs/writer/contextual_option.h @@ -151,15 +151,8 @@ class contextual_option_parser { } else { auto ctx = arg.substr(0, pos); auto val = op_.parse(arg.substr(pos + 2)); - if constexpr (std::is_same_v< - std::invoke_result_t, - typename option_type::context_type>) { - add_contextual(cp_.parse(ctx), val, policy); - } else { - for (auto c : cp_.parse(ctx)) { - add_contextual(c, val, policy); - } + for (auto c : cp_.parse(ctx)) { + add_contextual(c, val, policy); } } } catch (std::exception const& e) { @@ -206,6 +199,8 @@ class contextual_option_parser { std::string const& name() const { return name_; } + bool has_category_resolver() const { return cp_.has_resolver(); } + private: void add_contextual(typename option_type::context_type const& ctx, typename option_type::value_type const& val, diff --git a/src/reader/filesystem_v2.cpp b/src/reader/filesystem_v2.cpp index e0ac50c8..978a1349 100644 --- a/src/reader/filesystem_v2.cpp +++ b/src/reader/filesystem_v2.cpp @@ -752,6 +752,7 @@ filesystem_::info_as_json(fsinfo_options const& opts, nlohmann::json section_info{ {"type", s.name()}, + {"compression", s.compression_name()}, {"compressed_size", s.length()}, {"checksum_ok", checksum_ok}, }; diff --git a/src/writer/category_parser.cpp b/src/writer/category_parser.cpp index e1547ffe..f3e09a9e 100644 --- a/src/writer/category_parser.cpp +++ b/src/writer/category_parser.cpp @@ -30,8 +30,10 @@ namespace dwarfs::writer { category_parser::category_parser( - std::shared_ptr resolver) - : resolver_{std::move(resolver)} {} + std::shared_ptr resolver, + std::unordered_set const& accepted_categories) + : resolver_{std::move(resolver)} + , accepted_categories_{accepted_categories} {} std::vector category_parser::parse(std::string_view arg) const { @@ -43,12 +45,10 @@ category_parser::parse(std::string_view arg) const { std::vector rv; auto categories = split_to>(arg, ','); - rv.reserve(categories.size()); - for (auto const& name : categories) { if (auto val = resolver_->category_value(name)) { rv.emplace_back(*val); - } else { + } else if (!accepted_categories_.contains(name)) { throw std::range_error(fmt::format("unknown category: '{}'", name)); } } @@ -61,4 +61,8 @@ category_parser::to_string(fragment_category::value_type const& val) const { return std::string(resolver_->category_name(val)); } +bool category_parser::has_resolver() const { + return static_cast(resolver_); +} + } // namespace dwarfs::writer diff --git a/test/tool_main_test.cpp b/test/tool_main_test.cpp index b82efe4a..ca8a3611 100644 --- a/test/tool_main_test.cpp +++ b/test/tool_main_test.cpp @@ -1579,8 +1579,9 @@ TEST_P(mkdwarfs_recompress_test, recompress) { std::string compression_type = compression; std::string const image_file = "test.dwarfs"; std::string image; - reader::fsinfo_options const history_opts{ - .features = {reader::fsinfo_feature::history}}; + reader::fsinfo_options const info_opts{ + .features = {reader::fsinfo_feature::history, + reader::fsinfo_feature::section_details}}; if (auto pos = compression_type.find(':'); pos != std::string::npos) { compression_type.erase(pos); @@ -1590,11 +1591,47 @@ TEST_P(mkdwarfs_recompress_test, recompress) { compression_type = "NONE"; } + auto get_block_compression = [](nlohmann::json const& info) { + std::map> ccmap; + for (auto const& sec : info["sections"]) { + if (sec["type"] == "BLOCK") { + ccmap[sec["category"].get()].insert( + sec["compression"].get()); + } + } + return ccmap; + }; + + std::set const waveform_compressions{ +#ifdef DWARFS_HAVE_FLAC + "FLAC", +#else + "ZSTD", + "NONE", +#endif + }; + + std::string const fits_compression{ +#ifdef DWARFS_HAVE_RICEPP + "RICEPP", +#else + "ZSTD", +#endif + }; + + std::string const l1_compression{ +#ifdef DWARFS_HAVE_LIBLZ4 + "LZ4", +#else + "ZSTD", +#endif + }; + { mkdwarfs_tester t; t.os->add_local_files(audio_data_dir); t.os->add_local_files(fits_data_dir); - t.os->add_file("random", 4096, true); + t.os->add_file("random", test::create_random_string(4096)); ASSERT_EQ(0, t.run({"-i", "/", "-o", image_file, "--categorize", "-C", compression})) << t.err(); @@ -1602,8 +1639,20 @@ TEST_P(mkdwarfs_recompress_test, recompress) { EXPECT_TRUE(img); image = std::move(img.value()); auto fs = t.fs_from_file(image_file); - auto history = fs.info_as_json(history_opts)["history"]; + auto info = fs.info_as_json(info_opts); + auto history = info["history"]; EXPECT_EQ(1, history.size()); + + auto ccmap = get_block_compression(info); + std::map> const expected_ccmap{ + {"", {compression_type}}, + {"incompressible", {"NONE"}}, + {"pcmaudio/waveform", waveform_compressions}, + {"pcmaudio/metadata", {compression_type}}, + {"fits/image", {fits_compression}}, + {"fits/metadata", {compression_type}}, + }; + EXPECT_EQ(expected_ccmap, ccmap); } auto tester = [&image_file](std::string const& image_data) { @@ -1619,8 +1668,17 @@ TEST_P(mkdwarfs_recompress_test, recompress) { << t.err(); auto fs = t.fs_from_stdout(); EXPECT_TRUE(fs.find("/random")); - auto history = fs.info_as_json(history_opts)["history"]; + auto info = fs.info_as_json(info_opts); + auto history = info["history"]; EXPECT_EQ(2, history.size()); + + auto ccmap = get_block_compression(info); + std::map> const expected_ccmap{ + {"", {"NONE"}}, {"incompressible", {"NONE"}}, + {"pcmaudio/waveform", {"NONE"}}, {"pcmaudio/metadata", {"NONE"}}, + {"fits/image", {"NONE"}}, {"fits/metadata", {"NONE"}}, + }; + EXPECT_EQ(expected_ccmap, ccmap); } { @@ -1629,8 +1687,20 @@ TEST_P(mkdwarfs_recompress_test, recompress) { << t.err(); auto fs = t.fs_from_stdout(); EXPECT_TRUE(fs.find("/random")); - auto history = fs.info_as_json(history_opts)["history"]; + auto info = fs.info_as_json(info_opts); + auto history = info["history"]; EXPECT_EQ(2, history.size()); + + auto ccmap = get_block_compression(info); + std::map> const expected_ccmap{ + {"", {l1_compression}}, + {"incompressible", {"NONE"}}, + {"pcmaudio/waveform", waveform_compressions}, + {"pcmaudio/metadata", {l1_compression}}, + {"fits/image", {fits_compression}}, + {"fits/metadata", {l1_compression}}, + }; + EXPECT_EQ(expected_ccmap, ccmap); } { @@ -1651,10 +1721,22 @@ TEST_P(mkdwarfs_recompress_test, recompress) { auto t = tester(image); ASSERT_EQ(0, t.run({"-i", image_file, "-o", "-", "--recompress=block", "--recompress-categories=!pcmaudio/waveform", "-C", - "pcmaudio/metadata::null"})) + "pcmaudio/metadata::null", "-l1"})) << t.err(); auto fs = t.fs_from_stdout(); EXPECT_TRUE(fs.find("/random")); + + auto info = fs.info_as_json(info_opts); + auto ccmap = get_block_compression(info); + std::map> const expected_ccmap{ + {"", {l1_compression}}, + {"incompressible", {"NONE"}}, + {"pcmaudio/waveform", waveform_compressions}, + {"pcmaudio/metadata", {"NONE"}}, + {"fits/image", {fits_compression}}, + {"fits/metadata", {l1_compression}}, + }; + EXPECT_EQ(expected_ccmap, ccmap); } #ifdef DWARFS_HAVE_FLAC @@ -1715,7 +1797,7 @@ TEST_P(mkdwarfs_recompress_test, recompress) { auto fs = t.fs_from_stdout(); EXPECT_TRUE(fs.find("/random")); EXPECT_EQ(0, fs.get_history().size()); - EXPECT_EQ(1, fs.info_as_json(history_opts).count("history")); + EXPECT_EQ(1, fs.info_as_json(info_opts).count("history")); EXPECT_THAT(t.err(), ::testing::HasSubstr("removing HISTORY")); auto t2 = tester(t.out()); diff --git a/tools/src/mkdwarfs_main.cpp b/tools/src/mkdwarfs_main.cpp index 6de2e3db..5fcc1b47 100644 --- a/tools/src/mkdwarfs_main.cpp +++ b/tools/src/mkdwarfs_main.cpp @@ -259,6 +259,8 @@ categorize_defaults_type const& categorize_defaults_common() { } categorize_defaults_type const& categorize_defaults_level(unsigned level) { + static categorize_defaults_type const defaults_off; + static categorize_defaults_type const defaults_fast{ // clang-format off {"--order", {"pcmaudio/waveform::revpath", "fits/image::revpath"}}, @@ -319,7 +321,7 @@ categorize_defaults_type const& categorize_defaults_level(unsigned level) { static constexpr std::array defaults_level{{ // clang-format off - /* 0 */ &defaults_fast, + /* 0 */ &defaults_off, /* 1 */ &defaults_fast, /* 2 */ &defaults_fast, /* 3 */ &defaults_fast, @@ -347,12 +349,11 @@ class categorize_optval { bool empty() const { return value_.empty(); } std::string const& value() const { return value_; } - bool is_implicit_default() const { return !empty() && !is_explicit_; } bool is_explicit() const { return is_explicit_; } template void add_implicit_defaults(T& cop) const { - if (is_implicit_default()) { + if (cop.has_category_resolver()) { if (auto it = defaults_.find(cop.name()); it != defaults_.end()) { for (auto const& v : it->second) { cop.parse_fallback(v); @@ -1262,7 +1263,17 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { cat_resolver = options.inode.categorizer_mgr; } - writer::category_parser cp(cat_resolver); + std::unordered_set accepted_categories; + + for (auto const& name : catreg.categorizer_names()) { + stream_logger lgr(iol.term, iol.err); + auto categorizer = catreg.create(lgr, name, vm, iol.file); + for (auto cat : categorizer->categories()) { + accepted_categories.insert(cat); + } + } + + writer::category_parser cp(cat_resolver, accepted_categories); try { {