feat: use sane per-category defaults for rewriting

This commit is contained in:
Marcus Holland-Moritz 2025-05-14 16:41:12 +02:00
parent 53384f6faf
commit 5345f48595
7 changed files with 128 additions and 27 deletions

View File

@ -52,6 +52,9 @@ class fs_section {
bool is_known_compression() const { return impl_->is_known_compression(); }
bool is_known_type() const { return impl_->is_known_type(); }
compression_type compression() const { return impl_->compression(); }
std::string compression_name() const {
return get_compression_name(compression());
}
section_type type() const { return impl_->type(); }
std::string name() const { return impl_->name(); }
std::string description() const { return impl_->description(); }

View File

@ -24,6 +24,7 @@
#pragma once
#include <memory>
#include <unordered_set>
#include <vector>
#include <dwarfs/writer/fragment_category.h>
@ -34,13 +35,17 @@ class category_resolver;
class category_parser {
public:
category_parser(std::shared_ptr<category_resolver const> resolver);
category_parser(
std::shared_ptr<category_resolver const> resolver,
std::unordered_set<std::string_view> const& accepted_categories);
std::vector<fragment_category::value_type> parse(std::string_view arg) const;
std::string to_string(fragment_category::value_type const& val) const;
bool has_resolver() const;
private:
std::shared_ptr<category_resolver const> resolver_;
std::unordered_set<std::string_view> accepted_categories_;
};
} // namespace dwarfs::writer

View File

@ -151,15 +151,8 @@ class contextual_option_parser {
} else {
auto ctx = arg.substr(0, pos);
auto val = op_.parse(arg.substr(pos + 2));
if constexpr (std::is_same_v<
std::invoke_result_t<decltype(&ContextParser::parse),
ContextParser, decltype(ctx)>,
typename option_type::context_type>) {
add_contextual(cp_.parse(ctx), val, policy);
} else {
for (auto c : cp_.parse(ctx)) {
add_contextual(c, val, policy);
}
for (auto c : cp_.parse(ctx)) {
add_contextual(c, val, policy);
}
}
} catch (std::exception const& e) {
@ -206,6 +199,8 @@ class contextual_option_parser {
std::string const& name() const { return name_; }
bool has_category_resolver() const { return cp_.has_resolver(); }
private:
void add_contextual(typename option_type::context_type const& ctx,
typename option_type::value_type const& val,

View File

@ -752,6 +752,7 @@ filesystem_<LoggerPolicy>::info_as_json(fsinfo_options const& opts,
nlohmann::json section_info{
{"type", s.name()},
{"compression", s.compression_name()},
{"compressed_size", s.length()},
{"checksum_ok", checksum_ok},
};

View File

@ -30,8 +30,10 @@
namespace dwarfs::writer {
category_parser::category_parser(
std::shared_ptr<category_resolver const> resolver)
: resolver_{std::move(resolver)} {}
std::shared_ptr<category_resolver const> resolver,
std::unordered_set<std::string_view> const& accepted_categories)
: resolver_{std::move(resolver)}
, accepted_categories_{accepted_categories} {}
std::vector<fragment_category::value_type>
category_parser::parse(std::string_view arg) const {
@ -43,12 +45,10 @@ category_parser::parse(std::string_view arg) const {
std::vector<fragment_category::value_type> rv;
auto categories = split_to<std::vector<std::string_view>>(arg, ',');
rv.reserve(categories.size());
for (auto const& name : categories) {
if (auto val = resolver_->category_value(name)) {
rv.emplace_back(*val);
} else {
} else if (!accepted_categories_.contains(name)) {
throw std::range_error(fmt::format("unknown category: '{}'", name));
}
}
@ -61,4 +61,8 @@ category_parser::to_string(fragment_category::value_type const& val) const {
return std::string(resolver_->category_name(val));
}
bool category_parser::has_resolver() const {
return static_cast<bool>(resolver_);
}
} // namespace dwarfs::writer

View File

@ -1579,8 +1579,9 @@ TEST_P(mkdwarfs_recompress_test, recompress) {
std::string compression_type = compression;
std::string const image_file = "test.dwarfs";
std::string image;
reader::fsinfo_options const history_opts{
.features = {reader::fsinfo_feature::history}};
reader::fsinfo_options const info_opts{
.features = {reader::fsinfo_feature::history,
reader::fsinfo_feature::section_details}};
if (auto pos = compression_type.find(':'); pos != std::string::npos) {
compression_type.erase(pos);
@ -1590,11 +1591,47 @@ TEST_P(mkdwarfs_recompress_test, recompress) {
compression_type = "NONE";
}
auto get_block_compression = [](nlohmann::json const& info) {
std::map<std::string, std::set<std::string>> ccmap;
for (auto const& sec : info["sections"]) {
if (sec["type"] == "BLOCK") {
ccmap[sec["category"].get<std::string>()].insert(
sec["compression"].get<std::string>());
}
}
return ccmap;
};
std::set<std::string> const waveform_compressions{
#ifdef DWARFS_HAVE_FLAC
"FLAC",
#else
"ZSTD",
"NONE",
#endif
};
std::string const fits_compression{
#ifdef DWARFS_HAVE_RICEPP
"RICEPP",
#else
"ZSTD",
#endif
};
std::string const l1_compression{
#ifdef DWARFS_HAVE_LIBLZ4
"LZ4",
#else
"ZSTD",
#endif
};
{
mkdwarfs_tester t;
t.os->add_local_files(audio_data_dir);
t.os->add_local_files(fits_data_dir);
t.os->add_file("random", 4096, true);
t.os->add_file("random", test::create_random_string(4096));
ASSERT_EQ(0, t.run({"-i", "/", "-o", image_file, "--categorize", "-C",
compression}))
<< t.err();
@ -1602,8 +1639,20 @@ TEST_P(mkdwarfs_recompress_test, recompress) {
EXPECT_TRUE(img);
image = std::move(img.value());
auto fs = t.fs_from_file(image_file);
auto history = fs.info_as_json(history_opts)["history"];
auto info = fs.info_as_json(info_opts);
auto history = info["history"];
EXPECT_EQ(1, history.size());
auto ccmap = get_block_compression(info);
std::map<std::string, std::set<std::string>> const expected_ccmap{
{"<default>", {compression_type}},
{"incompressible", {"NONE"}},
{"pcmaudio/waveform", waveform_compressions},
{"pcmaudio/metadata", {compression_type}},
{"fits/image", {fits_compression}},
{"fits/metadata", {compression_type}},
};
EXPECT_EQ(expected_ccmap, ccmap);
}
auto tester = [&image_file](std::string const& image_data) {
@ -1619,8 +1668,17 @@ TEST_P(mkdwarfs_recompress_test, recompress) {
<< t.err();
auto fs = t.fs_from_stdout();
EXPECT_TRUE(fs.find("/random"));
auto history = fs.info_as_json(history_opts)["history"];
auto info = fs.info_as_json(info_opts);
auto history = info["history"];
EXPECT_EQ(2, history.size());
auto ccmap = get_block_compression(info);
std::map<std::string, std::set<std::string>> const expected_ccmap{
{"<default>", {"NONE"}}, {"incompressible", {"NONE"}},
{"pcmaudio/waveform", {"NONE"}}, {"pcmaudio/metadata", {"NONE"}},
{"fits/image", {"NONE"}}, {"fits/metadata", {"NONE"}},
};
EXPECT_EQ(expected_ccmap, ccmap);
}
{
@ -1629,8 +1687,20 @@ TEST_P(mkdwarfs_recompress_test, recompress) {
<< t.err();
auto fs = t.fs_from_stdout();
EXPECT_TRUE(fs.find("/random"));
auto history = fs.info_as_json(history_opts)["history"];
auto info = fs.info_as_json(info_opts);
auto history = info["history"];
EXPECT_EQ(2, history.size());
auto ccmap = get_block_compression(info);
std::map<std::string, std::set<std::string>> const expected_ccmap{
{"<default>", {l1_compression}},
{"incompressible", {"NONE"}},
{"pcmaudio/waveform", waveform_compressions},
{"pcmaudio/metadata", {l1_compression}},
{"fits/image", {fits_compression}},
{"fits/metadata", {l1_compression}},
};
EXPECT_EQ(expected_ccmap, ccmap);
}
{
@ -1651,10 +1721,22 @@ TEST_P(mkdwarfs_recompress_test, recompress) {
auto t = tester(image);
ASSERT_EQ(0, t.run({"-i", image_file, "-o", "-", "--recompress=block",
"--recompress-categories=!pcmaudio/waveform", "-C",
"pcmaudio/metadata::null"}))
"pcmaudio/metadata::null", "-l1"}))
<< t.err();
auto fs = t.fs_from_stdout();
EXPECT_TRUE(fs.find("/random"));
auto info = fs.info_as_json(info_opts);
auto ccmap = get_block_compression(info);
std::map<std::string, std::set<std::string>> const expected_ccmap{
{"<default>", {l1_compression}},
{"incompressible", {"NONE"}},
{"pcmaudio/waveform", waveform_compressions},
{"pcmaudio/metadata", {"NONE"}},
{"fits/image", {fits_compression}},
{"fits/metadata", {l1_compression}},
};
EXPECT_EQ(expected_ccmap, ccmap);
}
#ifdef DWARFS_HAVE_FLAC
@ -1715,7 +1797,7 @@ TEST_P(mkdwarfs_recompress_test, recompress) {
auto fs = t.fs_from_stdout();
EXPECT_TRUE(fs.find("/random"));
EXPECT_EQ(0, fs.get_history().size());
EXPECT_EQ(1, fs.info_as_json(history_opts).count("history"));
EXPECT_EQ(1, fs.info_as_json(info_opts).count("history"));
EXPECT_THAT(t.err(), ::testing::HasSubstr("removing HISTORY"));
auto t2 = tester(t.out());

View File

@ -259,6 +259,8 @@ categorize_defaults_type const& categorize_defaults_common() {
}
categorize_defaults_type const& categorize_defaults_level(unsigned level) {
static categorize_defaults_type const defaults_off;
static categorize_defaults_type const defaults_fast{
// clang-format off
{"--order", {"pcmaudio/waveform::revpath", "fits/image::revpath"}},
@ -319,7 +321,7 @@ categorize_defaults_type const& categorize_defaults_level(unsigned level) {
static constexpr std::array<categorize_defaults_type const*, 10>
defaults_level{{
// clang-format off
/* 0 */ &defaults_fast,
/* 0 */ &defaults_off,
/* 1 */ &defaults_fast,
/* 2 */ &defaults_fast,
/* 3 */ &defaults_fast,
@ -347,12 +349,11 @@ class categorize_optval {
bool empty() const { return value_.empty(); }
std::string const& value() const { return value_; }
bool is_implicit_default() const { return !empty() && !is_explicit_; }
bool is_explicit() const { return is_explicit_; }
template <typename T>
void add_implicit_defaults(T& cop) const {
if (is_implicit_default()) {
if (cop.has_category_resolver()) {
if (auto it = defaults_.find(cop.name()); it != defaults_.end()) {
for (auto const& v : it->second) {
cop.parse_fallback(v);
@ -1262,7 +1263,17 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
cat_resolver = options.inode.categorizer_mgr;
}
writer::category_parser cp(cat_resolver);
std::unordered_set<std::string_view> accepted_categories;
for (auto const& name : catreg.categorizer_names()) {
stream_logger lgr(iol.term, iol.err);
auto categorizer = catreg.create(lgr, name, vm, iol.file);
for (auto cat : categorizer->categories()) {
accepted_categories.insert(cat);
}
}
writer::category_parser cp(cat_resolver, accepted_categories);
try {
{