refactor(categorizer): allow access to root and relative paths

This commit is contained in:
Marcus Holland-Moritz 2025-04-04 13:17:11 +02:00
parent db2d20dfce
commit d84d7535c7
9 changed files with 62 additions and 31 deletions

View File

@ -65,10 +65,26 @@ class categorizer {
subcategory_less(fragment_category a, fragment_category b) const = 0;
};
class file_path_info {
public:
file_path_info(std::filesystem::path const& root_path,
std::filesystem::path const& full_path)
: root_path_{root_path}
, full_path_{full_path} {}
std::filesystem::path const& root_path() const { return root_path_; }
std::filesystem::path const& full_path() const { return full_path_; }
std::filesystem::path relative_path() const;
private:
std::filesystem::path const& root_path_;
std::filesystem::path const& full_path_;
};
class random_access_categorizer : public categorizer {
public:
virtual inode_fragments
categorize(std::filesystem::path const& path, std::span<uint8_t const> data,
categorize(file_path_info const& path, std::span<uint8_t const> data,
category_mapper const& mapper) const = 0;
};
@ -85,7 +101,7 @@ class sequential_categorizer_job {
class sequential_categorizer : public categorizer {
public:
virtual std::unique_ptr<sequential_categorizer_job>
job(std::filesystem::path const& path, size_t total_size,
job(file_path_info const& path, size_t total_size,
category_mapper const& mapper) const = 0;
};
@ -129,7 +145,7 @@ class categorizer_job {
class categorizer_manager : public category_resolver {
public:
categorizer_manager(logger& lgr);
categorizer_manager(logger& lgr, std::filesystem::path root);
static fragment_category default_category();

View File

@ -45,6 +45,8 @@
namespace dwarfs::writer {
namespace fs = std::filesystem;
namespace internal {
using namespace std::placeholders;
@ -61,9 +63,10 @@ template <typename LoggerPolicy>
class categorizer_job_ final : public categorizer_job::impl {
public:
categorizer_job_(logger& lgr, categorizer_manager_private const& mgr,
std::filesystem::path const& path)
fs::path const& root_path, fs::path const& path)
: LOG_PROXY_INIT(lgr)
, mgr_{mgr}
, root_path_{root_path}
, path_{path}
, cat_mapper_{// NOLINTNEXTLINE(modernize-avoid-bind)
std::bind(&categorizer_manager_private::category,
@ -85,7 +88,8 @@ class categorizer_job_ final : public categorizer_job::impl {
size_t total_size_{0};
std::vector<std::pair<int, std::unique_ptr<sequential_categorizer_job>>>
seq_jobs_;
std::filesystem::path const path_;
fs::path const& root_path_;
fs::path const path_;
category_mapper cat_mapper_;
};
@ -104,9 +108,11 @@ void categorizer_job_<LoggerPolicy>::categorize_random_access(
bool global_best = true;
file_path_info path_info{root_path_, path_};
for (auto&& [index, cat] : ranges::views::enumerate(mgr_.categorizers())) {
if (auto p = dynamic_cast<random_access_categorizer*>(cat.get())) {
if (auto c = p->categorize(path_, data, cat_mapper_)) {
if (auto c = p->categorize(path_info, data, cat_mapper_)) {
best_ = c;
index_ = index;
is_global_best_ = global_best;
@ -126,13 +132,15 @@ void categorizer_job_<LoggerPolicy>::categorize_sequential(
}
if (seq_jobs_.empty()) [[unlikely]] {
file_path_info path_info{root_path_, path_};
for (auto&& [index, cat] : ranges::views::enumerate(mgr_.categorizers())) {
if (index_ >= 0 && std::cmp_greater_equal(index, index_)) {
break;
}
if (auto p = dynamic_cast<sequential_categorizer*>(cat.get())) {
if (auto job = p->job(path_, total_size_, cat_mapper_)) {
if (auto job = p->job(path_info, total_size_, cat_mapper_)) {
seq_jobs_.emplace_back(index, std::move(job));
}
}
@ -174,15 +182,16 @@ bool categorizer_job_<LoggerPolicy>::best_result_found() const {
template <typename LoggerPolicy>
class categorizer_manager_ final : public categorizer_manager_private {
public:
explicit categorizer_manager_(logger& lgr)
explicit categorizer_manager_(logger& lgr, fs::path root)
: lgr_{lgr}
, LOG_PROXY_INIT(lgr) {
, LOG_PROXY_INIT(lgr)
, root_path_{std::move(root)} {
add_category(categorizer::DEFAULT_CATEGORY,
std::numeric_limits<size_t>::max());
}
void add(std::shared_ptr<categorizer> c) override;
categorizer_job job(std::filesystem::path const& path) const override;
categorizer_job job(fs::path const& path) const override;
std::string_view
category_name(fragment_category::value_type c) const override;
@ -229,6 +238,7 @@ class categorizer_manager_ final : public categorizer_manager_private {
// TODO: category descriptions?
std::vector<std::pair<std::string_view, size_t>> categories_;
std::unordered_map<std::string_view, fragment_category::value_type> catmap_;
fs::path root_path_;
};
template <typename LoggerPolicy>
@ -241,11 +251,12 @@ void categorizer_manager_<LoggerPolicy>::add(std::shared_ptr<categorizer> c) {
}
template <typename LoggerPolicy>
categorizer_job categorizer_manager_<LoggerPolicy>::job(
std::filesystem::path const& path) const {
categorizer_job
categorizer_manager_<LoggerPolicy>::job(fs::path const& path) const {
return categorizer_job(
make_unique_logging_object<categorizer_job::impl, categorizer_job_,
logger_policies>(lgr_, *this, path));
logger_policies>(lgr_, *this, root_path_,
path));
}
template <typename LoggerPolicy>
@ -307,6 +318,10 @@ bool categorizer_manager_<LoggerPolicy>::deterministic_less(
namespace po = boost::program_options;
fs::path file_path_info::relative_path() const {
return full_path_.lexically_relative(root_path_);
}
std::string category_prefix(std::shared_ptr<categorizer_manager> const& mgr,
fragment_category cat) {
return category_prefix(mgr.get(), cat);
@ -361,9 +376,10 @@ categorizer_job::categorizer_job() = default;
categorizer_job::categorizer_job(std::unique_ptr<impl> impl)
: impl_{std::move(impl)} {}
categorizer_manager::categorizer_manager(logger& lgr)
categorizer_manager::categorizer_manager(logger& lgr, fs::path root)
: impl_(make_unique_logging_object<impl, internal::categorizer_manager_,
logger_policies>(lgr)) {}
logger_policies>(lgr, std::move(root))) {
}
fragment_category categorizer_manager::default_category() {
return fragment_category(0);

View File

@ -336,7 +336,7 @@ class fits_categorizer_ final : public fits_categorizer_base {
}
inode_fragments
categorize(fs::path const& path, std::span<uint8_t const> data,
categorize(file_path_info const& path, std::span<uint8_t const> data,
category_mapper const& mapper) const override;
std::string category_metadata(std::string_view category_name,
@ -392,7 +392,7 @@ bool fits_categorizer_<LoggerPolicy>::check_metadata(
template <typename LoggerPolicy>
inode_fragments fits_categorizer_<LoggerPolicy>::categorize(
fs::path const& path, std::span<uint8_t const> data,
file_path_info const& path, std::span<uint8_t const> data,
category_mapper const& mapper) const {
inode_fragments fragments;
@ -406,7 +406,7 @@ inode_fragments fits_categorizer_<LoggerPolicy>::categorize(
meta.unused_lsb_count = fi->unused_lsb_count;
meta.component_count = fi->component_count;
if (check_metadata(meta, path)) {
if (check_metadata(meta, path.full_path())) {
auto subcategory = meta_.wlock()->add(meta);
fragments.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
fi->header.size());

View File

@ -208,7 +208,7 @@ class incompressible_categorizer_ final : public sequential_categorizer {
std::span<std::string_view const> categories() const override;
std::unique_ptr<sequential_categorizer_job>
job(std::filesystem::path const& path, size_t total_size,
job(file_path_info const& path, size_t total_size,
category_mapper const& mapper) const override;
bool
@ -235,8 +235,7 @@ incompressible_categorizer_::categories() const {
}
std::unique_ptr<sequential_categorizer_job>
incompressible_categorizer_::job(std::filesystem::path const& path,
size_t total_size,
incompressible_categorizer_::job(file_path_info const& path, size_t total_size,
category_mapper const& mapper) const {
if (total_size < config_.min_input_size) {
return nullptr;
@ -244,8 +243,8 @@ incompressible_categorizer_::job(std::filesystem::path const& path,
return make_unique_logging_object<sequential_categorizer_job,
incompressible_categorizer_job_,
logger_policies>(lgr_, config_, ctxmgr_,
path, total_size, mapper);
logger_policies>(
lgr_, config_, ctxmgr_, path.full_path(), total_size, mapper);
}
bool incompressible_categorizer_::subcategory_less(fragment_category,

View File

@ -528,7 +528,7 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
}
inode_fragments
categorize(fs::path const& path, std::span<uint8_t const> data,
categorize(file_path_info const& path, std::span<uint8_t const> data,
category_mapper const& mapper) const override;
std::string category_metadata(std::string_view category_name,
@ -1114,7 +1114,7 @@ void pcmaudio_categorizer_<LoggerPolicy>::add_fragments(
template <typename LoggerPolicy>
inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
fs::path const& path, std::span<uint8_t const> data,
file_path_info const& path, std::span<uint8_t const> data,
category_mapper const& mapper) const {
inode_fragments fragments;
@ -1127,7 +1127,7 @@ inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
&pcmaudio_categorizer_::check_wav64,
// clang-format on
}) {
if ((this->*f)(fragments, path, data, mapper)) {
if ((this->*f)(fragments, path.full_path(), data, mapper)) {
break;
}

View File

@ -68,7 +68,7 @@ class fits_categorizer_fixture : public Base {
po::store(parsed, vm);
po::notify(vm);
catmgr = std::make_shared<writer::categorizer_manager>(lgr);
catmgr = std::make_shared<writer::categorizer_manager>(lgr, "/");
catmgr->add(catreg.create(lgr, "fits", vm));
}

View File

@ -92,7 +92,7 @@ class incompressible_categorizer_fixture : public Base {
po::store(parsed, vm);
po::notify(vm);
catmgr = std::make_shared<writer::categorizer_manager>(lgr);
catmgr = std::make_shared<writer::categorizer_manager>(lgr, "/");
catmgr->add(catreg.create(lgr, "incompressible", vm));
}

View File

@ -215,7 +215,7 @@ TEST(pcmaudio_categorizer, requirements) {
test::test_logger logger(logger::INFO);
boost::program_options::variables_map vm;
auto& catreg = writer::categorizer_registry::instance();
auto catmgr = writer::categorizer_manager(logger);
auto catmgr = writer::categorizer_manager(logger, "/");
catmgr.add(catreg.create(logger, "pcmaudio", vm));
@ -294,7 +294,7 @@ TEST(pcmaudio_categorizer, requirements) {
class pcmaudio_error_test : public testing::Test {
public:
test::test_logger logger{logger::VERBOSE};
writer::categorizer_manager catmgr{logger};
writer::categorizer_manager catmgr{logger, "/"};
auto categorize(pcmfile_builder const& builder) {
// std::cout << folly::hexDump(builder.data.data(), builder.data.size());

View File

@ -1184,7 +1184,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
split_to<std::vector<std::string>>(categorizer_list.value(), ',');
options.inode.categorizer_mgr =
std::make_shared<writer::categorizer_manager>(lgr);
std::make_shared<writer::categorizer_manager>(lgr, path);
for (auto const& name : categorizers) {
options.inode.categorizer_mgr->add(catreg.create(lgr, name, vm));