diff --git a/include/dwarfs/writer/categorizer.h b/include/dwarfs/writer/categorizer.h index 1356bd62..c86512a7 100644 --- a/include/dwarfs/writer/categorizer.h +++ b/include/dwarfs/writer/categorizer.h @@ -65,10 +65,26 @@ class categorizer { subcategory_less(fragment_category a, fragment_category b) const = 0; }; +class file_path_info { + public: + file_path_info(std::filesystem::path const& root_path, + std::filesystem::path const& full_path) + : root_path_{root_path} + , full_path_{full_path} {} + + std::filesystem::path const& root_path() const { return root_path_; } + std::filesystem::path const& full_path() const { return full_path_; } + std::filesystem::path relative_path() const; + + private: + std::filesystem::path const& root_path_; + std::filesystem::path const& full_path_; +}; + class random_access_categorizer : public categorizer { public: virtual inode_fragments - categorize(std::filesystem::path const& path, std::span data, + categorize(file_path_info const& path, std::span data, category_mapper const& mapper) const = 0; }; @@ -85,7 +101,7 @@ class sequential_categorizer_job { class sequential_categorizer : public categorizer { public: virtual std::unique_ptr - job(std::filesystem::path const& path, size_t total_size, + job(file_path_info const& path, size_t total_size, category_mapper const& mapper) const = 0; }; @@ -129,7 +145,7 @@ class categorizer_job { class categorizer_manager : public category_resolver { public: - categorizer_manager(logger& lgr); + categorizer_manager(logger& lgr, std::filesystem::path root); static fragment_category default_category(); diff --git a/src/writer/categorizer.cpp b/src/writer/categorizer.cpp index 8c05410f..2dc65f6b 100644 --- a/src/writer/categorizer.cpp +++ b/src/writer/categorizer.cpp @@ -45,6 +45,8 @@ namespace dwarfs::writer { +namespace fs = std::filesystem; + namespace internal { using namespace std::placeholders; @@ -61,9 +63,10 @@ template class categorizer_job_ final : public categorizer_job::impl { public: categorizer_job_(logger& lgr, categorizer_manager_private const& mgr, - std::filesystem::path const& path) + fs::path const& root_path, fs::path const& path) : LOG_PROXY_INIT(lgr) , mgr_{mgr} + , root_path_{root_path} , path_{path} , cat_mapper_{// NOLINTNEXTLINE(modernize-avoid-bind) std::bind(&categorizer_manager_private::category, @@ -85,7 +88,8 @@ class categorizer_job_ final : public categorizer_job::impl { size_t total_size_{0}; std::vector>> seq_jobs_; - std::filesystem::path const path_; + fs::path const& root_path_; + fs::path const path_; category_mapper cat_mapper_; }; @@ -104,9 +108,11 @@ void categorizer_job_::categorize_random_access( bool global_best = true; + file_path_info path_info{root_path_, path_}; + for (auto&& [index, cat] : ranges::views::enumerate(mgr_.categorizers())) { if (auto p = dynamic_cast(cat.get())) { - if (auto c = p->categorize(path_, data, cat_mapper_)) { + if (auto c = p->categorize(path_info, data, cat_mapper_)) { best_ = c; index_ = index; is_global_best_ = global_best; @@ -126,13 +132,15 @@ void categorizer_job_::categorize_sequential( } if (seq_jobs_.empty()) [[unlikely]] { + file_path_info path_info{root_path_, path_}; + for (auto&& [index, cat] : ranges::views::enumerate(mgr_.categorizers())) { if (index_ >= 0 && std::cmp_greater_equal(index, index_)) { break; } if (auto p = dynamic_cast(cat.get())) { - if (auto job = p->job(path_, total_size_, cat_mapper_)) { + if (auto job = p->job(path_info, total_size_, cat_mapper_)) { seq_jobs_.emplace_back(index, std::move(job)); } } @@ -174,15 +182,16 @@ bool categorizer_job_::best_result_found() const { template class categorizer_manager_ final : public categorizer_manager_private { public: - explicit categorizer_manager_(logger& lgr) + explicit categorizer_manager_(logger& lgr, fs::path root) : lgr_{lgr} - , LOG_PROXY_INIT(lgr) { + , LOG_PROXY_INIT(lgr) + , root_path_{std::move(root)} { add_category(categorizer::DEFAULT_CATEGORY, std::numeric_limits::max()); } void add(std::shared_ptr c) override; - categorizer_job job(std::filesystem::path const& path) const override; + categorizer_job job(fs::path const& path) const override; std::string_view category_name(fragment_category::value_type c) const override; @@ -229,6 +238,7 @@ class categorizer_manager_ final : public categorizer_manager_private { // TODO: category descriptions? std::vector> categories_; std::unordered_map catmap_; + fs::path root_path_; }; template @@ -241,11 +251,12 @@ void categorizer_manager_::add(std::shared_ptr c) { } template -categorizer_job categorizer_manager_::job( - std::filesystem::path const& path) const { +categorizer_job +categorizer_manager_::job(fs::path const& path) const { return categorizer_job( make_unique_logging_object(lgr_, *this, path)); + logger_policies>(lgr_, *this, root_path_, + path)); } template @@ -307,6 +318,10 @@ bool categorizer_manager_::deterministic_less( namespace po = boost::program_options; +fs::path file_path_info::relative_path() const { + return full_path_.lexically_relative(root_path_); +} + std::string category_prefix(std::shared_ptr const& mgr, fragment_category cat) { return category_prefix(mgr.get(), cat); @@ -361,9 +376,10 @@ categorizer_job::categorizer_job() = default; categorizer_job::categorizer_job(std::unique_ptr impl) : impl_{std::move(impl)} {} -categorizer_manager::categorizer_manager(logger& lgr) +categorizer_manager::categorizer_manager(logger& lgr, fs::path root) : impl_(make_unique_logging_object(lgr)) {} + logger_policies>(lgr, std::move(root))) { +} fragment_category categorizer_manager::default_category() { return fragment_category(0); diff --git a/src/writer/categorizer/fits_categorizer.cpp b/src/writer/categorizer/fits_categorizer.cpp index 6121691e..db6a5ffa 100644 --- a/src/writer/categorizer/fits_categorizer.cpp +++ b/src/writer/categorizer/fits_categorizer.cpp @@ -336,7 +336,7 @@ class fits_categorizer_ final : public fits_categorizer_base { } inode_fragments - categorize(fs::path const& path, std::span data, + categorize(file_path_info const& path, std::span data, category_mapper const& mapper) const override; std::string category_metadata(std::string_view category_name, @@ -392,7 +392,7 @@ bool fits_categorizer_::check_metadata( template inode_fragments fits_categorizer_::categorize( - fs::path const& path, std::span data, + file_path_info const& path, std::span data, category_mapper const& mapper) const { inode_fragments fragments; @@ -406,7 +406,7 @@ inode_fragments fits_categorizer_::categorize( meta.unused_lsb_count = fi->unused_lsb_count; meta.component_count = fi->component_count; - if (check_metadata(meta, path)) { + if (check_metadata(meta, path.full_path())) { auto subcategory = meta_.wlock()->add(meta); fragments.emplace_back(fragment_category(mapper(METADATA_CATEGORY)), fi->header.size()); diff --git a/src/writer/categorizer/incompressible_categorizer.cpp b/src/writer/categorizer/incompressible_categorizer.cpp index 9b326bf4..2ff7a410 100644 --- a/src/writer/categorizer/incompressible_categorizer.cpp +++ b/src/writer/categorizer/incompressible_categorizer.cpp @@ -208,7 +208,7 @@ class incompressible_categorizer_ final : public sequential_categorizer { std::span categories() const override; std::unique_ptr - job(std::filesystem::path const& path, size_t total_size, + job(file_path_info const& path, size_t total_size, category_mapper const& mapper) const override; bool @@ -235,8 +235,7 @@ incompressible_categorizer_::categories() const { } std::unique_ptr -incompressible_categorizer_::job(std::filesystem::path const& path, - size_t total_size, +incompressible_categorizer_::job(file_path_info const& path, size_t total_size, category_mapper const& mapper) const { if (total_size < config_.min_input_size) { return nullptr; @@ -244,8 +243,8 @@ incompressible_categorizer_::job(std::filesystem::path const& path, return make_unique_logging_object(lgr_, config_, ctxmgr_, - path, total_size, mapper); + logger_policies>( + lgr_, config_, ctxmgr_, path.full_path(), total_size, mapper); } bool incompressible_categorizer_::subcategory_less(fragment_category, diff --git a/src/writer/categorizer/pcmaudio_categorizer.cpp b/src/writer/categorizer/pcmaudio_categorizer.cpp index ac622ad0..687814dc 100644 --- a/src/writer/categorizer/pcmaudio_categorizer.cpp +++ b/src/writer/categorizer/pcmaudio_categorizer.cpp @@ -528,7 +528,7 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base { } inode_fragments - categorize(fs::path const& path, std::span data, + categorize(file_path_info const& path, std::span data, category_mapper const& mapper) const override; std::string category_metadata(std::string_view category_name, @@ -1114,7 +1114,7 @@ void pcmaudio_categorizer_::add_fragments( template inode_fragments pcmaudio_categorizer_::categorize( - fs::path const& path, std::span data, + file_path_info const& path, std::span data, category_mapper const& mapper) const { inode_fragments fragments; @@ -1127,7 +1127,7 @@ inode_fragments pcmaudio_categorizer_::categorize( &pcmaudio_categorizer_::check_wav64, // clang-format on }) { - if ((this->*f)(fragments, path, data, mapper)) { + if ((this->*f)(fragments, path.full_path(), data, mapper)) { break; } diff --git a/test/fits_categorizer_test.cpp b/test/fits_categorizer_test.cpp index 9a6ef151..52aace7f 100644 --- a/test/fits_categorizer_test.cpp +++ b/test/fits_categorizer_test.cpp @@ -68,7 +68,7 @@ class fits_categorizer_fixture : public Base { po::store(parsed, vm); po::notify(vm); - catmgr = std::make_shared(lgr); + catmgr = std::make_shared(lgr, "/"); catmgr->add(catreg.create(lgr, "fits", vm)); } diff --git a/test/incompressible_categorizer_test.cpp b/test/incompressible_categorizer_test.cpp index f0680f12..42fdbb32 100644 --- a/test/incompressible_categorizer_test.cpp +++ b/test/incompressible_categorizer_test.cpp @@ -92,7 +92,7 @@ class incompressible_categorizer_fixture : public Base { po::store(parsed, vm); po::notify(vm); - catmgr = std::make_shared(lgr); + catmgr = std::make_shared(lgr, "/"); catmgr->add(catreg.create(lgr, "incompressible", vm)); } diff --git a/test/pcmaudio_categorizer_test.cpp b/test/pcmaudio_categorizer_test.cpp index d2cf7bfe..9f078888 100644 --- a/test/pcmaudio_categorizer_test.cpp +++ b/test/pcmaudio_categorizer_test.cpp @@ -215,7 +215,7 @@ TEST(pcmaudio_categorizer, requirements) { test::test_logger logger(logger::INFO); boost::program_options::variables_map vm; auto& catreg = writer::categorizer_registry::instance(); - auto catmgr = writer::categorizer_manager(logger); + auto catmgr = writer::categorizer_manager(logger, "/"); catmgr.add(catreg.create(logger, "pcmaudio", vm)); @@ -294,7 +294,7 @@ TEST(pcmaudio_categorizer, requirements) { class pcmaudio_error_test : public testing::Test { public: test::test_logger logger{logger::VERBOSE}; - writer::categorizer_manager catmgr{logger}; + writer::categorizer_manager catmgr{logger, "/"}; auto categorize(pcmfile_builder const& builder) { // std::cout << folly::hexDump(builder.data.data(), builder.data.size()); diff --git a/tools/src/mkdwarfs_main.cpp b/tools/src/mkdwarfs_main.cpp index d5e91c4e..725b5caf 100644 --- a/tools/src/mkdwarfs_main.cpp +++ b/tools/src/mkdwarfs_main.cpp @@ -1184,7 +1184,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { split_to>(categorizer_list.value(), ','); options.inode.categorizer_mgr = - std::make_shared(lgr); + std::make_shared(lgr, path); for (auto const& name : categorizers) { options.inode.categorizer_mgr->add(catreg.create(lgr, name, vm));