mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-18 08:49:29 -04:00
refactor(categorizer): allow access to root and relative paths
This commit is contained in:
parent
db2d20dfce
commit
d84d7535c7
@ -65,10 +65,26 @@ class categorizer {
|
||||
subcategory_less(fragment_category a, fragment_category b) const = 0;
|
||||
};
|
||||
|
||||
class file_path_info {
|
||||
public:
|
||||
file_path_info(std::filesystem::path const& root_path,
|
||||
std::filesystem::path const& full_path)
|
||||
: root_path_{root_path}
|
||||
, full_path_{full_path} {}
|
||||
|
||||
std::filesystem::path const& root_path() const { return root_path_; }
|
||||
std::filesystem::path const& full_path() const { return full_path_; }
|
||||
std::filesystem::path relative_path() const;
|
||||
|
||||
private:
|
||||
std::filesystem::path const& root_path_;
|
||||
std::filesystem::path const& full_path_;
|
||||
};
|
||||
|
||||
class random_access_categorizer : public categorizer {
|
||||
public:
|
||||
virtual inode_fragments
|
||||
categorize(std::filesystem::path const& path, std::span<uint8_t const> data,
|
||||
categorize(file_path_info const& path, std::span<uint8_t const> data,
|
||||
category_mapper const& mapper) const = 0;
|
||||
};
|
||||
|
||||
@ -85,7 +101,7 @@ class sequential_categorizer_job {
|
||||
class sequential_categorizer : public categorizer {
|
||||
public:
|
||||
virtual std::unique_ptr<sequential_categorizer_job>
|
||||
job(std::filesystem::path const& path, size_t total_size,
|
||||
job(file_path_info const& path, size_t total_size,
|
||||
category_mapper const& mapper) const = 0;
|
||||
};
|
||||
|
||||
@ -129,7 +145,7 @@ class categorizer_job {
|
||||
|
||||
class categorizer_manager : public category_resolver {
|
||||
public:
|
||||
categorizer_manager(logger& lgr);
|
||||
categorizer_manager(logger& lgr, std::filesystem::path root);
|
||||
|
||||
static fragment_category default_category();
|
||||
|
||||
|
@ -45,6 +45,8 @@
|
||||
|
||||
namespace dwarfs::writer {
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace internal {
|
||||
|
||||
using namespace std::placeholders;
|
||||
@ -61,9 +63,10 @@ template <typename LoggerPolicy>
|
||||
class categorizer_job_ final : public categorizer_job::impl {
|
||||
public:
|
||||
categorizer_job_(logger& lgr, categorizer_manager_private const& mgr,
|
||||
std::filesystem::path const& path)
|
||||
fs::path const& root_path, fs::path const& path)
|
||||
: LOG_PROXY_INIT(lgr)
|
||||
, mgr_{mgr}
|
||||
, root_path_{root_path}
|
||||
, path_{path}
|
||||
, cat_mapper_{// NOLINTNEXTLINE(modernize-avoid-bind)
|
||||
std::bind(&categorizer_manager_private::category,
|
||||
@ -85,7 +88,8 @@ class categorizer_job_ final : public categorizer_job::impl {
|
||||
size_t total_size_{0};
|
||||
std::vector<std::pair<int, std::unique_ptr<sequential_categorizer_job>>>
|
||||
seq_jobs_;
|
||||
std::filesystem::path const path_;
|
||||
fs::path const& root_path_;
|
||||
fs::path const path_;
|
||||
category_mapper cat_mapper_;
|
||||
};
|
||||
|
||||
@ -104,9 +108,11 @@ void categorizer_job_<LoggerPolicy>::categorize_random_access(
|
||||
|
||||
bool global_best = true;
|
||||
|
||||
file_path_info path_info{root_path_, path_};
|
||||
|
||||
for (auto&& [index, cat] : ranges::views::enumerate(mgr_.categorizers())) {
|
||||
if (auto p = dynamic_cast<random_access_categorizer*>(cat.get())) {
|
||||
if (auto c = p->categorize(path_, data, cat_mapper_)) {
|
||||
if (auto c = p->categorize(path_info, data, cat_mapper_)) {
|
||||
best_ = c;
|
||||
index_ = index;
|
||||
is_global_best_ = global_best;
|
||||
@ -126,13 +132,15 @@ void categorizer_job_<LoggerPolicy>::categorize_sequential(
|
||||
}
|
||||
|
||||
if (seq_jobs_.empty()) [[unlikely]] {
|
||||
file_path_info path_info{root_path_, path_};
|
||||
|
||||
for (auto&& [index, cat] : ranges::views::enumerate(mgr_.categorizers())) {
|
||||
if (index_ >= 0 && std::cmp_greater_equal(index, index_)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (auto p = dynamic_cast<sequential_categorizer*>(cat.get())) {
|
||||
if (auto job = p->job(path_, total_size_, cat_mapper_)) {
|
||||
if (auto job = p->job(path_info, total_size_, cat_mapper_)) {
|
||||
seq_jobs_.emplace_back(index, std::move(job));
|
||||
}
|
||||
}
|
||||
@ -174,15 +182,16 @@ bool categorizer_job_<LoggerPolicy>::best_result_found() const {
|
||||
template <typename LoggerPolicy>
|
||||
class categorizer_manager_ final : public categorizer_manager_private {
|
||||
public:
|
||||
explicit categorizer_manager_(logger& lgr)
|
||||
explicit categorizer_manager_(logger& lgr, fs::path root)
|
||||
: lgr_{lgr}
|
||||
, LOG_PROXY_INIT(lgr) {
|
||||
, LOG_PROXY_INIT(lgr)
|
||||
, root_path_{std::move(root)} {
|
||||
add_category(categorizer::DEFAULT_CATEGORY,
|
||||
std::numeric_limits<size_t>::max());
|
||||
}
|
||||
|
||||
void add(std::shared_ptr<categorizer> c) override;
|
||||
categorizer_job job(std::filesystem::path const& path) const override;
|
||||
categorizer_job job(fs::path const& path) const override;
|
||||
std::string_view
|
||||
category_name(fragment_category::value_type c) const override;
|
||||
|
||||
@ -229,6 +238,7 @@ class categorizer_manager_ final : public categorizer_manager_private {
|
||||
// TODO: category descriptions?
|
||||
std::vector<std::pair<std::string_view, size_t>> categories_;
|
||||
std::unordered_map<std::string_view, fragment_category::value_type> catmap_;
|
||||
fs::path root_path_;
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -241,11 +251,12 @@ void categorizer_manager_<LoggerPolicy>::add(std::shared_ptr<categorizer> c) {
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
categorizer_job categorizer_manager_<LoggerPolicy>::job(
|
||||
std::filesystem::path const& path) const {
|
||||
categorizer_job
|
||||
categorizer_manager_<LoggerPolicy>::job(fs::path const& path) const {
|
||||
return categorizer_job(
|
||||
make_unique_logging_object<categorizer_job::impl, categorizer_job_,
|
||||
logger_policies>(lgr_, *this, path));
|
||||
logger_policies>(lgr_, *this, root_path_,
|
||||
path));
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -307,6 +318,10 @@ bool categorizer_manager_<LoggerPolicy>::deterministic_less(
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
fs::path file_path_info::relative_path() const {
|
||||
return full_path_.lexically_relative(root_path_);
|
||||
}
|
||||
|
||||
std::string category_prefix(std::shared_ptr<categorizer_manager> const& mgr,
|
||||
fragment_category cat) {
|
||||
return category_prefix(mgr.get(), cat);
|
||||
@ -361,9 +376,10 @@ categorizer_job::categorizer_job() = default;
|
||||
categorizer_job::categorizer_job(std::unique_ptr<impl> impl)
|
||||
: impl_{std::move(impl)} {}
|
||||
|
||||
categorizer_manager::categorizer_manager(logger& lgr)
|
||||
categorizer_manager::categorizer_manager(logger& lgr, fs::path root)
|
||||
: impl_(make_unique_logging_object<impl, internal::categorizer_manager_,
|
||||
logger_policies>(lgr)) {}
|
||||
logger_policies>(lgr, std::move(root))) {
|
||||
}
|
||||
|
||||
fragment_category categorizer_manager::default_category() {
|
||||
return fragment_category(0);
|
||||
|
@ -336,7 +336,7 @@ class fits_categorizer_ final : public fits_categorizer_base {
|
||||
}
|
||||
|
||||
inode_fragments
|
||||
categorize(fs::path const& path, std::span<uint8_t const> data,
|
||||
categorize(file_path_info const& path, std::span<uint8_t const> data,
|
||||
category_mapper const& mapper) const override;
|
||||
|
||||
std::string category_metadata(std::string_view category_name,
|
||||
@ -392,7 +392,7 @@ bool fits_categorizer_<LoggerPolicy>::check_metadata(
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
inode_fragments fits_categorizer_<LoggerPolicy>::categorize(
|
||||
fs::path const& path, std::span<uint8_t const> data,
|
||||
file_path_info const& path, std::span<uint8_t const> data,
|
||||
category_mapper const& mapper) const {
|
||||
inode_fragments fragments;
|
||||
|
||||
@ -406,7 +406,7 @@ inode_fragments fits_categorizer_<LoggerPolicy>::categorize(
|
||||
meta.unused_lsb_count = fi->unused_lsb_count;
|
||||
meta.component_count = fi->component_count;
|
||||
|
||||
if (check_metadata(meta, path)) {
|
||||
if (check_metadata(meta, path.full_path())) {
|
||||
auto subcategory = meta_.wlock()->add(meta);
|
||||
fragments.emplace_back(fragment_category(mapper(METADATA_CATEGORY)),
|
||||
fi->header.size());
|
||||
|
@ -208,7 +208,7 @@ class incompressible_categorizer_ final : public sequential_categorizer {
|
||||
|
||||
std::span<std::string_view const> categories() const override;
|
||||
std::unique_ptr<sequential_categorizer_job>
|
||||
job(std::filesystem::path const& path, size_t total_size,
|
||||
job(file_path_info const& path, size_t total_size,
|
||||
category_mapper const& mapper) const override;
|
||||
|
||||
bool
|
||||
@ -235,8 +235,7 @@ incompressible_categorizer_::categories() const {
|
||||
}
|
||||
|
||||
std::unique_ptr<sequential_categorizer_job>
|
||||
incompressible_categorizer_::job(std::filesystem::path const& path,
|
||||
size_t total_size,
|
||||
incompressible_categorizer_::job(file_path_info const& path, size_t total_size,
|
||||
category_mapper const& mapper) const {
|
||||
if (total_size < config_.min_input_size) {
|
||||
return nullptr;
|
||||
@ -244,8 +243,8 @@ incompressible_categorizer_::job(std::filesystem::path const& path,
|
||||
|
||||
return make_unique_logging_object<sequential_categorizer_job,
|
||||
incompressible_categorizer_job_,
|
||||
logger_policies>(lgr_, config_, ctxmgr_,
|
||||
path, total_size, mapper);
|
||||
logger_policies>(
|
||||
lgr_, config_, ctxmgr_, path.full_path(), total_size, mapper);
|
||||
}
|
||||
|
||||
bool incompressible_categorizer_::subcategory_less(fragment_category,
|
||||
|
@ -528,7 +528,7 @@ class pcmaudio_categorizer_ final : public pcmaudio_categorizer_base {
|
||||
}
|
||||
|
||||
inode_fragments
|
||||
categorize(fs::path const& path, std::span<uint8_t const> data,
|
||||
categorize(file_path_info const& path, std::span<uint8_t const> data,
|
||||
category_mapper const& mapper) const override;
|
||||
|
||||
std::string category_metadata(std::string_view category_name,
|
||||
@ -1114,7 +1114,7 @@ void pcmaudio_categorizer_<LoggerPolicy>::add_fragments(
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
|
||||
fs::path const& path, std::span<uint8_t const> data,
|
||||
file_path_info const& path, std::span<uint8_t const> data,
|
||||
category_mapper const& mapper) const {
|
||||
inode_fragments fragments;
|
||||
|
||||
@ -1127,7 +1127,7 @@ inode_fragments pcmaudio_categorizer_<LoggerPolicy>::categorize(
|
||||
&pcmaudio_categorizer_::check_wav64,
|
||||
// clang-format on
|
||||
}) {
|
||||
if ((this->*f)(fragments, path, data, mapper)) {
|
||||
if ((this->*f)(fragments, path.full_path(), data, mapper)) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -68,7 +68,7 @@ class fits_categorizer_fixture : public Base {
|
||||
po::store(parsed, vm);
|
||||
po::notify(vm);
|
||||
|
||||
catmgr = std::make_shared<writer::categorizer_manager>(lgr);
|
||||
catmgr = std::make_shared<writer::categorizer_manager>(lgr, "/");
|
||||
|
||||
catmgr->add(catreg.create(lgr, "fits", vm));
|
||||
}
|
||||
|
@ -92,7 +92,7 @@ class incompressible_categorizer_fixture : public Base {
|
||||
po::store(parsed, vm);
|
||||
po::notify(vm);
|
||||
|
||||
catmgr = std::make_shared<writer::categorizer_manager>(lgr);
|
||||
catmgr = std::make_shared<writer::categorizer_manager>(lgr, "/");
|
||||
|
||||
catmgr->add(catreg.create(lgr, "incompressible", vm));
|
||||
}
|
||||
|
@ -215,7 +215,7 @@ TEST(pcmaudio_categorizer, requirements) {
|
||||
test::test_logger logger(logger::INFO);
|
||||
boost::program_options::variables_map vm;
|
||||
auto& catreg = writer::categorizer_registry::instance();
|
||||
auto catmgr = writer::categorizer_manager(logger);
|
||||
auto catmgr = writer::categorizer_manager(logger, "/");
|
||||
|
||||
catmgr.add(catreg.create(logger, "pcmaudio", vm));
|
||||
|
||||
@ -294,7 +294,7 @@ TEST(pcmaudio_categorizer, requirements) {
|
||||
class pcmaudio_error_test : public testing::Test {
|
||||
public:
|
||||
test::test_logger logger{logger::VERBOSE};
|
||||
writer::categorizer_manager catmgr{logger};
|
||||
writer::categorizer_manager catmgr{logger, "/"};
|
||||
|
||||
auto categorize(pcmfile_builder const& builder) {
|
||||
// std::cout << folly::hexDump(builder.data.data(), builder.data.size());
|
||||
|
@ -1184,7 +1184,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
split_to<std::vector<std::string>>(categorizer_list.value(), ',');
|
||||
|
||||
options.inode.categorizer_mgr =
|
||||
std::make_shared<writer::categorizer_manager>(lgr);
|
||||
std::make_shared<writer::categorizer_manager>(lgr, path);
|
||||
|
||||
for (auto const& name : categorizers) {
|
||||
options.inode.categorizer_mgr->add(catreg.create(lgr, name, vm));
|
||||
|
Loading…
x
Reference in New Issue
Block a user