refactor: use file_access abstraction in hotness categorizer

This commit is contained in:
Marcus Holland-Moritz 2025-04-08 11:18:41 +02:00
parent 5529c54acf
commit ca5c1bba0b
10 changed files with 39 additions and 27 deletions

View File

@ -43,6 +43,7 @@ class variables_map;
namespace dwarfs {
class file_access;
class logger;
namespace writer {
@ -224,8 +225,8 @@ class categorizer_info {
class categorizer_factory : public categorizer_info {
public:
virtual std::unique_ptr<categorizer>
create(logger& lgr,
boost::program_options::variables_map const& vm) const = 0;
create(logger& lgr, boost::program_options::variables_map const& vm,
std::shared_ptr<file_access const> const& fa) const = 0;
};
class categorizer_registry {
@ -234,7 +235,8 @@ class categorizer_registry {
std::unique_ptr<categorizer>
create(logger& lgr, std::string const& name,
boost::program_options::variables_map const& vm) const;
boost::program_options::variables_map const& vm,
std::shared_ptr<file_access const> const& fa) const;
void add_options(boost::program_options::options_description& opts) const;

View File

@ -400,16 +400,16 @@ void categorizer_registry::register_factory(
}
}
std::unique_ptr<categorizer>
categorizer_registry::create(logger& lgr, std::string const& name,
po::variables_map const& vm) const {
std::unique_ptr<categorizer> categorizer_registry::create(
logger& lgr, std::string const& name, po::variables_map const& vm,
std::shared_ptr<file_access const> const& fa) const {
auto it = factories_.find(name);
if (it == factories_.end()) {
DWARFS_THROW(runtime_error, "unknown categorizer: " + name);
}
return it->second->create(lgr, vm);
return it->second->create(lgr, vm, fa);
}
void categorizer_registry::add_options(po::options_description& opts) const {

View File

@ -454,7 +454,8 @@ class fits_categorizer_factory : public categorizer_factory {
}
std::unique_ptr<categorizer>
create(logger& lgr, po::variables_map const& /*vm*/) const override {
create(logger& lgr, po::variables_map const& /*vm*/,
std::shared_ptr<file_access const> const& /*fa*/) const override {
return make_unique_logging_object<categorizer, fits_categorizer_,
logger_policies>(lgr);
}

View File

@ -23,7 +23,6 @@
#include <atomic>
#include <cassert>
#include <cstring>
#include <fstream>
#include <numeric>
#include <unordered_set>
#include <vector>
@ -33,6 +32,7 @@
#include <fmt/format.h>
#include <dwarfs/error.h>
#include <dwarfs/file_access.h>
#include <dwarfs/logger.h>
#include <dwarfs/util.h>
#include <dwarfs/writer/categorizer.h>
@ -52,7 +52,8 @@ struct hotness_categorizer_config {
template <typename LoggerPolicy>
class hotness_categorizer_ final : public random_access_categorizer {
public:
hotness_categorizer_(logger& lgr, hotness_categorizer_config const& cfg);
hotness_categorizer_(logger& lgr, hotness_categorizer_config const& cfg,
std::shared_ptr<file_access const> const& fa);
std::span<std::string_view const> categories() const override;
@ -72,20 +73,23 @@ class hotness_categorizer_ final : public random_access_categorizer {
template <typename LoggerPolicy>
hotness_categorizer_<LoggerPolicy>::hotness_categorizer_(
logger& lgr, hotness_categorizer_config const& cfg)
logger& lgr, hotness_categorizer_config const& cfg,
std::shared_ptr<file_access const> const& fa)
: LOG_PROXY_INIT(lgr)
, cfg_{cfg} {
auto const& file = cfg_.hotness_list;
if (!file.empty()) {
std::ifstream ifs{file};
if (!ifs) {
DWARFS_THROW(runtime_error,
fmt::format("failed to open file '{}'", file));
std::error_code ec;
auto input = fa->open_input(file, ec);
if (ec) {
DWARFS_THROW(runtime_error, fmt::format("failed to open file '{}': {}",
file, ec.message()));
}
std::string line;
while (std::getline(ifs, line)) {
while (std::getline(input->is(), line)) {
auto const path = std::filesystem::path{line}.relative_path();
LOG_DEBUG << "hotness categorizer: adding path '" << path << "'";
if (!hotness_set_.emplace(path.string()).second) {
@ -118,7 +122,8 @@ inode_fragments hotness_categorizer_<LoggerPolicy>::categorize(
if (!hotness_set_.empty()) {
auto const rel_path = path.relative_path();
LOG_DEBUG << "hotness categorizer: checking path '" << rel_path << "'";
LOG_DEBUG << "hotness categorizer: checking path '" << rel_path << "' ('"
<< path.full_path() << "')";
if (auto it = hotness_set_.find(rel_path.string());
it != hotness_set_.end()) {
@ -163,9 +168,10 @@ class hotness_categorizer_factory : public categorizer_factory {
}
std::unique_ptr<categorizer>
create(logger& lgr, po::variables_map const& /*vm*/) const override {
create(logger& lgr, po::variables_map const& /*vm*/,
std::shared_ptr<file_access const> const& fa) const override {
return make_unique_logging_object<categorizer, hotness_categorizer_,
logger_policies>(lgr, cfg_);
logger_policies>(lgr, cfg_, fa);
}
private:

View File

@ -291,7 +291,8 @@ class incompressible_categorizer_factory : public categorizer_factory {
}
std::unique_ptr<categorizer>
create(logger& lgr, po::variables_map const& /*vm*/) const override {
create(logger& lgr, po::variables_map const& /*vm*/,
std::shared_ptr<file_access const> const& /*fa*/) const override {
auto cfg = cfg_;
cfg.min_input_size = parse_size_with_unit(min_input_size_str_);
cfg.block_size = parse_size_with_unit(block_size_str_);

View File

@ -1168,7 +1168,8 @@ class pcmaudio_categorizer_factory : public categorizer_factory {
}
std::unique_ptr<categorizer>
create(logger& lgr, po::variables_map const& /*vm*/) const override {
create(logger& lgr, po::variables_map const& /*vm*/,
std::shared_ptr<file_access const> const& /*fa*/) const override {
return make_unique_logging_object<categorizer, pcmaudio_categorizer_,
logger_policies>(lgr);
}

View File

@ -70,7 +70,7 @@ class fits_categorizer_fixture : public Base {
catmgr = std::make_shared<writer::categorizer_manager>(lgr, "/");
catmgr->add(catreg.create(lgr, "fits", vm));
catmgr->add(catreg.create(lgr, "fits", vm, nullptr));
}
public:

View File

@ -94,7 +94,7 @@ class incompressible_categorizer_fixture : public Base {
catmgr = std::make_shared<writer::categorizer_manager>(lgr, "/");
catmgr->add(catreg.create(lgr, "incompressible", vm));
catmgr->add(catreg.create(lgr, "incompressible", vm, nullptr));
}
// void TearDown() override {

View File

@ -217,7 +217,7 @@ TEST(pcmaudio_categorizer, requirements) {
auto& catreg = writer::categorizer_registry::instance();
auto catmgr = writer::categorizer_manager(logger, "/");
catmgr.add(catreg.create(logger, "pcmaudio", vm));
catmgr.add(catreg.create(logger, "pcmaudio", vm, nullptr));
try {
catmgr.set_metadata_requirements(
@ -307,7 +307,7 @@ class pcmaudio_error_test : public testing::Test {
void SetUp() override {
boost::program_options::variables_map vm;
auto& catreg = writer::categorizer_registry::instance();
catmgr.add(catreg.create(logger, "pcmaudio", vm));
catmgr.add(catreg.create(logger, "pcmaudio", vm, nullptr));
catmgr.set_metadata_requirements(
catmgr.category_value("pcmaudio/waveform").value(),

View File

@ -762,7 +762,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
for (auto const& name : catreg.categorizer_names()) {
stream_logger lgr(iol.term, iol.err);
auto categorizer = catreg.create(lgr, name, vm);
auto categorizer = catreg.create(lgr, name, vm, iol.file);
iol.out << " [" << name << "]\n";
for (auto cat : categorizer->categories()) {
iol.out << " " << cat << "\n";
@ -1187,7 +1187,8 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
std::make_shared<writer::categorizer_manager>(lgr, path);
for (auto const& name : categorizers) {
options.inode.categorizer_mgr->add(catreg.create(lgr, name, vm));
options.inode.categorizer_mgr->add(
catreg.create(lgr, name, vm, iol.file));
}
}