refactor: replace script with separate filters and transformers

This commit is contained in:
Marcus Holland-Moritz 2024-08-02 21:44:25 +02:00
parent 7eb47649ff
commit 0987dba63d
14 changed files with 299 additions and 269 deletions

View File

@ -647,7 +647,6 @@ list(APPEND LIBDWARFS_READER_SRC
)
list(APPEND LIBDWARFS_WRITER_SRC
src/dwarfs/builtin_script.cpp
src/dwarfs/categorizer.cpp
src/dwarfs/category_parser.cpp
src/dwarfs/chmod_entry_transformer.cpp
@ -674,6 +673,7 @@ list(APPEND LIBDWARFS_WRITER_SRC
src/dwarfs/internal/scanner_progress.cpp
src/dwarfs/internal/similarity.cpp
src/dwarfs/internal/similarity_ordering.cpp
src/dwarfs/rule_based_entry_filter.cpp
src/dwarfs/scanner.cpp
src/dwarfs/segmenter.cpp
src/dwarfs/segmenter_factory.cpp

View File

@ -21,7 +21,6 @@
#pragma once
#include <cstdint>
#include <memory>
#include <string_view>

View File

@ -21,22 +21,20 @@
#pragma once
#include <memory>
#include <span>
namespace dwarfs {
class entry_interface;
class script {
enum class filter_action {
keep,
remove,
};
class entry_filter {
public:
virtual ~script() = default;
virtual ~entry_filter() = default;
virtual bool has_filter() const = 0;
virtual bool has_transform() const = 0;
virtual bool filter(entry_interface const& ei) = 0;
virtual void transform(entry_interface& ei) = 0;
virtual filter_action filter(entry_interface const& ei) const = 0;
};
} // namespace dwarfs

View File

@ -38,7 +38,6 @@ namespace dwarfs {
class logger;
class os_access;
class progress;
class script;
struct inode_options;

View File

@ -24,53 +24,39 @@
#include <filesystem>
#include <iosfwd>
#include <memory>
#include <string>
#include <string_view>
#include <dwarfs/script.h>
#include <dwarfs/entry_filter.h>
#include <dwarfs/file_stat.h>
namespace dwarfs {
class entry_transformer;
class file_access;
class logger;
class builtin_script : public script {
class rule_based_entry_filter : public entry_filter {
public:
builtin_script(logger& lgr, std::shared_ptr<file_access const> fa);
~builtin_script();
rule_based_entry_filter(logger& lgr, std::shared_ptr<file_access const> fa);
~rule_based_entry_filter();
void set_root_path(std::filesystem::path const& path) {
impl_->set_root_path(path);
}
void add_filter_rule(std::string const& rule) {
impl_->add_filter_rule(rule);
}
void add_rule(std::string_view rule) { impl_->add_rule(rule); }
void add_filter_rules(std::istream& is) { impl_->add_filter_rules(is); }
void add_rules(std::istream& is) { impl_->add_rules(is); }
void add_transformer(std::unique_ptr<entry_transformer>&& xfm) {
impl_->add_transformer(std::move(xfm));
}
bool has_filter() const override;
bool has_transform() const override;
bool filter(entry_interface const& ei) override;
void transform(entry_interface& ei) override;
filter_action filter(entry_interface const& ei) const override;
class impl {
public:
virtual ~impl() = default;
virtual void set_root_path(std::filesystem::path const& path) = 0;
virtual void add_filter_rule(std::string const& rule) = 0;
virtual void add_filter_rules(std::istream& is) = 0;
virtual void add_transformer(std::unique_ptr<entry_transformer>&& xfm) = 0;
virtual bool filter(entry_interface const& ei) = 0;
virtual void transform(entry_interface& ei) = 0;
virtual bool has_filter() const = 0;
virtual bool has_transform() const = 0;
virtual void add_rule(std::string_view rule) = 0;
virtual void add_rules(std::istream& is) = 0;
virtual filter_action filter(entry_interface const& ei) const = 0;
};
private:

View File

@ -31,22 +31,31 @@ namespace dwarfs {
struct scanner_options;
class entry_filter;
class entry_transformer;
class entry_factory;
class file_access;
class filesystem_writer;
class logger;
class os_access;
class writer_progress;
class script;
class segmenter_factory;
class thread_pool;
class scanner {
public:
scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf,
entry_factory& ef, os_access const& os, std::shared_ptr<script> scr,
entry_factory& ef, os_access const& os,
const scanner_options& options);
void add_filter(std::unique_ptr<entry_filter> filter) {
impl_->add_filter(std::move(filter));
}
void add_transformer(std::unique_ptr<entry_transformer> transformer) {
impl_->add_transformer(std::move(transformer));
}
void scan(
filesystem_writer& fsw, const std::filesystem::path& path,
writer_progress& prog,
@ -59,6 +68,11 @@ class scanner {
public:
virtual ~impl() = default;
virtual void add_filter(std::unique_ptr<entry_filter> filter) = 0;
virtual void
add_transformer(std::unique_ptr<entry_transformer> transformer) = 0;
virtual void
scan(filesystem_writer& fsw, const std::filesystem::path& path,
writer_progress& prog,

View File

@ -21,6 +21,7 @@
#include <dwarfs/chmod_entry_transformer.h>
#include <dwarfs/entry_interface.h>
#include <dwarfs/entry_transformer.h>
#include <dwarfs/internal/chmod_transformer.h>

View File

@ -48,7 +48,6 @@
#include <dwarfs/mmif.h>
#include <dwarfs/options.h>
#include <dwarfs/os_access.h>
#include <dwarfs/script.h>
#include <dwarfs/util.h>
#include <dwarfs/internal/entry.h>

View File

@ -25,11 +25,10 @@
#include <fmt/format.h>
#include <dwarfs/builtin_script.h>
#include <dwarfs/entry_interface.h>
#include <dwarfs/entry_transformer.h>
#include <dwarfs/file_access.h>
#include <dwarfs/logger.h>
#include <dwarfs/rule_based_entry_filter.h>
#include <dwarfs/util.h>
namespace dwarfs {
@ -65,44 +64,34 @@ struct filter_rule {
};
template <typename LoggerPolicy>
class builtin_script_ : public builtin_script::impl {
class rule_based_entry_filter_ : public rule_based_entry_filter::impl {
public:
builtin_script_(logger& lgr, std::shared_ptr<file_access const> fa);
rule_based_entry_filter_(logger& lgr, std::shared_ptr<file_access const> fa);
void set_root_path(fs::path const& path) override;
void add_filter_rule(std::string const& rule) override;
void add_filter_rules(std::istream& is) override;
void add_transformer(std::unique_ptr<entry_transformer>&& xfm) override {
transformer_.emplace_back(std::move(xfm));
}
bool filter(entry_interface const& ei) override;
void transform(entry_interface& ei) override;
bool has_filter() const override { return !filter_.empty(); }
bool has_transform() const override { return !transformer_.empty(); }
void add_rule(std::string_view rule) override;
void add_rules(std::istream& is) override;
filter_action filter(entry_interface const& ei) const override;
private:
void add_filter_rule(std::unordered_set<std::string>& seen_files,
std::string const& rule);
void
add_rule(std::unordered_set<std::string>& seen_files, std::string_view rule);
void add_filter_rules(std::unordered_set<std::string>& seen_files,
std::istream& is);
void add_rules(std::unordered_set<std::string>& seen_files, std::istream& is);
filter_rule compile_filter_rule(std::string const& rule);
filter_rule compile_filter_rule(std::string_view rule);
LOG_PROXY_DECL(LoggerPolicy);
std::string root_path_;
std::vector<filter_rule> filter_;
std::vector<std::unique_ptr<entry_transformer>> transformer_;
std::shared_ptr<file_access const> fa_;
};
template <typename LoggerPolicy>
auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
-> filter_rule {
std::string r;
auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
std::string_view rule_sv) -> filter_rule {
std::string rule{rule_sv};
std::string re;
filter_rule::rule_type type;
auto* p = rule.c_str();
@ -125,15 +114,15 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
bool floating = *p && *p != '/';
if (floating) {
r += ".*/";
re += ".*/";
}
while (*p) {
switch (*p) {
case '\\':
r += *p++;
re += *p++;
if (p) {
r += *p++;
re += *p++;
}
continue;
@ -144,14 +133,14 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
}
switch (nstar) {
case 1:
if (r.ends_with('/') and (*p == '/' or *p == '\0')) {
r += "[^/]+";
if (re.ends_with('/') and (*p == '/' or *p == '\0')) {
re += "[^/]+";
} else {
r += "[^/]*";
re += "[^/]*";
}
break;
case 2:
r += ".*";
re += ".*";
break;
default:
throw std::runtime_error("too many *s");
@ -160,7 +149,7 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
continue;
case '?':
r += "[^/]";
re += "[^/]";
break;
case '.':
@ -172,32 +161,33 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
case '{':
case '}':
case '|':
r += '\\';
r += *p;
re += '\\';
re += *p;
break;
default:
r += *p;
re += *p;
break;
}
++p;
}
LOG_DEBUG << "'" << rule << "' -> '" << r << "' [floating=" << floating
LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating
<< "]";
return filter_rule(type, floating, r, rule);
return filter_rule(type, floating, re, rule);
}
template <typename LoggerPolicy>
builtin_script_<LoggerPolicy>::builtin_script_(
rule_based_entry_filter_<LoggerPolicy>::rule_based_entry_filter_(
logger& lgr, std::shared_ptr<file_access const> fa)
: log_{lgr}
, fa_{std::move(fa)} {}
template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::set_root_path(fs::path const& path) {
void rule_based_entry_filter_<LoggerPolicy>::set_root_path(
fs::path const& path) {
// TODO: this whole thing needs to be windowsized
root_path_ = u8string_to_string(path.u8string());
@ -215,22 +205,29 @@ void builtin_script_<LoggerPolicy>::set_root_path(fs::path const& path) {
}
template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::add_filter_rule(std::string const& rule) {
void rule_based_entry_filter_<LoggerPolicy>::add_rule(std::string_view rule) {
std::unordered_set<std::string> seen_files;
add_filter_rule(seen_files, rule);
add_rule(seen_files, rule);
}
template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::add_filter_rules(std::istream& is) {
void rule_based_entry_filter_<LoggerPolicy>::add_rules(std::istream& is) {
std::unordered_set<std::string> seen_files;
add_filter_rules(seen_files, is);
add_rules(seen_files, is);
}
template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::add_filter_rule(
std::unordered_set<std::string>& seen_files, std::string const& rule) {
void rule_based_entry_filter_<LoggerPolicy>::add_rule(
std::unordered_set<std::string>& seen_files, std::string_view rule) {
if (rule.starts_with('.')) {
auto file = std::regex_replace(rule, std::regex("^. +"), "");
auto file_pos = rule.find_first_not_of(" \t", 1);
if (file_pos == std::string::npos) {
throw std::runtime_error(
fmt::format("no file specified in merge rule: {}", rule));
}
auto file = std::string(rule.substr(file_pos));
if (!seen_files.emplace(file).second) {
throw std::runtime_error(
@ -238,7 +235,7 @@ void builtin_script_<LoggerPolicy>::add_filter_rule(
}
auto ifs = fa_->open_input(file);
add_filter_rules(seen_files, ifs->is());
add_rules(seen_files, ifs->is());
seen_files.erase(file);
} else {
@ -247,7 +244,7 @@ void builtin_script_<LoggerPolicy>::add_filter_rule(
}
template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::add_filter_rules(
void rule_based_entry_filter_<LoggerPolicy>::add_rules(
std::unordered_set<std::string>& seen_files, std::istream& is) {
std::string line;
@ -258,12 +255,13 @@ void builtin_script_<LoggerPolicy>::add_filter_rules(
if (line.find_first_not_of(" \t") == std::string::npos) {
continue;
}
add_filter_rule(seen_files, line);
add_rule(seen_files, line);
}
}
template <typename LoggerPolicy>
bool builtin_script_<LoggerPolicy>::filter(entry_interface const& ei) {
filter_action rule_based_entry_filter_<LoggerPolicy>::filter(
entry_interface const& ei) const {
std::string path = ei.unix_dpath();
std::string relpath = path;
@ -278,42 +276,30 @@ bool builtin_script_<LoggerPolicy>::filter(entry_interface const& ei) {
<< r.rule << "'";
switch (r.type) {
case filter_rule::rule_type::include:
return true;
return filter_action::keep;
case filter_rule::rule_type::exclude:
return false;
return filter_action::remove;
}
}
}
LOG_TRACE << "[" << path << "] / [" << relpath << "] matched no rule";
return true;
}
template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::transform(entry_interface& ei) {
for (auto& xfm : transformer_) {
xfm->transform(ei);
}
return filter_action::keep;
}
} // namespace internal
builtin_script::builtin_script(logger& lgr,
std::shared_ptr<file_access const> fa)
: impl_(make_unique_logging_object<impl, internal::builtin_script_,
rule_based_entry_filter::rule_based_entry_filter(
logger& lgr, std::shared_ptr<file_access const> fa)
: impl_(make_unique_logging_object<impl, internal::rule_based_entry_filter_,
logger_policies>(lgr, std::move(fa))) {}
builtin_script::~builtin_script() = default;
rule_based_entry_filter::~rule_based_entry_filter() = default;
bool builtin_script::has_filter() const { return impl_->has_filter(); }
bool builtin_script::has_transform() const { return impl_->has_transform(); }
bool builtin_script::filter(entry_interface const& ei) {
filter_action rule_based_entry_filter::filter(entry_interface const& ei) const {
return impl_->filter(ei);
}
void builtin_script::transform(entry_interface& ei) { impl_->transform(ei); }
} // namespace dwarfs

View File

@ -40,6 +40,8 @@
#include <dwarfs/categorizer.h>
#include <dwarfs/entry_factory.h>
#include <dwarfs/entry_filter.h>
#include <dwarfs/entry_transformer.h>
#include <dwarfs/error.h>
#include <dwarfs/file_access.h>
#include <dwarfs/filesystem_writer.h>
@ -49,7 +51,6 @@
#include <dwarfs/options.h>
#include <dwarfs/os_access.h>
#include <dwarfs/scanner.h>
#include <dwarfs/script.h>
#include <dwarfs/segmenter_factory.h>
#include <dwarfs/thread_pool.h>
#include <dwarfs/util.h>
@ -291,9 +292,13 @@ template <typename LoggerPolicy>
class scanner_ final : public scanner::impl {
public:
scanner_(logger& lgr, worker_group& wg, segmenter_factory& sf,
entry_factory& ef, os_access const& os, std::shared_ptr<script> scr,
entry_factory& ef, os_access const& os,
const scanner_options& options);
void add_filter(std::unique_ptr<entry_filter> filter) override;
void add_transformer(std::unique_ptr<entry_transformer> transformer) override;
void scan(filesystem_writer& fs_writer, std::filesystem::path const& path,
writer_progress& wprog,
std::optional<std::span<std::filesystem::path const>> list,
@ -321,22 +326,32 @@ class scanner_ final : public scanner::impl {
segmenter_factory& segmenter_factory_;
entry_factory& entry_factory_;
os_access const& os_;
std::shared_ptr<script> script_;
std::vector<std::unique_ptr<entry_filter>> filters_;
std::vector<std::unique_ptr<entry_transformer>> transformers_;
};
template <typename LoggerPolicy>
void scanner_<LoggerPolicy>::add_filter(std::unique_ptr<entry_filter> filter) {
filters_.push_back(std::move(filter));
}
template <typename LoggerPolicy>
void scanner_<LoggerPolicy>::add_transformer(
std::unique_ptr<entry_transformer> transformer) {
transformers_.push_back(std::move(transformer));
}
template <typename LoggerPolicy>
scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
segmenter_factory& sf, entry_factory& ef,
os_access const& os,
std::shared_ptr<script> scr,
const scanner_options& options)
: LOG_PROXY_INIT(lgr)
, wg_{wg}
, options_{options}
, segmenter_factory_{sf}
, entry_factory_{ef}
, os_{os}
, script_{std::move(scr)} {}
, os_{os} {}
template <typename LoggerPolicy>
std::shared_ptr<entry>
@ -345,15 +360,10 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
file_scanner& fs, bool debug_filter) {
try {
auto pe = entry_factory_.create(os_, name, parent);
bool exclude = false;
if (script_) {
if (script_->has_filter() && !script_->filter(*pe)) {
exclude = true;
} else if (script_->has_transform()) {
script_->transform(*pe);
}
}
bool const exclude =
std::any_of(filters_.begin(), filters_.end(), [&pe](auto const& f) {
return f->filter(*pe) == filter_action::remove;
});
if (debug_filter) {
(*options_.debug_filter_function)(exclude, *pe);
@ -367,11 +377,13 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
return nullptr;
}
if (pe) {
for (auto const& t : transformers_) {
t->transform(*pe);
}
switch (pe->type()) {
case entry::E_FILE:
if (!debug_filter && pe->size() > 0 &&
os_.access(pe->fs_path(), R_OK)) {
if (!debug_filter && pe->size() > 0 && os_.access(pe->fs_path(), R_OK)) {
LOG_ERROR << "cannot access " << pe->path_as_string()
<< ", creating empty file";
pe->override_size(0);
@ -435,7 +447,6 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
prog.errors++;
break;
}
}
return pe;
} catch (const std::system_error& e) {
@ -484,8 +495,8 @@ scanner_<LoggerPolicy>::scan_tree(std::filesystem::path const& path,
fmt::format("'{}' must be a directory", path.string()));
}
if (script_ && script_->has_transform()) {
script_->transform(*root);
for (auto const& t : transformers_) {
t->transform(*root);
}
std::deque<std::shared_ptr<entry>> queue({root});
@ -530,7 +541,7 @@ std::shared_ptr<entry>
scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path,
std::span<std::filesystem::path const> list,
progress& prog, file_scanner& fs) {
if (script_ && script_->has_filter()) {
if (!filters_.empty()) {
DWARFS_THROW(runtime_error, "cannot use filters with file lists");
}
@ -543,8 +554,8 @@ scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path,
fmt::format("'{}' must be a directory", path.string()));
}
if (script_ && script_->has_transform()) {
script_->transform(*root);
for (auto const& t : transformers_) {
t->transform(*root);
}
auto ensure_path = [this, &prog, &fs](std::filesystem::path const& path,
@ -1017,10 +1028,9 @@ void scanner_<LoggerPolicy>::scan(
scanner::scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf,
entry_factory& ef, os_access const& os,
std::shared_ptr<script> scr, const scanner_options& options)
const scanner_options& options)
: impl_(
make_unique_logging_object<impl, internal::scanner_, logger_policies>(
lgr, pool.get_worker_group(), sf, ef, os, std::move(scr),
options)) {}
lgr, pool.get_worker_group(), sf, ef, os, options)) {}
} // namespace dwarfs

View File

@ -55,7 +55,6 @@
#include <dwarfs/block_compressor.h>
#include <dwarfs/block_compressor_parser.h>
#include <dwarfs/builtin_script.h>
#include <dwarfs/categorizer.h>
#include <dwarfs/category_parser.h>
#include <dwarfs/checksum.h>
@ -77,8 +76,8 @@
#include <dwarfs/mmap.h>
#include <dwarfs/options.h>
#include <dwarfs/os_access.h>
#include <dwarfs/rule_based_entry_filter.h>
#include <dwarfs/scanner.h>
#include <dwarfs/script.h>
#include <dwarfs/segmenter_factory.h>
#include <dwarfs/string.h>
#include <dwarfs/terminal.h>
@ -928,18 +927,17 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
iol.term, iol.err, pg_mode,
recompress ? console_writer::REWRITE : console_writer::NORMAL, logopts);
std::shared_ptr<script> script;
if (!filter.empty() or vm.count("chmod")) {
auto bs = std::make_shared<builtin_script>(lgr, iol.file);
std::unique_ptr<rule_based_entry_filter> rule_filter;
if (!filter.empty()) {
bs->set_root_path(path);
rule_filter = std::make_unique<rule_based_entry_filter>(lgr, iol.file);
rule_filter->set_root_path(path);
for (auto const& rule : filter) {
auto srule = sys_string_to_string(rule);
try {
bs->add_filter_rule(srule);
rule_filter->add_rule(srule);
} catch (std::exception const& e) {
iol.err << "error: could not parse filter rule '" << srule
<< "': " << e.what() << "\n";
@ -948,24 +946,22 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
}
}
std::vector<std::unique_ptr<entry_transformer>> transformers;
if (vm.count("chmod")) {
if (chmod_str == "norm") {
chmod_str = "ug-st,=Xr";
}
auto chmod_exprs =
split_to<std::vector<std::string_view>>(chmod_str, ',');
auto chmod_exprs = split_to<std::vector<std::string_view>>(chmod_str, ',');
auto mask = get_current_umask();
for (auto expr : chmod_exprs) {
bs->add_transformer(create_chmod_entry_transformer(expr, mask));
transformers.push_back(create_chmod_entry_transformer(expr, mask));
}
}
script = bs;
}
if (vm.count("set-owner")) {
options.uid = uid;
}
@ -1339,7 +1335,15 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
thread_pool scanner_pool(lgr, *iol.os, "scanner", num_scanner_workers);
scanner s(lgr, scanner_pool, sf, ef, *iol.os, std::move(script), options);
scanner s(lgr, scanner_pool, sf, ef, *iol.os, options);
if (rule_filter) {
s.add_filter(std::move(rule_filter));
}
for (auto& t : transformers) {
s.add_transformer(std::move(t));
}
s.scan(*fsw, path, prog, input_list, iol.file);

View File

@ -126,8 +126,7 @@ std::string make_filesystem(::benchmark::State const& state) {
segmenter_factory sf(lgr, prog, cfg);
entry_factory ef;
scanner s(lgr, pool, sf, ef, *os, std::make_shared<test::script_mock>(),
options);
scanner s(lgr, pool, sf, ef, *os, options);
std::ostringstream oss;

View File

@ -36,7 +36,6 @@
#include <fmt/format.h>
#include <dwarfs/block_compressor.h>
#include <dwarfs/builtin_script.h>
#include <dwarfs/entry_factory.h>
#include <dwarfs/file_stat.h>
#include <dwarfs/file_type.h>
@ -47,6 +46,7 @@
#include <dwarfs/logger.h>
#include <dwarfs/mmif.h>
#include <dwarfs/options.h>
#include <dwarfs/rule_based_entry_filter.h>
#include <dwarfs/scanner.h>
#include <dwarfs/segmenter_factory.h>
#include <dwarfs/thread_pool.h>
@ -70,15 +70,17 @@ namespace {
std::string const default_file_hash_algo{"xxh3-128"};
// TODO: jeeeez, this is ugly :/
std::string
build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
std::string const& compression,
segmenter::config const& cfg = segmenter::config(),
scanner_options const& options = scanner_options(),
writer_progress* prog = nullptr,
std::shared_ptr<script> scr = nullptr,
std::shared_ptr<test::filter_transformer_data> ftd = nullptr,
std::optional<std::span<std::filesystem::path const>> input_list =
std::nullopt) {
std::nullopt,
std::unique_ptr<entry_filter> filter = nullptr) {
// force multithreading
thread_pool pool(lgr, *input, "worker", 4);
@ -99,7 +101,16 @@ build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
segmenter_factory sf(lgr, *prog, sf_cfg);
entry_factory ef;
scanner s(lgr, pool, sf, ef, *input, scr, options);
scanner s(lgr, pool, sf, ef, *input, options);
if (ftd) {
s.add_filter(std::make_unique<test::mock_filter>(ftd));
s.add_transformer(std::make_unique<test::mock_transformer>(ftd));
}
if (filter) {
s.add_filter(std::move(filter));
}
std::ostringstream oss;
@ -171,13 +182,13 @@ void basic_end_to_end_test(std::string const& compressor,
writer_progress wprog;
auto scr = std::make_shared<test::script_mock>();
auto ftd = std::make_shared<test::filter_transformer_data>();
auto fsimage =
build_dwarfs(lgr, input, compressor, cfg, options, &wprog, scr);
build_dwarfs(lgr, input, compressor, cfg, options, &wprog, ftd);
EXPECT_EQ(14, scr->filter_calls.size());
EXPECT_EQ(15, scr->transform_calls.size());
EXPECT_EQ(14, ftd->filter_calls.size());
EXPECT_EQ(15, ftd->transform_calls.size());
auto image_size = fsimage.size();
auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));
@ -911,15 +922,15 @@ class filter_test
: public testing::TestWithParam<dwarfs::test::filter_test_data> {
public:
test::test_logger lgr;
std::shared_ptr<builtin_script> scr;
std::unique_ptr<rule_based_entry_filter> rbf;
std::shared_ptr<test::test_file_access> tfa;
std::shared_ptr<test::os_access_mock> input;
void SetUp() override {
tfa = std::make_shared<test::test_file_access>();
scr = std::make_shared<builtin_script>(lgr, tfa);
scr->set_root_path("");
rbf = std::make_unique<rule_based_entry_filter>(lgr, tfa);
rbf->set_root_path("");
input = std::make_shared<test::os_access_mock>();
@ -943,7 +954,7 @@ class filter_test
void set_filter_rules(test::filter_test_data const& spec) {
std::istringstream iss(spec.filter());
scr->add_filter_rules(iss);
rbf->add_rules(iss);
}
std::string get_filter_debug_output(test::filter_test_data const& spec,
@ -963,7 +974,9 @@ class filter_test
thread_pool pool(lgr, *input, "worker", 1);
segmenter_factory sf(lgr, prog);
entry_factory ef;
scanner s(lgr, pool, sf, ef, *input, scr, options);
scanner s(lgr, pool, sf, ef, *input, options);
s.add_filter(std::move(rbf));
block_compressor bc("null");
std::ostringstream null;
@ -975,7 +988,7 @@ class filter_test
}
void TearDown() override {
scr.reset();
rbf.reset();
input.reset();
tfa.reset();
}
@ -991,7 +1004,8 @@ TEST_P(filter_test, filesystem) {
scanner_options options;
options.remove_empty_dirs = true;
auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, nullptr, scr);
auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, nullptr,
nullptr, std::nullopt, std::move(rbf));
auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));

View File

@ -38,11 +38,12 @@
#include <variant>
#include <vector>
#include <dwarfs/entry_filter.h>
#include <dwarfs/entry_interface.h>
#include <dwarfs/entry_transformer.h>
#include <dwarfs/file_access.h>
#include <dwarfs/file_stat.h>
#include <dwarfs/os_access.h>
#include <dwarfs/script.h>
#include <dwarfs/terminal.h>
#include <dwarfs/tool/iolayer.h>
@ -185,27 +186,20 @@ class os_access_mock : public os_access {
size_t map_file_delay_min_size_{0};
};
class script_mock : public script {
public:
bool has_filter() const override { return true; }
bool has_transform() const override { return true; }
bool filter(entry_interface const& ei) override {
filter_calls.push_back({ei.unix_dpath(), ei.name(), ei.size(),
ei.is_directory(), ei.get_permissions(),
ei.get_uid(), ei.get_gid(), ei.get_atime(),
ei.get_mtime(), ei.get_ctime()});
return true;
}
void transform(entry_interface& ei) override {
transform_calls.push_back({ei.unix_dpath(), ei.name(), ei.size(),
ei.is_directory(), ei.get_permissions(),
ei.get_uid(), ei.get_gid(), ei.get_atime(),
ei.get_mtime(), ei.get_ctime()});
}
struct filter_transformer_data {
struct entry_data {
entry_data(entry_interface const& ei)
: path{ei.unix_dpath()}
, name{ei.name()}
, size{ei.size()}
, is_directory{ei.is_directory()}
, mode{ei.get_permissions()}
, uid{ei.get_uid()}
, gid{ei.get_gid()}
, atime{ei.get_atime()}
, mtime{ei.get_mtime()}
, ctime{ei.get_ctime()} {}
std::string path;
std::string name;
size_t size;
@ -222,6 +216,33 @@ class script_mock : public script {
std::vector<entry_data> transform_calls;
};
class mock_filter : public entry_filter {
public:
mock_filter(std::shared_ptr<filter_transformer_data> data)
: data_{std::move(data)} {}
filter_action filter(entry_interface const& ei) const {
data_->filter_calls.emplace_back(ei);
return filter_action::keep;
}
private:
std::shared_ptr<filter_transformer_data> data_;
};
class mock_transformer : public entry_transformer {
public:
mock_transformer(std::shared_ptr<filter_transformer_data> data)
: data_{std::move(data)} {}
void transform(entry_interface& ei) {
data_->transform_calls.emplace_back(ei);
}
private:
std::shared_ptr<filter_transformer_data> data_;
};
class test_terminal : public terminal {
public:
test_terminal(std::ostream& out, std::ostream& err);