refactor: replace script with separate filters and transformers

This commit is contained in:
Marcus Holland-Moritz 2024-08-02 21:44:25 +02:00
parent 7eb47649ff
commit 0987dba63d
14 changed files with 299 additions and 269 deletions

View File

@ -647,7 +647,6 @@ list(APPEND LIBDWARFS_READER_SRC
) )
list(APPEND LIBDWARFS_WRITER_SRC list(APPEND LIBDWARFS_WRITER_SRC
src/dwarfs/builtin_script.cpp
src/dwarfs/categorizer.cpp src/dwarfs/categorizer.cpp
src/dwarfs/category_parser.cpp src/dwarfs/category_parser.cpp
src/dwarfs/chmod_entry_transformer.cpp src/dwarfs/chmod_entry_transformer.cpp
@ -674,6 +673,7 @@ list(APPEND LIBDWARFS_WRITER_SRC
src/dwarfs/internal/scanner_progress.cpp src/dwarfs/internal/scanner_progress.cpp
src/dwarfs/internal/similarity.cpp src/dwarfs/internal/similarity.cpp
src/dwarfs/internal/similarity_ordering.cpp src/dwarfs/internal/similarity_ordering.cpp
src/dwarfs/rule_based_entry_filter.cpp
src/dwarfs/scanner.cpp src/dwarfs/scanner.cpp
src/dwarfs/segmenter.cpp src/dwarfs/segmenter.cpp
src/dwarfs/segmenter_factory.cpp src/dwarfs/segmenter_factory.cpp

View File

@ -21,7 +21,6 @@
#pragma once #pragma once
#include <cstdint>
#include <memory> #include <memory>
#include <string_view> #include <string_view>

View File

@ -21,22 +21,20 @@
#pragma once #pragma once
#include <memory>
#include <span>
namespace dwarfs { namespace dwarfs {
class entry_interface; class entry_interface;
class script { enum class filter_action {
keep,
remove,
};
class entry_filter {
public: public:
virtual ~script() = default; virtual ~entry_filter() = default;
virtual bool has_filter() const = 0; virtual filter_action filter(entry_interface const& ei) const = 0;
virtual bool has_transform() const = 0;
virtual bool filter(entry_interface const& ei) = 0;
virtual void transform(entry_interface& ei) = 0;
}; };
} // namespace dwarfs } // namespace dwarfs

View File

@ -38,7 +38,6 @@ namespace dwarfs {
class logger; class logger;
class os_access; class os_access;
class progress; class progress;
class script;
struct inode_options; struct inode_options;

View File

@ -24,53 +24,39 @@
#include <filesystem> #include <filesystem>
#include <iosfwd> #include <iosfwd>
#include <memory> #include <memory>
#include <string> #include <string_view>
#include <dwarfs/script.h> #include <dwarfs/entry_filter.h>
#include <dwarfs/file_stat.h>
namespace dwarfs { namespace dwarfs {
class entry_transformer;
class file_access; class file_access;
class logger; class logger;
class builtin_script : public script { class rule_based_entry_filter : public entry_filter {
public: public:
builtin_script(logger& lgr, std::shared_ptr<file_access const> fa); rule_based_entry_filter(logger& lgr, std::shared_ptr<file_access const> fa);
~builtin_script(); ~rule_based_entry_filter();
void set_root_path(std::filesystem::path const& path) { void set_root_path(std::filesystem::path const& path) {
impl_->set_root_path(path); impl_->set_root_path(path);
} }
void add_filter_rule(std::string const& rule) { void add_rule(std::string_view rule) { impl_->add_rule(rule); }
impl_->add_filter_rule(rule);
}
void add_filter_rules(std::istream& is) { impl_->add_filter_rules(is); } void add_rules(std::istream& is) { impl_->add_rules(is); }
void add_transformer(std::unique_ptr<entry_transformer>&& xfm) { filter_action filter(entry_interface const& ei) const override;
impl_->add_transformer(std::move(xfm));
}
bool has_filter() const override;
bool has_transform() const override;
bool filter(entry_interface const& ei) override;
void transform(entry_interface& ei) override;
class impl { class impl {
public: public:
virtual ~impl() = default; virtual ~impl() = default;
virtual void set_root_path(std::filesystem::path const& path) = 0; virtual void set_root_path(std::filesystem::path const& path) = 0;
virtual void add_filter_rule(std::string const& rule) = 0; virtual void add_rule(std::string_view rule) = 0;
virtual void add_filter_rules(std::istream& is) = 0; virtual void add_rules(std::istream& is) = 0;
virtual void add_transformer(std::unique_ptr<entry_transformer>&& xfm) = 0; virtual filter_action filter(entry_interface const& ei) const = 0;
virtual bool filter(entry_interface const& ei) = 0;
virtual void transform(entry_interface& ei) = 0;
virtual bool has_filter() const = 0;
virtual bool has_transform() const = 0;
}; };
private: private:

View File

@ -31,22 +31,31 @@ namespace dwarfs {
struct scanner_options; struct scanner_options;
class entry_filter;
class entry_transformer;
class entry_factory; class entry_factory;
class file_access; class file_access;
class filesystem_writer; class filesystem_writer;
class logger; class logger;
class os_access; class os_access;
class writer_progress; class writer_progress;
class script;
class segmenter_factory; class segmenter_factory;
class thread_pool; class thread_pool;
class scanner { class scanner {
public: public:
scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf, scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf,
entry_factory& ef, os_access const& os, std::shared_ptr<script> scr, entry_factory& ef, os_access const& os,
const scanner_options& options); const scanner_options& options);
void add_filter(std::unique_ptr<entry_filter> filter) {
impl_->add_filter(std::move(filter));
}
void add_transformer(std::unique_ptr<entry_transformer> transformer) {
impl_->add_transformer(std::move(transformer));
}
void scan( void scan(
filesystem_writer& fsw, const std::filesystem::path& path, filesystem_writer& fsw, const std::filesystem::path& path,
writer_progress& prog, writer_progress& prog,
@ -59,6 +68,11 @@ class scanner {
public: public:
virtual ~impl() = default; virtual ~impl() = default;
virtual void add_filter(std::unique_ptr<entry_filter> filter) = 0;
virtual void
add_transformer(std::unique_ptr<entry_transformer> transformer) = 0;
virtual void virtual void
scan(filesystem_writer& fsw, const std::filesystem::path& path, scan(filesystem_writer& fsw, const std::filesystem::path& path,
writer_progress& prog, writer_progress& prog,

View File

@ -21,6 +21,7 @@
#include <dwarfs/chmod_entry_transformer.h> #include <dwarfs/chmod_entry_transformer.h>
#include <dwarfs/entry_interface.h> #include <dwarfs/entry_interface.h>
#include <dwarfs/entry_transformer.h>
#include <dwarfs/internal/chmod_transformer.h> #include <dwarfs/internal/chmod_transformer.h>

View File

@ -48,7 +48,6 @@
#include <dwarfs/mmif.h> #include <dwarfs/mmif.h>
#include <dwarfs/options.h> #include <dwarfs/options.h>
#include <dwarfs/os_access.h> #include <dwarfs/os_access.h>
#include <dwarfs/script.h>
#include <dwarfs/util.h> #include <dwarfs/util.h>
#include <dwarfs/internal/entry.h> #include <dwarfs/internal/entry.h>

View File

@ -25,11 +25,10 @@
#include <fmt/format.h> #include <fmt/format.h>
#include <dwarfs/builtin_script.h>
#include <dwarfs/entry_interface.h> #include <dwarfs/entry_interface.h>
#include <dwarfs/entry_transformer.h>
#include <dwarfs/file_access.h> #include <dwarfs/file_access.h>
#include <dwarfs/logger.h> #include <dwarfs/logger.h>
#include <dwarfs/rule_based_entry_filter.h>
#include <dwarfs/util.h> #include <dwarfs/util.h>
namespace dwarfs { namespace dwarfs {
@ -65,44 +64,34 @@ struct filter_rule {
}; };
template <typename LoggerPolicy> template <typename LoggerPolicy>
class builtin_script_ : public builtin_script::impl { class rule_based_entry_filter_ : public rule_based_entry_filter::impl {
public: public:
builtin_script_(logger& lgr, std::shared_ptr<file_access const> fa); rule_based_entry_filter_(logger& lgr, std::shared_ptr<file_access const> fa);
void set_root_path(fs::path const& path) override; void set_root_path(fs::path const& path) override;
void add_filter_rule(std::string const& rule) override; void add_rule(std::string_view rule) override;
void add_filter_rules(std::istream& is) override; void add_rules(std::istream& is) override;
filter_action filter(entry_interface const& ei) const override;
void add_transformer(std::unique_ptr<entry_transformer>&& xfm) override {
transformer_.emplace_back(std::move(xfm));
}
bool filter(entry_interface const& ei) override;
void transform(entry_interface& ei) override;
bool has_filter() const override { return !filter_.empty(); }
bool has_transform() const override { return !transformer_.empty(); }
private: private:
void add_filter_rule(std::unordered_set<std::string>& seen_files, void
std::string const& rule); add_rule(std::unordered_set<std::string>& seen_files, std::string_view rule);
void add_filter_rules(std::unordered_set<std::string>& seen_files, void add_rules(std::unordered_set<std::string>& seen_files, std::istream& is);
std::istream& is);
filter_rule compile_filter_rule(std::string const& rule); filter_rule compile_filter_rule(std::string_view rule);
LOG_PROXY_DECL(LoggerPolicy); LOG_PROXY_DECL(LoggerPolicy);
std::string root_path_; std::string root_path_;
std::vector<filter_rule> filter_; std::vector<filter_rule> filter_;
std::vector<std::unique_ptr<entry_transformer>> transformer_;
std::shared_ptr<file_access const> fa_; std::shared_ptr<file_access const> fa_;
}; };
template <typename LoggerPolicy> template <typename LoggerPolicy>
auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule) auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
-> filter_rule { std::string_view rule_sv) -> filter_rule {
std::string r; std::string rule{rule_sv};
std::string re;
filter_rule::rule_type type; filter_rule::rule_type type;
auto* p = rule.c_str(); auto* p = rule.c_str();
@ -125,15 +114,15 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
bool floating = *p && *p != '/'; bool floating = *p && *p != '/';
if (floating) { if (floating) {
r += ".*/"; re += ".*/";
} }
while (*p) { while (*p) {
switch (*p) { switch (*p) {
case '\\': case '\\':
r += *p++; re += *p++;
if (p) { if (p) {
r += *p++; re += *p++;
} }
continue; continue;
@ -144,14 +133,14 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
} }
switch (nstar) { switch (nstar) {
case 1: case 1:
if (r.ends_with('/') and (*p == '/' or *p == '\0')) { if (re.ends_with('/') and (*p == '/' or *p == '\0')) {
r += "[^/]+"; re += "[^/]+";
} else { } else {
r += "[^/]*"; re += "[^/]*";
} }
break; break;
case 2: case 2:
r += ".*"; re += ".*";
break; break;
default: default:
throw std::runtime_error("too many *s"); throw std::runtime_error("too many *s");
@ -160,7 +149,7 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
continue; continue;
case '?': case '?':
r += "[^/]"; re += "[^/]";
break; break;
case '.': case '.':
@ -172,32 +161,33 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
case '{': case '{':
case '}': case '}':
case '|': case '|':
r += '\\'; re += '\\';
r += *p; re += *p;
break; break;
default: default:
r += *p; re += *p;
break; break;
} }
++p; ++p;
} }
LOG_DEBUG << "'" << rule << "' -> '" << r << "' [floating=" << floating LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating
<< "]"; << "]";
return filter_rule(type, floating, r, rule); return filter_rule(type, floating, re, rule);
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
builtin_script_<LoggerPolicy>::builtin_script_( rule_based_entry_filter_<LoggerPolicy>::rule_based_entry_filter_(
logger& lgr, std::shared_ptr<file_access const> fa) logger& lgr, std::shared_ptr<file_access const> fa)
: log_{lgr} : log_{lgr}
, fa_{std::move(fa)} {} , fa_{std::move(fa)} {}
template <typename LoggerPolicy> template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::set_root_path(fs::path const& path) { void rule_based_entry_filter_<LoggerPolicy>::set_root_path(
fs::path const& path) {
// TODO: this whole thing needs to be windowsized // TODO: this whole thing needs to be windowsized
root_path_ = u8string_to_string(path.u8string()); root_path_ = u8string_to_string(path.u8string());
@ -215,22 +205,29 @@ void builtin_script_<LoggerPolicy>::set_root_path(fs::path const& path) {
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::add_filter_rule(std::string const& rule) { void rule_based_entry_filter_<LoggerPolicy>::add_rule(std::string_view rule) {
std::unordered_set<std::string> seen_files; std::unordered_set<std::string> seen_files;
add_filter_rule(seen_files, rule); add_rule(seen_files, rule);
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::add_filter_rules(std::istream& is) { void rule_based_entry_filter_<LoggerPolicy>::add_rules(std::istream& is) {
std::unordered_set<std::string> seen_files; std::unordered_set<std::string> seen_files;
add_filter_rules(seen_files, is); add_rules(seen_files, is);
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::add_filter_rule( void rule_based_entry_filter_<LoggerPolicy>::add_rule(
std::unordered_set<std::string>& seen_files, std::string const& rule) { std::unordered_set<std::string>& seen_files, std::string_view rule) {
if (rule.starts_with('.')) { if (rule.starts_with('.')) {
auto file = std::regex_replace(rule, std::regex("^. +"), ""); auto file_pos = rule.find_first_not_of(" \t", 1);
if (file_pos == std::string::npos) {
throw std::runtime_error(
fmt::format("no file specified in merge rule: {}", rule));
}
auto file = std::string(rule.substr(file_pos));
if (!seen_files.emplace(file).second) { if (!seen_files.emplace(file).second) {
throw std::runtime_error( throw std::runtime_error(
@ -238,7 +235,7 @@ void builtin_script_<LoggerPolicy>::add_filter_rule(
} }
auto ifs = fa_->open_input(file); auto ifs = fa_->open_input(file);
add_filter_rules(seen_files, ifs->is()); add_rules(seen_files, ifs->is());
seen_files.erase(file); seen_files.erase(file);
} else { } else {
@ -247,7 +244,7 @@ void builtin_script_<LoggerPolicy>::add_filter_rule(
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::add_filter_rules( void rule_based_entry_filter_<LoggerPolicy>::add_rules(
std::unordered_set<std::string>& seen_files, std::istream& is) { std::unordered_set<std::string>& seen_files, std::istream& is) {
std::string line; std::string line;
@ -258,12 +255,13 @@ void builtin_script_<LoggerPolicy>::add_filter_rules(
if (line.find_first_not_of(" \t") == std::string::npos) { if (line.find_first_not_of(" \t") == std::string::npos) {
continue; continue;
} }
add_filter_rule(seen_files, line); add_rule(seen_files, line);
} }
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
bool builtin_script_<LoggerPolicy>::filter(entry_interface const& ei) { filter_action rule_based_entry_filter_<LoggerPolicy>::filter(
entry_interface const& ei) const {
std::string path = ei.unix_dpath(); std::string path = ei.unix_dpath();
std::string relpath = path; std::string relpath = path;
@ -278,42 +276,30 @@ bool builtin_script_<LoggerPolicy>::filter(entry_interface const& ei) {
<< r.rule << "'"; << r.rule << "'";
switch (r.type) { switch (r.type) {
case filter_rule::rule_type::include: case filter_rule::rule_type::include:
return true; return filter_action::keep;
case filter_rule::rule_type::exclude: case filter_rule::rule_type::exclude:
return false; return filter_action::remove;
} }
} }
} }
LOG_TRACE << "[" << path << "] / [" << relpath << "] matched no rule"; LOG_TRACE << "[" << path << "] / [" << relpath << "] matched no rule";
return true; return filter_action::keep;
}
template <typename LoggerPolicy>
void builtin_script_<LoggerPolicy>::transform(entry_interface& ei) {
for (auto& xfm : transformer_) {
xfm->transform(ei);
}
} }
} // namespace internal } // namespace internal
builtin_script::builtin_script(logger& lgr, rule_based_entry_filter::rule_based_entry_filter(
std::shared_ptr<file_access const> fa) logger& lgr, std::shared_ptr<file_access const> fa)
: impl_(make_unique_logging_object<impl, internal::builtin_script_, : impl_(make_unique_logging_object<impl, internal::rule_based_entry_filter_,
logger_policies>(lgr, std::move(fa))) {} logger_policies>(lgr, std::move(fa))) {}
builtin_script::~builtin_script() = default; rule_based_entry_filter::~rule_based_entry_filter() = default;
bool builtin_script::has_filter() const { return impl_->has_filter(); } filter_action rule_based_entry_filter::filter(entry_interface const& ei) const {
bool builtin_script::has_transform() const { return impl_->has_transform(); }
bool builtin_script::filter(entry_interface const& ei) {
return impl_->filter(ei); return impl_->filter(ei);
} }
void builtin_script::transform(entry_interface& ei) { impl_->transform(ei); }
} // namespace dwarfs } // namespace dwarfs

View File

@ -40,6 +40,8 @@
#include <dwarfs/categorizer.h> #include <dwarfs/categorizer.h>
#include <dwarfs/entry_factory.h> #include <dwarfs/entry_factory.h>
#include <dwarfs/entry_filter.h>
#include <dwarfs/entry_transformer.h>
#include <dwarfs/error.h> #include <dwarfs/error.h>
#include <dwarfs/file_access.h> #include <dwarfs/file_access.h>
#include <dwarfs/filesystem_writer.h> #include <dwarfs/filesystem_writer.h>
@ -49,7 +51,6 @@
#include <dwarfs/options.h> #include <dwarfs/options.h>
#include <dwarfs/os_access.h> #include <dwarfs/os_access.h>
#include <dwarfs/scanner.h> #include <dwarfs/scanner.h>
#include <dwarfs/script.h>
#include <dwarfs/segmenter_factory.h> #include <dwarfs/segmenter_factory.h>
#include <dwarfs/thread_pool.h> #include <dwarfs/thread_pool.h>
#include <dwarfs/util.h> #include <dwarfs/util.h>
@ -291,9 +292,13 @@ template <typename LoggerPolicy>
class scanner_ final : public scanner::impl { class scanner_ final : public scanner::impl {
public: public:
scanner_(logger& lgr, worker_group& wg, segmenter_factory& sf, scanner_(logger& lgr, worker_group& wg, segmenter_factory& sf,
entry_factory& ef, os_access const& os, std::shared_ptr<script> scr, entry_factory& ef, os_access const& os,
const scanner_options& options); const scanner_options& options);
void add_filter(std::unique_ptr<entry_filter> filter) override;
void add_transformer(std::unique_ptr<entry_transformer> transformer) override;
void scan(filesystem_writer& fs_writer, std::filesystem::path const& path, void scan(filesystem_writer& fs_writer, std::filesystem::path const& path,
writer_progress& wprog, writer_progress& wprog,
std::optional<std::span<std::filesystem::path const>> list, std::optional<std::span<std::filesystem::path const>> list,
@ -321,22 +326,32 @@ class scanner_ final : public scanner::impl {
segmenter_factory& segmenter_factory_; segmenter_factory& segmenter_factory_;
entry_factory& entry_factory_; entry_factory& entry_factory_;
os_access const& os_; os_access const& os_;
std::shared_ptr<script> script_; std::vector<std::unique_ptr<entry_filter>> filters_;
std::vector<std::unique_ptr<entry_transformer>> transformers_;
}; };
template <typename LoggerPolicy>
void scanner_<LoggerPolicy>::add_filter(std::unique_ptr<entry_filter> filter) {
filters_.push_back(std::move(filter));
}
template <typename LoggerPolicy>
void scanner_<LoggerPolicy>::add_transformer(
std::unique_ptr<entry_transformer> transformer) {
transformers_.push_back(std::move(transformer));
}
template <typename LoggerPolicy> template <typename LoggerPolicy>
scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg, scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
segmenter_factory& sf, entry_factory& ef, segmenter_factory& sf, entry_factory& ef,
os_access const& os, os_access const& os,
std::shared_ptr<script> scr,
const scanner_options& options) const scanner_options& options)
: LOG_PROXY_INIT(lgr) : LOG_PROXY_INIT(lgr)
, wg_{wg} , wg_{wg}
, options_{options} , options_{options}
, segmenter_factory_{sf} , segmenter_factory_{sf}
, entry_factory_{ef} , entry_factory_{ef}
, os_{os} , os_{os} {}
, script_{std::move(scr)} {}
template <typename LoggerPolicy> template <typename LoggerPolicy>
std::shared_ptr<entry> std::shared_ptr<entry>
@ -345,15 +360,10 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
file_scanner& fs, bool debug_filter) { file_scanner& fs, bool debug_filter) {
try { try {
auto pe = entry_factory_.create(os_, name, parent); auto pe = entry_factory_.create(os_, name, parent);
bool exclude = false; bool const exclude =
std::any_of(filters_.begin(), filters_.end(), [&pe](auto const& f) {
if (script_) { return f->filter(*pe) == filter_action::remove;
if (script_->has_filter() && !script_->filter(*pe)) { });
exclude = true;
} else if (script_->has_transform()) {
script_->transform(*pe);
}
}
if (debug_filter) { if (debug_filter) {
(*options_.debug_filter_function)(exclude, *pe); (*options_.debug_filter_function)(exclude, *pe);
@ -367,74 +377,75 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
return nullptr; return nullptr;
} }
if (pe) { for (auto const& t : transformers_) {
switch (pe->type()) { t->transform(*pe);
case entry::E_FILE: }
if (!debug_filter && pe->size() > 0 &&
os_.access(pe->fs_path(), R_OK)) {
LOG_ERROR << "cannot access " << pe->path_as_string()
<< ", creating empty file";
pe->override_size(0);
prog.errors++;
}
break;
case entry::E_DEVICE: switch (pe->type()) {
if (!options_.with_devices) { case entry::E_FILE:
return nullptr; if (!debug_filter && pe->size() > 0 && os_.access(pe->fs_path(), R_OK)) {
} LOG_ERROR << "cannot access " << pe->path_as_string()
break; << ", creating empty file";
pe->override_size(0);
case entry::E_OTHER:
if (!options_.with_specials) {
return nullptr;
}
break;
default:
break;
}
parent->add(pe);
switch (pe->type()) {
case entry::E_DIR:
// prog.current.store(pe.get());
prog.dirs_found++;
if (!debug_filter) {
pe->scan(os_, prog);
}
break;
case entry::E_FILE:
prog.files_found++;
if (!debug_filter) {
fs.scan(dynamic_cast<file*>(pe.get()));
}
break;
case entry::E_LINK:
prog.symlinks_found++;
if (!debug_filter) {
pe->scan(os_, prog);
}
prog.symlinks_scanned++;
break;
case entry::E_DEVICE:
case entry::E_OTHER:
prog.specials_found++;
if (!debug_filter) {
pe->scan(os_, prog);
}
break;
default:
LOG_ERROR << "unsupported entry type: " << int(pe->type()) << " ("
<< pe->fs_path() << ")";
prog.errors++; prog.errors++;
break;
} }
break;
case entry::E_DEVICE:
if (!options_.with_devices) {
return nullptr;
}
break;
case entry::E_OTHER:
if (!options_.with_specials) {
return nullptr;
}
break;
default:
break;
}
parent->add(pe);
switch (pe->type()) {
case entry::E_DIR:
// prog.current.store(pe.get());
prog.dirs_found++;
if (!debug_filter) {
pe->scan(os_, prog);
}
break;
case entry::E_FILE:
prog.files_found++;
if (!debug_filter) {
fs.scan(dynamic_cast<file*>(pe.get()));
}
break;
case entry::E_LINK:
prog.symlinks_found++;
if (!debug_filter) {
pe->scan(os_, prog);
}
prog.symlinks_scanned++;
break;
case entry::E_DEVICE:
case entry::E_OTHER:
prog.specials_found++;
if (!debug_filter) {
pe->scan(os_, prog);
}
break;
default:
LOG_ERROR << "unsupported entry type: " << int(pe->type()) << " ("
<< pe->fs_path() << ")";
prog.errors++;
break;
} }
return pe; return pe;
@ -484,8 +495,8 @@ scanner_<LoggerPolicy>::scan_tree(std::filesystem::path const& path,
fmt::format("'{}' must be a directory", path.string())); fmt::format("'{}' must be a directory", path.string()));
} }
if (script_ && script_->has_transform()) { for (auto const& t : transformers_) {
script_->transform(*root); t->transform(*root);
} }
std::deque<std::shared_ptr<entry>> queue({root}); std::deque<std::shared_ptr<entry>> queue({root});
@ -530,7 +541,7 @@ std::shared_ptr<entry>
scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path, scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path,
std::span<std::filesystem::path const> list, std::span<std::filesystem::path const> list,
progress& prog, file_scanner& fs) { progress& prog, file_scanner& fs) {
if (script_ && script_->has_filter()) { if (!filters_.empty()) {
DWARFS_THROW(runtime_error, "cannot use filters with file lists"); DWARFS_THROW(runtime_error, "cannot use filters with file lists");
} }
@ -543,8 +554,8 @@ scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path,
fmt::format("'{}' must be a directory", path.string())); fmt::format("'{}' must be a directory", path.string()));
} }
if (script_ && script_->has_transform()) { for (auto const& t : transformers_) {
script_->transform(*root); t->transform(*root);
} }
auto ensure_path = [this, &prog, &fs](std::filesystem::path const& path, auto ensure_path = [this, &prog, &fs](std::filesystem::path const& path,
@ -1017,10 +1028,9 @@ void scanner_<LoggerPolicy>::scan(
scanner::scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf, scanner::scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf,
entry_factory& ef, os_access const& os, entry_factory& ef, os_access const& os,
std::shared_ptr<script> scr, const scanner_options& options) const scanner_options& options)
: impl_( : impl_(
make_unique_logging_object<impl, internal::scanner_, logger_policies>( make_unique_logging_object<impl, internal::scanner_, logger_policies>(
lgr, pool.get_worker_group(), sf, ef, os, std::move(scr), lgr, pool.get_worker_group(), sf, ef, os, options)) {}
options)) {}
} // namespace dwarfs } // namespace dwarfs

View File

@ -55,7 +55,6 @@
#include <dwarfs/block_compressor.h> #include <dwarfs/block_compressor.h>
#include <dwarfs/block_compressor_parser.h> #include <dwarfs/block_compressor_parser.h>
#include <dwarfs/builtin_script.h>
#include <dwarfs/categorizer.h> #include <dwarfs/categorizer.h>
#include <dwarfs/category_parser.h> #include <dwarfs/category_parser.h>
#include <dwarfs/checksum.h> #include <dwarfs/checksum.h>
@ -77,8 +76,8 @@
#include <dwarfs/mmap.h> #include <dwarfs/mmap.h>
#include <dwarfs/options.h> #include <dwarfs/options.h>
#include <dwarfs/os_access.h> #include <dwarfs/os_access.h>
#include <dwarfs/rule_based_entry_filter.h>
#include <dwarfs/scanner.h> #include <dwarfs/scanner.h>
#include <dwarfs/script.h>
#include <dwarfs/segmenter_factory.h> #include <dwarfs/segmenter_factory.h>
#include <dwarfs/string.h> #include <dwarfs/string.h>
#include <dwarfs/terminal.h> #include <dwarfs/terminal.h>
@ -928,42 +927,39 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
iol.term, iol.err, pg_mode, iol.term, iol.err, pg_mode,
recompress ? console_writer::REWRITE : console_writer::NORMAL, logopts); recompress ? console_writer::REWRITE : console_writer::NORMAL, logopts);
std::shared_ptr<script> script; std::unique_ptr<rule_based_entry_filter> rule_filter;
if (!filter.empty() or vm.count("chmod")) { if (!filter.empty()) {
auto bs = std::make_shared<builtin_script>(lgr, iol.file); rule_filter = std::make_unique<rule_based_entry_filter>(lgr, iol.file);
if (!filter.empty()) { rule_filter->set_root_path(path);
bs->set_root_path(path);
for (auto const& rule : filter) { for (auto const& rule : filter) {
auto srule = sys_string_to_string(rule); auto srule = sys_string_to_string(rule);
try { try {
bs->add_filter_rule(srule); rule_filter->add_rule(srule);
} catch (std::exception const& e) { } catch (std::exception const& e) {
iol.err << "error: could not parse filter rule '" << srule iol.err << "error: could not parse filter rule '" << srule
<< "': " << e.what() << "\n"; << "': " << e.what() << "\n";
return 1; return 1;
}
} }
} }
}
if (vm.count("chmod")) { std::vector<std::unique_ptr<entry_transformer>> transformers;
if (chmod_str == "norm") {
chmod_str = "ug-st,=Xr";
}
auto chmod_exprs = if (vm.count("chmod")) {
split_to<std::vector<std::string_view>>(chmod_str, ','); if (chmod_str == "norm") {
chmod_str = "ug-st,=Xr";
auto mask = get_current_umask();
for (auto expr : chmod_exprs) {
bs->add_transformer(create_chmod_entry_transformer(expr, mask));
}
} }
script = bs; auto chmod_exprs = split_to<std::vector<std::string_view>>(chmod_str, ',');
auto mask = get_current_umask();
for (auto expr : chmod_exprs) {
transformers.push_back(create_chmod_entry_transformer(expr, mask));
}
} }
if (vm.count("set-owner")) { if (vm.count("set-owner")) {
@ -1339,7 +1335,15 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
thread_pool scanner_pool(lgr, *iol.os, "scanner", num_scanner_workers); thread_pool scanner_pool(lgr, *iol.os, "scanner", num_scanner_workers);
scanner s(lgr, scanner_pool, sf, ef, *iol.os, std::move(script), options); scanner s(lgr, scanner_pool, sf, ef, *iol.os, options);
if (rule_filter) {
s.add_filter(std::move(rule_filter));
}
for (auto& t : transformers) {
s.add_transformer(std::move(t));
}
s.scan(*fsw, path, prog, input_list, iol.file); s.scan(*fsw, path, prog, input_list, iol.file);

View File

@ -126,8 +126,7 @@ std::string make_filesystem(::benchmark::State const& state) {
segmenter_factory sf(lgr, prog, cfg); segmenter_factory sf(lgr, prog, cfg);
entry_factory ef; entry_factory ef;
scanner s(lgr, pool, sf, ef, *os, std::make_shared<test::script_mock>(), scanner s(lgr, pool, sf, ef, *os, options);
options);
std::ostringstream oss; std::ostringstream oss;

View File

@ -36,7 +36,6 @@
#include <fmt/format.h> #include <fmt/format.h>
#include <dwarfs/block_compressor.h> #include <dwarfs/block_compressor.h>
#include <dwarfs/builtin_script.h>
#include <dwarfs/entry_factory.h> #include <dwarfs/entry_factory.h>
#include <dwarfs/file_stat.h> #include <dwarfs/file_stat.h>
#include <dwarfs/file_type.h> #include <dwarfs/file_type.h>
@ -47,6 +46,7 @@
#include <dwarfs/logger.h> #include <dwarfs/logger.h>
#include <dwarfs/mmif.h> #include <dwarfs/mmif.h>
#include <dwarfs/options.h> #include <dwarfs/options.h>
#include <dwarfs/rule_based_entry_filter.h>
#include <dwarfs/scanner.h> #include <dwarfs/scanner.h>
#include <dwarfs/segmenter_factory.h> #include <dwarfs/segmenter_factory.h>
#include <dwarfs/thread_pool.h> #include <dwarfs/thread_pool.h>
@ -70,15 +70,17 @@ namespace {
std::string const default_file_hash_algo{"xxh3-128"}; std::string const default_file_hash_algo{"xxh3-128"};
// TODO: jeeeez, this is ugly :/
std::string std::string
build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input, build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
std::string const& compression, std::string const& compression,
segmenter::config const& cfg = segmenter::config(), segmenter::config const& cfg = segmenter::config(),
scanner_options const& options = scanner_options(), scanner_options const& options = scanner_options(),
writer_progress* prog = nullptr, writer_progress* prog = nullptr,
std::shared_ptr<script> scr = nullptr, std::shared_ptr<test::filter_transformer_data> ftd = nullptr,
std::optional<std::span<std::filesystem::path const>> input_list = std::optional<std::span<std::filesystem::path const>> input_list =
std::nullopt) { std::nullopt,
std::unique_ptr<entry_filter> filter = nullptr) {
// force multithreading // force multithreading
thread_pool pool(lgr, *input, "worker", 4); thread_pool pool(lgr, *input, "worker", 4);
@ -99,7 +101,16 @@ build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
segmenter_factory sf(lgr, *prog, sf_cfg); segmenter_factory sf(lgr, *prog, sf_cfg);
entry_factory ef; entry_factory ef;
scanner s(lgr, pool, sf, ef, *input, scr, options); scanner s(lgr, pool, sf, ef, *input, options);
if (ftd) {
s.add_filter(std::make_unique<test::mock_filter>(ftd));
s.add_transformer(std::make_unique<test::mock_transformer>(ftd));
}
if (filter) {
s.add_filter(std::move(filter));
}
std::ostringstream oss; std::ostringstream oss;
@ -171,13 +182,13 @@ void basic_end_to_end_test(std::string const& compressor,
writer_progress wprog; writer_progress wprog;
auto scr = std::make_shared<test::script_mock>(); auto ftd = std::make_shared<test::filter_transformer_data>();
auto fsimage = auto fsimage =
build_dwarfs(lgr, input, compressor, cfg, options, &wprog, scr); build_dwarfs(lgr, input, compressor, cfg, options, &wprog, ftd);
EXPECT_EQ(14, scr->filter_calls.size()); EXPECT_EQ(14, ftd->filter_calls.size());
EXPECT_EQ(15, scr->transform_calls.size()); EXPECT_EQ(15, ftd->transform_calls.size());
auto image_size = fsimage.size(); auto image_size = fsimage.size();
auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage)); auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));
@ -911,15 +922,15 @@ class filter_test
: public testing::TestWithParam<dwarfs::test::filter_test_data> { : public testing::TestWithParam<dwarfs::test::filter_test_data> {
public: public:
test::test_logger lgr; test::test_logger lgr;
std::shared_ptr<builtin_script> scr; std::unique_ptr<rule_based_entry_filter> rbf;
std::shared_ptr<test::test_file_access> tfa; std::shared_ptr<test::test_file_access> tfa;
std::shared_ptr<test::os_access_mock> input; std::shared_ptr<test::os_access_mock> input;
void SetUp() override { void SetUp() override {
tfa = std::make_shared<test::test_file_access>(); tfa = std::make_shared<test::test_file_access>();
scr = std::make_shared<builtin_script>(lgr, tfa); rbf = std::make_unique<rule_based_entry_filter>(lgr, tfa);
scr->set_root_path(""); rbf->set_root_path("");
input = std::make_shared<test::os_access_mock>(); input = std::make_shared<test::os_access_mock>();
@ -943,7 +954,7 @@ class filter_test
void set_filter_rules(test::filter_test_data const& spec) { void set_filter_rules(test::filter_test_data const& spec) {
std::istringstream iss(spec.filter()); std::istringstream iss(spec.filter());
scr->add_filter_rules(iss); rbf->add_rules(iss);
} }
std::string get_filter_debug_output(test::filter_test_data const& spec, std::string get_filter_debug_output(test::filter_test_data const& spec,
@ -963,7 +974,9 @@ class filter_test
thread_pool pool(lgr, *input, "worker", 1); thread_pool pool(lgr, *input, "worker", 1);
segmenter_factory sf(lgr, prog); segmenter_factory sf(lgr, prog);
entry_factory ef; entry_factory ef;
scanner s(lgr, pool, sf, ef, *input, scr, options); scanner s(lgr, pool, sf, ef, *input, options);
s.add_filter(std::move(rbf));
block_compressor bc("null"); block_compressor bc("null");
std::ostringstream null; std::ostringstream null;
@ -975,7 +988,7 @@ class filter_test
} }
void TearDown() override { void TearDown() override {
scr.reset(); rbf.reset();
input.reset(); input.reset();
tfa.reset(); tfa.reset();
} }
@ -991,7 +1004,8 @@ TEST_P(filter_test, filesystem) {
scanner_options options; scanner_options options;
options.remove_empty_dirs = true; options.remove_empty_dirs = true;
auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, nullptr, scr); auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, nullptr,
nullptr, std::nullopt, std::move(rbf));
auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage)); auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));

View File

@ -38,11 +38,12 @@
#include <variant> #include <variant>
#include <vector> #include <vector>
#include <dwarfs/entry_filter.h>
#include <dwarfs/entry_interface.h> #include <dwarfs/entry_interface.h>
#include <dwarfs/entry_transformer.h>
#include <dwarfs/file_access.h> #include <dwarfs/file_access.h>
#include <dwarfs/file_stat.h> #include <dwarfs/file_stat.h>
#include <dwarfs/os_access.h> #include <dwarfs/os_access.h>
#include <dwarfs/script.h>
#include <dwarfs/terminal.h> #include <dwarfs/terminal.h>
#include <dwarfs/tool/iolayer.h> #include <dwarfs/tool/iolayer.h>
@ -185,27 +186,20 @@ class os_access_mock : public os_access {
size_t map_file_delay_min_size_{0}; size_t map_file_delay_min_size_{0};
}; };
class script_mock : public script { struct filter_transformer_data {
public:
bool has_filter() const override { return true; }
bool has_transform() const override { return true; }
bool filter(entry_interface const& ei) override {
filter_calls.push_back({ei.unix_dpath(), ei.name(), ei.size(),
ei.is_directory(), ei.get_permissions(),
ei.get_uid(), ei.get_gid(), ei.get_atime(),
ei.get_mtime(), ei.get_ctime()});
return true;
}
void transform(entry_interface& ei) override {
transform_calls.push_back({ei.unix_dpath(), ei.name(), ei.size(),
ei.is_directory(), ei.get_permissions(),
ei.get_uid(), ei.get_gid(), ei.get_atime(),
ei.get_mtime(), ei.get_ctime()});
}
struct entry_data { struct entry_data {
entry_data(entry_interface const& ei)
: path{ei.unix_dpath()}
, name{ei.name()}
, size{ei.size()}
, is_directory{ei.is_directory()}
, mode{ei.get_permissions()}
, uid{ei.get_uid()}
, gid{ei.get_gid()}
, atime{ei.get_atime()}
, mtime{ei.get_mtime()}
, ctime{ei.get_ctime()} {}
std::string path; std::string path;
std::string name; std::string name;
size_t size; size_t size;
@ -222,6 +216,33 @@ class script_mock : public script {
std::vector<entry_data> transform_calls; std::vector<entry_data> transform_calls;
}; };
class mock_filter : public entry_filter {
public:
mock_filter(std::shared_ptr<filter_transformer_data> data)
: data_{std::move(data)} {}
filter_action filter(entry_interface const& ei) const {
data_->filter_calls.emplace_back(ei);
return filter_action::keep;
}
private:
std::shared_ptr<filter_transformer_data> data_;
};
class mock_transformer : public entry_transformer {
public:
mock_transformer(std::shared_ptr<filter_transformer_data> data)
: data_{std::move(data)} {}
void transform(entry_interface& ei) {
data_->transform_calls.emplace_back(ei);
}
private:
std::shared_ptr<filter_transformer_data> data_;
};
class test_terminal : public terminal { class test_terminal : public terminal {
public: public:
test_terminal(std::ostream& out, std::ostream& err); test_terminal(std::ostream& out, std::ostream& err);