mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-04 02:06:22 -04:00
refactor: replace script with separate filters and transformers
This commit is contained in:
parent
7eb47649ff
commit
0987dba63d
@ -647,7 +647,6 @@ list(APPEND LIBDWARFS_READER_SRC
|
||||
)
|
||||
|
||||
list(APPEND LIBDWARFS_WRITER_SRC
|
||||
src/dwarfs/builtin_script.cpp
|
||||
src/dwarfs/categorizer.cpp
|
||||
src/dwarfs/category_parser.cpp
|
||||
src/dwarfs/chmod_entry_transformer.cpp
|
||||
@ -674,6 +673,7 @@ list(APPEND LIBDWARFS_WRITER_SRC
|
||||
src/dwarfs/internal/scanner_progress.cpp
|
||||
src/dwarfs/internal/similarity.cpp
|
||||
src/dwarfs/internal/similarity_ordering.cpp
|
||||
src/dwarfs/rule_based_entry_filter.cpp
|
||||
src/dwarfs/scanner.cpp
|
||||
src/dwarfs/segmenter.cpp
|
||||
src/dwarfs/segmenter_factory.cpp
|
||||
|
@ -21,7 +21,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string_view>
|
||||
|
||||
|
@ -21,22 +21,20 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
class entry_interface;
|
||||
|
||||
class script {
|
||||
enum class filter_action {
|
||||
keep,
|
||||
remove,
|
||||
};
|
||||
|
||||
class entry_filter {
|
||||
public:
|
||||
virtual ~script() = default;
|
||||
virtual ~entry_filter() = default;
|
||||
|
||||
virtual bool has_filter() const = 0;
|
||||
virtual bool has_transform() const = 0;
|
||||
|
||||
virtual bool filter(entry_interface const& ei) = 0;
|
||||
virtual void transform(entry_interface& ei) = 0;
|
||||
virtual filter_action filter(entry_interface const& ei) const = 0;
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
@ -38,7 +38,6 @@ namespace dwarfs {
|
||||
class logger;
|
||||
class os_access;
|
||||
class progress;
|
||||
class script;
|
||||
|
||||
struct inode_options;
|
||||
|
||||
|
@ -24,53 +24,39 @@
|
||||
#include <filesystem>
|
||||
#include <iosfwd>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include <dwarfs/script.h>
|
||||
#include <dwarfs/entry_filter.h>
|
||||
#include <dwarfs/file_stat.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
class entry_transformer;
|
||||
class file_access;
|
||||
class logger;
|
||||
|
||||
class builtin_script : public script {
|
||||
class rule_based_entry_filter : public entry_filter {
|
||||
public:
|
||||
builtin_script(logger& lgr, std::shared_ptr<file_access const> fa);
|
||||
~builtin_script();
|
||||
rule_based_entry_filter(logger& lgr, std::shared_ptr<file_access const> fa);
|
||||
~rule_based_entry_filter();
|
||||
|
||||
void set_root_path(std::filesystem::path const& path) {
|
||||
impl_->set_root_path(path);
|
||||
}
|
||||
|
||||
void add_filter_rule(std::string const& rule) {
|
||||
impl_->add_filter_rule(rule);
|
||||
}
|
||||
void add_rule(std::string_view rule) { impl_->add_rule(rule); }
|
||||
|
||||
void add_filter_rules(std::istream& is) { impl_->add_filter_rules(is); }
|
||||
void add_rules(std::istream& is) { impl_->add_rules(is); }
|
||||
|
||||
void add_transformer(std::unique_ptr<entry_transformer>&& xfm) {
|
||||
impl_->add_transformer(std::move(xfm));
|
||||
}
|
||||
|
||||
bool has_filter() const override;
|
||||
bool has_transform() const override;
|
||||
|
||||
bool filter(entry_interface const& ei) override;
|
||||
void transform(entry_interface& ei) override;
|
||||
filter_action filter(entry_interface const& ei) const override;
|
||||
|
||||
class impl {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
|
||||
virtual void set_root_path(std::filesystem::path const& path) = 0;
|
||||
virtual void add_filter_rule(std::string const& rule) = 0;
|
||||
virtual void add_filter_rules(std::istream& is) = 0;
|
||||
virtual void add_transformer(std::unique_ptr<entry_transformer>&& xfm) = 0;
|
||||
virtual bool filter(entry_interface const& ei) = 0;
|
||||
virtual void transform(entry_interface& ei) = 0;
|
||||
virtual bool has_filter() const = 0;
|
||||
virtual bool has_transform() const = 0;
|
||||
virtual void add_rule(std::string_view rule) = 0;
|
||||
virtual void add_rules(std::istream& is) = 0;
|
||||
virtual filter_action filter(entry_interface const& ei) const = 0;
|
||||
};
|
||||
|
||||
private:
|
@ -31,22 +31,31 @@ namespace dwarfs {
|
||||
|
||||
struct scanner_options;
|
||||
|
||||
class entry_filter;
|
||||
class entry_transformer;
|
||||
class entry_factory;
|
||||
class file_access;
|
||||
class filesystem_writer;
|
||||
class logger;
|
||||
class os_access;
|
||||
class writer_progress;
|
||||
class script;
|
||||
class segmenter_factory;
|
||||
class thread_pool;
|
||||
|
||||
class scanner {
|
||||
public:
|
||||
scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf,
|
||||
entry_factory& ef, os_access const& os, std::shared_ptr<script> scr,
|
||||
entry_factory& ef, os_access const& os,
|
||||
const scanner_options& options);
|
||||
|
||||
void add_filter(std::unique_ptr<entry_filter> filter) {
|
||||
impl_->add_filter(std::move(filter));
|
||||
}
|
||||
|
||||
void add_transformer(std::unique_ptr<entry_transformer> transformer) {
|
||||
impl_->add_transformer(std::move(transformer));
|
||||
}
|
||||
|
||||
void scan(
|
||||
filesystem_writer& fsw, const std::filesystem::path& path,
|
||||
writer_progress& prog,
|
||||
@ -59,6 +68,11 @@ class scanner {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
|
||||
virtual void add_filter(std::unique_ptr<entry_filter> filter) = 0;
|
||||
|
||||
virtual void
|
||||
add_transformer(std::unique_ptr<entry_transformer> transformer) = 0;
|
||||
|
||||
virtual void
|
||||
scan(filesystem_writer& fsw, const std::filesystem::path& path,
|
||||
writer_progress& prog,
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include <dwarfs/chmod_entry_transformer.h>
|
||||
#include <dwarfs/entry_interface.h>
|
||||
#include <dwarfs/entry_transformer.h>
|
||||
|
||||
#include <dwarfs/internal/chmod_transformer.h>
|
||||
|
||||
|
@ -48,7 +48,6 @@
|
||||
#include <dwarfs/mmif.h>
|
||||
#include <dwarfs/options.h>
|
||||
#include <dwarfs/os_access.h>
|
||||
#include <dwarfs/script.h>
|
||||
#include <dwarfs/util.h>
|
||||
|
||||
#include <dwarfs/internal/entry.h>
|
||||
|
@ -25,11 +25,10 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <dwarfs/builtin_script.h>
|
||||
#include <dwarfs/entry_interface.h>
|
||||
#include <dwarfs/entry_transformer.h>
|
||||
#include <dwarfs/file_access.h>
|
||||
#include <dwarfs/logger.h>
|
||||
#include <dwarfs/rule_based_entry_filter.h>
|
||||
#include <dwarfs/util.h>
|
||||
|
||||
namespace dwarfs {
|
||||
@ -65,44 +64,34 @@ struct filter_rule {
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
class builtin_script_ : public builtin_script::impl {
|
||||
class rule_based_entry_filter_ : public rule_based_entry_filter::impl {
|
||||
public:
|
||||
builtin_script_(logger& lgr, std::shared_ptr<file_access const> fa);
|
||||
rule_based_entry_filter_(logger& lgr, std::shared_ptr<file_access const> fa);
|
||||
|
||||
void set_root_path(fs::path const& path) override;
|
||||
void add_filter_rule(std::string const& rule) override;
|
||||
void add_filter_rules(std::istream& is) override;
|
||||
|
||||
void add_transformer(std::unique_ptr<entry_transformer>&& xfm) override {
|
||||
transformer_.emplace_back(std::move(xfm));
|
||||
}
|
||||
|
||||
bool filter(entry_interface const& ei) override;
|
||||
void transform(entry_interface& ei) override;
|
||||
|
||||
bool has_filter() const override { return !filter_.empty(); }
|
||||
bool has_transform() const override { return !transformer_.empty(); }
|
||||
void add_rule(std::string_view rule) override;
|
||||
void add_rules(std::istream& is) override;
|
||||
filter_action filter(entry_interface const& ei) const override;
|
||||
|
||||
private:
|
||||
void add_filter_rule(std::unordered_set<std::string>& seen_files,
|
||||
std::string const& rule);
|
||||
void
|
||||
add_rule(std::unordered_set<std::string>& seen_files, std::string_view rule);
|
||||
|
||||
void add_filter_rules(std::unordered_set<std::string>& seen_files,
|
||||
std::istream& is);
|
||||
void add_rules(std::unordered_set<std::string>& seen_files, std::istream& is);
|
||||
|
||||
filter_rule compile_filter_rule(std::string const& rule);
|
||||
filter_rule compile_filter_rule(std::string_view rule);
|
||||
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
std::string root_path_;
|
||||
std::vector<filter_rule> filter_;
|
||||
std::vector<std::unique_ptr<entry_transformer>> transformer_;
|
||||
std::shared_ptr<file_access const> fa_;
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
|
||||
-> filter_rule {
|
||||
std::string r;
|
||||
auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
|
||||
std::string_view rule_sv) -> filter_rule {
|
||||
std::string rule{rule_sv};
|
||||
std::string re;
|
||||
filter_rule::rule_type type;
|
||||
|
||||
auto* p = rule.c_str();
|
||||
@ -125,15 +114,15 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
|
||||
bool floating = *p && *p != '/';
|
||||
|
||||
if (floating) {
|
||||
r += ".*/";
|
||||
re += ".*/";
|
||||
}
|
||||
|
||||
while (*p) {
|
||||
switch (*p) {
|
||||
case '\\':
|
||||
r += *p++;
|
||||
re += *p++;
|
||||
if (p) {
|
||||
r += *p++;
|
||||
re += *p++;
|
||||
}
|
||||
continue;
|
||||
|
||||
@ -144,14 +133,14 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
|
||||
}
|
||||
switch (nstar) {
|
||||
case 1:
|
||||
if (r.ends_with('/') and (*p == '/' or *p == '\0')) {
|
||||
r += "[^/]+";
|
||||
if (re.ends_with('/') and (*p == '/' or *p == '\0')) {
|
||||
re += "[^/]+";
|
||||
} else {
|
||||
r += "[^/]*";
|
||||
re += "[^/]*";
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
r += ".*";
|
||||
re += ".*";
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("too many *s");
|
||||
@ -160,7 +149,7 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
|
||||
continue;
|
||||
|
||||
case '?':
|
||||
r += "[^/]";
|
||||
re += "[^/]";
|
||||
break;
|
||||
|
||||
case '.':
|
||||
@ -172,32 +161,33 @@ auto builtin_script_<LoggerPolicy>::compile_filter_rule(std::string const& rule)
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
r += '\\';
|
||||
r += *p;
|
||||
re += '\\';
|
||||
re += *p;
|
||||
break;
|
||||
|
||||
default:
|
||||
r += *p;
|
||||
re += *p;
|
||||
break;
|
||||
}
|
||||
|
||||
++p;
|
||||
}
|
||||
|
||||
LOG_DEBUG << "'" << rule << "' -> '" << r << "' [floating=" << floating
|
||||
LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating
|
||||
<< "]";
|
||||
|
||||
return filter_rule(type, floating, r, rule);
|
||||
return filter_rule(type, floating, re, rule);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
builtin_script_<LoggerPolicy>::builtin_script_(
|
||||
rule_based_entry_filter_<LoggerPolicy>::rule_based_entry_filter_(
|
||||
logger& lgr, std::shared_ptr<file_access const> fa)
|
||||
: log_{lgr}
|
||||
, fa_{std::move(fa)} {}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void builtin_script_<LoggerPolicy>::set_root_path(fs::path const& path) {
|
||||
void rule_based_entry_filter_<LoggerPolicy>::set_root_path(
|
||||
fs::path const& path) {
|
||||
// TODO: this whole thing needs to be windowsized
|
||||
root_path_ = u8string_to_string(path.u8string());
|
||||
|
||||
@ -215,22 +205,29 @@ void builtin_script_<LoggerPolicy>::set_root_path(fs::path const& path) {
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void builtin_script_<LoggerPolicy>::add_filter_rule(std::string const& rule) {
|
||||
void rule_based_entry_filter_<LoggerPolicy>::add_rule(std::string_view rule) {
|
||||
std::unordered_set<std::string> seen_files;
|
||||
add_filter_rule(seen_files, rule);
|
||||
add_rule(seen_files, rule);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void builtin_script_<LoggerPolicy>::add_filter_rules(std::istream& is) {
|
||||
void rule_based_entry_filter_<LoggerPolicy>::add_rules(std::istream& is) {
|
||||
std::unordered_set<std::string> seen_files;
|
||||
add_filter_rules(seen_files, is);
|
||||
add_rules(seen_files, is);
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void builtin_script_<LoggerPolicy>::add_filter_rule(
|
||||
std::unordered_set<std::string>& seen_files, std::string const& rule) {
|
||||
void rule_based_entry_filter_<LoggerPolicy>::add_rule(
|
||||
std::unordered_set<std::string>& seen_files, std::string_view rule) {
|
||||
if (rule.starts_with('.')) {
|
||||
auto file = std::regex_replace(rule, std::regex("^. +"), "");
|
||||
auto file_pos = rule.find_first_not_of(" \t", 1);
|
||||
|
||||
if (file_pos == std::string::npos) {
|
||||
throw std::runtime_error(
|
||||
fmt::format("no file specified in merge rule: {}", rule));
|
||||
}
|
||||
|
||||
auto file = std::string(rule.substr(file_pos));
|
||||
|
||||
if (!seen_files.emplace(file).second) {
|
||||
throw std::runtime_error(
|
||||
@ -238,7 +235,7 @@ void builtin_script_<LoggerPolicy>::add_filter_rule(
|
||||
}
|
||||
|
||||
auto ifs = fa_->open_input(file);
|
||||
add_filter_rules(seen_files, ifs->is());
|
||||
add_rules(seen_files, ifs->is());
|
||||
|
||||
seen_files.erase(file);
|
||||
} else {
|
||||
@ -247,7 +244,7 @@ void builtin_script_<LoggerPolicy>::add_filter_rule(
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void builtin_script_<LoggerPolicy>::add_filter_rules(
|
||||
void rule_based_entry_filter_<LoggerPolicy>::add_rules(
|
||||
std::unordered_set<std::string>& seen_files, std::istream& is) {
|
||||
std::string line;
|
||||
|
||||
@ -258,12 +255,13 @@ void builtin_script_<LoggerPolicy>::add_filter_rules(
|
||||
if (line.find_first_not_of(" \t") == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
add_filter_rule(seen_files, line);
|
||||
add_rule(seen_files, line);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
bool builtin_script_<LoggerPolicy>::filter(entry_interface const& ei) {
|
||||
filter_action rule_based_entry_filter_<LoggerPolicy>::filter(
|
||||
entry_interface const& ei) const {
|
||||
std::string path = ei.unix_dpath();
|
||||
std::string relpath = path;
|
||||
|
||||
@ -278,42 +276,30 @@ bool builtin_script_<LoggerPolicy>::filter(entry_interface const& ei) {
|
||||
<< r.rule << "'";
|
||||
switch (r.type) {
|
||||
case filter_rule::rule_type::include:
|
||||
return true;
|
||||
return filter_action::keep;
|
||||
|
||||
case filter_rule::rule_type::exclude:
|
||||
return false;
|
||||
return filter_action::remove;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG_TRACE << "[" << path << "] / [" << relpath << "] matched no rule";
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void builtin_script_<LoggerPolicy>::transform(entry_interface& ei) {
|
||||
for (auto& xfm : transformer_) {
|
||||
xfm->transform(ei);
|
||||
}
|
||||
return filter_action::keep;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
builtin_script::builtin_script(logger& lgr,
|
||||
std::shared_ptr<file_access const> fa)
|
||||
: impl_(make_unique_logging_object<impl, internal::builtin_script_,
|
||||
rule_based_entry_filter::rule_based_entry_filter(
|
||||
logger& lgr, std::shared_ptr<file_access const> fa)
|
||||
: impl_(make_unique_logging_object<impl, internal::rule_based_entry_filter_,
|
||||
logger_policies>(lgr, std::move(fa))) {}
|
||||
|
||||
builtin_script::~builtin_script() = default;
|
||||
rule_based_entry_filter::~rule_based_entry_filter() = default;
|
||||
|
||||
bool builtin_script::has_filter() const { return impl_->has_filter(); }
|
||||
bool builtin_script::has_transform() const { return impl_->has_transform(); }
|
||||
|
||||
bool builtin_script::filter(entry_interface const& ei) {
|
||||
filter_action rule_based_entry_filter::filter(entry_interface const& ei) const {
|
||||
return impl_->filter(ei);
|
||||
}
|
||||
|
||||
void builtin_script::transform(entry_interface& ei) { impl_->transform(ei); }
|
||||
|
||||
} // namespace dwarfs
|
@ -40,6 +40,8 @@
|
||||
|
||||
#include <dwarfs/categorizer.h>
|
||||
#include <dwarfs/entry_factory.h>
|
||||
#include <dwarfs/entry_filter.h>
|
||||
#include <dwarfs/entry_transformer.h>
|
||||
#include <dwarfs/error.h>
|
||||
#include <dwarfs/file_access.h>
|
||||
#include <dwarfs/filesystem_writer.h>
|
||||
@ -49,7 +51,6 @@
|
||||
#include <dwarfs/options.h>
|
||||
#include <dwarfs/os_access.h>
|
||||
#include <dwarfs/scanner.h>
|
||||
#include <dwarfs/script.h>
|
||||
#include <dwarfs/segmenter_factory.h>
|
||||
#include <dwarfs/thread_pool.h>
|
||||
#include <dwarfs/util.h>
|
||||
@ -291,9 +292,13 @@ template <typename LoggerPolicy>
|
||||
class scanner_ final : public scanner::impl {
|
||||
public:
|
||||
scanner_(logger& lgr, worker_group& wg, segmenter_factory& sf,
|
||||
entry_factory& ef, os_access const& os, std::shared_ptr<script> scr,
|
||||
entry_factory& ef, os_access const& os,
|
||||
const scanner_options& options);
|
||||
|
||||
void add_filter(std::unique_ptr<entry_filter> filter) override;
|
||||
|
||||
void add_transformer(std::unique_ptr<entry_transformer> transformer) override;
|
||||
|
||||
void scan(filesystem_writer& fs_writer, std::filesystem::path const& path,
|
||||
writer_progress& wprog,
|
||||
std::optional<std::span<std::filesystem::path const>> list,
|
||||
@ -321,22 +326,32 @@ class scanner_ final : public scanner::impl {
|
||||
segmenter_factory& segmenter_factory_;
|
||||
entry_factory& entry_factory_;
|
||||
os_access const& os_;
|
||||
std::shared_ptr<script> script_;
|
||||
std::vector<std::unique_ptr<entry_filter>> filters_;
|
||||
std::vector<std::unique_ptr<entry_transformer>> transformers_;
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void scanner_<LoggerPolicy>::add_filter(std::unique_ptr<entry_filter> filter) {
|
||||
filters_.push_back(std::move(filter));
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void scanner_<LoggerPolicy>::add_transformer(
|
||||
std::unique_ptr<entry_transformer> transformer) {
|
||||
transformers_.push_back(std::move(transformer));
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
|
||||
segmenter_factory& sf, entry_factory& ef,
|
||||
os_access const& os,
|
||||
std::shared_ptr<script> scr,
|
||||
const scanner_options& options)
|
||||
: LOG_PROXY_INIT(lgr)
|
||||
, wg_{wg}
|
||||
, options_{options}
|
||||
, segmenter_factory_{sf}
|
||||
, entry_factory_{ef}
|
||||
, os_{os}
|
||||
, script_{std::move(scr)} {}
|
||||
, os_{os} {}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
std::shared_ptr<entry>
|
||||
@ -345,15 +360,10 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
||||
file_scanner& fs, bool debug_filter) {
|
||||
try {
|
||||
auto pe = entry_factory_.create(os_, name, parent);
|
||||
bool exclude = false;
|
||||
|
||||
if (script_) {
|
||||
if (script_->has_filter() && !script_->filter(*pe)) {
|
||||
exclude = true;
|
||||
} else if (script_->has_transform()) {
|
||||
script_->transform(*pe);
|
||||
}
|
||||
}
|
||||
bool const exclude =
|
||||
std::any_of(filters_.begin(), filters_.end(), [&pe](auto const& f) {
|
||||
return f->filter(*pe) == filter_action::remove;
|
||||
});
|
||||
|
||||
if (debug_filter) {
|
||||
(*options_.debug_filter_function)(exclude, *pe);
|
||||
@ -367,11 +377,13 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (pe) {
|
||||
for (auto const& t : transformers_) {
|
||||
t->transform(*pe);
|
||||
}
|
||||
|
||||
switch (pe->type()) {
|
||||
case entry::E_FILE:
|
||||
if (!debug_filter && pe->size() > 0 &&
|
||||
os_.access(pe->fs_path(), R_OK)) {
|
||||
if (!debug_filter && pe->size() > 0 && os_.access(pe->fs_path(), R_OK)) {
|
||||
LOG_ERROR << "cannot access " << pe->path_as_string()
|
||||
<< ", creating empty file";
|
||||
pe->override_size(0);
|
||||
@ -435,7 +447,6 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
||||
prog.errors++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return pe;
|
||||
} catch (const std::system_error& e) {
|
||||
@ -484,8 +495,8 @@ scanner_<LoggerPolicy>::scan_tree(std::filesystem::path const& path,
|
||||
fmt::format("'{}' must be a directory", path.string()));
|
||||
}
|
||||
|
||||
if (script_ && script_->has_transform()) {
|
||||
script_->transform(*root);
|
||||
for (auto const& t : transformers_) {
|
||||
t->transform(*root);
|
||||
}
|
||||
|
||||
std::deque<std::shared_ptr<entry>> queue({root});
|
||||
@ -530,7 +541,7 @@ std::shared_ptr<entry>
|
||||
scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path,
|
||||
std::span<std::filesystem::path const> list,
|
||||
progress& prog, file_scanner& fs) {
|
||||
if (script_ && script_->has_filter()) {
|
||||
if (!filters_.empty()) {
|
||||
DWARFS_THROW(runtime_error, "cannot use filters with file lists");
|
||||
}
|
||||
|
||||
@ -543,8 +554,8 @@ scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path,
|
||||
fmt::format("'{}' must be a directory", path.string()));
|
||||
}
|
||||
|
||||
if (script_ && script_->has_transform()) {
|
||||
script_->transform(*root);
|
||||
for (auto const& t : transformers_) {
|
||||
t->transform(*root);
|
||||
}
|
||||
|
||||
auto ensure_path = [this, &prog, &fs](std::filesystem::path const& path,
|
||||
@ -1017,10 +1028,9 @@ void scanner_<LoggerPolicy>::scan(
|
||||
|
||||
scanner::scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf,
|
||||
entry_factory& ef, os_access const& os,
|
||||
std::shared_ptr<script> scr, const scanner_options& options)
|
||||
const scanner_options& options)
|
||||
: impl_(
|
||||
make_unique_logging_object<impl, internal::scanner_, logger_policies>(
|
||||
lgr, pool.get_worker_group(), sf, ef, os, std::move(scr),
|
||||
options)) {}
|
||||
lgr, pool.get_worker_group(), sf, ef, os, options)) {}
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -55,7 +55,6 @@
|
||||
|
||||
#include <dwarfs/block_compressor.h>
|
||||
#include <dwarfs/block_compressor_parser.h>
|
||||
#include <dwarfs/builtin_script.h>
|
||||
#include <dwarfs/categorizer.h>
|
||||
#include <dwarfs/category_parser.h>
|
||||
#include <dwarfs/checksum.h>
|
||||
@ -77,8 +76,8 @@
|
||||
#include <dwarfs/mmap.h>
|
||||
#include <dwarfs/options.h>
|
||||
#include <dwarfs/os_access.h>
|
||||
#include <dwarfs/rule_based_entry_filter.h>
|
||||
#include <dwarfs/scanner.h>
|
||||
#include <dwarfs/script.h>
|
||||
#include <dwarfs/segmenter_factory.h>
|
||||
#include <dwarfs/string.h>
|
||||
#include <dwarfs/terminal.h>
|
||||
@ -928,18 +927,17 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
iol.term, iol.err, pg_mode,
|
||||
recompress ? console_writer::REWRITE : console_writer::NORMAL, logopts);
|
||||
|
||||
std::shared_ptr<script> script;
|
||||
|
||||
if (!filter.empty() or vm.count("chmod")) {
|
||||
auto bs = std::make_shared<builtin_script>(lgr, iol.file);
|
||||
std::unique_ptr<rule_based_entry_filter> rule_filter;
|
||||
|
||||
if (!filter.empty()) {
|
||||
bs->set_root_path(path);
|
||||
rule_filter = std::make_unique<rule_based_entry_filter>(lgr, iol.file);
|
||||
|
||||
rule_filter->set_root_path(path);
|
||||
|
||||
for (auto const& rule : filter) {
|
||||
auto srule = sys_string_to_string(rule);
|
||||
try {
|
||||
bs->add_filter_rule(srule);
|
||||
rule_filter->add_rule(srule);
|
||||
} catch (std::exception const& e) {
|
||||
iol.err << "error: could not parse filter rule '" << srule
|
||||
<< "': " << e.what() << "\n";
|
||||
@ -948,24 +946,22 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<entry_transformer>> transformers;
|
||||
|
||||
if (vm.count("chmod")) {
|
||||
if (chmod_str == "norm") {
|
||||
chmod_str = "ug-st,=Xr";
|
||||
}
|
||||
|
||||
auto chmod_exprs =
|
||||
split_to<std::vector<std::string_view>>(chmod_str, ',');
|
||||
auto chmod_exprs = split_to<std::vector<std::string_view>>(chmod_str, ',');
|
||||
|
||||
auto mask = get_current_umask();
|
||||
|
||||
for (auto expr : chmod_exprs) {
|
||||
bs->add_transformer(create_chmod_entry_transformer(expr, mask));
|
||||
transformers.push_back(create_chmod_entry_transformer(expr, mask));
|
||||
}
|
||||
}
|
||||
|
||||
script = bs;
|
||||
}
|
||||
|
||||
if (vm.count("set-owner")) {
|
||||
options.uid = uid;
|
||||
}
|
||||
@ -1339,7 +1335,15 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
|
||||
thread_pool scanner_pool(lgr, *iol.os, "scanner", num_scanner_workers);
|
||||
|
||||
scanner s(lgr, scanner_pool, sf, ef, *iol.os, std::move(script), options);
|
||||
scanner s(lgr, scanner_pool, sf, ef, *iol.os, options);
|
||||
|
||||
if (rule_filter) {
|
||||
s.add_filter(std::move(rule_filter));
|
||||
}
|
||||
|
||||
for (auto& t : transformers) {
|
||||
s.add_transformer(std::move(t));
|
||||
}
|
||||
|
||||
s.scan(*fsw, path, prog, input_list, iol.file);
|
||||
|
||||
|
@ -126,8 +126,7 @@ std::string make_filesystem(::benchmark::State const& state) {
|
||||
segmenter_factory sf(lgr, prog, cfg);
|
||||
entry_factory ef;
|
||||
|
||||
scanner s(lgr, pool, sf, ef, *os, std::make_shared<test::script_mock>(),
|
||||
options);
|
||||
scanner s(lgr, pool, sf, ef, *os, options);
|
||||
|
||||
std::ostringstream oss;
|
||||
|
||||
|
@ -36,7 +36,6 @@
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <dwarfs/block_compressor.h>
|
||||
#include <dwarfs/builtin_script.h>
|
||||
#include <dwarfs/entry_factory.h>
|
||||
#include <dwarfs/file_stat.h>
|
||||
#include <dwarfs/file_type.h>
|
||||
@ -47,6 +46,7 @@
|
||||
#include <dwarfs/logger.h>
|
||||
#include <dwarfs/mmif.h>
|
||||
#include <dwarfs/options.h>
|
||||
#include <dwarfs/rule_based_entry_filter.h>
|
||||
#include <dwarfs/scanner.h>
|
||||
#include <dwarfs/segmenter_factory.h>
|
||||
#include <dwarfs/thread_pool.h>
|
||||
@ -70,15 +70,17 @@ namespace {
|
||||
|
||||
std::string const default_file_hash_algo{"xxh3-128"};
|
||||
|
||||
// TODO: jeeeez, this is ugly :/
|
||||
std::string
|
||||
build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
|
||||
std::string const& compression,
|
||||
segmenter::config const& cfg = segmenter::config(),
|
||||
scanner_options const& options = scanner_options(),
|
||||
writer_progress* prog = nullptr,
|
||||
std::shared_ptr<script> scr = nullptr,
|
||||
std::shared_ptr<test::filter_transformer_data> ftd = nullptr,
|
||||
std::optional<std::span<std::filesystem::path const>> input_list =
|
||||
std::nullopt) {
|
||||
std::nullopt,
|
||||
std::unique_ptr<entry_filter> filter = nullptr) {
|
||||
// force multithreading
|
||||
thread_pool pool(lgr, *input, "worker", 4);
|
||||
|
||||
@ -99,7 +101,16 @@ build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
|
||||
segmenter_factory sf(lgr, *prog, sf_cfg);
|
||||
entry_factory ef;
|
||||
|
||||
scanner s(lgr, pool, sf, ef, *input, scr, options);
|
||||
scanner s(lgr, pool, sf, ef, *input, options);
|
||||
|
||||
if (ftd) {
|
||||
s.add_filter(std::make_unique<test::mock_filter>(ftd));
|
||||
s.add_transformer(std::make_unique<test::mock_transformer>(ftd));
|
||||
}
|
||||
|
||||
if (filter) {
|
||||
s.add_filter(std::move(filter));
|
||||
}
|
||||
|
||||
std::ostringstream oss;
|
||||
|
||||
@ -171,13 +182,13 @@ void basic_end_to_end_test(std::string const& compressor,
|
||||
|
||||
writer_progress wprog;
|
||||
|
||||
auto scr = std::make_shared<test::script_mock>();
|
||||
auto ftd = std::make_shared<test::filter_transformer_data>();
|
||||
|
||||
auto fsimage =
|
||||
build_dwarfs(lgr, input, compressor, cfg, options, &wprog, scr);
|
||||
build_dwarfs(lgr, input, compressor, cfg, options, &wprog, ftd);
|
||||
|
||||
EXPECT_EQ(14, scr->filter_calls.size());
|
||||
EXPECT_EQ(15, scr->transform_calls.size());
|
||||
EXPECT_EQ(14, ftd->filter_calls.size());
|
||||
EXPECT_EQ(15, ftd->transform_calls.size());
|
||||
|
||||
auto image_size = fsimage.size();
|
||||
auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));
|
||||
@ -911,15 +922,15 @@ class filter_test
|
||||
: public testing::TestWithParam<dwarfs::test::filter_test_data> {
|
||||
public:
|
||||
test::test_logger lgr;
|
||||
std::shared_ptr<builtin_script> scr;
|
||||
std::unique_ptr<rule_based_entry_filter> rbf;
|
||||
std::shared_ptr<test::test_file_access> tfa;
|
||||
std::shared_ptr<test::os_access_mock> input;
|
||||
|
||||
void SetUp() override {
|
||||
tfa = std::make_shared<test::test_file_access>();
|
||||
|
||||
scr = std::make_shared<builtin_script>(lgr, tfa);
|
||||
scr->set_root_path("");
|
||||
rbf = std::make_unique<rule_based_entry_filter>(lgr, tfa);
|
||||
rbf->set_root_path("");
|
||||
|
||||
input = std::make_shared<test::os_access_mock>();
|
||||
|
||||
@ -943,7 +954,7 @@ class filter_test
|
||||
|
||||
void set_filter_rules(test::filter_test_data const& spec) {
|
||||
std::istringstream iss(spec.filter());
|
||||
scr->add_filter_rules(iss);
|
||||
rbf->add_rules(iss);
|
||||
}
|
||||
|
||||
std::string get_filter_debug_output(test::filter_test_data const& spec,
|
||||
@ -963,7 +974,9 @@ class filter_test
|
||||
thread_pool pool(lgr, *input, "worker", 1);
|
||||
segmenter_factory sf(lgr, prog);
|
||||
entry_factory ef;
|
||||
scanner s(lgr, pool, sf, ef, *input, scr, options);
|
||||
scanner s(lgr, pool, sf, ef, *input, options);
|
||||
|
||||
s.add_filter(std::move(rbf));
|
||||
|
||||
block_compressor bc("null");
|
||||
std::ostringstream null;
|
||||
@ -975,7 +988,7 @@ class filter_test
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
scr.reset();
|
||||
rbf.reset();
|
||||
input.reset();
|
||||
tfa.reset();
|
||||
}
|
||||
@ -991,7 +1004,8 @@ TEST_P(filter_test, filesystem) {
|
||||
scanner_options options;
|
||||
options.remove_empty_dirs = true;
|
||||
|
||||
auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, nullptr, scr);
|
||||
auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, nullptr,
|
||||
nullptr, std::nullopt, std::move(rbf));
|
||||
|
||||
auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));
|
||||
|
||||
|
@ -38,11 +38,12 @@
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <dwarfs/entry_filter.h>
|
||||
#include <dwarfs/entry_interface.h>
|
||||
#include <dwarfs/entry_transformer.h>
|
||||
#include <dwarfs/file_access.h>
|
||||
#include <dwarfs/file_stat.h>
|
||||
#include <dwarfs/os_access.h>
|
||||
#include <dwarfs/script.h>
|
||||
#include <dwarfs/terminal.h>
|
||||
#include <dwarfs/tool/iolayer.h>
|
||||
|
||||
@ -185,27 +186,20 @@ class os_access_mock : public os_access {
|
||||
size_t map_file_delay_min_size_{0};
|
||||
};
|
||||
|
||||
class script_mock : public script {
|
||||
public:
|
||||
bool has_filter() const override { return true; }
|
||||
bool has_transform() const override { return true; }
|
||||
|
||||
bool filter(entry_interface const& ei) override {
|
||||
filter_calls.push_back({ei.unix_dpath(), ei.name(), ei.size(),
|
||||
ei.is_directory(), ei.get_permissions(),
|
||||
ei.get_uid(), ei.get_gid(), ei.get_atime(),
|
||||
ei.get_mtime(), ei.get_ctime()});
|
||||
return true;
|
||||
}
|
||||
|
||||
void transform(entry_interface& ei) override {
|
||||
transform_calls.push_back({ei.unix_dpath(), ei.name(), ei.size(),
|
||||
ei.is_directory(), ei.get_permissions(),
|
||||
ei.get_uid(), ei.get_gid(), ei.get_atime(),
|
||||
ei.get_mtime(), ei.get_ctime()});
|
||||
}
|
||||
|
||||
struct filter_transformer_data {
|
||||
struct entry_data {
|
||||
entry_data(entry_interface const& ei)
|
||||
: path{ei.unix_dpath()}
|
||||
, name{ei.name()}
|
||||
, size{ei.size()}
|
||||
, is_directory{ei.is_directory()}
|
||||
, mode{ei.get_permissions()}
|
||||
, uid{ei.get_uid()}
|
||||
, gid{ei.get_gid()}
|
||||
, atime{ei.get_atime()}
|
||||
, mtime{ei.get_mtime()}
|
||||
, ctime{ei.get_ctime()} {}
|
||||
|
||||
std::string path;
|
||||
std::string name;
|
||||
size_t size;
|
||||
@ -222,6 +216,33 @@ class script_mock : public script {
|
||||
std::vector<entry_data> transform_calls;
|
||||
};
|
||||
|
||||
class mock_filter : public entry_filter {
|
||||
public:
|
||||
mock_filter(std::shared_ptr<filter_transformer_data> data)
|
||||
: data_{std::move(data)} {}
|
||||
|
||||
filter_action filter(entry_interface const& ei) const {
|
||||
data_->filter_calls.emplace_back(ei);
|
||||
return filter_action::keep;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<filter_transformer_data> data_;
|
||||
};
|
||||
|
||||
class mock_transformer : public entry_transformer {
|
||||
public:
|
||||
mock_transformer(std::shared_ptr<filter_transformer_data> data)
|
||||
: data_{std::move(data)} {}
|
||||
|
||||
void transform(entry_interface& ei) {
|
||||
data_->transform_calls.emplace_back(ei);
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<filter_transformer_data> data_;
|
||||
};
|
||||
|
||||
class test_terminal : public terminal {
|
||||
public:
|
||||
test_terminal(std::ostream& out, std::ostream& err);
|
||||
|
Loading…
x
Reference in New Issue
Block a user