diff --git a/CMakeLists.txt b/CMakeLists.txt
index ae8b96c7..24a425a0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -298,6 +298,7 @@ list(
src/dwarfs/block_cache.cpp
src/dwarfs/block_compressor.cpp
src/dwarfs/block_manager.cpp
+ src/dwarfs/builtin_script.cpp
src/dwarfs/checksum.cpp
src/dwarfs/console_writer.cpp
src/dwarfs/entry.cpp
diff --git a/doc/mkdwarfs.md b/doc/mkdwarfs.md
index 26e018c0..34de6b85 100644
--- a/doc/mkdwarfs.md
+++ b/doc/mkdwarfs.md
@@ -245,6 +245,18 @@ Most other options are concerned with compression tuning:
Last but not least, if scripting support is built into `mkdwarfs`, you can
choose `script` to let the script determine the order.
+- `-F`, `--filter=`*rule*:
+ Add a filter rule. This option can be specified multiple times.
+ See [FILTER RULES](#filter-rules) for more details.
+
+- `--debug-filter`[`=all`|`=excluded`|`=excluded-files`|`=files`|`=included`|`=included-files`]:
+ Show the effect of the filter rules without creating a file system.
+ If no argument is passed to the option, all included/excluded files and
+ directories are shown (same as with `all`). `files` will omit all
+ directories. `included` and `excluded` will only show the corresponding
+ set of files/directories. `included-files` and `excluded-files` work
+ as before, but again omit all directories.
+
- `--remove-empty-dirs`:
Removes all empty directories from the output file system, recursively.
This is particularly useful when using scripts that filter out a lot of
@@ -445,6 +457,67 @@ further compress the block. So if you're really desperately trying
to reduce the image size, enabling `all` packing would be an option
at the cost of using a lot more memory when using the filesystem.
+## FILTER RULES
+
+The filter rules have been inspired by the `rsync` utility. They
+look very similar, but there are differences. These rules are quite
+powerful, yet they're somewhat hard to get used to.
+
+There are only 3 different kinds of rules:
+
+- `+ `pattern
+ An "include" rule.
+
+- `- `pattern
+ An "exclude" rule.
+
+- `. `file
+ A merge file rule. Rules are read (recursively) from the
+ specified file.
+
+Ultimately, only include and exclude rules remain in the rule set
+as file rules are merged in at the place where they occur.
+
+The most important rule to remember when building a rule set is that
+all rules are applied strictly in order and processing stops at the
+first matching rule. If no rules match, the default is to include the
+entry.
+
+Patterns can be anchored or floating. Anchored patterns are patterns
+that start with a `/`. These patterns match relative to the file
+system root (i.e. the `--input` path). Floating patterns match in
+any directory in the hierarchy.
+
+Patterns ending with a `/` only match directories. All other patterns
+only match non-directories.
+
+Patterns support `?` and `*` wildcards matching a single character
+and any number of characters, respectively. These patterns don't match
+across directory separators (`/`).
+
+Patterns also support the `**` wildcard, which matches across directory
+separators.
+
+Patterns also support character classes.
+
+Here's an example rule set:
+```
++ File/Spec/[EM]*.pm
+- unicore/**.pl
++ *.pl
+- *
+```
+This set of rules will include all files matching `File/Spec/[EM]*.pm`
+anywhere in the hierarchy. It will also include all `*.pl` files, except
+for those anywhere below a `unicore` directory. The last rule excludes
+all other files.
+
+This will likely leave a lot of empty directories around, but these can
+be removed using `--remove-empty-dirs`.
+
+You can use the `--debug-filter` option to show the sets of included
+and excluded files without building an actual file system.
+
## INTERNAL OPERATION
Internally, `mkdwarfs` runs in two completely separate phases. The first
diff --git a/include/dwarfs/builtin_script.h b/include/dwarfs/builtin_script.h
new file mode 100644
index 00000000..7ad13527
--- /dev/null
+++ b/include/dwarfs/builtin_script.h
@@ -0,0 +1,68 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs. If not, see .
+ */
+
+#pragma once
+
+#include
+#include
+
+#include "dwarfs/inode.h"
+#include "dwarfs/script.h"
+
+namespace dwarfs {
+
+class logger;
+
+class builtin_script : public script {
+ public:
+ builtin_script(logger& lgr);
+ ~builtin_script();
+
+ void set_root_path(std::string const& path) { impl_->set_root_path(path); }
+ void add_filter_rule(std::string const& rule) {
+ impl_->add_filter_rule(rule);
+ };
+
+ bool has_configure() const override;
+ bool has_filter() const override;
+ bool has_transform() const override;
+ bool has_order() const override;
+
+ void configure(options_interface const& oi) override;
+ bool filter(entry_interface const& ei) override;
+ void transform(entry_interface& ei) override;
+ void order(inode_vector& iv) override;
+
+ class impl {
+ public:
+ virtual ~impl() = default;
+
+ virtual void set_root_path(std::string const& path) = 0;
+ virtual void add_filter_rule(std::string const& rule) = 0;
+ virtual bool filter(entry_interface const& ei) = 0;
+ virtual bool has_filter() const = 0;
+ };
+
+ private:
+ std::unique_ptr impl_;
+};
+
+} // namespace dwarfs
diff --git a/include/dwarfs/entry.h b/include/dwarfs/entry.h
index d12c507f..bd160ba8 100644
--- a/include/dwarfs/entry.h
+++ b/include/dwarfs/entry.h
@@ -75,6 +75,7 @@ class entry : public entry_interface {
std::shared_ptr parent() const;
void set_name(const std::string& name);
std::string path() const override;
+ std::string dpath() const override;
const std::string& name() const override { return name_; }
size_t size() const override { return stat_.st_size; }
virtual type_t type() const = 0;
diff --git a/include/dwarfs/entry_interface.h b/include/dwarfs/entry_interface.h
index 1a1d1d8a..d184c641 100644
--- a/include/dwarfs/entry_interface.h
+++ b/include/dwarfs/entry_interface.h
@@ -30,6 +30,7 @@ namespace dwarfs {
class entry_interface : public object {
public:
virtual std::string path() const = 0;
+ virtual std::string dpath() const = 0;
virtual std::string const& name() const = 0;
virtual std::string type_string() const = 0;
virtual size_t size() const = 0;
diff --git a/include/dwarfs/options.h b/include/dwarfs/options.h
index 9d8b83b7..d2a7b160 100644
--- a/include/dwarfs/options.h
+++ b/include/dwarfs/options.h
@@ -23,6 +23,7 @@
#include
#include
+#include
#include
#include
@@ -30,6 +31,8 @@
namespace dwarfs {
+class entry;
+
enum class mlock_mode { NONE, TRY, MUST };
enum class cache_tidy_strategy { NONE, EXPIRY_TIME, BLOCK_SWAPPED_OUT };
@@ -108,6 +111,7 @@ struct scanner_options {
bool pack_symlinks_index{false};
bool force_pack_string_tables{false};
bool no_create_timestamp{true};
+ std::optional> debug_filter_function;
};
struct rewrite_options {
diff --git a/src/dwarfs/builtin_script.cpp b/src/dwarfs/builtin_script.cpp
new file mode 100644
index 00000000..da605723
--- /dev/null
+++ b/src/dwarfs/builtin_script.cpp
@@ -0,0 +1,266 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs. If not, see .
+ */
+
+#include
+#include
+#include
+#include
+
+#include
+
+#include "dwarfs/builtin_script.h"
+#include "dwarfs/entry_interface.h"
+#include "dwarfs/logger.h"
+
+namespace dwarfs {
+
+struct filter_rule {
+ enum class rule_type {
+ include,
+ exclude,
+ };
+
+ filter_rule(rule_type type, bool floating, std::string const& re,
+ std::string const& rule)
+ : type{type}
+ , floating{floating}
+ , re{re}
+ , rule{rule} {}
+
+ rule_type type;
+ bool floating;
+ std::regex re;
+ std::string rule;
+};
+
+template
+class builtin_script_ : public builtin_script::impl {
+ public:
+ builtin_script_(logger& lgr);
+
+ void set_root_path(std::string const& path) override;
+ void add_filter_rule(std::string const& rule) override;
+
+ bool filter(entry_interface const& ei) override;
+
+ bool has_filter() const override { return !filter_.empty(); }
+
+ private:
+ void add_filter_rule(std::unordered_set& seen_files,
+ std::string const& rule);
+
+ filter_rule compile_filter_rule(std::string const& rule);
+
+ LOG_PROXY_DECL(LoggerPolicy);
+ std::string root_path_;
+ std::vector filter_;
+};
+
+template
+auto builtin_script_::compile_filter_rule(std::string const& rule)
+ -> filter_rule {
+ std::string r;
+ filter_rule::rule_type type;
+
+ auto* p = rule.c_str();
+
+ switch (*p) {
+ case '+':
+ type = filter_rule::rule_type::include;
+ break;
+ case '-':
+ type = filter_rule::rule_type::exclude;
+ break;
+ default:
+ throw std::runtime_error("rules must start with + or -");
+ }
+
+ while (*++p == ' ')
+ ;
+
+ // If the start of the pattern is not explicitly anchored, make it floating.
+ bool floating = *p && *p != '/';
+
+ if (floating) {
+ r += ".*/";
+ }
+
+ while (*p) {
+ switch (*p) {
+ case '\\':
+ r += *p++;
+ if (p) {
+ r += *p++;
+ }
+ continue;
+
+ case '*': {
+ int nstar = 1;
+ while (*++p == '*') {
+ ++nstar;
+ }
+ switch (nstar) {
+ case 1:
+ if (r.ends_with('/') and (*p == '/' or *p == '\0')) {
+ r += "[^/]+";
+ } else {
+ r += "[^/]*";
+ }
+ break;
+ case 2:
+ r += ".*";
+ break;
+ default:
+ throw std::runtime_error("too many *s");
+ }
+ }
+ continue;
+
+ case '?':
+ r += "[^/]";
+ break;
+
+ case '.':
+ case '+':
+ case '^':
+ case '$':
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '|':
+ r += '\\';
+ r += *p;
+ break;
+
+ default:
+ r += *p;
+ break;
+ }
+
+ ++p;
+ }
+
+ LOG_DEBUG << "'" << rule << "' -> '" << r << "' [floating=" << floating
+ << "]";
+
+ return filter_rule(type, floating, r, rule);
+}
+
+template
+builtin_script_::builtin_script_(logger& lgr)
+ : log_(lgr) {}
+
+template
+void builtin_script_::set_root_path(std::string const& path) {
+ root_path_ = path;
+}
+
+template
+void builtin_script_::add_filter_rule(std::string const& rule) {
+ std::unordered_set seen_files;
+ add_filter_rule(seen_files, rule);
+}
+
+template
+void builtin_script_::add_filter_rule(
+ std::unordered_set& seen_files, std::string const& rule) {
+ if (rule.starts_with('.')) {
+ auto file = std::regex_replace(rule, std::regex("^. +"), "");
+
+ if (!seen_files.emplace(file).second) {
+ throw std::runtime_error(
+ fmt::format("recursion detected while opening file: {}", file));
+ }
+
+ std::ifstream ifs(file);
+
+ if (!ifs.is_open()) {
+ throw std::runtime_error(fmt::format("error opening file: {}", file));
+ }
+
+ std::string line;
+
+ while (std::getline(ifs, line)) {
+ if (line.starts_with('#')) {
+ continue;
+ }
+ if (line.find_first_not_of(" \t") == std::string::npos) {
+ continue;
+ }
+ add_filter_rule(seen_files, line);
+ }
+
+ seen_files.erase(file);
+ } else {
+ filter_.push_back(compile_filter_rule(rule));
+ }
+}
+
+template
+bool builtin_script_::filter(entry_interface const& ei) {
+ std::string path = ei.dpath();
+ std::string relpath = path;
+
+ if (relpath.size() >= root_path_.size()) {
+ assert(relpath.substr(0, root_path_.size()) == root_path_);
+ relpath.erase(0, root_path_.size());
+ }
+
+ for (const auto& r : filter_) {
+ if (std::regex_match(r.floating ? path : relpath, r.re)) {
+ LOG_TRACE << path << " matched rule '" << r.rule << "'";
+ switch (r.type) {
+ case filter_rule::rule_type::include:
+ return true;
+
+ case filter_rule::rule_type::exclude:
+ return false;
+ }
+ }
+ }
+
+ LOG_TRACE << path << " matched no rule";
+
+ return true;
+}
+
+builtin_script::builtin_script(logger& lgr)
+ : impl_(make_unique_logging_object(
+ lgr)) {}
+
+builtin_script::~builtin_script() = default;
+
+bool builtin_script::has_configure() const { return false; }
+bool builtin_script::has_filter() const { return impl_->has_filter(); }
+bool builtin_script::has_transform() const { return false; }
+bool builtin_script::has_order() const { return false; }
+
+void builtin_script::configure(options_interface const&) { assert(false); }
+
+bool builtin_script::filter(entry_interface const& ei) {
+ return impl_->filter(ei);
+}
+
+void builtin_script::transform(entry_interface&) { assert(false); }
+void builtin_script::order(inode_vector&) { assert(false); }
+
+} // namespace dwarfs
diff --git a/src/dwarfs/entry.cpp b/src/dwarfs/entry.cpp
index fc589050..65aa99cf 100644
--- a/src/dwarfs/entry.cpp
+++ b/src/dwarfs/entry.cpp
@@ -66,6 +66,14 @@ std::string entry::path() const {
return name_;
}
+std::string entry::dpath() const {
+ auto p = path();
+ if (type() == E_DIR) {
+ p += '/';
+ }
+ return p;
+}
+
std::string entry::type_string() const {
auto mode = stat_.st_mode;
diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp
index 607e0309..35f947b5 100644
--- a/src/dwarfs/scanner.cpp
+++ b/src/dwarfs/scanner.cpp
@@ -24,6 +24,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -676,6 +677,7 @@ std::shared_ptr
scanner_::scan_tree(const std::string& path, progress& prog,
file_scanner& fs) {
auto root = entry_->create(*os_, path);
+ bool const debug_filter = options_.debug_filter_function.has_value();
if (root->type() != entry::E_DIR) {
DWARFS_THROW(runtime_error, fmt::format("'{}' must be a directory", path));
@@ -704,18 +706,28 @@ scanner_::scan_tree(const std::string& path, progress& prog,
try {
auto pe = entry_->create(*os_, name, parent);
+ bool exclude = false;
if (script_) {
if (script_->has_filter() && !script_->filter(*pe)) {
- LOG_DEBUG << "skipping " << pe->path();
- continue;
- }
-
- if (script_->has_transform()) {
+ exclude = true;
+ } else if (script_->has_transform()) {
script_->transform(*pe);
}
}
+ if (debug_filter) {
+ (*options_.debug_filter_function)(exclude, pe.get());
+ }
+
+ if (exclude) {
+ if (!debug_filter) {
+ LOG_DEBUG << "excluding " << pe->dpath();
+ }
+
+ continue;
+ }
+
if (pe) {
switch (pe->type()) {
case entry::E_FILE:
@@ -748,25 +760,33 @@ scanner_::scan_tree(const std::string& path, progress& prog,
case entry::E_DIR:
// prog.current.store(pe.get());
prog.dirs_found++;
- pe->scan(*os_, prog);
+ if (!debug_filter) {
+ pe->scan(*os_, prog);
+ }
subdirs.push_back(pe);
break;
case entry::E_FILE:
prog.files_found++;
- fs.scan(dynamic_cast(pe.get()));
+ if (!debug_filter) {
+ fs.scan(dynamic_cast(pe.get()));
+ }
break;
case entry::E_LINK:
prog.symlinks_found++;
- pe->scan(*os_, prog);
+ if (!debug_filter) {
+ pe->scan(*os_, prog);
+ }
prog.symlinks_scanned++;
break;
case entry::E_DEVICE:
case entry::E_OTHER:
prog.specials_found++;
- pe->scan(*os_, prog);
+ if (!debug_filter) {
+ pe->scan(*os_, prog);
+ }
break;
default:
@@ -796,7 +816,9 @@ scanner_::scan_tree(const std::string& path, progress& prog,
template
void scanner_::scan(filesystem_writer& fsw,
const std::string& path, progress& prog) {
- LOG_INFO << "scanning " << path;
+ if (!options_.debug_filter_function) {
+ LOG_INFO << "scanning " << path;
+ }
prog.set_status_function(status_string);
@@ -806,6 +828,10 @@ void scanner_::scan(filesystem_writer& fsw,
auto root = scan_tree(path, prog, fs);
+ if (options_.debug_filter_function) {
+ return;
+ }
+
if (options_.remove_empty_dirs) {
LOG_INFO << "removing empty directories...";
auto d = dynamic_cast(root.get());
diff --git a/src/mkdwarfs.cpp b/src/mkdwarfs.cpp
index d9ba0893..bc914db0 100644
--- a/src/mkdwarfs.cpp
+++ b/src/mkdwarfs.cpp
@@ -57,6 +57,7 @@
#include "dwarfs/block_compressor.h"
#include "dwarfs/block_manager.h"
+#include "dwarfs/builtin_script.h"
#include "dwarfs/console_writer.h"
#include "dwarfs/entry.h"
#include "dwarfs/error.h"
@@ -93,6 +94,16 @@ namespace {
#endif
#endif
+enum class debug_filter_mode {
+ OFF,
+ INCLUDED,
+ INCLUDED_FILES,
+ EXCLUDED,
+ EXCLUDED_FILES,
+ FILES,
+ ALL
+};
+
const std::map order_choices{
{"none", file_order_mode::NONE},
{"path", file_order_mode::PATH},
@@ -110,6 +121,15 @@ const std::map progress_modes{
{"unicode", console_writer::UNICODE},
};
+const std::map debug_filter_modes{
+ {"included", debug_filter_mode::INCLUDED},
+ {"included-files", debug_filter_mode::INCLUDED_FILES},
+ {"excluded", debug_filter_mode::EXCLUDED},
+ {"excluded-files", debug_filter_mode::EXCLUDED_FILES},
+ {"files", debug_filter_mode::FILES},
+ {"all", debug_filter_mode::ALL},
+};
+
const std::map time_resolutions{
{"sec", 1},
{"min", 60},
@@ -120,6 +140,32 @@ const std::map time_resolutions{
constexpr size_t min_block_size_bits{10};
constexpr size_t max_block_size_bits{30};
+void debug_filter_output(std::ostream& os, bool exclude, entry const* pe,
+ debug_filter_mode mode) {
+ if (exclude ? mode == debug_filter_mode::INCLUDED or
+ mode == debug_filter_mode::INCLUDED_FILES
+ : mode == debug_filter_mode::EXCLUDED or
+ mode == debug_filter_mode::EXCLUDED_FILES) {
+ return;
+ }
+
+ bool const files_only = mode == debug_filter_mode::FILES or
+ mode == debug_filter_mode::INCLUDED_FILES or
+ mode == debug_filter_mode::EXCLUDED_FILES;
+
+ if (files_only and pe->type() == entry::E_DIR) {
+ return;
+ }
+
+ char const* prefix = "";
+
+ if (mode == debug_filter_mode::FILES or mode == debug_filter_mode::ALL) {
+ prefix = exclude ? "- " : "+ ";
+ }
+
+ os << prefix << pe->dpath() << "\n";
+}
+
} // namespace
namespace dwarfs {
@@ -339,7 +385,8 @@ int mkdwarfs(int argc, char** argv) {
std::string path, output, memory_limit, script_arg, compression, header,
schema_compression, metadata_compression, log_level_str, timestamp,
time_resolution, order, progress_mode, recompress_opts, pack_metadata,
- file_hash_algo;
+ file_hash_algo, debug_filter;
+ std::vector filter;
size_t num_workers;
bool no_progress = false, remove_header = false, no_section_index = false,
force_overwrite = false;
@@ -354,6 +401,10 @@ int mkdwarfs(int argc, char** argv) {
auto progress_desc = "progress mode (" +
(from(progress_modes) | get<0>() | unsplit(", ")) + ")";
+ auto debug_filter_desc =
+ "show effect of filter rules without producing an image (" +
+ (from(debug_filter_modes) | get<0>() | unsplit(", ")) + ")";
+
auto resolution_desc = "time resolution in seconds or (" +
(from(time_resolutions) | get<0>() | unsplit(", ")) +
")";
@@ -439,6 +490,12 @@ int mkdwarfs(int argc, char** argv) {
po::value(&script_arg),
"Python script for customization")
#endif
+ ("filter,F",
+ po::value>(&filter)->multitoken(),
+ "add filter rule")
+ ("debug-filter",
+ po::value(&debug_filter)->implicit_value("all"),
+ debug_filter_desc.c_str())
("remove-empty-dirs",
po::value(&options.remove_empty_dirs)->zero_tokens(),
"remove empty directories in file system")
@@ -498,7 +555,8 @@ int mkdwarfs(int argc, char** argv) {
return 1;
}
- if (vm.count("help") or !vm.count("input") or !vm.count("output")) {
+ if (vm.count("help") or !vm.count("input") or
+ (!vm.count("output") and !vm.count("debug-filter"))) {
size_t l_dc = 0, l_sc = 0, l_mc = 0, l_or = 0;
for (auto const& l : levels) {
l_dc = std::max(l_dc, l.data_compression.size());
@@ -683,6 +741,21 @@ int mkdwarfs(int argc, char** argv) {
worker_group wg_compress("compress", num_workers);
worker_group wg_scanner("scanner", num_workers);
+ if (vm.count("debug-filter")) {
+ if (auto it = debug_filter_modes.find(debug_filter);
+ it != debug_filter_modes.end()) {
+ options.debug_filter_function = [mode = it->second](bool exclude,
+ entry const* pe) {
+ debug_filter_output(std::cout, exclude, pe, mode);
+ };
+ no_progress = true;
+ } else {
+ std::cerr << "error: invalid filter debug mode '" << debug_filter
+ << "'\n";
+ return 1;
+ }
+ }
+
if (no_progress) {
progress_mode = "none";
}
@@ -728,6 +801,30 @@ int mkdwarfs(int argc, char** argv) {
}
#endif
+ if (!filter.empty()) {
+ if (script) {
+ std::cerr
+ << "error: scripts and filters are not simultaneously supported\n";
+ return 1;
+ }
+
+ auto bs = std::make_shared(lgr);
+
+ bs->set_root_path(path);
+
+ for (auto const& rule : filter) {
+ try {
+ bs->add_filter_rule(rule);
+ } catch (std::exception const& e) {
+ std::cerr << "error: could not parse filter rule '" << rule
+ << "': " << e.what() << "\n";
+ return 1;
+ }
+ }
+
+ script = bs;
+ }
+
bool force_similarity = false;
if (script && script->has_configure()) {
@@ -853,8 +950,15 @@ int mkdwarfs(int argc, char** argv) {
LOG_PROXY(debug_logger_policy, lgr);
- progress prog([&](const progress& p, bool last) { lgr.update(p, last); },
- interval_ms);
+ folly::Function updater;
+
+ if (options.debug_filter_function) {
+ updater = [](const progress&, bool) {};
+ } else {
+ updater = [&](const progress& p, bool last) { lgr.update(p, last); };
+ }
+
+ progress prog(std::move(updater), interval_ms);
block_compressor bc(compression);
block_compressor schema_bc(schema_compression);
@@ -869,21 +973,30 @@ int mkdwarfs(int argc, char** argv) {
<< " blocks with " << num_workers << " threads";
}
- if (std::filesystem::exists(output) && !force_overwrite) {
- std::cerr << "error: output file already exists, use --force to overwrite"
- << std::endl;
- return 1;
+ std::unique_ptr os;
+
+ if (!options.debug_filter_function) {
+ if (std::filesystem::exists(output) && !force_overwrite) {
+ std::cerr << "error: output file already exists, use --force to overwrite"
+ << std::endl;
+ return 1;
+ }
+
+ auto ofs = std::make_unique(output, std::ios::binary |
+ std::ios::trunc);
+
+ if (ofs->bad() || !ofs->is_open()) {
+ std::cerr << "error: cannot open output file '" << output
+ << "': " << strerror(errno) << std::endl;
+ return 1;
+ }
+
+ os = std::move(ofs);
+ } else {
+ os = std::make_unique();
}
- std::ofstream ofs(output, std::ios::binary | std::ios::trunc);
-
- if (ofs.bad() || !ofs.is_open()) {
- std::cerr << "error: cannot open output file '" << output
- << "': " << strerror(errno) << std::endl;
- return 1;
- }
-
- filesystem_writer fsw(ofs, lgr, wg_compress, prog, bc, schema_bc, metadata_bc,
+ filesystem_writer fsw(*os, lgr, wg_compress, prog, bc, schema_bc, metadata_bc,
fswopts, header_ifs.get());
auto ti = LOG_TIMED_INFO;
@@ -914,29 +1027,42 @@ int mkdwarfs(int argc, char** argv) {
return 1;
}
- LOG_INFO << "compression CPU time: "
- << time_with_unit(wg_compress.get_cpu_time());
-
- ofs.close();
-
- if (ofs.bad()) {
- LOG_ERROR << "failed to close output file '" << output
- << "': " << strerror(errno);
- return 1;
+ if (!options.debug_filter_function) {
+ LOG_INFO << "compression CPU time: "
+ << time_with_unit(wg_compress.get_cpu_time());
}
- std::ostringstream err;
+ if (auto ofs = dynamic_cast(os.get())) {
+ ofs->close();
- if (prog.errors) {
- err << "with " << prog.errors << " error";
- if (prog.errors > 1) {
- err << "s";
+ if (ofs->bad()) {
+ LOG_ERROR << "failed to close output file '" << output
+ << "': " << strerror(errno);
+ return 1;
}
+ } else if (auto oss = dynamic_cast(os.get())) {
+ assert(oss->str().empty());
} else {
- err << "without errors";
+ assert(false);
}
- ti << "filesystem " << (recompress ? "rewritten " : "created ") << err.str();
+ os.reset();
+
+ if (!options.debug_filter_function) {
+ std::ostringstream err;
+
+ if (prog.errors) {
+ err << "with " << prog.errors << " error";
+ if (prog.errors > 1) {
+ err << "s";
+ }
+ } else {
+ err << "without errors";
+ }
+
+ ti << "filesystem " << (recompress ? "rewritten " : "created ")
+ << err.str();
+ }
return prog.errors > 0;
}