mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 20:41:04 -04:00
refactor(scanner): clean up ctor
This commit is contained in:
parent
fc21c5d9d5
commit
42e7af8cd2
@ -43,9 +43,8 @@ class thread_pool;
|
|||||||
|
|
||||||
class scanner {
|
class scanner {
|
||||||
public:
|
public:
|
||||||
scanner(logger& lgr, thread_pool& pool, std::shared_ptr<segmenter_factory> sf,
|
scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf,
|
||||||
std::shared_ptr<entry_factory> ef,
|
entry_factory& ef, os_access const& os, std::shared_ptr<script> scr,
|
||||||
std::shared_ptr<os_access const> os, std::shared_ptr<script> scr,
|
|
||||||
const scanner_options& options);
|
const scanner_options& options);
|
||||||
|
|
||||||
void scan(
|
void scan(
|
||||||
|
@ -289,9 +289,8 @@ std::string status_string(progress const& p, size_t width) {
|
|||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
class scanner_ final : public scanner::impl {
|
class scanner_ final : public scanner::impl {
|
||||||
public:
|
public:
|
||||||
scanner_(logger& lgr, worker_group& wg, std::shared_ptr<segmenter_factory> sf,
|
scanner_(logger& lgr, worker_group& wg, segmenter_factory& sf,
|
||||||
std::shared_ptr<entry_factory> ef,
|
entry_factory& ef, os_access const& os, std::shared_ptr<script> scr,
|
||||||
std::shared_ptr<os_access const> os, std::shared_ptr<script> scr,
|
|
||||||
const scanner_options& options);
|
const scanner_options& options);
|
||||||
|
|
||||||
void scan(filesystem_writer& fsw, std::filesystem::path const& path,
|
void scan(filesystem_writer& fsw, std::filesystem::path const& path,
|
||||||
@ -318,26 +317,25 @@ class scanner_ final : public scanner::impl {
|
|||||||
LOG_PROXY_DECL(LoggerPolicy);
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
worker_group& wg_;
|
worker_group& wg_;
|
||||||
scanner_options const& options_;
|
scanner_options const& options_;
|
||||||
std::shared_ptr<segmenter_factory> segmenter_factory_;
|
segmenter_factory& segmenter_factory_;
|
||||||
std::shared_ptr<entry_factory> entry_factory_;
|
entry_factory& entry_factory_;
|
||||||
std::shared_ptr<os_access const> os_;
|
os_access const& os_;
|
||||||
std::shared_ptr<script> script_;
|
std::shared_ptr<script> script_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
|
scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
|
||||||
std::shared_ptr<segmenter_factory> sf,
|
segmenter_factory& sf, entry_factory& ef,
|
||||||
std::shared_ptr<entry_factory> ef,
|
os_access const& os,
|
||||||
std::shared_ptr<os_access const> os,
|
|
||||||
std::shared_ptr<script> scr,
|
std::shared_ptr<script> scr,
|
||||||
const scanner_options& options)
|
const scanner_options& options)
|
||||||
: LOG_PROXY_INIT(lgr)
|
: LOG_PROXY_INIT(lgr)
|
||||||
, wg_{wg}
|
, wg_{wg}
|
||||||
, options_{options}
|
, options_{options}
|
||||||
, segmenter_factory_{std::move(sf)}
|
, segmenter_factory_{sf}
|
||||||
, entry_factory_{std::move(ef)}
|
, entry_factory_{ef}
|
||||||
, os_(std::move(os))
|
, os_{os}
|
||||||
, script_(std::move(scr)) {}
|
, script_{std::move(scr)} {}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
std::shared_ptr<entry>
|
std::shared_ptr<entry>
|
||||||
@ -345,7 +343,7 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
|||||||
std::shared_ptr<dir> parent, progress& prog,
|
std::shared_ptr<dir> parent, progress& prog,
|
||||||
file_scanner& fs, bool debug_filter) {
|
file_scanner& fs, bool debug_filter) {
|
||||||
try {
|
try {
|
||||||
auto pe = entry_factory_->create(*os_, name, parent);
|
auto pe = entry_factory_.create(os_, name, parent);
|
||||||
bool exclude = false;
|
bool exclude = false;
|
||||||
|
|
||||||
if (script_) {
|
if (script_) {
|
||||||
@ -371,7 +369,7 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
|||||||
if (pe) {
|
if (pe) {
|
||||||
switch (pe->type()) {
|
switch (pe->type()) {
|
||||||
case entry::E_FILE:
|
case entry::E_FILE:
|
||||||
if (pe->size() > 0 && os_->access(pe->fs_path(), R_OK)) {
|
if (pe->size() > 0 && os_.access(pe->fs_path(), R_OK)) {
|
||||||
LOG_ERROR << "cannot access " << pe->path_as_string()
|
LOG_ERROR << "cannot access " << pe->path_as_string()
|
||||||
<< ", creating empty file";
|
<< ", creating empty file";
|
||||||
pe->override_size(0);
|
pe->override_size(0);
|
||||||
@ -402,7 +400,7 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
|||||||
// prog.current.store(pe.get());
|
// prog.current.store(pe.get());
|
||||||
prog.dirs_found++;
|
prog.dirs_found++;
|
||||||
if (!debug_filter) {
|
if (!debug_filter) {
|
||||||
pe->scan(*os_, prog);
|
pe->scan(os_, prog);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -416,7 +414,7 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
|||||||
case entry::E_LINK:
|
case entry::E_LINK:
|
||||||
prog.symlinks_found++;
|
prog.symlinks_found++;
|
||||||
if (!debug_filter) {
|
if (!debug_filter) {
|
||||||
pe->scan(*os_, prog);
|
pe->scan(os_, prog);
|
||||||
}
|
}
|
||||||
prog.symlinks_scanned++;
|
prog.symlinks_scanned++;
|
||||||
break;
|
break;
|
||||||
@ -425,7 +423,7 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
|||||||
case entry::E_OTHER:
|
case entry::E_OTHER:
|
||||||
prog.specials_found++;
|
prog.specials_found++;
|
||||||
if (!debug_filter) {
|
if (!debug_filter) {
|
||||||
pe->scan(*os_, prog);
|
pe->scan(os_, prog);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -450,7 +448,7 @@ void scanner_<LoggerPolicy>::dump_state(
|
|||||||
std::string_view env_var, std::string_view what,
|
std::string_view env_var, std::string_view what,
|
||||||
std::shared_ptr<file_access const> fa,
|
std::shared_ptr<file_access const> fa,
|
||||||
std::function<void(std::ostream&)> dumper) const {
|
std::function<void(std::ostream&)> dumper) const {
|
||||||
if (auto dumpfile = os_->getenv(env_var)) {
|
if (auto dumpfile = os_.getenv(env_var)) {
|
||||||
if (fa) {
|
if (fa) {
|
||||||
LOG_VERBOSE << "dumping " << what << " to " << *dumpfile;
|
LOG_VERBOSE << "dumping " << what << " to " << *dumpfile;
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
@ -474,7 +472,7 @@ template <typename LoggerPolicy>
|
|||||||
std::shared_ptr<entry>
|
std::shared_ptr<entry>
|
||||||
scanner_<LoggerPolicy>::scan_tree(std::filesystem::path const& path,
|
scanner_<LoggerPolicy>::scan_tree(std::filesystem::path const& path,
|
||||||
progress& prog, file_scanner& fs) {
|
progress& prog, file_scanner& fs) {
|
||||||
auto root = entry_factory_->create(*os_, path);
|
auto root = entry_factory_.create(os_, path);
|
||||||
bool const debug_filter = options_.debug_filter_function.has_value();
|
bool const debug_filter = options_.debug_filter_function.has_value();
|
||||||
|
|
||||||
if (root->type() != entry::E_DIR) {
|
if (root->type() != entry::E_DIR) {
|
||||||
@ -498,7 +496,7 @@ scanner_<LoggerPolicy>::scan_tree(std::filesystem::path const& path,
|
|||||||
auto ppath = parent->fs_path();
|
auto ppath = parent->fs_path();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto d = os_->opendir(ppath);
|
auto d = os_.opendir(ppath);
|
||||||
std::filesystem::path name;
|
std::filesystem::path name;
|
||||||
std::vector<std::shared_ptr<entry>> subdirs;
|
std::vector<std::shared_ptr<entry>> subdirs;
|
||||||
|
|
||||||
@ -534,7 +532,7 @@ scanner_<LoggerPolicy>::scan_list(std::filesystem::path const& path,
|
|||||||
|
|
||||||
auto ti = LOG_TIMED_INFO;
|
auto ti = LOG_TIMED_INFO;
|
||||||
|
|
||||||
auto root = entry_factory_->create(*os_, path);
|
auto root = entry_factory_.create(os_, path);
|
||||||
|
|
||||||
if (root->type() != entry::E_DIR) {
|
if (root->type() != entry::E_DIR) {
|
||||||
DWARFS_THROW(runtime_error,
|
DWARFS_THROW(runtime_error,
|
||||||
@ -619,10 +617,10 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
prog.set_status_function(status_string);
|
prog.set_status_function(status_string);
|
||||||
|
|
||||||
inode_manager im(LOG_GET_LOGGER, prog, options_.inode);
|
inode_manager im(LOG_GET_LOGGER, prog, options_.inode);
|
||||||
file_scanner fs(LOG_GET_LOGGER, wg_, *os_, im, prog,
|
file_scanner fs(LOG_GET_LOGGER, wg_, os_, im, prog,
|
||||||
{.hash_algo = options_.file_hash_algorithm,
|
{.hash_algo = options_.file_hash_algorithm,
|
||||||
.debug_inode_create = os_->getenv(kEnvVarDumpFilesRaw) ||
|
.debug_inode_create = os_.getenv(kEnvVarDumpFilesRaw) ||
|
||||||
os_->getenv(kEnvVarDumpFilesFinal)});
|
os_.getenv(kEnvVarDumpFilesFinal)});
|
||||||
|
|
||||||
auto root =
|
auto root =
|
||||||
list ? scan_list(path, *list, prog, fs) : scan_tree(path, prog, fs);
|
list ? scan_list(path, *list, prog, fs) : scan_tree(path, prog, fs);
|
||||||
@ -668,7 +666,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
// the file vectors are populated
|
// the file vectors are populated
|
||||||
if (im.has_invalid_inodes()) {
|
if (im.has_invalid_inodes()) {
|
||||||
LOG_INFO << "trying to recover any invalid inodes...";
|
LOG_INFO << "trying to recover any invalid inodes...";
|
||||||
im.try_scan_invalid(wg_, *os_);
|
im.try_scan_invalid(wg_, os_);
|
||||||
wg_.wait();
|
wg_.wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -738,8 +736,8 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
{
|
{
|
||||||
size_t const num_threads = options_.num_segmenter_workers;
|
size_t const num_threads = options_.num_segmenter_workers;
|
||||||
worker_group wg_ordering(LOG_GET_LOGGER, *os_, "ordering", num_threads);
|
worker_group wg_ordering(LOG_GET_LOGGER, os_, "ordering", num_threads);
|
||||||
worker_group wg_blockify(LOG_GET_LOGGER, *os_, "blockify", num_threads);
|
worker_group wg_blockify(LOG_GET_LOGGER, os_, "blockify", num_threads);
|
||||||
|
|
||||||
fsw.configure(frag_info.categories, num_threads);
|
fsw.configure(frag_info.categories, num_threads);
|
||||||
|
|
||||||
@ -763,7 +761,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
auto span = im.ordered_span(category, wg_ordering);
|
auto span = im.ordered_span(category, wg_ordering);
|
||||||
auto tv = LOG_CPU_TIMED_VERBOSE;
|
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||||
|
|
||||||
auto seg = segmenter_factory_->create(
|
auto seg = segmenter_factory_.create(
|
||||||
category, cat_size, cc, blockmgr,
|
category, cat_size, cc, blockmgr,
|
||||||
[category, meta, blockmgr, &fsw](auto block,
|
[category, meta, blockmgr, &fsw](auto block,
|
||||||
auto logical_block_num) {
|
auto logical_block_num) {
|
||||||
@ -785,7 +783,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
auto f = ino->any();
|
auto f = ino->any();
|
||||||
|
|
||||||
if (auto size = f->size(); size > 0 && !f->is_invalid()) {
|
if (auto size = f->size(); size > 0 && !f->is_invalid()) {
|
||||||
auto [mm, _, errors] = ino->mmap_any(*os_);
|
auto [mm, _, errors] = ino->mmap_any(os_);
|
||||||
|
|
||||||
if (mm) {
|
if (mm) {
|
||||||
file_off_t offset{0};
|
file_off_t offset{0};
|
||||||
@ -947,7 +945,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
mv2.gids() = ge_data.get_gids();
|
mv2.gids() = ge_data.get_gids();
|
||||||
mv2.modes() = ge_data.get_modes();
|
mv2.modes() = ge_data.get_modes();
|
||||||
mv2.timestamp_base() = ge_data.get_timestamp_base();
|
mv2.timestamp_base() = ge_data.get_timestamp_base();
|
||||||
mv2.block_size() = segmenter_factory_->get_block_size();
|
mv2.block_size() = segmenter_factory_.get_block_size();
|
||||||
mv2.total_fs_size() = prog.original_size;
|
mv2.total_fs_size() = prog.original_size;
|
||||||
mv2.total_hardlink_size() = prog.hardlink_size;
|
mv2.total_hardlink_size() = prog.hardlink_size;
|
||||||
mv2.options() = fsopts;
|
mv2.options() = fsopts;
|
||||||
@ -1012,14 +1010,12 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
|
||||||
scanner::scanner(logger& lgr, thread_pool& pool,
|
scanner::scanner(logger& lgr, thread_pool& pool, segmenter_factory& sf,
|
||||||
std::shared_ptr<segmenter_factory> sf,
|
entry_factory& ef, os_access const& os,
|
||||||
std::shared_ptr<entry_factory> ef,
|
|
||||||
std::shared_ptr<os_access const> os,
|
|
||||||
std::shared_ptr<script> scr, const scanner_options& options)
|
std::shared_ptr<script> scr, const scanner_options& options)
|
||||||
: impl_(
|
: impl_(
|
||||||
make_unique_logging_object<impl, internal::scanner_, logger_policies>(
|
make_unique_logging_object<impl, internal::scanner_, logger_policies>(
|
||||||
lgr, pool.get_worker_group(), std::move(sf), std::move(ef),
|
lgr, pool.get_worker_group(), sf, ef, os, std::move(scr),
|
||||||
std::move(os), std::move(scr), options)) {}
|
options)) {}
|
||||||
|
|
||||||
} // namespace dwarfs
|
} // namespace dwarfs
|
||||||
|
@ -1274,7 +1274,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
std::numeric_limits<size_t>::max(),
|
std::numeric_limits<size_t>::max(),
|
||||||
compress_niceness);
|
compress_niceness);
|
||||||
|
|
||||||
std::unique_ptr<filesystem_writer> fsw;
|
std::optional<filesystem_writer> fsw;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
std::ostream& fsw_os =
|
std::ostream& fsw_os =
|
||||||
@ -1285,9 +1285,8 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
},
|
},
|
||||||
[&](std::ostringstream& oss) -> std::ostream& { return oss; }};
|
[&](std::ostringstream& oss) -> std::ostream& { return oss; }};
|
||||||
|
|
||||||
fsw = std::make_unique<filesystem_writer>(
|
fsw.emplace(fsw_os, lgr, compress_pool, prog, schema_bc, metadata_bc,
|
||||||
fsw_os, lgr, compress_pool, prog, schema_bc, metadata_bc, history_bc,
|
history_bc, fswopts, header_ifs ? &header_ifs->is() : nullptr);
|
||||||
fswopts, header_ifs ? &header_ifs->is() : nullptr);
|
|
||||||
|
|
||||||
categorized_option<block_compressor> compression_opt;
|
categorized_option<block_compressor> compression_opt;
|
||||||
contextual_option_parser cop("--compression", compression_opt, cp,
|
contextual_option_parser cop("--compression", compression_opt, cp,
|
||||||
@ -1343,15 +1342,12 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) {
|
|||||||
input_filesystem->rewrite(prog, *fsw, *cat_resolver, rw_opts);
|
input_filesystem->rewrite(prog, *fsw, *cat_resolver, rw_opts);
|
||||||
compress_pool.wait();
|
compress_pool.wait();
|
||||||
} else {
|
} else {
|
||||||
auto sf = std::make_shared<segmenter_factory>(
|
segmenter_factory sf(lgr, prog, options.inode.categorizer_mgr, sf_config);
|
||||||
lgr, prog, options.inode.categorizer_mgr, sf_config);
|
entry_factory ef;
|
||||||
|
|
||||||
auto ef = std::make_shared<entry_factory>();
|
|
||||||
|
|
||||||
thread_pool scanner_pool(lgr, *iol.os, "scanner", num_scanner_workers);
|
thread_pool scanner_pool(lgr, *iol.os, "scanner", num_scanner_workers);
|
||||||
|
|
||||||
scanner s(lgr, scanner_pool, std::move(sf), std::move(ef), iol.os,
|
scanner s(lgr, scanner_pool, sf, ef, *iol.os, std::move(script), options);
|
||||||
std::move(script), options);
|
|
||||||
|
|
||||||
s.scan(*fsw, path, prog, input_list, iol.file);
|
s.scan(*fsw, path, prog, input_list, iol.file);
|
||||||
|
|
||||||
|
@ -123,10 +123,10 @@ std::string make_filesystem(::benchmark::State const& state) {
|
|||||||
thread_pool pool(lgr, *os, "writer", 4);
|
thread_pool pool(lgr, *os, "writer", 4);
|
||||||
writer_progress prog;
|
writer_progress prog;
|
||||||
|
|
||||||
auto sf = std::make_shared<segmenter_factory>(lgr, prog, cfg);
|
segmenter_factory sf(lgr, prog, cfg);
|
||||||
auto ef = std::make_shared<entry_factory>();
|
entry_factory ef;
|
||||||
|
|
||||||
scanner s(lgr, pool, sf, ef, os, std::make_shared<test::script_mock>(),
|
scanner s(lgr, pool, sf, ef, *os, std::make_shared<test::script_mock>(),
|
||||||
options);
|
options);
|
||||||
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
|
@ -94,10 +94,10 @@ build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
|
|||||||
sf_cfg.max_active_blocks.set_default(cfg.max_active_blocks);
|
sf_cfg.max_active_blocks.set_default(cfg.max_active_blocks);
|
||||||
sf_cfg.bloom_filter_size.set_default(cfg.bloom_filter_size);
|
sf_cfg.bloom_filter_size.set_default(cfg.bloom_filter_size);
|
||||||
|
|
||||||
auto sf = std::make_shared<segmenter_factory>(lgr, *prog, sf_cfg);
|
segmenter_factory sf(lgr, *prog, sf_cfg);
|
||||||
auto ef = std::make_shared<entry_factory>();
|
entry_factory ef;
|
||||||
|
|
||||||
scanner s(lgr, pool, sf, ef, input, scr, options);
|
scanner s(lgr, pool, sf, ef, *input, scr, options);
|
||||||
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
|
|
||||||
@ -937,10 +937,9 @@ class filter_test
|
|||||||
|
|
||||||
writer_progress prog;
|
writer_progress prog;
|
||||||
thread_pool pool(lgr, *input, "worker", 1);
|
thread_pool pool(lgr, *input, "worker", 1);
|
||||||
auto sf = std::make_shared<segmenter_factory>(lgr, prog,
|
segmenter_factory sf(lgr, prog, segmenter_factory::config{});
|
||||||
segmenter_factory::config{});
|
entry_factory ef;
|
||||||
auto ef = std::make_shared<entry_factory>();
|
scanner s(lgr, pool, sf, ef, *input, scr, options);
|
||||||
scanner s(lgr, pool, sf, ef, input, scr, options);
|
|
||||||
|
|
||||||
block_compressor bc("null");
|
block_compressor bc("null");
|
||||||
std::ostringstream null;
|
std::ostringstream null;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user