Logging & timing for file_scanner

This commit is contained in:
Marcus Holland-Moritz 2023-08-07 15:20:52 +02:00
parent b309d7165b
commit 94b875868e
3 changed files with 53 additions and 28 deletions

View File

@ -29,6 +29,7 @@ namespace dwarfs {
class file;
class inode_manager;
class logger;
class os_access;
class progress;
class worker_group;
@ -39,7 +40,7 @@ namespace detail {
class file_scanner {
public:
file_scanner(worker_group& wg, os_access& os, inode_manager& im,
file_scanner(logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
std::optional<std::string> const& hash_algo, progress& prog);
void scan(file* p) { impl_->scan(p); }

View File

@ -39,9 +39,10 @@ namespace dwarfs::detail {
namespace {
class file_scanner_ : public file_scanner::impl {
template <typename LoggerPolicy>
class file_scanner_ final : public file_scanner::impl {
public:
file_scanner_(worker_group& wg, os_access& os, inode_manager& im,
file_scanner_(logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
std::optional<std::string> const& hash_algo, progress& prog);
void scan(file* p) override;
@ -81,6 +82,7 @@ class file_scanner_ : public file_scanner::impl {
finalize_inodes(std::vector<std::pair<KeyType, inode::files_vector>>& ent,
uint32_t& inode_num, uint32_t& obj_num);
LOG_PROXY_DECL(LoggerPolicy);
worker_group& wg_;
os_access& os_;
inode_manager& im_;
@ -125,16 +127,19 @@ class file_scanner_ : public file_scanner::impl {
// it is still present in `unique_size_`. It will be removed
// from `unique_size_` after its hash has been stored.
file_scanner_::file_scanner_(worker_group& wg, os_access& os, inode_manager& im,
std::optional<std::string> const& hash_algo,
progress& prog)
: wg_(wg)
template <typename LoggerPolicy>
file_scanner_<LoggerPolicy>::file_scanner_(
logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
std::optional<std::string> const& hash_algo, progress& prog)
: LOG_PROXY_INIT(lgr)
, wg_(wg)
, os_(os)
, im_(im)
, hash_algo_{hash_algo}
, prog_(prog) {}
void file_scanner_::scan(file* p) {
template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::scan(file* p) {
if (p->num_hard_links() > 1) {
auto& vec = hardlinks_[p->raw_inode_num()];
vec.push_back(p);
@ -162,7 +167,8 @@ void file_scanner_::scan(file* p) {
}
}
void file_scanner_::finalize(uint32_t& inode_num) {
template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::finalize(uint32_t& inode_num) {
uint32_t obj_num = 0;
assert(first_file_hashed_.empty());
@ -185,7 +191,8 @@ void file_scanner_::finalize(uint32_t& inode_num) {
}
}
void file_scanner_::scan_dedupe(file* p) {
template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
// We need no lock yet, as `unique_size_` is only manipulated from
// this thread.
auto size = p->size();
@ -285,7 +292,8 @@ void file_scanner_::scan_dedupe(file* p) {
}
}
void file_scanner_::hash_file(file* p) {
template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::hash_file(file* p) {
auto const size = p->size();
std::shared_ptr<mmif> mm;
@ -297,7 +305,8 @@ void file_scanner_::hash_file(file* p) {
p->scan(mm.get(), prog_, hash_algo_);
}
void file_scanner_::add_inode(file* p) {
template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::add_inode(file* p) {
assert(!p->get_inode());
auto inode = im_.create_inode();
@ -307,8 +316,11 @@ void file_scanner_::add_inode(file* p) {
im_.scan_background(wg_, os_, std::move(inode), p);
}
template <typename LoggerPolicy>
template <typename Lookup>
void file_scanner_::finalize_hardlinks(Lookup&& lookup) {
void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
auto ti = LOG_TIMED_INFO;
for (auto& kv : hardlinks_) {
auto& hlv = kv.second;
if (hlv.size() > 1) {
@ -322,13 +334,19 @@ void file_scanner_::finalize_hardlinks(Lookup&& lookup) {
}
hardlinks_.clear();
ti << "finalized " << hardlinks_.size() << " hardlinks";
}
template <typename LoggerPolicy>
template <bool UniqueOnly, typename KeyType>
void file_scanner_::finalize_files(
void file_scanner_<LoggerPolicy>::finalize_files(
folly::F14FastMap<KeyType, inode::files_vector>& fmap, uint32_t& inode_num,
uint32_t& obj_num) {
std::vector<std::pair<KeyType, inode::files_vector>> ent;
auto ti = LOG_TIMED_INFO;
ent.reserve(fmap.size());
fmap.eraseInto(
fmap.begin(), fmap.end(), [&ent](KeyType&& k, inode::files_vector&& fv) {
@ -339,6 +357,7 @@ void file_scanner_::finalize_files(
ent.emplace_back(std::move(k), std::move(fv));
}
});
std::sort(ent.begin(), ent.end(),
[](auto& left, auto& right) { return left.first < right.first; });
@ -348,10 +367,14 @@ void file_scanner_::finalize_files(
if constexpr (!UniqueOnly) {
finalize_inodes<false>(ent, inode_num, obj_num);
}
ti << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
<< "unique files";
}
template <typename LoggerPolicy>
template <bool Unique, typename KeyType>
void file_scanner_::finalize_inodes(
void file_scanner_<LoggerPolicy>::finalize_inodes(
std::vector<std::pair<KeyType, inode::files_vector>>& ent,
uint32_t& inode_num, uint32_t& obj_num) {
for (auto& p : ent) {
@ -395,9 +418,11 @@ void file_scanner_::finalize_inodes(
}
} // namespace
file_scanner::file_scanner(worker_group& wg, os_access& os, inode_manager& im,
file_scanner::file_scanner(logger& lgr, worker_group& wg, os_access& os,
inode_manager& im,
std::optional<std::string> const& hash_algo,
progress& prog)
: impl_{std::make_unique<file_scanner_>(wg, os, im, hash_algo, prog)} {}
: impl_{make_unique_logging_object<impl, file_scanner_, logger_policies>(
lgr, wg, os, im, hash_algo, prog)} {}
} // namespace dwarfs::detail

View File

@ -290,14 +290,13 @@ class scanner_ final : public scanner::impl {
progress& prog, detail::file_scanner& fs,
bool debug_filter = false);
LOG_PROXY_DECL(LoggerPolicy);
worker_group& wg_;
const segmenter::config& cfg_;
const scanner_options& options_;
std::shared_ptr<entry_factory> entry_;
std::shared_ptr<os_access> os_;
std::shared_ptr<script> script_;
worker_group& wg_;
logger& lgr_;
LOG_PROXY_DECL(LoggerPolicy);
};
template <typename LoggerPolicy>
@ -307,14 +306,13 @@ scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
std::shared_ptr<os_access> os,
std::shared_ptr<script> scr,
const scanner_options& options)
: cfg_(cfg)
: LOG_PROXY_INIT(lgr)
, wg_(wg)
, cfg_(cfg)
, options_(options)
, entry_(std::move(ef))
, os_(std::move(os))
, script_(std::move(scr))
, wg_(wg)
, lgr_(lgr)
, LOG_PROXY_INIT(lgr_) {}
, script_(std::move(scr)) {}
template <typename LoggerPolicy>
std::shared_ptr<entry>
@ -566,8 +564,9 @@ void scanner_<LoggerPolicy>::scan(
prog.set_status_function(status_string);
inode_manager im(lgr_, prog, options_.inode);
detail::file_scanner fs(wg_, *os_, im, options_.file_hash_algorithm, prog);
inode_manager im(LOG_GET_LOGGER, prog, options_.inode);
detail::file_scanner fs(LOG_GET_LOGGER, wg_, *os_, im,
options_.file_hash_algorithm, prog);
auto root =
list ? scan_list(path, *list, prog, fs) : scan_tree(path, prog, fs);
@ -658,7 +657,7 @@ void scanner_<LoggerPolicy>::scan(
});
LOG_INFO << "building blocks...";
segmenter bm(lgr_, prog, cfg_, os_, fsw);
segmenter bm(LOG_GET_LOGGER, prog, cfg_, os_, fsw);
{
worker_group blockify("blockify", 1, 1 << 20);