Logging & timing for file_scanner

This commit is contained in:
Marcus Holland-Moritz 2023-08-07 15:20:52 +02:00
parent b309d7165b
commit 94b875868e
3 changed files with 53 additions and 28 deletions

View File

@ -29,6 +29,7 @@ namespace dwarfs {
class file; class file;
class inode_manager; class inode_manager;
class logger;
class os_access; class os_access;
class progress; class progress;
class worker_group; class worker_group;
@ -39,7 +40,7 @@ namespace detail {
class file_scanner { class file_scanner {
public: public:
file_scanner(worker_group& wg, os_access& os, inode_manager& im, file_scanner(logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
std::optional<std::string> const& hash_algo, progress& prog); std::optional<std::string> const& hash_algo, progress& prog);
void scan(file* p) { impl_->scan(p); } void scan(file* p) { impl_->scan(p); }

View File

@ -39,9 +39,10 @@ namespace dwarfs::detail {
namespace { namespace {
class file_scanner_ : public file_scanner::impl { template <typename LoggerPolicy>
class file_scanner_ final : public file_scanner::impl {
public: public:
file_scanner_(worker_group& wg, os_access& os, inode_manager& im, file_scanner_(logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
std::optional<std::string> const& hash_algo, progress& prog); std::optional<std::string> const& hash_algo, progress& prog);
void scan(file* p) override; void scan(file* p) override;
@ -81,6 +82,7 @@ class file_scanner_ : public file_scanner::impl {
finalize_inodes(std::vector<std::pair<KeyType, inode::files_vector>>& ent, finalize_inodes(std::vector<std::pair<KeyType, inode::files_vector>>& ent,
uint32_t& inode_num, uint32_t& obj_num); uint32_t& inode_num, uint32_t& obj_num);
LOG_PROXY_DECL(LoggerPolicy);
worker_group& wg_; worker_group& wg_;
os_access& os_; os_access& os_;
inode_manager& im_; inode_manager& im_;
@ -125,16 +127,19 @@ class file_scanner_ : public file_scanner::impl {
// it is still present in `unique_size_`. It will be removed // it is still present in `unique_size_`. It will be removed
// from `unique_size_` after its hash has been stored. // from `unique_size_` after its hash has been stored.
file_scanner_::file_scanner_(worker_group& wg, os_access& os, inode_manager& im, template <typename LoggerPolicy>
std::optional<std::string> const& hash_algo, file_scanner_<LoggerPolicy>::file_scanner_(
progress& prog) logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
: wg_(wg) std::optional<std::string> const& hash_algo, progress& prog)
: LOG_PROXY_INIT(lgr)
, wg_(wg)
, os_(os) , os_(os)
, im_(im) , im_(im)
, hash_algo_{hash_algo} , hash_algo_{hash_algo}
, prog_(prog) {} , prog_(prog) {}
void file_scanner_::scan(file* p) { template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::scan(file* p) {
if (p->num_hard_links() > 1) { if (p->num_hard_links() > 1) {
auto& vec = hardlinks_[p->raw_inode_num()]; auto& vec = hardlinks_[p->raw_inode_num()];
vec.push_back(p); vec.push_back(p);
@ -162,7 +167,8 @@ void file_scanner_::scan(file* p) {
} }
} }
void file_scanner_::finalize(uint32_t& inode_num) { template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::finalize(uint32_t& inode_num) {
uint32_t obj_num = 0; uint32_t obj_num = 0;
assert(first_file_hashed_.empty()); assert(first_file_hashed_.empty());
@ -185,7 +191,8 @@ void file_scanner_::finalize(uint32_t& inode_num) {
} }
} }
void file_scanner_::scan_dedupe(file* p) { template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
// We need no lock yet, as `unique_size_` is only manipulated from // We need no lock yet, as `unique_size_` is only manipulated from
// this thread. // this thread.
auto size = p->size(); auto size = p->size();
@ -285,7 +292,8 @@ void file_scanner_::scan_dedupe(file* p) {
} }
} }
void file_scanner_::hash_file(file* p) { template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::hash_file(file* p) {
auto const size = p->size(); auto const size = p->size();
std::shared_ptr<mmif> mm; std::shared_ptr<mmif> mm;
@ -297,7 +305,8 @@ void file_scanner_::hash_file(file* p) {
p->scan(mm.get(), prog_, hash_algo_); p->scan(mm.get(), prog_, hash_algo_);
} }
void file_scanner_::add_inode(file* p) { template <typename LoggerPolicy>
void file_scanner_<LoggerPolicy>::add_inode(file* p) {
assert(!p->get_inode()); assert(!p->get_inode());
auto inode = im_.create_inode(); auto inode = im_.create_inode();
@ -307,8 +316,11 @@ void file_scanner_::add_inode(file* p) {
im_.scan_background(wg_, os_, std::move(inode), p); im_.scan_background(wg_, os_, std::move(inode), p);
} }
template <typename LoggerPolicy>
template <typename Lookup> template <typename Lookup>
void file_scanner_::finalize_hardlinks(Lookup&& lookup) { void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
auto ti = LOG_TIMED_INFO;
for (auto& kv : hardlinks_) { for (auto& kv : hardlinks_) {
auto& hlv = kv.second; auto& hlv = kv.second;
if (hlv.size() > 1) { if (hlv.size() > 1) {
@ -322,13 +334,19 @@ void file_scanner_::finalize_hardlinks(Lookup&& lookup) {
} }
hardlinks_.clear(); hardlinks_.clear();
ti << "finalized " << hardlinks_.size() << " hardlinks";
} }
template <typename LoggerPolicy>
template <bool UniqueOnly, typename KeyType> template <bool UniqueOnly, typename KeyType>
void file_scanner_::finalize_files( void file_scanner_<LoggerPolicy>::finalize_files(
folly::F14FastMap<KeyType, inode::files_vector>& fmap, uint32_t& inode_num, folly::F14FastMap<KeyType, inode::files_vector>& fmap, uint32_t& inode_num,
uint32_t& obj_num) { uint32_t& obj_num) {
std::vector<std::pair<KeyType, inode::files_vector>> ent; std::vector<std::pair<KeyType, inode::files_vector>> ent;
auto ti = LOG_TIMED_INFO;
ent.reserve(fmap.size()); ent.reserve(fmap.size());
fmap.eraseInto( fmap.eraseInto(
fmap.begin(), fmap.end(), [&ent](KeyType&& k, inode::files_vector&& fv) { fmap.begin(), fmap.end(), [&ent](KeyType&& k, inode::files_vector&& fv) {
@ -339,6 +357,7 @@ void file_scanner_::finalize_files(
ent.emplace_back(std::move(k), std::move(fv)); ent.emplace_back(std::move(k), std::move(fv));
} }
}); });
std::sort(ent.begin(), ent.end(), std::sort(ent.begin(), ent.end(),
[](auto& left, auto& right) { return left.first < right.first; }); [](auto& left, auto& right) { return left.first < right.first; });
@ -348,10 +367,14 @@ void file_scanner_::finalize_files(
if constexpr (!UniqueOnly) { if constexpr (!UniqueOnly) {
finalize_inodes<false>(ent, inode_num, obj_num); finalize_inodes<false>(ent, inode_num, obj_num);
} }
ti << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
<< "unique files";
} }
template <typename LoggerPolicy>
template <bool Unique, typename KeyType> template <bool Unique, typename KeyType>
void file_scanner_::finalize_inodes( void file_scanner_<LoggerPolicy>::finalize_inodes(
std::vector<std::pair<KeyType, inode::files_vector>>& ent, std::vector<std::pair<KeyType, inode::files_vector>>& ent,
uint32_t& inode_num, uint32_t& obj_num) { uint32_t& inode_num, uint32_t& obj_num) {
for (auto& p : ent) { for (auto& p : ent) {
@ -395,9 +418,11 @@ void file_scanner_::finalize_inodes(
} }
} // namespace } // namespace
file_scanner::file_scanner(worker_group& wg, os_access& os, inode_manager& im, file_scanner::file_scanner(logger& lgr, worker_group& wg, os_access& os,
inode_manager& im,
std::optional<std::string> const& hash_algo, std::optional<std::string> const& hash_algo,
progress& prog) progress& prog)
: impl_{std::make_unique<file_scanner_>(wg, os, im, hash_algo, prog)} {} : impl_{make_unique_logging_object<impl, file_scanner_, logger_policies>(
lgr, wg, os, im, hash_algo, prog)} {}
} // namespace dwarfs::detail } // namespace dwarfs::detail

View File

@ -290,14 +290,13 @@ class scanner_ final : public scanner::impl {
progress& prog, detail::file_scanner& fs, progress& prog, detail::file_scanner& fs,
bool debug_filter = false); bool debug_filter = false);
LOG_PROXY_DECL(LoggerPolicy);
worker_group& wg_;
const segmenter::config& cfg_; const segmenter::config& cfg_;
const scanner_options& options_; const scanner_options& options_;
std::shared_ptr<entry_factory> entry_; std::shared_ptr<entry_factory> entry_;
std::shared_ptr<os_access> os_; std::shared_ptr<os_access> os_;
std::shared_ptr<script> script_; std::shared_ptr<script> script_;
worker_group& wg_;
logger& lgr_;
LOG_PROXY_DECL(LoggerPolicy);
}; };
template <typename LoggerPolicy> template <typename LoggerPolicy>
@ -307,14 +306,13 @@ scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
std::shared_ptr<os_access> os, std::shared_ptr<os_access> os,
std::shared_ptr<script> scr, std::shared_ptr<script> scr,
const scanner_options& options) const scanner_options& options)
: cfg_(cfg) : LOG_PROXY_INIT(lgr)
, wg_(wg)
, cfg_(cfg)
, options_(options) , options_(options)
, entry_(std::move(ef)) , entry_(std::move(ef))
, os_(std::move(os)) , os_(std::move(os))
, script_(std::move(scr)) , script_(std::move(scr)) {}
, wg_(wg)
, lgr_(lgr)
, LOG_PROXY_INIT(lgr_) {}
template <typename LoggerPolicy> template <typename LoggerPolicy>
std::shared_ptr<entry> std::shared_ptr<entry>
@ -566,8 +564,9 @@ void scanner_<LoggerPolicy>::scan(
prog.set_status_function(status_string); prog.set_status_function(status_string);
inode_manager im(lgr_, prog, options_.inode); inode_manager im(LOG_GET_LOGGER, prog, options_.inode);
detail::file_scanner fs(wg_, *os_, im, options_.file_hash_algorithm, prog); detail::file_scanner fs(LOG_GET_LOGGER, wg_, *os_, im,
options_.file_hash_algorithm, prog);
auto root = auto root =
list ? scan_list(path, *list, prog, fs) : scan_tree(path, prog, fs); list ? scan_list(path, *list, prog, fs) : scan_tree(path, prog, fs);
@ -658,7 +657,7 @@ void scanner_<LoggerPolicy>::scan(
}); });
LOG_INFO << "building blocks..."; LOG_INFO << "building blocks...";
segmenter bm(lgr_, prog, cfg_, os_, fsw); segmenter bm(LOG_GET_LOGGER, prog, cfg_, os_, fsw);
{ {
worker_group blockify("blockify", 1, 1 << 20); worker_group blockify("blockify", 1, 1 << 20);