mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-14 06:48:39 -04:00
Logging & timing for file_scanner
This commit is contained in:
parent
b309d7165b
commit
94b875868e
@ -29,6 +29,7 @@ namespace dwarfs {
|
|||||||
|
|
||||||
class file;
|
class file;
|
||||||
class inode_manager;
|
class inode_manager;
|
||||||
|
class logger;
|
||||||
class os_access;
|
class os_access;
|
||||||
class progress;
|
class progress;
|
||||||
class worker_group;
|
class worker_group;
|
||||||
@ -39,7 +40,7 @@ namespace detail {
|
|||||||
|
|
||||||
class file_scanner {
|
class file_scanner {
|
||||||
public:
|
public:
|
||||||
file_scanner(worker_group& wg, os_access& os, inode_manager& im,
|
file_scanner(logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
|
||||||
std::optional<std::string> const& hash_algo, progress& prog);
|
std::optional<std::string> const& hash_algo, progress& prog);
|
||||||
|
|
||||||
void scan(file* p) { impl_->scan(p); }
|
void scan(file* p) { impl_->scan(p); }
|
||||||
|
@ -39,9 +39,10 @@ namespace dwarfs::detail {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class file_scanner_ : public file_scanner::impl {
|
template <typename LoggerPolicy>
|
||||||
|
class file_scanner_ final : public file_scanner::impl {
|
||||||
public:
|
public:
|
||||||
file_scanner_(worker_group& wg, os_access& os, inode_manager& im,
|
file_scanner_(logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
|
||||||
std::optional<std::string> const& hash_algo, progress& prog);
|
std::optional<std::string> const& hash_algo, progress& prog);
|
||||||
|
|
||||||
void scan(file* p) override;
|
void scan(file* p) override;
|
||||||
@ -81,6 +82,7 @@ class file_scanner_ : public file_scanner::impl {
|
|||||||
finalize_inodes(std::vector<std::pair<KeyType, inode::files_vector>>& ent,
|
finalize_inodes(std::vector<std::pair<KeyType, inode::files_vector>>& ent,
|
||||||
uint32_t& inode_num, uint32_t& obj_num);
|
uint32_t& inode_num, uint32_t& obj_num);
|
||||||
|
|
||||||
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
worker_group& wg_;
|
worker_group& wg_;
|
||||||
os_access& os_;
|
os_access& os_;
|
||||||
inode_manager& im_;
|
inode_manager& im_;
|
||||||
@ -125,16 +127,19 @@ class file_scanner_ : public file_scanner::impl {
|
|||||||
// it is still present in `unique_size_`. It will be removed
|
// it is still present in `unique_size_`. It will be removed
|
||||||
// from `unique_size_` after its hash has been stored.
|
// from `unique_size_` after its hash has been stored.
|
||||||
|
|
||||||
file_scanner_::file_scanner_(worker_group& wg, os_access& os, inode_manager& im,
|
template <typename LoggerPolicy>
|
||||||
std::optional<std::string> const& hash_algo,
|
file_scanner_<LoggerPolicy>::file_scanner_(
|
||||||
progress& prog)
|
logger& lgr, worker_group& wg, os_access& os, inode_manager& im,
|
||||||
: wg_(wg)
|
std::optional<std::string> const& hash_algo, progress& prog)
|
||||||
|
: LOG_PROXY_INIT(lgr)
|
||||||
|
, wg_(wg)
|
||||||
, os_(os)
|
, os_(os)
|
||||||
, im_(im)
|
, im_(im)
|
||||||
, hash_algo_{hash_algo}
|
, hash_algo_{hash_algo}
|
||||||
, prog_(prog) {}
|
, prog_(prog) {}
|
||||||
|
|
||||||
void file_scanner_::scan(file* p) {
|
template <typename LoggerPolicy>
|
||||||
|
void file_scanner_<LoggerPolicy>::scan(file* p) {
|
||||||
if (p->num_hard_links() > 1) {
|
if (p->num_hard_links() > 1) {
|
||||||
auto& vec = hardlinks_[p->raw_inode_num()];
|
auto& vec = hardlinks_[p->raw_inode_num()];
|
||||||
vec.push_back(p);
|
vec.push_back(p);
|
||||||
@ -162,7 +167,8 @@ void file_scanner_::scan(file* p) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void file_scanner_::finalize(uint32_t& inode_num) {
|
template <typename LoggerPolicy>
|
||||||
|
void file_scanner_<LoggerPolicy>::finalize(uint32_t& inode_num) {
|
||||||
uint32_t obj_num = 0;
|
uint32_t obj_num = 0;
|
||||||
|
|
||||||
assert(first_file_hashed_.empty());
|
assert(first_file_hashed_.empty());
|
||||||
@ -185,7 +191,8 @@ void file_scanner_::finalize(uint32_t& inode_num) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void file_scanner_::scan_dedupe(file* p) {
|
template <typename LoggerPolicy>
|
||||||
|
void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
||||||
// We need no lock yet, as `unique_size_` is only manipulated from
|
// We need no lock yet, as `unique_size_` is only manipulated from
|
||||||
// this thread.
|
// this thread.
|
||||||
auto size = p->size();
|
auto size = p->size();
|
||||||
@ -285,7 +292,8 @@ void file_scanner_::scan_dedupe(file* p) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void file_scanner_::hash_file(file* p) {
|
template <typename LoggerPolicy>
|
||||||
|
void file_scanner_<LoggerPolicy>::hash_file(file* p) {
|
||||||
auto const size = p->size();
|
auto const size = p->size();
|
||||||
std::shared_ptr<mmif> mm;
|
std::shared_ptr<mmif> mm;
|
||||||
|
|
||||||
@ -297,7 +305,8 @@ void file_scanner_::hash_file(file* p) {
|
|||||||
p->scan(mm.get(), prog_, hash_algo_);
|
p->scan(mm.get(), prog_, hash_algo_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void file_scanner_::add_inode(file* p) {
|
template <typename LoggerPolicy>
|
||||||
|
void file_scanner_<LoggerPolicy>::add_inode(file* p) {
|
||||||
assert(!p->get_inode());
|
assert(!p->get_inode());
|
||||||
|
|
||||||
auto inode = im_.create_inode();
|
auto inode = im_.create_inode();
|
||||||
@ -307,8 +316,11 @@ void file_scanner_::add_inode(file* p) {
|
|||||||
im_.scan_background(wg_, os_, std::move(inode), p);
|
im_.scan_background(wg_, os_, std::move(inode), p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
template <typename Lookup>
|
template <typename Lookup>
|
||||||
void file_scanner_::finalize_hardlinks(Lookup&& lookup) {
|
void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
|
||||||
|
auto ti = LOG_TIMED_INFO;
|
||||||
|
|
||||||
for (auto& kv : hardlinks_) {
|
for (auto& kv : hardlinks_) {
|
||||||
auto& hlv = kv.second;
|
auto& hlv = kv.second;
|
||||||
if (hlv.size() > 1) {
|
if (hlv.size() > 1) {
|
||||||
@ -322,13 +334,19 @@ void file_scanner_::finalize_hardlinks(Lookup&& lookup) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
hardlinks_.clear();
|
hardlinks_.clear();
|
||||||
|
|
||||||
|
ti << "finalized " << hardlinks_.size() << " hardlinks";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
template <bool UniqueOnly, typename KeyType>
|
template <bool UniqueOnly, typename KeyType>
|
||||||
void file_scanner_::finalize_files(
|
void file_scanner_<LoggerPolicy>::finalize_files(
|
||||||
folly::F14FastMap<KeyType, inode::files_vector>& fmap, uint32_t& inode_num,
|
folly::F14FastMap<KeyType, inode::files_vector>& fmap, uint32_t& inode_num,
|
||||||
uint32_t& obj_num) {
|
uint32_t& obj_num) {
|
||||||
std::vector<std::pair<KeyType, inode::files_vector>> ent;
|
std::vector<std::pair<KeyType, inode::files_vector>> ent;
|
||||||
|
|
||||||
|
auto ti = LOG_TIMED_INFO;
|
||||||
|
|
||||||
ent.reserve(fmap.size());
|
ent.reserve(fmap.size());
|
||||||
fmap.eraseInto(
|
fmap.eraseInto(
|
||||||
fmap.begin(), fmap.end(), [&ent](KeyType&& k, inode::files_vector&& fv) {
|
fmap.begin(), fmap.end(), [&ent](KeyType&& k, inode::files_vector&& fv) {
|
||||||
@ -339,6 +357,7 @@ void file_scanner_::finalize_files(
|
|||||||
ent.emplace_back(std::move(k), std::move(fv));
|
ent.emplace_back(std::move(k), std::move(fv));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
std::sort(ent.begin(), ent.end(),
|
std::sort(ent.begin(), ent.end(),
|
||||||
[](auto& left, auto& right) { return left.first < right.first; });
|
[](auto& left, auto& right) { return left.first < right.first; });
|
||||||
|
|
||||||
@ -348,10 +367,14 @@ void file_scanner_::finalize_files(
|
|||||||
if constexpr (!UniqueOnly) {
|
if constexpr (!UniqueOnly) {
|
||||||
finalize_inodes<false>(ent, inode_num, obj_num);
|
finalize_inodes<false>(ent, inode_num, obj_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ti << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
|
||||||
|
<< "unique files";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
template <bool Unique, typename KeyType>
|
template <bool Unique, typename KeyType>
|
||||||
void file_scanner_::finalize_inodes(
|
void file_scanner_<LoggerPolicy>::finalize_inodes(
|
||||||
std::vector<std::pair<KeyType, inode::files_vector>>& ent,
|
std::vector<std::pair<KeyType, inode::files_vector>>& ent,
|
||||||
uint32_t& inode_num, uint32_t& obj_num) {
|
uint32_t& inode_num, uint32_t& obj_num) {
|
||||||
for (auto& p : ent) {
|
for (auto& p : ent) {
|
||||||
@ -395,9 +418,11 @@ void file_scanner_::finalize_inodes(
|
|||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
file_scanner::file_scanner(worker_group& wg, os_access& os, inode_manager& im,
|
file_scanner::file_scanner(logger& lgr, worker_group& wg, os_access& os,
|
||||||
|
inode_manager& im,
|
||||||
std::optional<std::string> const& hash_algo,
|
std::optional<std::string> const& hash_algo,
|
||||||
progress& prog)
|
progress& prog)
|
||||||
: impl_{std::make_unique<file_scanner_>(wg, os, im, hash_algo, prog)} {}
|
: impl_{make_unique_logging_object<impl, file_scanner_, logger_policies>(
|
||||||
|
lgr, wg, os, im, hash_algo, prog)} {}
|
||||||
|
|
||||||
} // namespace dwarfs::detail
|
} // namespace dwarfs::detail
|
||||||
|
@ -290,14 +290,13 @@ class scanner_ final : public scanner::impl {
|
|||||||
progress& prog, detail::file_scanner& fs,
|
progress& prog, detail::file_scanner& fs,
|
||||||
bool debug_filter = false);
|
bool debug_filter = false);
|
||||||
|
|
||||||
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
|
worker_group& wg_;
|
||||||
const segmenter::config& cfg_;
|
const segmenter::config& cfg_;
|
||||||
const scanner_options& options_;
|
const scanner_options& options_;
|
||||||
std::shared_ptr<entry_factory> entry_;
|
std::shared_ptr<entry_factory> entry_;
|
||||||
std::shared_ptr<os_access> os_;
|
std::shared_ptr<os_access> os_;
|
||||||
std::shared_ptr<script> script_;
|
std::shared_ptr<script> script_;
|
||||||
worker_group& wg_;
|
|
||||||
logger& lgr_;
|
|
||||||
LOG_PROXY_DECL(LoggerPolicy);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
@ -307,14 +306,13 @@ scanner_<LoggerPolicy>::scanner_(logger& lgr, worker_group& wg,
|
|||||||
std::shared_ptr<os_access> os,
|
std::shared_ptr<os_access> os,
|
||||||
std::shared_ptr<script> scr,
|
std::shared_ptr<script> scr,
|
||||||
const scanner_options& options)
|
const scanner_options& options)
|
||||||
: cfg_(cfg)
|
: LOG_PROXY_INIT(lgr)
|
||||||
|
, wg_(wg)
|
||||||
|
, cfg_(cfg)
|
||||||
, options_(options)
|
, options_(options)
|
||||||
, entry_(std::move(ef))
|
, entry_(std::move(ef))
|
||||||
, os_(std::move(os))
|
, os_(std::move(os))
|
||||||
, script_(std::move(scr))
|
, script_(std::move(scr)) {}
|
||||||
, wg_(wg)
|
|
||||||
, lgr_(lgr)
|
|
||||||
, LOG_PROXY_INIT(lgr_) {}
|
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
std::shared_ptr<entry>
|
std::shared_ptr<entry>
|
||||||
@ -566,8 +564,9 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
prog.set_status_function(status_string);
|
prog.set_status_function(status_string);
|
||||||
|
|
||||||
inode_manager im(lgr_, prog, options_.inode);
|
inode_manager im(LOG_GET_LOGGER, prog, options_.inode);
|
||||||
detail::file_scanner fs(wg_, *os_, im, options_.file_hash_algorithm, prog);
|
detail::file_scanner fs(LOG_GET_LOGGER, wg_, *os_, im,
|
||||||
|
options_.file_hash_algorithm, prog);
|
||||||
|
|
||||||
auto root =
|
auto root =
|
||||||
list ? scan_list(path, *list, prog, fs) : scan_tree(path, prog, fs);
|
list ? scan_list(path, *list, prog, fs) : scan_tree(path, prog, fs);
|
||||||
@ -658,7 +657,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
});
|
});
|
||||||
|
|
||||||
LOG_INFO << "building blocks...";
|
LOG_INFO << "building blocks...";
|
||||||
segmenter bm(lgr_, prog, cfg_, os_, fsw);
|
segmenter bm(LOG_GET_LOGGER, prog, cfg_, os_, fsw);
|
||||||
|
|
||||||
{
|
{
|
||||||
worker_group blockify("blockify", 1, 1 << 20);
|
worker_group blockify("blockify", 1, 1 << 20);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user