mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-13 06:16:55 -04:00
Improve logging
This commit is contained in:
parent
a9636a3f0b
commit
026f57ccb7
@ -58,6 +58,7 @@ class basic_array_similarity_element_view : public similarity_element_view {
|
||||
};
|
||||
|
||||
struct similarity_ordering_options {
|
||||
std::string context;
|
||||
size_t max_children{256};
|
||||
size_t max_cluster_size{256};
|
||||
};
|
||||
|
@ -320,7 +320,7 @@ void file_scanner_<LoggerPolicy>::add_inode(file* p) {
|
||||
template <typename LoggerPolicy>
|
||||
template <typename Lookup>
|
||||
void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
|
||||
auto ti = LOG_TIMED_INFO;
|
||||
auto tv = LOG_TIMED_VERBOSE;
|
||||
|
||||
for (auto& kv : hardlinks_) {
|
||||
auto& hlv = kv.second;
|
||||
@ -336,7 +336,7 @@ void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
|
||||
|
||||
hardlinks_.clear();
|
||||
|
||||
ti << "finalized " << hardlinks_.size() << " hardlinks";
|
||||
tv << "finalized " << hardlinks_.size() << " hardlinks";
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -346,7 +346,7 @@ void file_scanner_<LoggerPolicy>::finalize_files(
|
||||
uint32_t& obj_num) {
|
||||
std::vector<std::pair<KeyType, inode::files_vector>> ent;
|
||||
|
||||
auto ti = LOG_TIMED_INFO;
|
||||
auto tv = LOG_TIMED_VERBOSE;
|
||||
|
||||
ent.reserve(fmap.size());
|
||||
fmap.eraseInto(
|
||||
@ -369,7 +369,7 @@ void file_scanner_<LoggerPolicy>::finalize_files(
|
||||
finalize_inodes<false>(ent, inode_num, obj_num);
|
||||
}
|
||||
|
||||
ti << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
|
||||
tv << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
|
||||
<< "unique files";
|
||||
}
|
||||
|
||||
|
@ -604,36 +604,37 @@ auto inode_manager_<LoggerPolicy>::ordered_span(fragment_category cat,
|
||||
|
||||
switch (opts.mode) {
|
||||
case file_order_mode::NONE:
|
||||
LOG_INFO << prefix << "keeping inode order";
|
||||
LOG_VERBOSE << prefix << "keeping inode order";
|
||||
break;
|
||||
|
||||
case file_order_mode::PATH: {
|
||||
LOG_INFO << prefix << "ordering " << span.size()
|
||||
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||
<< " inodes by path name...";
|
||||
auto ti = LOG_CPU_TIMED_INFO;
|
||||
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||
order.by_path(span);
|
||||
ti << prefix << span.size() << " inodes ordered";
|
||||
tv << prefix << span.size() << " inodes ordered";
|
||||
break;
|
||||
}
|
||||
|
||||
case file_order_mode::SIMILARITY: {
|
||||
LOG_INFO << prefix << "ordering " << span.size()
|
||||
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||
<< " inodes by similarity...";
|
||||
auto ti = LOG_CPU_TIMED_INFO;
|
||||
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||
order.by_similarity(span, cat);
|
||||
ti << prefix << span.size() << " inodes ordered";
|
||||
tv << prefix << span.size() << " inodes ordered";
|
||||
break;
|
||||
}
|
||||
|
||||
case file_order_mode::NILSIMSA: {
|
||||
LOG_INFO << prefix << "ordering " << span.size()
|
||||
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||
<< " inodes using nilsimsa similarity...";
|
||||
similarity_ordering_options soo;
|
||||
soo.context = prefix;
|
||||
soo.max_children = opts.nilsimsa_max_children;
|
||||
soo.max_cluster_size = opts.nilsimsa_max_cluster_size;
|
||||
auto ti = LOG_TIMED_INFO;
|
||||
auto tv = LOG_TIMED_VERBOSE;
|
||||
order.by_nilsimsa(wg, soo, span, cat);
|
||||
ti << prefix << span.size() << " inodes ordered";
|
||||
tv << prefix << span.size() << " inodes ordered";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -613,16 +613,9 @@ void scanner_<LoggerPolicy>::scan(
|
||||
|
||||
if (auto catmgr = options_.inode.categorizer_mgr) {
|
||||
for (auto const& ci : im.fragment_category_info()) {
|
||||
LOG_INFO << ci.fragment_count << " " << catmgr->category_name(ci.category)
|
||||
<< " fragments (" << size_with_unit(ci.total_size) << ")";
|
||||
}
|
||||
|
||||
for (auto const& cat : im.inode_categories()) {
|
||||
std::string str(catmgr->category_name(cat.value()));
|
||||
if (cat.has_subcategory()) {
|
||||
str += fmt::format("/{}", cat.subcategory());
|
||||
}
|
||||
LOG_INFO << str;
|
||||
LOG_VERBOSE << "found " << ci.fragment_count << " "
|
||||
<< catmgr->category_name(ci.category) << " fragments ("
|
||||
<< size_with_unit(ci.total_size) << ")";
|
||||
}
|
||||
}
|
||||
|
||||
@ -696,7 +689,7 @@ void scanner_<LoggerPolicy>::scan(
|
||||
wg_blockify.add_job(
|
||||
[this, catmgr, blockmgr, category, meta, cc, &prog, &fsw,
|
||||
span = im.ordered_span(category, wg_ordering)]() mutable {
|
||||
auto ti = LOG_CPU_TIMED_INFO;
|
||||
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||
|
||||
auto seg = segmenter_factory_->create(
|
||||
category, cc, blockmgr, [category, meta, &fsw](auto block) {
|
||||
@ -730,7 +723,7 @@ void scanner_<LoggerPolicy>::scan(
|
||||
|
||||
seg.finish();
|
||||
|
||||
ti << category_prefix(catmgr, category) << "segmenting";
|
||||
tv << category_prefix(catmgr, category) << "segmenting finished";
|
||||
});
|
||||
});
|
||||
}
|
||||
|
@ -276,7 +276,8 @@ auto similarity_ordering_<LoggerPolicy>::build_index(
|
||||
}
|
||||
index.shrink_to_fit();
|
||||
|
||||
tt << "build index: " << ev.size() << " -> " << index.size();
|
||||
tt << opts_.context << "build index: " << ev.size() << " -> "
|
||||
<< index.size();
|
||||
}
|
||||
|
||||
return index;
|
||||
@ -294,7 +295,7 @@ auto similarity_ordering_<LoggerPolicy>::find_duplicates(
|
||||
std::sort(index.begin(), index.end(),
|
||||
[&ev](auto a, auto b) { return ev.bitvec_less(a, b); });
|
||||
|
||||
tt << "sort index of " << index.size() << " elements";
|
||||
tt << opts_.context << "sort index of " << index.size() << " elements";
|
||||
}
|
||||
|
||||
{
|
||||
@ -315,8 +316,8 @@ auto similarity_ordering_<LoggerPolicy>::find_duplicates(
|
||||
index.erase(++dst, index.end());
|
||||
}
|
||||
|
||||
tt << "find duplicates: " << index.size() << " unique / " << dm.size()
|
||||
<< " groups";
|
||||
tt << opts_.context << "find duplicates: " << index.size() << " unique / "
|
||||
<< dm.size() << " groups";
|
||||
}
|
||||
|
||||
return dm;
|
||||
@ -494,8 +495,8 @@ void similarity_ordering_<LoggerPolicy>::cluster_by_distance(
|
||||
match->index.push_back(i);
|
||||
}
|
||||
|
||||
td << "cluster_by_distance: " << node.cluster().index.size() << " -> "
|
||||
<< children.size() << ")";
|
||||
td << opts_.context << "cluster_by_distance: " << node.cluster().index.size()
|
||||
<< " -> " << children.size() << ")";
|
||||
|
||||
node.v = std::move(children);
|
||||
}
|
||||
@ -550,7 +551,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
|
||||
duplicates_map& dup, index_type& ordered, std::string indent) const {
|
||||
if (node.is_leaf()) {
|
||||
for (auto e : node.cluster().index) {
|
||||
LOG_TRACE << indent << " " << ev.description(e) << " -> "
|
||||
LOG_TRACE << opts_.context << indent << " " << ev.description(e)
|
||||
<< " -> "
|
||||
<< node.cluster().centroid.distance_to(ev.get_bits(e));
|
||||
|
||||
ordered.push_back(e);
|
||||
@ -562,7 +564,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
|
||||
[&ev](auto a, auto b) { return ev.order_less(a, b); });
|
||||
|
||||
for (auto i : dupvec) {
|
||||
LOG_TRACE << indent << " + " << ev.description(i) << " -> "
|
||||
LOG_TRACE << opts_.context << indent << " + " << ev.description(i)
|
||||
<< " -> "
|
||||
<< node.cluster().centroid.distance_to(ev.get_bits(i));
|
||||
ordered.push_back(i);
|
||||
}
|
||||
@ -573,7 +576,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
|
||||
// step before collecting
|
||||
|
||||
for (auto const& [i, cn] : folly::enumerate(node.children())) {
|
||||
LOG_TRACE << indent << "[" << i << "] " << cn.description();
|
||||
LOG_TRACE << opts_.context << indent << "[" << i << "] "
|
||||
<< cn.description();
|
||||
collect_rec(cn, ev, dup, ordered, indent + " ");
|
||||
}
|
||||
}
|
||||
@ -592,7 +596,8 @@ void similarity_ordering_<LoggerPolicy>::order_impl(
|
||||
index = build_index(ev);
|
||||
}
|
||||
|
||||
LOG_INFO << "total distance before ordering: " << total_distance(ev, index);
|
||||
LOG_DEBUG << opts_.context
|
||||
<< "total distance before ordering: " << total_distance(ev, index);
|
||||
|
||||
size_t size_hint = index.size();
|
||||
auto duplicates = find_duplicates(ev, index);
|
||||
@ -602,14 +607,15 @@ void similarity_ordering_<LoggerPolicy>::order_impl(
|
||||
[this, size_hint, &ev, rec = std::move(rec), root,
|
||||
dup = std::move(duplicates)]() mutable {
|
||||
{
|
||||
auto ti = LOG_TIMED_INFO;
|
||||
auto tv = LOG_TIMED_VERBOSE;
|
||||
order_tree_rec(*root, ev);
|
||||
ti << "order_tree_rec";
|
||||
tv << opts_.context << "nilsimsa recursive ordering finished";
|
||||
}
|
||||
index_type rv;
|
||||
rv.reserve(size_hint);
|
||||
collect_rec(*root, ev, dup, rv, "");
|
||||
LOG_INFO << "total distance after ordering: " << total_distance(ev, rv);
|
||||
LOG_DEBUG << opts_.context << "total distance after ordering: "
|
||||
<< total_distance(ev, rv);
|
||||
rec.set_value(std::move(rv));
|
||||
});
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user