Improve logging

This commit is contained in:
Marcus Holland-Moritz 2023-08-19 16:00:59 +02:00
parent a9636a3f0b
commit 026f57ccb7
5 changed files with 43 additions and 42 deletions

View File

@ -58,6 +58,7 @@ class basic_array_similarity_element_view : public similarity_element_view {
}; };
struct similarity_ordering_options { struct similarity_ordering_options {
std::string context;
size_t max_children{256}; size_t max_children{256};
size_t max_cluster_size{256}; size_t max_cluster_size{256};
}; };

View File

@ -320,7 +320,7 @@ void file_scanner_<LoggerPolicy>::add_inode(file* p) {
template <typename LoggerPolicy> template <typename LoggerPolicy>
template <typename Lookup> template <typename Lookup>
void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) { void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
auto ti = LOG_TIMED_INFO; auto tv = LOG_TIMED_VERBOSE;
for (auto& kv : hardlinks_) { for (auto& kv : hardlinks_) {
auto& hlv = kv.second; auto& hlv = kv.second;
@ -336,7 +336,7 @@ void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
hardlinks_.clear(); hardlinks_.clear();
ti << "finalized " << hardlinks_.size() << " hardlinks"; tv << "finalized " << hardlinks_.size() << " hardlinks";
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
@ -346,7 +346,7 @@ void file_scanner_<LoggerPolicy>::finalize_files(
uint32_t& obj_num) { uint32_t& obj_num) {
std::vector<std::pair<KeyType, inode::files_vector>> ent; std::vector<std::pair<KeyType, inode::files_vector>> ent;
auto ti = LOG_TIMED_INFO; auto tv = LOG_TIMED_VERBOSE;
ent.reserve(fmap.size()); ent.reserve(fmap.size());
fmap.eraseInto( fmap.eraseInto(
@ -369,7 +369,7 @@ void file_scanner_<LoggerPolicy>::finalize_files(
finalize_inodes<false>(ent, inode_num, obj_num); finalize_inodes<false>(ent, inode_num, obj_num);
} }
ti << "finalized " << ent.size() << (UniqueOnly ? " " : " non-") tv << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
<< "unique files"; << "unique files";
} }

View File

@ -604,36 +604,37 @@ auto inode_manager_<LoggerPolicy>::ordered_span(fragment_category cat,
switch (opts.mode) { switch (opts.mode) {
case file_order_mode::NONE: case file_order_mode::NONE:
LOG_INFO << prefix << "keeping inode order"; LOG_VERBOSE << prefix << "keeping inode order";
break; break;
case file_order_mode::PATH: { case file_order_mode::PATH: {
LOG_INFO << prefix << "ordering " << span.size() LOG_VERBOSE << prefix << "ordering " << span.size()
<< " inodes by path name..."; << " inodes by path name...";
auto ti = LOG_CPU_TIMED_INFO; auto tv = LOG_CPU_TIMED_VERBOSE;
order.by_path(span); order.by_path(span);
ti << prefix << span.size() << " inodes ordered"; tv << prefix << span.size() << " inodes ordered";
break; break;
} }
case file_order_mode::SIMILARITY: { case file_order_mode::SIMILARITY: {
LOG_INFO << prefix << "ordering " << span.size() LOG_VERBOSE << prefix << "ordering " << span.size()
<< " inodes by similarity..."; << " inodes by similarity...";
auto ti = LOG_CPU_TIMED_INFO; auto tv = LOG_CPU_TIMED_VERBOSE;
order.by_similarity(span, cat); order.by_similarity(span, cat);
ti << prefix << span.size() << " inodes ordered"; tv << prefix << span.size() << " inodes ordered";
break; break;
} }
case file_order_mode::NILSIMSA: { case file_order_mode::NILSIMSA: {
LOG_INFO << prefix << "ordering " << span.size() LOG_VERBOSE << prefix << "ordering " << span.size()
<< " inodes using nilsimsa similarity..."; << " inodes using nilsimsa similarity...";
similarity_ordering_options soo; similarity_ordering_options soo;
soo.context = prefix;
soo.max_children = opts.nilsimsa_max_children; soo.max_children = opts.nilsimsa_max_children;
soo.max_cluster_size = opts.nilsimsa_max_cluster_size; soo.max_cluster_size = opts.nilsimsa_max_cluster_size;
auto ti = LOG_TIMED_INFO; auto tv = LOG_TIMED_VERBOSE;
order.by_nilsimsa(wg, soo, span, cat); order.by_nilsimsa(wg, soo, span, cat);
ti << prefix << span.size() << " inodes ordered"; tv << prefix << span.size() << " inodes ordered";
break; break;
} }
} }

View File

@ -613,16 +613,9 @@ void scanner_<LoggerPolicy>::scan(
if (auto catmgr = options_.inode.categorizer_mgr) { if (auto catmgr = options_.inode.categorizer_mgr) {
for (auto const& ci : im.fragment_category_info()) { for (auto const& ci : im.fragment_category_info()) {
LOG_INFO << ci.fragment_count << " " << catmgr->category_name(ci.category) LOG_VERBOSE << "found " << ci.fragment_count << " "
<< " fragments (" << size_with_unit(ci.total_size) << ")"; << catmgr->category_name(ci.category) << " fragments ("
} << size_with_unit(ci.total_size) << ")";
for (auto const& cat : im.inode_categories()) {
std::string str(catmgr->category_name(cat.value()));
if (cat.has_subcategory()) {
str += fmt::format("/{}", cat.subcategory());
}
LOG_INFO << str;
} }
} }
@ -696,7 +689,7 @@ void scanner_<LoggerPolicy>::scan(
wg_blockify.add_job( wg_blockify.add_job(
[this, catmgr, blockmgr, category, meta, cc, &prog, &fsw, [this, catmgr, blockmgr, category, meta, cc, &prog, &fsw,
span = im.ordered_span(category, wg_ordering)]() mutable { span = im.ordered_span(category, wg_ordering)]() mutable {
auto ti = LOG_CPU_TIMED_INFO; auto tv = LOG_CPU_TIMED_VERBOSE;
auto seg = segmenter_factory_->create( auto seg = segmenter_factory_->create(
category, cc, blockmgr, [category, meta, &fsw](auto block) { category, cc, blockmgr, [category, meta, &fsw](auto block) {
@ -730,7 +723,7 @@ void scanner_<LoggerPolicy>::scan(
seg.finish(); seg.finish();
ti << category_prefix(catmgr, category) << "segmenting"; tv << category_prefix(catmgr, category) << "segmenting finished";
}); });
}); });
} }

View File

@ -276,7 +276,8 @@ auto similarity_ordering_<LoggerPolicy>::build_index(
} }
index.shrink_to_fit(); index.shrink_to_fit();
tt << "build index: " << ev.size() << " -> " << index.size(); tt << opts_.context << "build index: " << ev.size() << " -> "
<< index.size();
} }
return index; return index;
@ -294,7 +295,7 @@ auto similarity_ordering_<LoggerPolicy>::find_duplicates(
std::sort(index.begin(), index.end(), std::sort(index.begin(), index.end(),
[&ev](auto a, auto b) { return ev.bitvec_less(a, b); }); [&ev](auto a, auto b) { return ev.bitvec_less(a, b); });
tt << "sort index of " << index.size() << " elements"; tt << opts_.context << "sort index of " << index.size() << " elements";
} }
{ {
@ -315,8 +316,8 @@ auto similarity_ordering_<LoggerPolicy>::find_duplicates(
index.erase(++dst, index.end()); index.erase(++dst, index.end());
} }
tt << "find duplicates: " << index.size() << " unique / " << dm.size() tt << opts_.context << "find duplicates: " << index.size() << " unique / "
<< " groups"; << dm.size() << " groups";
} }
return dm; return dm;
@ -494,8 +495,8 @@ void similarity_ordering_<LoggerPolicy>::cluster_by_distance(
match->index.push_back(i); match->index.push_back(i);
} }
td << "cluster_by_distance: " << node.cluster().index.size() << " -> " td << opts_.context << "cluster_by_distance: " << node.cluster().index.size()
<< children.size() << ")"; << " -> " << children.size() << ")";
node.v = std::move(children); node.v = std::move(children);
} }
@ -550,7 +551,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
duplicates_map& dup, index_type& ordered, std::string indent) const { duplicates_map& dup, index_type& ordered, std::string indent) const {
if (node.is_leaf()) { if (node.is_leaf()) {
for (auto e : node.cluster().index) { for (auto e : node.cluster().index) {
LOG_TRACE << indent << " " << ev.description(e) << " -> " LOG_TRACE << opts_.context << indent << " " << ev.description(e)
<< " -> "
<< node.cluster().centroid.distance_to(ev.get_bits(e)); << node.cluster().centroid.distance_to(ev.get_bits(e));
ordered.push_back(e); ordered.push_back(e);
@ -562,7 +564,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
[&ev](auto a, auto b) { return ev.order_less(a, b); }); [&ev](auto a, auto b) { return ev.order_less(a, b); });
for (auto i : dupvec) { for (auto i : dupvec) {
LOG_TRACE << indent << " + " << ev.description(i) << " -> " LOG_TRACE << opts_.context << indent << " + " << ev.description(i)
<< " -> "
<< node.cluster().centroid.distance_to(ev.get_bits(i)); << node.cluster().centroid.distance_to(ev.get_bits(i));
ordered.push_back(i); ordered.push_back(i);
} }
@ -573,7 +576,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
// step before collecting // step before collecting
for (auto const& [i, cn] : folly::enumerate(node.children())) { for (auto const& [i, cn] : folly::enumerate(node.children())) {
LOG_TRACE << indent << "[" << i << "] " << cn.description(); LOG_TRACE << opts_.context << indent << "[" << i << "] "
<< cn.description();
collect_rec(cn, ev, dup, ordered, indent + " "); collect_rec(cn, ev, dup, ordered, indent + " ");
} }
} }
@ -592,7 +596,8 @@ void similarity_ordering_<LoggerPolicy>::order_impl(
index = build_index(ev); index = build_index(ev);
} }
LOG_INFO << "total distance before ordering: " << total_distance(ev, index); LOG_DEBUG << opts_.context
<< "total distance before ordering: " << total_distance(ev, index);
size_t size_hint = index.size(); size_t size_hint = index.size();
auto duplicates = find_duplicates(ev, index); auto duplicates = find_duplicates(ev, index);
@ -602,14 +607,15 @@ void similarity_ordering_<LoggerPolicy>::order_impl(
[this, size_hint, &ev, rec = std::move(rec), root, [this, size_hint, &ev, rec = std::move(rec), root,
dup = std::move(duplicates)]() mutable { dup = std::move(duplicates)]() mutable {
{ {
auto ti = LOG_TIMED_INFO; auto tv = LOG_TIMED_VERBOSE;
order_tree_rec(*root, ev); order_tree_rec(*root, ev);
ti << "order_tree_rec"; tv << opts_.context << "nilsimsa recursive ordering finished";
} }
index_type rv; index_type rv;
rv.reserve(size_hint); rv.reserve(size_hint);
collect_rec(*root, ev, dup, rv, ""); collect_rec(*root, ev, dup, rv, "");
LOG_INFO << "total distance after ordering: " << total_distance(ev, rv); LOG_DEBUG << opts_.context << "total distance after ordering: "
<< total_distance(ev, rv);
rec.set_value(std::move(rv)); rec.set_value(std::move(rv));
}); });