Improve logging

This commit is contained in:
Marcus Holland-Moritz 2023-08-19 16:00:59 +02:00
parent a9636a3f0b
commit 026f57ccb7
5 changed files with 43 additions and 42 deletions

View File

@ -58,6 +58,7 @@ class basic_array_similarity_element_view : public similarity_element_view {
};
struct similarity_ordering_options {
std::string context;
size_t max_children{256};
size_t max_cluster_size{256};
};

View File

@ -320,7 +320,7 @@ void file_scanner_<LoggerPolicy>::add_inode(file* p) {
template <typename LoggerPolicy>
template <typename Lookup>
void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
auto ti = LOG_TIMED_INFO;
auto tv = LOG_TIMED_VERBOSE;
for (auto& kv : hardlinks_) {
auto& hlv = kv.second;
@ -336,7 +336,7 @@ void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
hardlinks_.clear();
ti << "finalized " << hardlinks_.size() << " hardlinks";
tv << "finalized " << hardlinks_.size() << " hardlinks";
}
template <typename LoggerPolicy>
@ -346,7 +346,7 @@ void file_scanner_<LoggerPolicy>::finalize_files(
uint32_t& obj_num) {
std::vector<std::pair<KeyType, inode::files_vector>> ent;
auto ti = LOG_TIMED_INFO;
auto tv = LOG_TIMED_VERBOSE;
ent.reserve(fmap.size());
fmap.eraseInto(
@ -369,7 +369,7 @@ void file_scanner_<LoggerPolicy>::finalize_files(
finalize_inodes<false>(ent, inode_num, obj_num);
}
ti << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
tv << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
<< "unique files";
}

View File

@ -604,36 +604,37 @@ auto inode_manager_<LoggerPolicy>::ordered_span(fragment_category cat,
switch (opts.mode) {
case file_order_mode::NONE:
LOG_INFO << prefix << "keeping inode order";
LOG_VERBOSE << prefix << "keeping inode order";
break;
case file_order_mode::PATH: {
LOG_INFO << prefix << "ordering " << span.size()
<< " inodes by path name...";
auto ti = LOG_CPU_TIMED_INFO;
LOG_VERBOSE << prefix << "ordering " << span.size()
<< " inodes by path name...";
auto tv = LOG_CPU_TIMED_VERBOSE;
order.by_path(span);
ti << prefix << span.size() << " inodes ordered";
tv << prefix << span.size() << " inodes ordered";
break;
}
case file_order_mode::SIMILARITY: {
LOG_INFO << prefix << "ordering " << span.size()
<< " inodes by similarity...";
auto ti = LOG_CPU_TIMED_INFO;
LOG_VERBOSE << prefix << "ordering " << span.size()
<< " inodes by similarity...";
auto tv = LOG_CPU_TIMED_VERBOSE;
order.by_similarity(span, cat);
ti << prefix << span.size() << " inodes ordered";
tv << prefix << span.size() << " inodes ordered";
break;
}
case file_order_mode::NILSIMSA: {
LOG_INFO << prefix << "ordering " << span.size()
<< " inodes using nilsimsa similarity...";
LOG_VERBOSE << prefix << "ordering " << span.size()
<< " inodes using nilsimsa similarity...";
similarity_ordering_options soo;
soo.context = prefix;
soo.max_children = opts.nilsimsa_max_children;
soo.max_cluster_size = opts.nilsimsa_max_cluster_size;
auto ti = LOG_TIMED_INFO;
auto tv = LOG_TIMED_VERBOSE;
order.by_nilsimsa(wg, soo, span, cat);
ti << prefix << span.size() << " inodes ordered";
tv << prefix << span.size() << " inodes ordered";
break;
}
}

View File

@ -613,16 +613,9 @@ void scanner_<LoggerPolicy>::scan(
if (auto catmgr = options_.inode.categorizer_mgr) {
for (auto const& ci : im.fragment_category_info()) {
LOG_INFO << ci.fragment_count << " " << catmgr->category_name(ci.category)
<< " fragments (" << size_with_unit(ci.total_size) << ")";
}
for (auto const& cat : im.inode_categories()) {
std::string str(catmgr->category_name(cat.value()));
if (cat.has_subcategory()) {
str += fmt::format("/{}", cat.subcategory());
}
LOG_INFO << str;
LOG_VERBOSE << "found " << ci.fragment_count << " "
<< catmgr->category_name(ci.category) << " fragments ("
<< size_with_unit(ci.total_size) << ")";
}
}
@ -696,7 +689,7 @@ void scanner_<LoggerPolicy>::scan(
wg_blockify.add_job(
[this, catmgr, blockmgr, category, meta, cc, &prog, &fsw,
span = im.ordered_span(category, wg_ordering)]() mutable {
auto ti = LOG_CPU_TIMED_INFO;
auto tv = LOG_CPU_TIMED_VERBOSE;
auto seg = segmenter_factory_->create(
category, cc, blockmgr, [category, meta, &fsw](auto block) {
@ -730,7 +723,7 @@ void scanner_<LoggerPolicy>::scan(
seg.finish();
ti << category_prefix(catmgr, category) << "segmenting";
tv << category_prefix(catmgr, category) << "segmenting finished";
});
});
}

View File

@ -276,7 +276,8 @@ auto similarity_ordering_<LoggerPolicy>::build_index(
}
index.shrink_to_fit();
tt << "build index: " << ev.size() << " -> " << index.size();
tt << opts_.context << "build index: " << ev.size() << " -> "
<< index.size();
}
return index;
@ -294,7 +295,7 @@ auto similarity_ordering_<LoggerPolicy>::find_duplicates(
std::sort(index.begin(), index.end(),
[&ev](auto a, auto b) { return ev.bitvec_less(a, b); });
tt << "sort index of " << index.size() << " elements";
tt << opts_.context << "sort index of " << index.size() << " elements";
}
{
@ -315,8 +316,8 @@ auto similarity_ordering_<LoggerPolicy>::find_duplicates(
index.erase(++dst, index.end());
}
tt << "find duplicates: " << index.size() << " unique / " << dm.size()
<< " groups";
tt << opts_.context << "find duplicates: " << index.size() << " unique / "
<< dm.size() << " groups";
}
return dm;
@ -494,8 +495,8 @@ void similarity_ordering_<LoggerPolicy>::cluster_by_distance(
match->index.push_back(i);
}
td << "cluster_by_distance: " << node.cluster().index.size() << " -> "
<< children.size() << ")";
td << opts_.context << "cluster_by_distance: " << node.cluster().index.size()
<< " -> " << children.size() << ")";
node.v = std::move(children);
}
@ -550,7 +551,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
duplicates_map& dup, index_type& ordered, std::string indent) const {
if (node.is_leaf()) {
for (auto e : node.cluster().index) {
LOG_TRACE << indent << " " << ev.description(e) << " -> "
LOG_TRACE << opts_.context << indent << " " << ev.description(e)
<< " -> "
<< node.cluster().centroid.distance_to(ev.get_bits(e));
ordered.push_back(e);
@ -562,7 +564,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
[&ev](auto a, auto b) { return ev.order_less(a, b); });
for (auto i : dupvec) {
LOG_TRACE << indent << " + " << ev.description(i) << " -> "
LOG_TRACE << opts_.context << indent << " + " << ev.description(i)
<< " -> "
<< node.cluster().centroid.distance_to(ev.get_bits(i));
ordered.push_back(i);
}
@ -573,7 +576,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
// step before collecting
for (auto const& [i, cn] : folly::enumerate(node.children())) {
LOG_TRACE << indent << "[" << i << "] " << cn.description();
LOG_TRACE << opts_.context << indent << "[" << i << "] "
<< cn.description();
collect_rec(cn, ev, dup, ordered, indent + " ");
}
}
@ -592,7 +596,8 @@ void similarity_ordering_<LoggerPolicy>::order_impl(
index = build_index(ev);
}
LOG_INFO << "total distance before ordering: " << total_distance(ev, index);
LOG_DEBUG << opts_.context
<< "total distance before ordering: " << total_distance(ev, index);
size_t size_hint = index.size();
auto duplicates = find_duplicates(ev, index);
@ -602,14 +607,15 @@ void similarity_ordering_<LoggerPolicy>::order_impl(
[this, size_hint, &ev, rec = std::move(rec), root,
dup = std::move(duplicates)]() mutable {
{
auto ti = LOG_TIMED_INFO;
auto tv = LOG_TIMED_VERBOSE;
order_tree_rec(*root, ev);
ti << "order_tree_rec";
tv << opts_.context << "nilsimsa recursive ordering finished";
}
index_type rv;
rv.reserve(size_hint);
collect_rec(*root, ev, dup, rv, "");
LOG_INFO << "total distance after ordering: " << total_distance(ev, rv);
LOG_DEBUG << opts_.context << "total distance after ordering: "
<< total_distance(ev, rv);
rec.set_value(std::move(rv));
});