mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-13 22:40:03 -04:00
Improve logging
This commit is contained in:
parent
a9636a3f0b
commit
026f57ccb7
@ -58,6 +58,7 @@ class basic_array_similarity_element_view : public similarity_element_view {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct similarity_ordering_options {
|
struct similarity_ordering_options {
|
||||||
|
std::string context;
|
||||||
size_t max_children{256};
|
size_t max_children{256};
|
||||||
size_t max_cluster_size{256};
|
size_t max_cluster_size{256};
|
||||||
};
|
};
|
||||||
|
@ -320,7 +320,7 @@ void file_scanner_<LoggerPolicy>::add_inode(file* p) {
|
|||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
template <typename Lookup>
|
template <typename Lookup>
|
||||||
void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
|
void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
|
||||||
auto ti = LOG_TIMED_INFO;
|
auto tv = LOG_TIMED_VERBOSE;
|
||||||
|
|
||||||
for (auto& kv : hardlinks_) {
|
for (auto& kv : hardlinks_) {
|
||||||
auto& hlv = kv.second;
|
auto& hlv = kv.second;
|
||||||
@ -336,7 +336,7 @@ void file_scanner_<LoggerPolicy>::finalize_hardlinks(Lookup&& lookup) {
|
|||||||
|
|
||||||
hardlinks_.clear();
|
hardlinks_.clear();
|
||||||
|
|
||||||
ti << "finalized " << hardlinks_.size() << " hardlinks";
|
tv << "finalized " << hardlinks_.size() << " hardlinks";
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
@ -346,7 +346,7 @@ void file_scanner_<LoggerPolicy>::finalize_files(
|
|||||||
uint32_t& obj_num) {
|
uint32_t& obj_num) {
|
||||||
std::vector<std::pair<KeyType, inode::files_vector>> ent;
|
std::vector<std::pair<KeyType, inode::files_vector>> ent;
|
||||||
|
|
||||||
auto ti = LOG_TIMED_INFO;
|
auto tv = LOG_TIMED_VERBOSE;
|
||||||
|
|
||||||
ent.reserve(fmap.size());
|
ent.reserve(fmap.size());
|
||||||
fmap.eraseInto(
|
fmap.eraseInto(
|
||||||
@ -369,7 +369,7 @@ void file_scanner_<LoggerPolicy>::finalize_files(
|
|||||||
finalize_inodes<false>(ent, inode_num, obj_num);
|
finalize_inodes<false>(ent, inode_num, obj_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
ti << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
|
tv << "finalized " << ent.size() << (UniqueOnly ? " " : " non-")
|
||||||
<< "unique files";
|
<< "unique files";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -604,36 +604,37 @@ auto inode_manager_<LoggerPolicy>::ordered_span(fragment_category cat,
|
|||||||
|
|
||||||
switch (opts.mode) {
|
switch (opts.mode) {
|
||||||
case file_order_mode::NONE:
|
case file_order_mode::NONE:
|
||||||
LOG_INFO << prefix << "keeping inode order";
|
LOG_VERBOSE << prefix << "keeping inode order";
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case file_order_mode::PATH: {
|
case file_order_mode::PATH: {
|
||||||
LOG_INFO << prefix << "ordering " << span.size()
|
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||||
<< " inodes by path name...";
|
<< " inodes by path name...";
|
||||||
auto ti = LOG_CPU_TIMED_INFO;
|
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||||
order.by_path(span);
|
order.by_path(span);
|
||||||
ti << prefix << span.size() << " inodes ordered";
|
tv << prefix << span.size() << " inodes ordered";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case file_order_mode::SIMILARITY: {
|
case file_order_mode::SIMILARITY: {
|
||||||
LOG_INFO << prefix << "ordering " << span.size()
|
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||||
<< " inodes by similarity...";
|
<< " inodes by similarity...";
|
||||||
auto ti = LOG_CPU_TIMED_INFO;
|
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||||
order.by_similarity(span, cat);
|
order.by_similarity(span, cat);
|
||||||
ti << prefix << span.size() << " inodes ordered";
|
tv << prefix << span.size() << " inodes ordered";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case file_order_mode::NILSIMSA: {
|
case file_order_mode::NILSIMSA: {
|
||||||
LOG_INFO << prefix << "ordering " << span.size()
|
LOG_VERBOSE << prefix << "ordering " << span.size()
|
||||||
<< " inodes using nilsimsa similarity...";
|
<< " inodes using nilsimsa similarity...";
|
||||||
similarity_ordering_options soo;
|
similarity_ordering_options soo;
|
||||||
|
soo.context = prefix;
|
||||||
soo.max_children = opts.nilsimsa_max_children;
|
soo.max_children = opts.nilsimsa_max_children;
|
||||||
soo.max_cluster_size = opts.nilsimsa_max_cluster_size;
|
soo.max_cluster_size = opts.nilsimsa_max_cluster_size;
|
||||||
auto ti = LOG_TIMED_INFO;
|
auto tv = LOG_TIMED_VERBOSE;
|
||||||
order.by_nilsimsa(wg, soo, span, cat);
|
order.by_nilsimsa(wg, soo, span, cat);
|
||||||
ti << prefix << span.size() << " inodes ordered";
|
tv << prefix << span.size() << " inodes ordered";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -613,16 +613,9 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
if (auto catmgr = options_.inode.categorizer_mgr) {
|
if (auto catmgr = options_.inode.categorizer_mgr) {
|
||||||
for (auto const& ci : im.fragment_category_info()) {
|
for (auto const& ci : im.fragment_category_info()) {
|
||||||
LOG_INFO << ci.fragment_count << " " << catmgr->category_name(ci.category)
|
LOG_VERBOSE << "found " << ci.fragment_count << " "
|
||||||
<< " fragments (" << size_with_unit(ci.total_size) << ")";
|
<< catmgr->category_name(ci.category) << " fragments ("
|
||||||
}
|
<< size_with_unit(ci.total_size) << ")";
|
||||||
|
|
||||||
for (auto const& cat : im.inode_categories()) {
|
|
||||||
std::string str(catmgr->category_name(cat.value()));
|
|
||||||
if (cat.has_subcategory()) {
|
|
||||||
str += fmt::format("/{}", cat.subcategory());
|
|
||||||
}
|
|
||||||
LOG_INFO << str;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -696,7 +689,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
wg_blockify.add_job(
|
wg_blockify.add_job(
|
||||||
[this, catmgr, blockmgr, category, meta, cc, &prog, &fsw,
|
[this, catmgr, blockmgr, category, meta, cc, &prog, &fsw,
|
||||||
span = im.ordered_span(category, wg_ordering)]() mutable {
|
span = im.ordered_span(category, wg_ordering)]() mutable {
|
||||||
auto ti = LOG_CPU_TIMED_INFO;
|
auto tv = LOG_CPU_TIMED_VERBOSE;
|
||||||
|
|
||||||
auto seg = segmenter_factory_->create(
|
auto seg = segmenter_factory_->create(
|
||||||
category, cc, blockmgr, [category, meta, &fsw](auto block) {
|
category, cc, blockmgr, [category, meta, &fsw](auto block) {
|
||||||
@ -730,7 +723,7 @@ void scanner_<LoggerPolicy>::scan(
|
|||||||
|
|
||||||
seg.finish();
|
seg.finish();
|
||||||
|
|
||||||
ti << category_prefix(catmgr, category) << "segmenting";
|
tv << category_prefix(catmgr, category) << "segmenting finished";
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -276,7 +276,8 @@ auto similarity_ordering_<LoggerPolicy>::build_index(
|
|||||||
}
|
}
|
||||||
index.shrink_to_fit();
|
index.shrink_to_fit();
|
||||||
|
|
||||||
tt << "build index: " << ev.size() << " -> " << index.size();
|
tt << opts_.context << "build index: " << ev.size() << " -> "
|
||||||
|
<< index.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
return index;
|
return index;
|
||||||
@ -294,7 +295,7 @@ auto similarity_ordering_<LoggerPolicy>::find_duplicates(
|
|||||||
std::sort(index.begin(), index.end(),
|
std::sort(index.begin(), index.end(),
|
||||||
[&ev](auto a, auto b) { return ev.bitvec_less(a, b); });
|
[&ev](auto a, auto b) { return ev.bitvec_less(a, b); });
|
||||||
|
|
||||||
tt << "sort index of " << index.size() << " elements";
|
tt << opts_.context << "sort index of " << index.size() << " elements";
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -315,8 +316,8 @@ auto similarity_ordering_<LoggerPolicy>::find_duplicates(
|
|||||||
index.erase(++dst, index.end());
|
index.erase(++dst, index.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
tt << "find duplicates: " << index.size() << " unique / " << dm.size()
|
tt << opts_.context << "find duplicates: " << index.size() << " unique / "
|
||||||
<< " groups";
|
<< dm.size() << " groups";
|
||||||
}
|
}
|
||||||
|
|
||||||
return dm;
|
return dm;
|
||||||
@ -494,8 +495,8 @@ void similarity_ordering_<LoggerPolicy>::cluster_by_distance(
|
|||||||
match->index.push_back(i);
|
match->index.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
td << "cluster_by_distance: " << node.cluster().index.size() << " -> "
|
td << opts_.context << "cluster_by_distance: " << node.cluster().index.size()
|
||||||
<< children.size() << ")";
|
<< " -> " << children.size() << ")";
|
||||||
|
|
||||||
node.v = std::move(children);
|
node.v = std::move(children);
|
||||||
}
|
}
|
||||||
@ -550,7 +551,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
|
|||||||
duplicates_map& dup, index_type& ordered, std::string indent) const {
|
duplicates_map& dup, index_type& ordered, std::string indent) const {
|
||||||
if (node.is_leaf()) {
|
if (node.is_leaf()) {
|
||||||
for (auto e : node.cluster().index) {
|
for (auto e : node.cluster().index) {
|
||||||
LOG_TRACE << indent << " " << ev.description(e) << " -> "
|
LOG_TRACE << opts_.context << indent << " " << ev.description(e)
|
||||||
|
<< " -> "
|
||||||
<< node.cluster().centroid.distance_to(ev.get_bits(e));
|
<< node.cluster().centroid.distance_to(ev.get_bits(e));
|
||||||
|
|
||||||
ordered.push_back(e);
|
ordered.push_back(e);
|
||||||
@ -562,7 +564,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
|
|||||||
[&ev](auto a, auto b) { return ev.order_less(a, b); });
|
[&ev](auto a, auto b) { return ev.order_less(a, b); });
|
||||||
|
|
||||||
for (auto i : dupvec) {
|
for (auto i : dupvec) {
|
||||||
LOG_TRACE << indent << " + " << ev.description(i) << " -> "
|
LOG_TRACE << opts_.context << indent << " + " << ev.description(i)
|
||||||
|
<< " -> "
|
||||||
<< node.cluster().centroid.distance_to(ev.get_bits(i));
|
<< node.cluster().centroid.distance_to(ev.get_bits(i));
|
||||||
ordered.push_back(i);
|
ordered.push_back(i);
|
||||||
}
|
}
|
||||||
@ -573,7 +576,8 @@ void similarity_ordering_<LoggerPolicy>::collect_rec(
|
|||||||
// step before collecting
|
// step before collecting
|
||||||
|
|
||||||
for (auto const& [i, cn] : folly::enumerate(node.children())) {
|
for (auto const& [i, cn] : folly::enumerate(node.children())) {
|
||||||
LOG_TRACE << indent << "[" << i << "] " << cn.description();
|
LOG_TRACE << opts_.context << indent << "[" << i << "] "
|
||||||
|
<< cn.description();
|
||||||
collect_rec(cn, ev, dup, ordered, indent + " ");
|
collect_rec(cn, ev, dup, ordered, indent + " ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -592,7 +596,8 @@ void similarity_ordering_<LoggerPolicy>::order_impl(
|
|||||||
index = build_index(ev);
|
index = build_index(ev);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO << "total distance before ordering: " << total_distance(ev, index);
|
LOG_DEBUG << opts_.context
|
||||||
|
<< "total distance before ordering: " << total_distance(ev, index);
|
||||||
|
|
||||||
size_t size_hint = index.size();
|
size_t size_hint = index.size();
|
||||||
auto duplicates = find_duplicates(ev, index);
|
auto duplicates = find_duplicates(ev, index);
|
||||||
@ -602,14 +607,15 @@ void similarity_ordering_<LoggerPolicy>::order_impl(
|
|||||||
[this, size_hint, &ev, rec = std::move(rec), root,
|
[this, size_hint, &ev, rec = std::move(rec), root,
|
||||||
dup = std::move(duplicates)]() mutable {
|
dup = std::move(duplicates)]() mutable {
|
||||||
{
|
{
|
||||||
auto ti = LOG_TIMED_INFO;
|
auto tv = LOG_TIMED_VERBOSE;
|
||||||
order_tree_rec(*root, ev);
|
order_tree_rec(*root, ev);
|
||||||
ti << "order_tree_rec";
|
tv << opts_.context << "nilsimsa recursive ordering finished";
|
||||||
}
|
}
|
||||||
index_type rv;
|
index_type rv;
|
||||||
rv.reserve(size_hint);
|
rv.reserve(size_hint);
|
||||||
collect_rec(*root, ev, dup, rv, "");
|
collect_rec(*root, ev, dup, rv, "");
|
||||||
LOG_INFO << "total distance after ordering: " << total_distance(ev, rv);
|
LOG_DEBUG << opts_.context << "total distance after ordering: "
|
||||||
|
<< total_distance(ev, rv);
|
||||||
rec.set_value(std::move(rv));
|
rec.set_value(std::move(rv));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user