diff --git a/include/dwarfs/similarity_ordering.h b/include/dwarfs/similarity_ordering.h index a1ba556f..cf35b439 100644 --- a/include/dwarfs/similarity_ordering.h +++ b/include/dwarfs/similarity_ordering.h @@ -58,6 +58,7 @@ class basic_array_similarity_element_view : public similarity_element_view { }; struct similarity_ordering_options { + std::string context; size_t max_children{256}; size_t max_cluster_size{256}; }; diff --git a/src/dwarfs/file_scanner.cpp b/src/dwarfs/file_scanner.cpp index d8552efd..36bb1e08 100644 --- a/src/dwarfs/file_scanner.cpp +++ b/src/dwarfs/file_scanner.cpp @@ -320,7 +320,7 @@ void file_scanner_::add_inode(file* p) { template template void file_scanner_::finalize_hardlinks(Lookup&& lookup) { - auto ti = LOG_TIMED_INFO; + auto tv = LOG_TIMED_VERBOSE; for (auto& kv : hardlinks_) { auto& hlv = kv.second; @@ -336,7 +336,7 @@ void file_scanner_::finalize_hardlinks(Lookup&& lookup) { hardlinks_.clear(); - ti << "finalized " << hardlinks_.size() << " hardlinks"; + tv << "finalized " << hardlinks_.size() << " hardlinks"; } template @@ -346,7 +346,7 @@ void file_scanner_::finalize_files( uint32_t& obj_num) { std::vector> ent; - auto ti = LOG_TIMED_INFO; + auto tv = LOG_TIMED_VERBOSE; ent.reserve(fmap.size()); fmap.eraseInto( @@ -369,7 +369,7 @@ void file_scanner_::finalize_files( finalize_inodes(ent, inode_num, obj_num); } - ti << "finalized " << ent.size() << (UniqueOnly ? " " : " non-") + tv << "finalized " << ent.size() << (UniqueOnly ? " " : " non-") << "unique files"; } diff --git a/src/dwarfs/inode_manager.cpp b/src/dwarfs/inode_manager.cpp index d418f9fc..45df9e17 100644 --- a/src/dwarfs/inode_manager.cpp +++ b/src/dwarfs/inode_manager.cpp @@ -604,36 +604,37 @@ auto inode_manager_::ordered_span(fragment_category cat, switch (opts.mode) { case file_order_mode::NONE: - LOG_INFO << prefix << "keeping inode order"; + LOG_VERBOSE << prefix << "keeping inode order"; break; case file_order_mode::PATH: { - LOG_INFO << prefix << "ordering " << span.size() - << " inodes by path name..."; - auto ti = LOG_CPU_TIMED_INFO; + LOG_VERBOSE << prefix << "ordering " << span.size() + << " inodes by path name..."; + auto tv = LOG_CPU_TIMED_VERBOSE; order.by_path(span); - ti << prefix << span.size() << " inodes ordered"; + tv << prefix << span.size() << " inodes ordered"; break; } case file_order_mode::SIMILARITY: { - LOG_INFO << prefix << "ordering " << span.size() - << " inodes by similarity..."; - auto ti = LOG_CPU_TIMED_INFO; + LOG_VERBOSE << prefix << "ordering " << span.size() + << " inodes by similarity..."; + auto tv = LOG_CPU_TIMED_VERBOSE; order.by_similarity(span, cat); - ti << prefix << span.size() << " inodes ordered"; + tv << prefix << span.size() << " inodes ordered"; break; } case file_order_mode::NILSIMSA: { - LOG_INFO << prefix << "ordering " << span.size() - << " inodes using nilsimsa similarity..."; + LOG_VERBOSE << prefix << "ordering " << span.size() + << " inodes using nilsimsa similarity..."; similarity_ordering_options soo; + soo.context = prefix; soo.max_children = opts.nilsimsa_max_children; soo.max_cluster_size = opts.nilsimsa_max_cluster_size; - auto ti = LOG_TIMED_INFO; + auto tv = LOG_TIMED_VERBOSE; order.by_nilsimsa(wg, soo, span, cat); - ti << prefix << span.size() << " inodes ordered"; + tv << prefix << span.size() << " inodes ordered"; break; } } diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index 0ac1b266..e39fedb7 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -613,16 +613,9 @@ void scanner_::scan( if (auto catmgr = options_.inode.categorizer_mgr) { for (auto const& ci : im.fragment_category_info()) { - LOG_INFO << ci.fragment_count << " " << catmgr->category_name(ci.category) - << " fragments (" << size_with_unit(ci.total_size) << ")"; - } - - for (auto const& cat : im.inode_categories()) { - std::string str(catmgr->category_name(cat.value())); - if (cat.has_subcategory()) { - str += fmt::format("/{}", cat.subcategory()); - } - LOG_INFO << str; + LOG_VERBOSE << "found " << ci.fragment_count << " " + << catmgr->category_name(ci.category) << " fragments (" + << size_with_unit(ci.total_size) << ")"; } } @@ -696,7 +689,7 @@ void scanner_::scan( wg_blockify.add_job( [this, catmgr, blockmgr, category, meta, cc, &prog, &fsw, span = im.ordered_span(category, wg_ordering)]() mutable { - auto ti = LOG_CPU_TIMED_INFO; + auto tv = LOG_CPU_TIMED_VERBOSE; auto seg = segmenter_factory_->create( category, cc, blockmgr, [category, meta, &fsw](auto block) { @@ -730,7 +723,7 @@ void scanner_::scan( seg.finish(); - ti << category_prefix(catmgr, category) << "segmenting"; + tv << category_prefix(catmgr, category) << "segmenting finished"; }); }); } diff --git a/src/dwarfs/similarity_ordering.cpp b/src/dwarfs/similarity_ordering.cpp index 9dcc945f..90855da9 100644 --- a/src/dwarfs/similarity_ordering.cpp +++ b/src/dwarfs/similarity_ordering.cpp @@ -276,7 +276,8 @@ auto similarity_ordering_::build_index( } index.shrink_to_fit(); - tt << "build index: " << ev.size() << " -> " << index.size(); + tt << opts_.context << "build index: " << ev.size() << " -> " + << index.size(); } return index; @@ -294,7 +295,7 @@ auto similarity_ordering_::find_duplicates( std::sort(index.begin(), index.end(), [&ev](auto a, auto b) { return ev.bitvec_less(a, b); }); - tt << "sort index of " << index.size() << " elements"; + tt << opts_.context << "sort index of " << index.size() << " elements"; } { @@ -315,8 +316,8 @@ auto similarity_ordering_::find_duplicates( index.erase(++dst, index.end()); } - tt << "find duplicates: " << index.size() << " unique / " << dm.size() - << " groups"; + tt << opts_.context << "find duplicates: " << index.size() << " unique / " + << dm.size() << " groups"; } return dm; @@ -494,8 +495,8 @@ void similarity_ordering_::cluster_by_distance( match->index.push_back(i); } - td << "cluster_by_distance: " << node.cluster().index.size() << " -> " - << children.size() << ")"; + td << opts_.context << "cluster_by_distance: " << node.cluster().index.size() + << " -> " << children.size() << ")"; node.v = std::move(children); } @@ -550,7 +551,8 @@ void similarity_ordering_::collect_rec( duplicates_map& dup, index_type& ordered, std::string indent) const { if (node.is_leaf()) { for (auto e : node.cluster().index) { - LOG_TRACE << indent << " " << ev.description(e) << " -> " + LOG_TRACE << opts_.context << indent << " " << ev.description(e) + << " -> " << node.cluster().centroid.distance_to(ev.get_bits(e)); ordered.push_back(e); @@ -562,7 +564,8 @@ void similarity_ordering_::collect_rec( [&ev](auto a, auto b) { return ev.order_less(a, b); }); for (auto i : dupvec) { - LOG_TRACE << indent << " + " << ev.description(i) << " -> " + LOG_TRACE << opts_.context << indent << " + " << ev.description(i) + << " -> " << node.cluster().centroid.distance_to(ev.get_bits(i)); ordered.push_back(i); } @@ -573,7 +576,8 @@ void similarity_ordering_::collect_rec( // step before collecting for (auto const& [i, cn] : folly::enumerate(node.children())) { - LOG_TRACE << indent << "[" << i << "] " << cn.description(); + LOG_TRACE << opts_.context << indent << "[" << i << "] " + << cn.description(); collect_rec(cn, ev, dup, ordered, indent + " "); } } @@ -592,7 +596,8 @@ void similarity_ordering_::order_impl( index = build_index(ev); } - LOG_INFO << "total distance before ordering: " << total_distance(ev, index); + LOG_DEBUG << opts_.context + << "total distance before ordering: " << total_distance(ev, index); size_t size_hint = index.size(); auto duplicates = find_duplicates(ev, index); @@ -602,14 +607,15 @@ void similarity_ordering_::order_impl( [this, size_hint, &ev, rec = std::move(rec), root, dup = std::move(duplicates)]() mutable { { - auto ti = LOG_TIMED_INFO; + auto tv = LOG_TIMED_VERBOSE; order_tree_rec(*root, ev); - ti << "order_tree_rec"; + tv << opts_.context << "nilsimsa recursive ordering finished"; } index_type rv; rv.reserve(size_hint); collect_rec(*root, ev, dup, rv, ""); - LOG_INFO << "total distance after ordering: " << total_distance(ev, rv); + LOG_DEBUG << opts_.context << "total distance after ordering: " + << total_distance(ev, rv); rec.set_value(std::move(rv)); });