diff --git a/src/dwarfs/block_manager.cpp b/src/dwarfs/block_manager.cpp index bcb0699a..1a8d5b87 100644 --- a/src/dwarfs/block_manager.cpp +++ b/src/dwarfs/block_manager.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -42,13 +43,6 @@ #include "dwarfs/progress.h" #include "dwarfs/util.h" -#define ENABLE_COLLISION_VECTOR 0 - -#if ENABLE_COLLISION_VECTOR -#include -#include -#endif - namespace dwarfs { namespace { @@ -57,14 +51,6 @@ namespace { const uint32_t blockhash_emtpy = 0; struct bm_stats { -#if ENABLE_COLLISION_VECTOR - bm_stats() - : collision_vec_size(1, 0, 256) {} - - folly::Histogram collision_vec_size; - size_t collisions2{0}; -#endif - size_t total_hashes{0}; size_t collisions{0}; size_t real_matches{0}; @@ -78,24 +64,14 @@ template struct block_hashes { static constexpr size_t max_coll_inline = 2; using blockhash_t = google::dense_hash_map; -#if ENABLE_COLLISION_VECTOR - using collision_vector = folly::small_vector; - using blockhash2_t = google::dense_hash_map; -#endif block_hashes(size_t size) : size(size) { values.set_empty_key(blockhash_emtpy); -#if ENABLE_COLLISION_VECTOR - values2.set_empty_key(blockhash_emtpy); -#endif } const size_t size; blockhash_t values; -#if ENABLE_COLLISION_VECTOR - blockhash2_t values2; -#endif }; template @@ -217,9 +193,6 @@ void block_manager_::finish_blocks() { static char const* const percent = "{:.2}%"; const auto& st = sti.second; log_.debug() << "blockhash window <" << sti.first << ">: " << st.collisions -#if ENABLE_COLLISION_VECTOR - << " (" << st.collisions2 << ")" -#endif << " collisions (" << fmt::format(percent, float(st.collisions) / st.total_hashes) << "), " << st.real_matches << " real matches, " @@ -229,17 +202,6 @@ void block_manager_::finish_blocks() { << "), " << size_with_unit(st.saved_bytes) << " saved (largest=" << size_with_unit(st.largest_block) << ")"; -#if ENABLE_COLLISION_VECTOR - if (st.collision_vec_size.computeTotalCount() > 0) { - log_.debug() - << "collision vector size p50: " - << st.collision_vec_size.getPercentileEstimate(0.5) - << ", p75: " << st.collision_vec_size.getPercentileEstimate(0.75) - << ", p90: " << st.collision_vec_size.getPercentileEstimate(0.9) - << ", p95: " << st.collision_vec_size.getPercentileEstimate(0.95) - << ", p99: " << st.collision_vec_size.getPercentileEstimate(0.99); - } -#endif } } @@ -250,13 +212,6 @@ void block_manager_::block_ready() { fsw_.write_block(std::move(tmp)); block_.reserve(block_size_); for (auto& bh : block_hashes_) { -#if ENABLE_COLLISION_VECTOR - bm_stats& stats = stats_.at(bh.size); - for (auto const& e : bh.values2) { - stats.collision_vec_size.addValue(e.second.size()); - } - bh.values2.clear(); -#endif bh.values.clear(); } ++current_block_; @@ -295,26 +250,22 @@ void block_manager_::update_hashes(const hash_map_type& hm, ++stats.total_hashes; if (hval != blockhash_emtpy) { - auto [it, success] = - bhi->values.insert(std::make_pair(hval, block_offset + off)); + if constexpr (std::is_same_v) { + bhi->values[hval] = block_offset + off; + } else { + auto new_offest = block_offset + off; + auto [it, success] = + bhi->values.insert(std::make_pair(hval, new_offest)); - if (!success) { -#if ENABLE_COLLISION_VECTOR - auto [it2, success2] = bhi->values2.insert( - std::make_pair(hval, block_hashes_t::collision_vector())); + if (!success) { + log_.trace() << "collision for hash=" << hval + << " (size=" << bhi->size << "): " << it->second + << " <-> " << new_offest; - if (!success2) { - log_.trace() << "2nd collision for hash=" << hval; - ++stats.collisions2; + ++stats.collisions; + + it->second = new_offest; } - - it2->second.push_back(block_offset + off); -#endif - log_.trace() << "collision for hash=" << hval - << " (size=" << bhi->size << "): " << it->second - << " <-> " << block_offset + off; - - ++stats.collisions; } } }