mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 20:41:04 -04:00
Refactor segmenting
This commit is contained in:
parent
1970da345e
commit
b4f559213f
@ -168,6 +168,12 @@ class block_manager_ : public block_manager::impl {
|
||||
size_t& block_offset, const uint8_t* data,
|
||||
const match_window& search_win) const;
|
||||
|
||||
void
|
||||
validate_match(const std::string& indent, const uint8_t* data,
|
||||
match_window& mw, const std::vector<cyclic_hash_t>& hashvec,
|
||||
bm_stats& stats, size_t block_size, size_t block_offset,
|
||||
size_t off, match_window& best, size_t& best_offset);
|
||||
|
||||
const block_manager::config& cfg_;
|
||||
const size_t block_size_;
|
||||
std::vector<size_t> blockhash_window_size_;
|
||||
@ -387,6 +393,35 @@ void block_manager_<LoggerPolicy>::segment_and_add_data(
|
||||
block_hashes_.rbegin(), block_hashes_.rend());
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void block_manager_<LoggerPolicy>::validate_match(
|
||||
const std::string& indent, const uint8_t* data, match_window& mw,
|
||||
const std::vector<cyclic_hash_t>& hashvec, bm_stats& stats,
|
||||
size_t block_size, size_t block_offset, size_t off, match_window& best,
|
||||
size_t& best_offset) {
|
||||
log_.trace() << indent << "potentially matched " << block_size
|
||||
<< " bytes at offset " << off << ", hash=" << hashvec[off];
|
||||
|
||||
match_window win(off, off + block_size);
|
||||
|
||||
if (get_match_window(indent, win, block_offset, data, mw)) {
|
||||
log_.trace() << indent << "definitely matched " << win.size()
|
||||
<< " bytes at offset " << win.first;
|
||||
++stats.real_matches;
|
||||
|
||||
// fuck yeah, we've got a block...
|
||||
|
||||
if (win.size() > best.size()) {
|
||||
best = win;
|
||||
best_offset = block_offset;
|
||||
}
|
||||
} else {
|
||||
log_.trace() << indent << "bad match: " << block_size << " bytes at offset "
|
||||
<< off << ", hash=" << hashvec[off];
|
||||
++stats.bad_matches;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void block_manager_<LoggerPolicy>::segment_and_add_data(
|
||||
const std::string& indent, const hash_map_type& hm,
|
||||
@ -413,33 +448,10 @@ void block_manager_<LoggerPolicy>::segment_and_add_data(
|
||||
match_window best;
|
||||
size_t best_offset = 0;
|
||||
|
||||
auto bhi = bhcur->values.find(hashvec[off]);
|
||||
|
||||
if (bhi != bhcur->values.end()) {
|
||||
log_.trace() << indent << "potentially matched " << bhcur->size
|
||||
<< " bytes at offset " << off
|
||||
<< ", hash=" << hashvec[off];
|
||||
|
||||
match_window win(off, off + bhcur->size);
|
||||
size_t block_offset = bhi->second;
|
||||
|
||||
if (get_match_window(indent, win, block_offset, data, mw)) {
|
||||
log_.trace() << indent << "definitely matched " << win.size()
|
||||
<< " bytes at offset " << win.first;
|
||||
++stats.real_matches;
|
||||
|
||||
// fuck yeah, we've got a block...
|
||||
|
||||
if (win.size() > best.size()) {
|
||||
best = win;
|
||||
best_offset = block_offset;
|
||||
}
|
||||
} else {
|
||||
log_.trace() << indent << "bad match: " << bhcur->size
|
||||
<< " bytes at offset " << off
|
||||
<< ", hash=" << hashvec[off];
|
||||
++stats.bad_matches;
|
||||
}
|
||||
if (auto bhi = bhcur->values.find(hashvec[off]);
|
||||
bhi != bhcur->values.end()) {
|
||||
validate_match(indent, data, mw, hashvec, stats, bhcur->size,
|
||||
bhi->second, off, best, best_offset);
|
||||
}
|
||||
|
||||
if (best.size() > 0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user