mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-14 06:48:39 -04:00
Fix scanning logic in inode manager to perform sequential scans
This commit is contained in:
parent
6866a2fef7
commit
f7d3fca9e2
@ -160,37 +160,26 @@ class inode_ : public inode {
|
|||||||
catjob.set_total_size(mm->size());
|
catjob.set_total_size(mm->size());
|
||||||
catjob.categorize_random_access(mm->span());
|
catjob.categorize_random_access(mm->span());
|
||||||
|
|
||||||
if (catjob.best_result_found()) {
|
if (!catjob.best_result_found()) {
|
||||||
// This means the job won't be running any sequential categorizers
|
// We must perform a sequential categorizer scan before scanning the
|
||||||
// as the outcome cannot possibly be any better. As a consequence,
|
// fragments, because the ordering is category-dependent.
|
||||||
// we can already fetch the result here and scan the fragments
|
// TODO: we might be able to get away with a single scan if we
|
||||||
// instead of the whole file.
|
// optimistically assume the default category and perform
|
||||||
|
// both the sequential scan and the default-category order
|
||||||
|
// scan in parallel
|
||||||
|
scan_range(
|
||||||
|
mm, [&catjob](auto span) { catjob.categorize_sequential(span); });
|
||||||
|
}
|
||||||
|
|
||||||
fragments_ = catjob.result();
|
fragments_ = catjob.result();
|
||||||
|
|
||||||
if (fragments_.size() > 1) {
|
if (fragments_.size() > 1) {
|
||||||
scan_fragments(mm, opts);
|
scan_fragments(mm, opts);
|
||||||
} else {
|
|
||||||
scan_full(mm, opts);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fragments_.empty()) {
|
if (fragments_.size() <= 1) {
|
||||||
// If we get here, we haven't scanned anything yet, and we don't know
|
|
||||||
// if the file will be fragmented or not.
|
|
||||||
|
|
||||||
scan_full(mm, opts);
|
scan_full(mm, opts);
|
||||||
|
|
||||||
if (catjob) {
|
|
||||||
fragments_ = catjob.result();
|
|
||||||
|
|
||||||
if (fragments_.size() > 1) {
|
|
||||||
// This is the unfortunate case where we have to scan the
|
|
||||||
// individual fragments after having already done a full scan.
|
|
||||||
scan_fragments(mm, opts);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,6 +318,11 @@ class inode_ : public inode {
|
|||||||
scanner(mm->span(offset, size));
|
scanner(mm->span(offset, size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void scan_range(mmif* mm, T&& scanner) {
|
||||||
|
scan_range(mm, 0, mm->size(), std::forward<T>(scanner));
|
||||||
|
}
|
||||||
|
|
||||||
void scan_fragments(mmif* mm, inode_options const& opts) {
|
void scan_fragments(mmif* mm, inode_options const& opts) {
|
||||||
assert(mm);
|
assert(mm);
|
||||||
assert(fragments_.size() > 1);
|
assert(fragments_.size() > 1);
|
||||||
@ -400,14 +394,14 @@ class inode_ : public inode {
|
|||||||
|
|
||||||
case file_order_mode::SIMILARITY: {
|
case file_order_mode::SIMILARITY: {
|
||||||
similarity sc;
|
similarity sc;
|
||||||
scan_range(mm, 0, mm->size(), sc);
|
scan_range(mm, sc);
|
||||||
similarity_hash_ = sc.finalize(); // TODO
|
similarity_hash_ = sc.finalize(); // TODO
|
||||||
similarity_.emplace<uint32_t>(sc.finalize());
|
similarity_.emplace<uint32_t>(sc.finalize());
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case file_order_mode::NILSIMSA: {
|
case file_order_mode::NILSIMSA: {
|
||||||
nilsimsa nc;
|
nilsimsa nc;
|
||||||
scan_range(mm, 0, mm->size(), nc);
|
scan_range(mm, nc);
|
||||||
// TODO: can we finalize in-place?
|
// TODO: can we finalize in-place?
|
||||||
nilsimsa::hash_type hash;
|
nilsimsa::hash_type hash;
|
||||||
nc.finalize(hash);
|
nc.finalize(hash);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user