mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-10 21:10:02 -04:00
refactor(file_scanner): improve comments and error checking
This commit is contained in:
parent
8c3334cdc8
commit
2c5ef8ff23
@ -288,6 +288,9 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
|||||||
uint64_t size = p->size();
|
uint64_t size = p->size();
|
||||||
uint64_t start_hash{0};
|
uint64_t start_hash{0};
|
||||||
|
|
||||||
|
LOG_TRACE << "scanning file " << p->path_as_string() << " [size=" << size
|
||||||
|
<< "]";
|
||||||
|
|
||||||
if (size >= kLargeFileThreshold) {
|
if (size >= kLargeFileThreshold) {
|
||||||
if (!p->is_invalid()) {
|
if (!p->is_invalid()) {
|
||||||
try {
|
try {
|
||||||
@ -311,7 +314,7 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
|||||||
inode::files_vector());
|
inode::files_vector());
|
||||||
|
|
||||||
if (is_new) {
|
if (is_new) {
|
||||||
// A file size that has never been seen before. We can safely
|
// A file (size, start_hash) that has never been seen before. We can safely
|
||||||
// create a new inode and we'll keep track of the file.
|
// create a new inode and we'll keep track of the file.
|
||||||
it->second.push_back(p);
|
it->second.push_back(p);
|
||||||
|
|
||||||
@ -320,13 +323,13 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
|||||||
add_inode(p, __LINE__);
|
add_inode(p, __LINE__);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// This file size has been seen before, so this is potentially
|
// This file (size, start_hash) has been seen before, so this is potentially
|
||||||
// a duplicate.
|
// a duplicate.
|
||||||
|
|
||||||
std::shared_ptr<condition_barrier> cv;
|
std::shared_ptr<condition_barrier> cv;
|
||||||
|
|
||||||
if (it->second.empty()) {
|
if (it->second.empty()) {
|
||||||
// This is any file of this size after the second file
|
// This is any file of this (size, start_hash) after the second file
|
||||||
std::lock_guard lock(mx_);
|
std::lock_guard lock(mx_);
|
||||||
|
|
||||||
if (auto ffi = first_file_hashed_.find(size);
|
if (auto ffi = first_file_hashed_.find(size);
|
||||||
@ -334,16 +337,18 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
|||||||
cv = ffi->second;
|
cv = ffi->second;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// This is the second file of this size. We now need to hash
|
// This is the second file of this (size, start_hash). We now need to
|
||||||
// both the first and second file and ensure that the first
|
// hash both the first and second file and ensure that the first file's
|
||||||
// file's hash is stored to `by_hash_` first. We set up a
|
// hash is stored to `by_hash_` first. We set up a condition variable
|
||||||
// condition variable to synchronize insertion into `by_hash_`.
|
// to synchronize insertion into `by_hash_`.
|
||||||
|
|
||||||
cv = std::make_shared<condition_barrier>();
|
cv = std::make_shared<condition_barrier>();
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard lock(mx_);
|
std::lock_guard lock(mx_);
|
||||||
first_file_hashed_.emplace(size, cv);
|
DWARFS_CHECK(
|
||||||
|
first_file_hashed_.emplace(size, cv).second,
|
||||||
|
"internal error: first file condition barrier already exists");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a job for the first file
|
// Add a job for the first file
|
||||||
@ -359,7 +364,8 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
|||||||
by_raw_inode_[p->raw_inode_num()].push_back(p);
|
by_raw_inode_[p->raw_inode_num()].push_back(p);
|
||||||
} else {
|
} else {
|
||||||
auto& ref = by_hash_[p->hash()];
|
auto& ref = by_hash_[p->hash()];
|
||||||
assert(ref.empty());
|
DWARFS_CHECK(ref.empty(),
|
||||||
|
"internal error: unexpected existing hash");
|
||||||
ref.push_back(p);
|
ref.push_back(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user