mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 04:19:10 -04:00
fix(file_scanner): fix hardlink processing in presence of errors
This commit is contained in:
parent
9ce46057b4
commit
e16cce2a15
@ -234,20 +234,23 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
|||||||
uint64_t size = p->size();
|
uint64_t size = p->size();
|
||||||
uint64_t start_hash{0};
|
uint64_t start_hash{0};
|
||||||
|
|
||||||
if (size >= kLargeFileThreshold && !p->is_invalid()) {
|
if (size >= kLargeFileThreshold) {
|
||||||
try {
|
if (!p->is_invalid()) {
|
||||||
auto mm = os_.map_file(p->fs_path(), kLargeFileStartHashSize);
|
try {
|
||||||
checksum cs(checksum::algorithm::XXH3_64);
|
auto mm = os_.map_file(p->fs_path(), kLargeFileStartHashSize);
|
||||||
cs.update(mm->addr(), kLargeFileStartHashSize);
|
checksum cs(checksum::algorithm::XXH3_64);
|
||||||
cs.finalize(&start_hash);
|
cs.update(mm->addr(), kLargeFileStartHashSize);
|
||||||
file_start_hash_.emplace(p, start_hash);
|
cs.finalize(&start_hash);
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOG_ERROR << "failed to map file " << p->path_as_string() << ": "
|
LOG_ERROR << "failed to map file " << p->path_as_string() << ": "
|
||||||
<< folly::exceptionStr(std::current_exception())
|
<< folly::exceptionStr(std::current_exception())
|
||||||
<< ", creating empty file";
|
<< ", creating empty file";
|
||||||
++prog_.errors;
|
++prog_.errors;
|
||||||
p->set_invalid();
|
p->set_invalid();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
file_start_hash_.emplace(p, start_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto [it, is_new] = unique_size_.emplace(std::make_pair(size, start_hash),
|
auto [it, is_new] = unique_size_.emplace(std::make_pair(size, start_hash),
|
||||||
|
@ -281,12 +281,13 @@ class mkdwarfs_tester : public tester_common {
|
|||||||
return filesystem_v2(*lgr, *os, mm, opt);
|
return filesystem_v2(*lgr, *os, mm, opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
filesystem_v2 fs_from_file(std::string path) {
|
filesystem_v2
|
||||||
|
fs_from_file(std::string path, filesystem_options const& opt = {}) {
|
||||||
auto fsimage = fa->get_file(path);
|
auto fsimage = fa->get_file(path);
|
||||||
if (!fsimage) {
|
if (!fsimage) {
|
||||||
throw std::runtime_error("file not found: " + path);
|
throw std::runtime_error("file not found: " + path);
|
||||||
}
|
}
|
||||||
return fs_from_data(std::move(fsimage.value()));
|
return fs_from_data(std::move(fsimage.value()), opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
filesystem_v2 fs_from_stdout(filesystem_options const& opt = {}) {
|
filesystem_v2 fs_from_stdout(filesystem_options const& opt = {}) {
|
||||||
@ -2404,8 +2405,6 @@ class map_file_error_test : public testing::TestWithParam<char const*> {};
|
|||||||
TEST_P(map_file_error_test, delayed) {
|
TEST_P(map_file_error_test, delayed) {
|
||||||
std::string extra_args{GetParam()};
|
std::string extra_args{GetParam()};
|
||||||
|
|
||||||
// TODO: we must also simulate hardlinks here...
|
|
||||||
|
|
||||||
auto t = mkdwarfs_tester::create_empty();
|
auto t = mkdwarfs_tester::create_empty();
|
||||||
t.add_root_dir();
|
t.add_root_dir();
|
||||||
t.os->add_local_files(audio_data_dir);
|
t.os->add_local_files(audio_data_dir);
|
||||||
@ -2414,6 +2413,25 @@ TEST_P(map_file_error_test, delayed) {
|
|||||||
.max_name_len = 8,
|
.max_name_len = 8,
|
||||||
.with_errors = true});
|
.with_errors = true});
|
||||||
|
|
||||||
|
static constexpr size_t const kSizeSmall{1 << 10};
|
||||||
|
static constexpr size_t const kSizeLarge{1 << 20};
|
||||||
|
auto gen_small = [] { return test::loremipsum(kSizeLarge); };
|
||||||
|
auto gen_large = [] { return test::loremipsum(kSizeLarge); };
|
||||||
|
t.os->add("large_link1", {43, 0100755, 2, 1000, 100, kSizeLarge, 42, 0, 0, 0},
|
||||||
|
gen_large);
|
||||||
|
t.os->add("large_link2", {43, 0100755, 2, 1000, 100, kSizeLarge, 42, 0, 0, 0},
|
||||||
|
gen_large);
|
||||||
|
t.os->add("small_link1", {44, 0100755, 2, 1000, 100, kSizeSmall, 42, 0, 0, 0},
|
||||||
|
gen_small);
|
||||||
|
t.os->add("small_link2", {44, 0100755, 2, 1000, 100, kSizeSmall, 42, 0, 0, 0},
|
||||||
|
gen_small);
|
||||||
|
for (auto const& link :
|
||||||
|
{"large_link1", "large_link2", "small_link1", "small_link2"}) {
|
||||||
|
t.os->set_map_file_error(
|
||||||
|
fs::path{"/"} / link,
|
||||||
|
std::make_exception_ptr(std::runtime_error("map_file_error")), 0);
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
std::mt19937_64 rng{42};
|
std::mt19937_64 rng{42};
|
||||||
|
|
||||||
@ -2444,9 +2462,28 @@ TEST_P(map_file_error_test, delayed) {
|
|||||||
|
|
||||||
EXPECT_EQ(2, t.run(args)) << t.err();
|
EXPECT_EQ(2, t.run(args)) << t.err();
|
||||||
|
|
||||||
auto fs = t.fs_from_file("test.dwarfs");
|
auto fs = t.fs_from_file("test.dwarfs", {.metadata = {.enable_nlink = true}});
|
||||||
// fs.dump(std::cout, 2);
|
// fs.dump(std::cout, 2);
|
||||||
|
|
||||||
|
{
|
||||||
|
auto large_link1 = fs.find("/large_link1");
|
||||||
|
auto large_link2 = fs.find("/large_link2");
|
||||||
|
auto small_link1 = fs.find("/small_link1");
|
||||||
|
auto small_link2 = fs.find("/small_link2");
|
||||||
|
|
||||||
|
ASSERT_TRUE(large_link1);
|
||||||
|
ASSERT_TRUE(large_link2);
|
||||||
|
ASSERT_TRUE(small_link1);
|
||||||
|
ASSERT_TRUE(small_link2);
|
||||||
|
EXPECT_EQ(large_link1->inode_num(), large_link2->inode_num());
|
||||||
|
EXPECT_EQ(small_link1->inode_num(), small_link2->inode_num());
|
||||||
|
file_stat st;
|
||||||
|
ASSERT_EQ(0, fs.getattr(*large_link1, &st));
|
||||||
|
EXPECT_EQ(0, st.size);
|
||||||
|
ASSERT_EQ(0, fs.getattr(*small_link1, &st));
|
||||||
|
EXPECT_EQ(0, st.size);
|
||||||
|
}
|
||||||
|
|
||||||
std::unordered_map<fs::path, std::string, fs_path_hash> actual_files;
|
std::unordered_map<fs::path, std::string, fs_path_hash> actual_files;
|
||||||
fs.walk([&](auto const& dev) {
|
fs.walk([&](auto const& dev) {
|
||||||
auto iv = dev.inode();
|
auto iv = dev.inode();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user