mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-19 09:17:21 -04:00
fix: correctly handle delayed file access errors
This commit is contained in:
parent
bc72147689
commit
2d4d8e79c9
@ -142,6 +142,9 @@ class file : public entry {
|
||||
void set_inode_num(uint32_t ino) override;
|
||||
std::optional<uint32_t> const& inode_num() const override;
|
||||
|
||||
void set_invalid() { data_->invalid.store(true); }
|
||||
bool is_invalid() const { return data_->invalid.load(); }
|
||||
|
||||
uint32_t refcount() const { return data_->refcount; }
|
||||
|
||||
private:
|
||||
@ -150,6 +153,7 @@ class file : public entry {
|
||||
hash_type hash;
|
||||
uint32_t refcount{1};
|
||||
std::optional<uint32_t> inode_num;
|
||||
std::atomic<bool> invalid{false};
|
||||
};
|
||||
|
||||
std::shared_ptr<data> data_;
|
||||
|
@ -21,9 +21,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <exception>
|
||||
#include <iosfwd>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/small_vector.h>
|
||||
@ -41,6 +43,7 @@ class chunk;
|
||||
|
||||
class file;
|
||||
class mmif;
|
||||
class os_access;
|
||||
class progress;
|
||||
|
||||
struct inode_options;
|
||||
@ -61,10 +64,17 @@ class inode : public object {
|
||||
nilsimsa_similarity_hash(fragment_category cat) const = 0;
|
||||
virtual size_t size() const = 0;
|
||||
virtual file const* any() const = 0;
|
||||
virtual files_vector const& all() const = 0;
|
||||
virtual void
|
||||
append_chunks_to(std::vector<thrift::metadata::chunk>& vec) const = 0;
|
||||
virtual inode_fragments& fragments() = 0;
|
||||
virtual void dump(std::ostream& os, inode_options const& options) const = 0;
|
||||
virtual void set_scan_error(file const* fp, std::exception_ptr ep) = 0;
|
||||
virtual std::optional<std::pair<file const*, std::exception_ptr>>
|
||||
get_scan_error() const = 0;
|
||||
virtual std::tuple<std::unique_ptr<mmif>, file const*,
|
||||
std::vector<std::pair<file const*, std::exception_ptr>>>
|
||||
mmap_any(os_access const& os) const = 0;
|
||||
};
|
||||
|
||||
using sortable_inode_span =
|
||||
|
@ -85,6 +85,12 @@ class inode_manager {
|
||||
impl_->scan_background(wg, os, std::move(ino), p);
|
||||
}
|
||||
|
||||
bool has_invalid_inodes() const { return impl_->has_invalid_inodes(); }
|
||||
|
||||
void try_scan_invalid(worker_group& wg, os_access const& os) {
|
||||
impl_->try_scan_invalid(wg, os);
|
||||
}
|
||||
|
||||
void dump(std::ostream& os) const { impl_->dump(os); }
|
||||
|
||||
sortable_inode_span sortable_span() const { return impl_->sortable_span(); }
|
||||
@ -105,6 +111,8 @@ class inode_manager {
|
||||
virtual fragment_infos fragment_category_info() const = 0;
|
||||
virtual void scan_background(worker_group& wg, os_access const& os,
|
||||
std::shared_ptr<inode> ino, file* p) const = 0;
|
||||
virtual bool has_invalid_inodes() const = 0;
|
||||
virtual void try_scan_invalid(worker_group& wg, os_access const& os) = 0;
|
||||
virtual void dump(std::ostream& os) const = 0;
|
||||
virtual sortable_inode_span sortable_span() const = 0;
|
||||
virtual sortable_inode_span
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/String.h>
|
||||
#include <folly/container/F14Map.h>
|
||||
|
||||
#include "dwarfs/entry.h"
|
||||
@ -184,6 +185,7 @@ void file_scanner_<LoggerPolicy>::finalize(uint32_t& inode_num) {
|
||||
return unique_size_.at(p->size());
|
||||
});
|
||||
finalize_files<true>(unique_size_, inode_num, obj_num);
|
||||
finalize_files(by_raw_inode_, inode_num, obj_num);
|
||||
finalize_files(by_hash_, inode_num, obj_num);
|
||||
} else {
|
||||
finalize_hardlinks([this](file const* p) -> inode::files_vector& {
|
||||
@ -243,12 +245,15 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
|
||||
auto& ref = by_hash_[p->hash()];
|
||||
|
||||
assert(ref.empty());
|
||||
assert(p->get_inode());
|
||||
|
||||
if (p->is_invalid()) [[unlikely]] {
|
||||
by_raw_inode_[p->raw_inode_num()].push_back(p);
|
||||
} else {
|
||||
auto& ref = by_hash_[p->hash()];
|
||||
assert(ref.empty());
|
||||
ref.push_back(p);
|
||||
}
|
||||
|
||||
cv->set();
|
||||
|
||||
@ -274,6 +279,10 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
||||
cv->wait(lock);
|
||||
}
|
||||
|
||||
if (p->is_invalid()) [[unlikely]] {
|
||||
add_inode(p);
|
||||
by_raw_inode_[p->raw_inode_num()].push_back(p);
|
||||
} else {
|
||||
auto& ref = by_hash_[p->hash()];
|
||||
|
||||
if (ref.empty()) {
|
||||
@ -290,17 +299,31 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
|
||||
|
||||
ref.push_back(p);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void file_scanner_<LoggerPolicy>::hash_file(file* p) {
|
||||
if (p->is_invalid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto const size = p->size();
|
||||
std::unique_ptr<mmif> mm;
|
||||
|
||||
if (size > 0) {
|
||||
try {
|
||||
mm = os_.map_file(p->fs_path(), size);
|
||||
} catch (...) {
|
||||
LOG_ERROR << "failed to map file " << p->path_as_string() << ": "
|
||||
<< folly::exceptionStr(std::current_exception())
|
||||
<< ", creating empty file";
|
||||
++prog_.errors;
|
||||
p->set_invalid();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
prog_.current.store(p);
|
||||
|
@ -203,9 +203,16 @@ class inode_ : public inode {
|
||||
if (files_.empty()) {
|
||||
DWARFS_THROW(runtime_error, "inode has no file (any)");
|
||||
}
|
||||
for (auto const& f : files_) {
|
||||
if (!f->is_invalid()) {
|
||||
return f;
|
||||
}
|
||||
}
|
||||
return files_.front();
|
||||
}
|
||||
|
||||
files_vector const& all() const override { return files_; }
|
||||
|
||||
void append_chunks_to(std::vector<chunk_type>& vec) const override {
|
||||
for (auto const& frag : fragments_) {
|
||||
auto chks = frag.chunks();
|
||||
@ -228,11 +235,21 @@ class inode_ : public inode {
|
||||
}
|
||||
};
|
||||
|
||||
os << "inode " << num() << " (" << any()->size() << " bytes):\n";
|
||||
std::string ino_num{"?"};
|
||||
|
||||
if (flags_ & kNumIsValid) {
|
||||
ino_num = std::to_string(num());
|
||||
}
|
||||
|
||||
os << "inode " << ino_num << " (" << any()->size() << " bytes):\n";
|
||||
os << " files:\n";
|
||||
|
||||
for (auto const& f : files_) {
|
||||
os << " " << f->path_as_string() << "\n";
|
||||
os << " " << f->path_as_string();
|
||||
if (f->is_invalid()) {
|
||||
os << " (invalid)";
|
||||
}
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
os << " fragments:\n";
|
||||
@ -282,6 +299,43 @@ class inode_ : public inode {
|
||||
similarity_);
|
||||
}
|
||||
|
||||
void set_scan_error(file const* fp, std::exception_ptr ep) override {
|
||||
assert(!scan_error_);
|
||||
scan_error_ = std::make_unique<std::pair<file const*, std::exception_ptr>>(
|
||||
fp, std::move(ep));
|
||||
}
|
||||
|
||||
std::optional<std::pair<file const*, std::exception_ptr>>
|
||||
get_scan_error() const override {
|
||||
if (scan_error_) {
|
||||
return *scan_error_;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::tuple<std::unique_ptr<mmif>, file const*,
|
||||
std::vector<std::pair<file const*, std::exception_ptr>>>
|
||||
mmap_any(os_access const& os) const override {
|
||||
std::unique_ptr<mmif> mm;
|
||||
std::vector<std::pair<file const*, std::exception_ptr>> errors;
|
||||
file const* rfp{nullptr};
|
||||
|
||||
for (auto fp : files_) {
|
||||
if (!fp->is_invalid()) {
|
||||
try {
|
||||
mm = os.map_file(fp->fs_path(), fp->size());
|
||||
rfp = fp;
|
||||
break;
|
||||
} catch (...) {
|
||||
fp->set_invalid();
|
||||
errors.emplace_back(fp, std::current_exception());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {std::move(mm), rfp, std::move(errors)};
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<scanner_progress>
|
||||
make_progress_context(std::string_view context, mmif* mm, progress& prog,
|
||||
@ -457,6 +511,7 @@ class inode_ : public inode {
|
||||
uint32_t num_{0};
|
||||
inode_fragments fragments_;
|
||||
files_vector files_;
|
||||
std::unique_ptr<std::pair<file const*, std::exception_ptr>> scan_error_;
|
||||
|
||||
std::variant<
|
||||
// in case of no hashes at all
|
||||
@ -553,6 +608,10 @@ class inode_manager_ final : public inode_manager::impl {
|
||||
void scan_background(worker_group& wg, os_access const& os,
|
||||
std::shared_ptr<inode> ino, file* p) const override;
|
||||
|
||||
bool has_invalid_inodes() const override;
|
||||
|
||||
void try_scan_invalid(worker_group& wg, os_access const& os) override;
|
||||
|
||||
void dump(std::ostream& os) const override;
|
||||
|
||||
sortable_inode_span sortable_span() const override {
|
||||
@ -587,6 +646,7 @@ class inode_manager_ final : public inode_manager::impl {
|
||||
progress& prog_;
|
||||
inode_options opts_;
|
||||
bool const inodes_need_scanning_;
|
||||
std::atomic<size_t> mutable num_invalid_inodes_{0};
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -603,18 +663,22 @@ void inode_manager_<LoggerPolicy>::scan_background(worker_group& wg,
|
||||
wg.add_job([this, &os, p, ino = std::move(ino)] {
|
||||
auto const size = p->size();
|
||||
std::shared_ptr<mmif> mm;
|
||||
if (size > 0) {
|
||||
|
||||
if (size > 0 && !p->is_invalid()) {
|
||||
try {
|
||||
mm = os.map_file(p->fs_path(), size);
|
||||
} catch (...) {
|
||||
LOG_ERROR << "failed to map file \"" << p->path_as_string()
|
||||
<< "\": " << folly::exceptionStr(std::current_exception())
|
||||
<< ", creating empty inode";
|
||||
++prog_.errors;
|
||||
p->override_size(0);
|
||||
// don't return here, we still need scan() to run
|
||||
p->set_invalid();
|
||||
// If this file *was* successfully mapped before, there's a slight
|
||||
// chance that there's another file with the same hash. We can only
|
||||
// figure this out later when all files have been hashed, so we
|
||||
// save the error and try again later (in `try_scan_invalid()`).
|
||||
ino->set_scan_error(p, std::current_exception());
|
||||
++num_invalid_inodes_;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ino->scan(mm.get(), opts_, prog_);
|
||||
update_prog(ino, p);
|
||||
});
|
||||
@ -624,6 +688,56 @@ void inode_manager_<LoggerPolicy>::scan_background(worker_group& wg,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
bool inode_manager_<LoggerPolicy>::has_invalid_inodes() const {
|
||||
assert(inodes_need_scanning_ || num_invalid_inodes_.load() == 0);
|
||||
return num_invalid_inodes_.load() > 0;
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void inode_manager_<LoggerPolicy>::try_scan_invalid(worker_group& wg,
|
||||
os_access const& os) {
|
||||
LOG_VERBOSE << "trying to scan " << num_invalid_inodes_.load()
|
||||
<< " invalid inodes...";
|
||||
|
||||
for (auto const& ino : inodes_) {
|
||||
if (auto scan_err = ino->get_scan_error()) {
|
||||
std::vector<std::pair<file const*, std::exception_ptr>> errors;
|
||||
auto const& fv = ino->all();
|
||||
|
||||
if (fv.size() > 1) {
|
||||
auto [mm, p, err] = ino->mmap_any(os);
|
||||
|
||||
if (mm) {
|
||||
LOG_DEBUG << "successfully opened: " << p->path_as_string();
|
||||
|
||||
wg.add_job([this, p, ino, mm = std::move(mm)] {
|
||||
ino->scan(mm.get(), opts_, prog_);
|
||||
update_prog(ino, p);
|
||||
});
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
errors = std::move(err);
|
||||
}
|
||||
|
||||
ino->scan(nullptr, opts_, prog_);
|
||||
++prog_.inodes_scanned;
|
||||
++prog_.files_scanned;
|
||||
|
||||
errors.emplace_back(scan_err.value());
|
||||
|
||||
for (auto const& [fp, ep] : errors) {
|
||||
LOG_ERROR << "failed to map file \"" << fp->path_as_string()
|
||||
<< "\": " << folly::exceptionStr(ep)
|
||||
<< ", creating empty inode";
|
||||
++prog_.errors;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void inode_manager_<LoggerPolicy>::dump(std::ostream& os) const {
|
||||
for_each_inode_in_order(
|
||||
|
@ -354,7 +354,7 @@ scanner_<LoggerPolicy>::add_entry(std::filesystem::path const& name,
|
||||
if (pe) {
|
||||
switch (pe->type()) {
|
||||
case entry::E_FILE:
|
||||
if (os_->access(pe->fs_path(), R_OK)) {
|
||||
if (pe->size() > 0 && os_->access(pe->fs_path(), R_OK)) {
|
||||
LOG_ERROR << "cannot access " << pe->path_as_string()
|
||||
<< ", creating empty file";
|
||||
pe->override_size(0);
|
||||
@ -601,6 +601,7 @@ void scanner_<LoggerPolicy>::scan(
|
||||
root->accept(lsiv, true);
|
||||
|
||||
LOG_INFO << "waiting for background scanners...";
|
||||
|
||||
wg_.wait();
|
||||
|
||||
LOG_INFO << "scanning CPU time: " << time_with_unit(wg_.get_cpu_time());
|
||||
@ -609,6 +610,14 @@ void scanner_<LoggerPolicy>::scan(
|
||||
uint32_t first_device_inode = first_file_inode;
|
||||
fs.finalize(first_device_inode);
|
||||
|
||||
// this must be done after finalizing the inodes since this is when
|
||||
// the file vectors are populated
|
||||
if (im.has_invalid_inodes()) {
|
||||
LOG_INFO << "trying to recover any invalid inodes...";
|
||||
im.try_scan_invalid(wg_, *os_);
|
||||
wg_.wait();
|
||||
}
|
||||
|
||||
LOG_INFO << "saved " << size_with_unit(prog.saved_by_deduplication) << " / "
|
||||
<< size_with_unit(prog.original_size) << " in "
|
||||
<< prog.duplicate_files << "/" << prog.files_found
|
||||
@ -738,8 +747,10 @@ void scanner_<LoggerPolicy>::scan(
|
||||
// TODO: factor this code out
|
||||
auto f = ino->any();
|
||||
|
||||
if (auto size = f->size(); size > 0) {
|
||||
auto mm = os_->map_file(f->fs_path(), size);
|
||||
if (auto size = f->size(); size > 0 && !f->is_invalid()) {
|
||||
auto [mm, _, errors] = ino->mmap_any(*os_);
|
||||
|
||||
if (mm) {
|
||||
file_off_t offset{0};
|
||||
|
||||
for (auto& frag : ino->fragments()) {
|
||||
@ -751,6 +762,14 @@ void scanner_<LoggerPolicy>::scan(
|
||||
|
||||
offset += frag.size();
|
||||
}
|
||||
} else {
|
||||
for (auto& [fp, e] : errors) {
|
||||
LOG_ERROR << "failed to map file " << fp->path_as_string()
|
||||
<< ": " << folly::exceptionStr(e)
|
||||
<< ", creating empty inode";
|
||||
++prog.errors;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prog.inodes_written++; // TODO: remove?
|
||||
|
@ -330,7 +330,7 @@ void os_access_mock::set_access_fail(fs::path const& path) {
|
||||
|
||||
void os_access_mock::set_map_file_error(std::filesystem::path const& path,
|
||||
std::exception_ptr ep,
|
||||
size_t after_n_attempts) {
|
||||
int after_n_attempts) {
|
||||
auto& e = map_file_errors_[path];
|
||||
e.ep = std::move(ep);
|
||||
e.remaining_successful_attempts = after_n_attempts;
|
||||
@ -435,13 +435,11 @@ os_access_mock::map_file(fs::path const& path, size_t size) const {
|
||||
if (auto de = find(path);
|
||||
de && de->status.type() == posix_file_type::regular) {
|
||||
if (auto it = map_file_errors_.find(path); it != map_file_errors_.end()) {
|
||||
size_t remaining = 0;
|
||||
do {
|
||||
remaining = it->second.remaining_successful_attempts.load();
|
||||
} while (remaining > 0 &&
|
||||
!it->second.remaining_successful_attempts.compare_exchange_weak(
|
||||
remaining, remaining - 1));
|
||||
if (remaining == 0) {
|
||||
int remaining = it->second.remaining_successful_attempts.load();
|
||||
while (!it->second.remaining_successful_attempts.compare_exchange_weak(
|
||||
remaining, remaining - 1)) {
|
||||
}
|
||||
if (remaining <= 0) {
|
||||
std::rethrow_exception(it->second.ep);
|
||||
}
|
||||
}
|
||||
@ -463,6 +461,16 @@ os_access_mock::map_file(fs::path const& path, size_t size) const {
|
||||
throw std::runtime_error(fmt::format("oops in map_file: {}", path.string()));
|
||||
}
|
||||
|
||||
std::set<std::filesystem::path> os_access_mock::get_failed_paths() const {
|
||||
std::set<std::filesystem::path> rv = access_fail_set_;
|
||||
for (auto const& [path, error] : map_file_errors_) {
|
||||
if (error.remaining_successful_attempts.load() < 0) {
|
||||
rv.emplace(path);
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
std::unique_ptr<mmif> os_access_mock::map_file(fs::path const& path) const {
|
||||
return map_file(path, std::numeric_limits<size_t>::max());
|
||||
}
|
||||
|
@ -94,7 +94,7 @@ class os_access_mock : public os_access {
|
||||
|
||||
void set_access_fail(std::filesystem::path const& path);
|
||||
void set_map_file_error(std::filesystem::path const& path,
|
||||
std::exception_ptr ep, size_t after_n_attempts = 0);
|
||||
std::exception_ptr ep, int after_n_attempts = 0);
|
||||
|
||||
void setenv(std::string name, std::string value);
|
||||
|
||||
@ -119,10 +119,12 @@ class os_access_mock : public os_access {
|
||||
|
||||
std::optional<std::string> getenv(std::string_view name) const override;
|
||||
|
||||
std::set<std::filesystem::path> get_failed_paths() const;
|
||||
|
||||
private:
|
||||
struct error_info {
|
||||
std::exception_ptr ep{};
|
||||
std::atomic<size_t> mutable remaining_successful_attempts{0};
|
||||
std::atomic<int> mutable remaining_successful_attempts{0};
|
||||
};
|
||||
|
||||
static std::vector<std::string> splitpath(std::filesystem::path const& path);
|
||||
|
@ -176,13 +176,13 @@ class mkdwarfs_tester : public tester_common {
|
||||
os->add("sock", {1005, 0140666, 1, 0, 0, 0, 0, 0, 0, 0}, std::string{});
|
||||
}
|
||||
|
||||
std::vector<fs::path> add_random_file_tree(
|
||||
std::vector<std::pair<fs::path, std::string>> add_random_file_tree(
|
||||
random_file_tree_options const& opt = random_file_tree_options{}) {
|
||||
size_t max_size{128 * static_cast<size_t>(opt.avg_size)};
|
||||
std::mt19937_64 rng{42};
|
||||
std::exponential_distribution<> size_dist{1 / opt.avg_size};
|
||||
std::uniform_int_distribution<> path_comp_size_dist{0, opt.max_name_len};
|
||||
std::vector<fs::path> paths;
|
||||
std::vector<std::pair<fs::path, std::string>> paths;
|
||||
|
||||
auto random_path_component = [&] {
|
||||
auto size = path_comp_size_dist(rng);
|
||||
@ -192,19 +192,42 @@ class mkdwarfs_tester : public tester_common {
|
||||
for (int x = 0; x < opt.dimension; ++x) {
|
||||
fs::path d1{random_path_component() + std::to_string(x)};
|
||||
os->add_dir(d1);
|
||||
|
||||
for (int y = 0; y < opt.dimension; ++y) {
|
||||
fs::path d2{d1 / (random_path_component() + std::to_string(y))};
|
||||
os->add_dir(d2);
|
||||
|
||||
for (int z = 0; z < opt.dimension; ++z) {
|
||||
fs::path f{d2 / (random_path_component() + std::to_string(z))};
|
||||
auto size = std::min(max_size, static_cast<size_t>(size_dist(rng)));
|
||||
os->add_file(f, size, true);
|
||||
paths.push_back(f);
|
||||
if (opt.with_errors && rng() % 4 == 0) {
|
||||
std::string data;
|
||||
|
||||
if (rng() % 2 == 0) {
|
||||
data = test::create_random_string(size, rng);
|
||||
} else {
|
||||
data = test::loremipsum(size);
|
||||
}
|
||||
|
||||
os->add_file(f, data);
|
||||
paths.emplace_back(f, data);
|
||||
|
||||
if (opt.with_errors) {
|
||||
auto failpath = fs::path{"/"} / f;
|
||||
switch (rng() % 8) {
|
||||
case 0:
|
||||
os->set_access_fail(failpath);
|
||||
[[fallthrough]];
|
||||
case 1:
|
||||
case 2:
|
||||
os->set_map_file_error(
|
||||
fs::path{"/"} / f,
|
||||
failpath,
|
||||
std::make_exception_ptr(std::runtime_error("map_file_error")),
|
||||
rng() % 4);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -213,11 +236,6 @@ class mkdwarfs_tester : public tester_common {
|
||||
return paths;
|
||||
}
|
||||
|
||||
std::vector<fs::path>
|
||||
add_random_file_tree(double avg_size, int dimension = 20) {
|
||||
return add_random_file_tree({.avg_size = avg_size, .dimension = dimension});
|
||||
}
|
||||
|
||||
void add_test_file_tree() {
|
||||
for (auto const& [stat, name] : test::test_dirtree()) {
|
||||
auto path = name.substr(name.size() == 5 ? 5 : 6);
|
||||
@ -566,10 +584,11 @@ TEST(mkdwarfs_test, input_list_large) {
|
||||
auto t = mkdwarfs_tester::create_empty();
|
||||
t.add_root_dir();
|
||||
auto paths = t.add_random_file_tree({.avg_size = 32.0, .dimension = 32});
|
||||
|
||||
{
|
||||
std::ostringstream os;
|
||||
for (auto const& p : paths) {
|
||||
os << p.string() << '\n';
|
||||
os << p.first.string() << '\n';
|
||||
}
|
||||
t.iol->set_in(os.str());
|
||||
}
|
||||
@ -578,7 +597,10 @@ TEST(mkdwarfs_test, input_list_large) {
|
||||
|
||||
auto fs = t.fs_from_stdout();
|
||||
|
||||
std::set<fs::path> expected{paths.begin(), paths.end()};
|
||||
std::set<fs::path> expected;
|
||||
std::transform(paths.begin(), paths.end(),
|
||||
std::inserter(expected, expected.end()),
|
||||
[](auto const& p) { return p.first; });
|
||||
std::set<fs::path> actual;
|
||||
|
||||
fs.walk([&](auto const& e) {
|
||||
@ -2133,15 +2155,112 @@ TEST(mkdwarfs_test, map_file_error) {
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr("filesystem created with 1 error"));
|
||||
}
|
||||
|
||||
// TODO: enable this once we know how to make it work reliably
|
||||
#if 0
|
||||
TEST(mkdwarfs_test, map_file_error_delayed) {
|
||||
mkdwarfs_tester t;
|
||||
t.os->set_map_file_error(
|
||||
"/somedir/ipsum.py",
|
||||
std::make_exception_ptr(std::runtime_error("map_file_error")), 1);
|
||||
class map_file_error_test : public testing::TestWithParam<char const*> {};
|
||||
|
||||
EXPECT_DEATH(t.run("-i / -o - --categorize --no-progress --log-level=error"),
|
||||
"");
|
||||
TEST_P(map_file_error_test, delayed) {
|
||||
std::string extra_args{GetParam()};
|
||||
|
||||
// TODO: we must also simulate hardlinks here...
|
||||
|
||||
auto t = mkdwarfs_tester::create_empty();
|
||||
t.add_root_dir();
|
||||
auto files = t.add_random_file_tree({.avg_size = 64.0,
|
||||
.dimension = 20,
|
||||
.max_name_len = 8,
|
||||
.with_errors = true});
|
||||
|
||||
// t.os->setenv("DWARFS_DUMP_INODES", "inodes.dump");
|
||||
// t.iol->use_real_terminal(true);
|
||||
|
||||
std::string args = "-i / -o test.dwarfs --no-progress --log-level=verbose";
|
||||
if (!extra_args.empty()) {
|
||||
args += " " + extra_args;
|
||||
}
|
||||
|
||||
EXPECT_EQ(2, t.run(args)) << t.err();
|
||||
|
||||
auto fs = t.fs_from_file("test.dwarfs");
|
||||
// fs.dump(std::cout, 2);
|
||||
|
||||
std::unordered_map<fs::path, std::string> actual_files;
|
||||
fs.walk([&](auto const& dev) {
|
||||
auto iv = dev.inode();
|
||||
if (iv.is_regular_file()) {
|
||||
std::string data;
|
||||
file_stat stat;
|
||||
ASSERT_EQ(0, fs.getattr(iv, &stat));
|
||||
data.resize(stat.size);
|
||||
ASSERT_EQ(data.size(), fs.read(iv.inode_num(), data.data(), data.size()));
|
||||
ASSERT_TRUE(actual_files.emplace(dev.fs_path(), std::move(data)).second);
|
||||
}
|
||||
});
|
||||
|
||||
// check that:
|
||||
// - all original files are present
|
||||
// - they're either empty (in case of errors) or have the original content
|
||||
|
||||
size_t num_non_empty = 0;
|
||||
|
||||
auto failed_expected = t.os->get_failed_paths();
|
||||
std::set<fs::path> failed_actual;
|
||||
|
||||
for (auto const& [path, data] : files) {
|
||||
auto it = actual_files.find(path);
|
||||
ASSERT_NE(actual_files.end(), it);
|
||||
if (!it->second.empty()) {
|
||||
EXPECT_EQ(data, it->second);
|
||||
++num_non_empty;
|
||||
} else if (!data.empty()) {
|
||||
failed_actual.insert(fs::path("/") / path);
|
||||
} else {
|
||||
failed_expected.erase(fs::path("/") / path);
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_LE(failed_actual.size(), failed_expected.size());
|
||||
|
||||
EXPECT_EQ(8000, files.size());
|
||||
EXPECT_GT(num_non_empty, 4000);
|
||||
|
||||
// Ensure that files which never had any errors are all present
|
||||
|
||||
std::set<fs::path> surprisingly_missing;
|
||||
|
||||
std::set_difference(
|
||||
failed_actual.begin(), failed_actual.end(), failed_expected.begin(),
|
||||
failed_expected.end(),
|
||||
std::inserter(surprisingly_missing, surprisingly_missing.begin()));
|
||||
|
||||
std::unordered_map<fs::path, std::string> original_files(files.begin(),
|
||||
files.end());
|
||||
|
||||
EXPECT_EQ(0, surprisingly_missing.size());
|
||||
|
||||
for (auto const& path : surprisingly_missing) {
|
||||
std::cout << "surprisingly missing: " << path << std::endl;
|
||||
auto it = original_files.find(path.relative_path());
|
||||
ASSERT_NE(original_files.end(), it);
|
||||
std::cout << "--- original (" << it->second.size() << " bytes) ---"
|
||||
<< std::endl;
|
||||
std::cout << folly::hexDump(it->second.data(), it->second.size())
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
std::array const map_file_error_args{
|
||||
"",
|
||||
"--categorize",
|
||||
"--order=revpath",
|
||||
"--order=revpath --categorize",
|
||||
"--file-hash=none",
|
||||
"--file-hash=none --categorize",
|
||||
"--file-hash=none --order=revpath",
|
||||
"--file-hash=none --order=revpath --categorize",
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(dwarfs, map_file_error_test,
|
||||
::testing::ValuesIn(map_file_error_args));
|
||||
|
Loading…
x
Reference in New Issue
Block a user