mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 20:12:56 -04:00
Support extracting corrupted file systems (fixes github #51)
This adds two new options `--continue-on-error` and `--disable-integrity-check` to `dwarfsextract` that allow extracting data from corrupted images.
This commit is contained in:
parent
8550c47873
commit
a326f533d5
@ -56,6 +56,18 @@ to disk:
|
||||
if no output directory is specified). For a full list of supported formats,
|
||||
see libarchive-formats(5).
|
||||
|
||||
- `--continue-on-error`:
|
||||
Try to continue with extraction even when errors are encountered. This
|
||||
only applies to errors when reading from the file system image. Errors
|
||||
when writing the extracted files will still be fatal.
|
||||
|
||||
- `--disable-integrity-check`:
|
||||
This option disables all block integrity checks on the file system data.
|
||||
There is a non-zero chance that this allows further data to be read from
|
||||
corrupted file systems. However, there's also a non-zero chance that it
|
||||
will completely crash the program. So please don't use this unless you
|
||||
know what you're doing.
|
||||
|
||||
- `-n`, `--num-workers=`*value*:
|
||||
Number of worker threads used for extracting the filesystem.
|
||||
|
||||
|
@ -30,6 +30,11 @@ namespace dwarfs {
|
||||
class filesystem_v2;
|
||||
class logger;
|
||||
|
||||
struct filesystem_extractor_options {
|
||||
size_t max_queued_bytes{4096};
|
||||
bool continue_on_error{false};
|
||||
};
|
||||
|
||||
class filesystem_extractor {
|
||||
public:
|
||||
filesystem_extractor(logger& lgr);
|
||||
@ -46,8 +51,10 @@ class filesystem_extractor {
|
||||
|
||||
void close() { return impl_->close(); }
|
||||
|
||||
void extract(filesystem_v2 const& fs, size_t max_queued_bytes) {
|
||||
return impl_->extract(fs, max_queued_bytes);
|
||||
bool
|
||||
extract(filesystem_v2 const& fs, filesystem_extractor_options const& opts =
|
||||
filesystem_extractor_options()) {
|
||||
return impl_->extract(fs, opts);
|
||||
}
|
||||
|
||||
class impl {
|
||||
@ -59,7 +66,8 @@ class filesystem_extractor {
|
||||
virtual void open_stream(std::ostream& os, std::string const& format) = 0;
|
||||
virtual void open_disk(std::string const& output) = 0;
|
||||
virtual void close() = 0;
|
||||
virtual void extract(filesystem_v2 const& fs, size_t max_queued_bytes) = 0;
|
||||
virtual bool extract(filesystem_v2 const& fs,
|
||||
filesystem_extractor_options const& opts) = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -43,6 +43,7 @@ struct block_cache_options {
|
||||
double decompress_ratio{1.0};
|
||||
bool mm_release{true};
|
||||
bool init_workers{true};
|
||||
bool disable_block_integrity_check{false};
|
||||
};
|
||||
|
||||
struct cache_tidy_config {
|
||||
|
@ -56,7 +56,7 @@ namespace dwarfs {
|
||||
class cached_block {
|
||||
public:
|
||||
cached_block(logger& lgr, fs_section const& b, std::shared_ptr<mmif> mm,
|
||||
bool release)
|
||||
bool release, bool disable_integrity_check)
|
||||
: decompressor_(std::make_unique<block_decompressor>(
|
||||
b.compression(), mm->as<uint8_t>(b.start()), b.length(), data_))
|
||||
, mm_(std::move(mm))
|
||||
@ -64,7 +64,7 @@ class cached_block {
|
||||
, LOG_PROXY_INIT(lgr)
|
||||
, release_(release)
|
||||
, uncompressed_size_{decompressor_->uncompressed_size()} {
|
||||
if (!section_.check_fast(*mm_)) {
|
||||
if (!disable_integrity_check && !section_.check_fast(*mm_)) {
|
||||
DWARFS_THROW(runtime_error, "block data integrity check failed");
|
||||
}
|
||||
}
|
||||
@ -483,7 +483,7 @@ class block_cache_ final : public block_cache::impl {
|
||||
|
||||
auto block = std::make_shared<cached_block>(
|
||||
LOG_GET_LOGGER, DWARFS_NOTHROW(block_.at(block_no)), mm_,
|
||||
options_.mm_release);
|
||||
options_.mm_release, options_.disable_block_integrity_check);
|
||||
++blocks_created_;
|
||||
|
||||
// Make a new set for the block
|
||||
|
@ -72,6 +72,11 @@ class cache_semaphore {
|
||||
int64_t size_{0};
|
||||
};
|
||||
|
||||
class archive_error : public std::runtime_error {
|
||||
public:
|
||||
using std::runtime_error::runtime_error;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
@ -146,7 +151,8 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
|
||||
closefd(pipefd_[0]);
|
||||
}
|
||||
|
||||
void extract(filesystem_v2 const& fs, size_t max_queued_bytes) override;
|
||||
bool extract(filesystem_v2 const& fs,
|
||||
filesystem_extractor_options const& opts) override;
|
||||
|
||||
private:
|
||||
void closefd(int& fd) {
|
||||
@ -189,7 +195,7 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
|
||||
break;
|
||||
case ARCHIVE_RETRY:
|
||||
case ARCHIVE_FATAL:
|
||||
DWARFS_THROW(runtime_error, std::string(archive_error_string(a_)));
|
||||
throw archive_error(std::string(archive_error_string(a_)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -200,8 +206,8 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
||||
size_t max_queued_bytes) {
|
||||
bool filesystem_extractor_<LoggerPolicy>::extract(
|
||||
filesystem_v2 const& fs, filesystem_extractor_options const& opts) {
|
||||
DWARFS_CHECK(a_, "filesystem not opened");
|
||||
|
||||
auto lr = ::archive_entry_linkresolver_new();
|
||||
@ -217,11 +223,13 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
||||
worker_group archiver("archiver", 1);
|
||||
cache_semaphore sem;
|
||||
|
||||
LOG_DEBUG << "extractor semaphore size: " << max_queued_bytes << " bytes";
|
||||
LOG_DEBUG << "extractor semaphore size: " << opts.max_queued_bytes
|
||||
<< " bytes";
|
||||
|
||||
sem.post(max_queued_bytes);
|
||||
sem.post(opts.max_queued_bytes);
|
||||
|
||||
std::atomic<bool> abort{false};
|
||||
std::atomic<size_t> hard_error{0};
|
||||
std::atomic<size_t> soft_error{0};
|
||||
|
||||
auto do_archive = [&](::archive_entry* ae,
|
||||
inode_view entry) { // TODO: inode vs. entry
|
||||
@ -232,14 +240,16 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
||||
size_t pos = 0;
|
||||
size_t remain = size;
|
||||
|
||||
while (remain > 0 && !abort) {
|
||||
size_t bs = remain < max_queued_bytes ? remain : max_queued_bytes;
|
||||
while (remain > 0 && hard_error == 0) {
|
||||
size_t bs =
|
||||
remain < opts.max_queued_bytes ? remain : opts.max_queued_bytes;
|
||||
|
||||
sem.wait(bs);
|
||||
|
||||
if (auto ranges = fs.readv(fd, bs, pos)) {
|
||||
archiver.add_job([this, &sem, &abort, ranges = std::move(*ranges), ae,
|
||||
pos, remain, bs, size]() mutable {
|
||||
archiver.add_job([this, &sem, &hard_error, &soft_error, &opts,
|
||||
ranges = std::move(*ranges), ae, pos, remain, bs,
|
||||
size]() mutable {
|
||||
try {
|
||||
if (pos == 0) {
|
||||
LOG_DEBUG << "extracting " << ::archive_entry_pathname(ae)
|
||||
@ -256,9 +266,17 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
||||
archive_entry_free(ae);
|
||||
}
|
||||
sem.post(bs);
|
||||
} catch (archive_error const& e) {
|
||||
LOG_ERROR << folly::exceptionStr(e);
|
||||
++hard_error;
|
||||
} catch (...) {
|
||||
LOG_ERROR << folly::exceptionStr(std::current_exception());
|
||||
abort = true;
|
||||
if (opts.continue_on_error) {
|
||||
LOG_WARN << folly::exceptionStr(std::current_exception());
|
||||
++soft_error;
|
||||
} else {
|
||||
LOG_ERROR << folly::exceptionStr(std::current_exception());
|
||||
++hard_error;
|
||||
}
|
||||
archive_entry_free(ae);
|
||||
}
|
||||
});
|
||||
@ -273,13 +291,13 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
||||
remain -= bs;
|
||||
}
|
||||
} else {
|
||||
archiver.add_job([this, ae, &abort] {
|
||||
archiver.add_job([this, ae, &hard_error] {
|
||||
SCOPE_EXIT { ::archive_entry_free(ae); };
|
||||
try {
|
||||
check_result(::archive_write_header(a_, ae));
|
||||
} catch (...) {
|
||||
LOG_ERROR << folly::exceptionStr(std::current_exception());
|
||||
abort = true;
|
||||
hard_error = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -287,7 +305,7 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
||||
|
||||
fs.walk_data_order([&](auto entry) {
|
||||
// TODO: we can surely early abort walk() somehow
|
||||
if (entry.is_root() || abort) {
|
||||
if (entry.is_root() || hard_error) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -329,7 +347,7 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
||||
|
||||
archiver.wait();
|
||||
|
||||
if (abort) {
|
||||
if (hard_error) {
|
||||
DWARFS_THROW(runtime_error, "extraction aborted");
|
||||
}
|
||||
|
||||
@ -340,6 +358,15 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
||||
if (ae) {
|
||||
DWARFS_THROW(runtime_error, "unexpected deferred entry");
|
||||
}
|
||||
|
||||
if (soft_error > 0) {
|
||||
LOG_ERROR << "extraction finished with " << soft_error << " error(s)";
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_INFO << "extraction finished without errors";
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
filesystem_extractor::filesystem_extractor(logger& lgr)
|
||||
|
@ -45,6 +45,7 @@ int dwarfsextract(int argc, char** argv) {
|
||||
std::string filesystem, output, format, cache_size_str, log_level,
|
||||
image_offset;
|
||||
size_t num_workers;
|
||||
bool continue_on_error{false}, disable_integrity_check{false};
|
||||
|
||||
// clang-format off
|
||||
po::options_description opts("Command line options");
|
||||
@ -61,6 +62,12 @@ int dwarfsextract(int argc, char** argv) {
|
||||
("format,f",
|
||||
po::value<std::string>(&format),
|
||||
"output format")
|
||||
("continue-on-error",
|
||||
po::value<bool>(&continue_on_error)->zero_tokens(),
|
||||
"continue if errors are encountered")
|
||||
("disable-integrity-check",
|
||||
po::value<bool>(&disable_integrity_check)->zero_tokens(),
|
||||
"disable file system image block integrity check (dangerous)")
|
||||
("num-workers,n",
|
||||
po::value<size_t>(&num_workers)->default_value(4),
|
||||
"number of worker threads")
|
||||
@ -89,6 +96,8 @@ int dwarfsextract(int argc, char** argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rv = 0;
|
||||
|
||||
try {
|
||||
auto level = logger::parse_level(log_level);
|
||||
stream_logger lgr(std::cerr, level, level >= logger::DEBUG);
|
||||
@ -103,6 +112,7 @@ int dwarfsextract(int argc, char** argv) {
|
||||
|
||||
fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str);
|
||||
fsopts.block_cache.num_workers = num_workers;
|
||||
fsopts.block_cache.disable_block_integrity_check = disable_integrity_check;
|
||||
fsopts.metadata.enable_nlink = true;
|
||||
|
||||
filesystem_v2 fs(lgr, std::make_shared<mmap>(filesystem), fsopts);
|
||||
@ -117,7 +127,12 @@ int dwarfsextract(int argc, char** argv) {
|
||||
fsx.open_archive(output, format);
|
||||
}
|
||||
|
||||
fsx.extract(fs, fsopts.block_cache.max_bytes);
|
||||
filesystem_extractor_options fsx_opts;
|
||||
|
||||
fsx_opts.max_queued_bytes = fsopts.block_cache.max_bytes;
|
||||
fsx_opts.continue_on_error = continue_on_error;
|
||||
|
||||
rv = fsx.extract(fs, fsx_opts) ? 0 : 2;
|
||||
|
||||
fsx.close();
|
||||
} catch (runtime_error const& e) {
|
||||
@ -131,7 +146,7 @@ int dwarfsextract(int argc, char** argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return rv;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -972,7 +972,7 @@ void check_compat(logger& lgr, filesystem_v2 const& fs,
|
||||
std::ostringstream oss;
|
||||
|
||||
EXPECT_NO_THROW(ext.open_stream(oss, "mtree"));
|
||||
EXPECT_NO_THROW(ext.extract(fs, 4096));
|
||||
EXPECT_NO_THROW(ext.extract(fs));
|
||||
EXPECT_NO_THROW(ext.close());
|
||||
|
||||
std::istringstream iss(oss.str());
|
||||
|
Loading…
x
Reference in New Issue
Block a user