mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-09 20:41:04 -04:00
Support extracting corrupted file systems (fixes github #51)
This adds two new options `--continue-on-error` and `--disable-integrity-check` to `dwarfsextract` that allow extracting data from corrupted images.
This commit is contained in:
parent
8550c47873
commit
a326f533d5
@ -56,6 +56,18 @@ to disk:
|
|||||||
if no output directory is specified). For a full list of supported formats,
|
if no output directory is specified). For a full list of supported formats,
|
||||||
see libarchive-formats(5).
|
see libarchive-formats(5).
|
||||||
|
|
||||||
|
- `--continue-on-error`:
|
||||||
|
Try to continue with extraction even when errors are encountered. This
|
||||||
|
only applies to errors when reading from the file system image. Errors
|
||||||
|
when writing the extracted files will still be fatal.
|
||||||
|
|
||||||
|
- `--disable-integrity-check`:
|
||||||
|
This option disables all block integrity checks on the file system data.
|
||||||
|
There is a non-zero chance that this allows further data to be read from
|
||||||
|
corrupted file systems. However, there's also a non-zero chance that it
|
||||||
|
will completely crash the program. So please don't use this unless you
|
||||||
|
know what you're doing.
|
||||||
|
|
||||||
- `-n`, `--num-workers=`*value*:
|
- `-n`, `--num-workers=`*value*:
|
||||||
Number of worker threads used for extracting the filesystem.
|
Number of worker threads used for extracting the filesystem.
|
||||||
|
|
||||||
|
@ -30,6 +30,11 @@ namespace dwarfs {
|
|||||||
class filesystem_v2;
|
class filesystem_v2;
|
||||||
class logger;
|
class logger;
|
||||||
|
|
||||||
|
struct filesystem_extractor_options {
|
||||||
|
size_t max_queued_bytes{4096};
|
||||||
|
bool continue_on_error{false};
|
||||||
|
};
|
||||||
|
|
||||||
class filesystem_extractor {
|
class filesystem_extractor {
|
||||||
public:
|
public:
|
||||||
filesystem_extractor(logger& lgr);
|
filesystem_extractor(logger& lgr);
|
||||||
@ -46,8 +51,10 @@ class filesystem_extractor {
|
|||||||
|
|
||||||
void close() { return impl_->close(); }
|
void close() { return impl_->close(); }
|
||||||
|
|
||||||
void extract(filesystem_v2 const& fs, size_t max_queued_bytes) {
|
bool
|
||||||
return impl_->extract(fs, max_queued_bytes);
|
extract(filesystem_v2 const& fs, filesystem_extractor_options const& opts =
|
||||||
|
filesystem_extractor_options()) {
|
||||||
|
return impl_->extract(fs, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
class impl {
|
class impl {
|
||||||
@ -59,7 +66,8 @@ class filesystem_extractor {
|
|||||||
virtual void open_stream(std::ostream& os, std::string const& format) = 0;
|
virtual void open_stream(std::ostream& os, std::string const& format) = 0;
|
||||||
virtual void open_disk(std::string const& output) = 0;
|
virtual void open_disk(std::string const& output) = 0;
|
||||||
virtual void close() = 0;
|
virtual void close() = 0;
|
||||||
virtual void extract(filesystem_v2 const& fs, size_t max_queued_bytes) = 0;
|
virtual bool extract(filesystem_v2 const& fs,
|
||||||
|
filesystem_extractor_options const& opts) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -43,6 +43,7 @@ struct block_cache_options {
|
|||||||
double decompress_ratio{1.0};
|
double decompress_ratio{1.0};
|
||||||
bool mm_release{true};
|
bool mm_release{true};
|
||||||
bool init_workers{true};
|
bool init_workers{true};
|
||||||
|
bool disable_block_integrity_check{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct cache_tidy_config {
|
struct cache_tidy_config {
|
||||||
|
@ -56,7 +56,7 @@ namespace dwarfs {
|
|||||||
class cached_block {
|
class cached_block {
|
||||||
public:
|
public:
|
||||||
cached_block(logger& lgr, fs_section const& b, std::shared_ptr<mmif> mm,
|
cached_block(logger& lgr, fs_section const& b, std::shared_ptr<mmif> mm,
|
||||||
bool release)
|
bool release, bool disable_integrity_check)
|
||||||
: decompressor_(std::make_unique<block_decompressor>(
|
: decompressor_(std::make_unique<block_decompressor>(
|
||||||
b.compression(), mm->as<uint8_t>(b.start()), b.length(), data_))
|
b.compression(), mm->as<uint8_t>(b.start()), b.length(), data_))
|
||||||
, mm_(std::move(mm))
|
, mm_(std::move(mm))
|
||||||
@ -64,7 +64,7 @@ class cached_block {
|
|||||||
, LOG_PROXY_INIT(lgr)
|
, LOG_PROXY_INIT(lgr)
|
||||||
, release_(release)
|
, release_(release)
|
||||||
, uncompressed_size_{decompressor_->uncompressed_size()} {
|
, uncompressed_size_{decompressor_->uncompressed_size()} {
|
||||||
if (!section_.check_fast(*mm_)) {
|
if (!disable_integrity_check && !section_.check_fast(*mm_)) {
|
||||||
DWARFS_THROW(runtime_error, "block data integrity check failed");
|
DWARFS_THROW(runtime_error, "block data integrity check failed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -483,7 +483,7 @@ class block_cache_ final : public block_cache::impl {
|
|||||||
|
|
||||||
auto block = std::make_shared<cached_block>(
|
auto block = std::make_shared<cached_block>(
|
||||||
LOG_GET_LOGGER, DWARFS_NOTHROW(block_.at(block_no)), mm_,
|
LOG_GET_LOGGER, DWARFS_NOTHROW(block_.at(block_no)), mm_,
|
||||||
options_.mm_release);
|
options_.mm_release, options_.disable_block_integrity_check);
|
||||||
++blocks_created_;
|
++blocks_created_;
|
||||||
|
|
||||||
// Make a new set for the block
|
// Make a new set for the block
|
||||||
|
@ -72,6 +72,11 @@ class cache_semaphore {
|
|||||||
int64_t size_{0};
|
int64_t size_{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class archive_error : public std::runtime_error {
|
||||||
|
public:
|
||||||
|
using std::runtime_error::runtime_error;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
@ -146,7 +151,8 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
|
|||||||
closefd(pipefd_[0]);
|
closefd(pipefd_[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void extract(filesystem_v2 const& fs, size_t max_queued_bytes) override;
|
bool extract(filesystem_v2 const& fs,
|
||||||
|
filesystem_extractor_options const& opts) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void closefd(int& fd) {
|
void closefd(int& fd) {
|
||||||
@ -189,7 +195,7 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
|
|||||||
break;
|
break;
|
||||||
case ARCHIVE_RETRY:
|
case ARCHIVE_RETRY:
|
||||||
case ARCHIVE_FATAL:
|
case ARCHIVE_FATAL:
|
||||||
DWARFS_THROW(runtime_error, std::string(archive_error_string(a_)));
|
throw archive_error(std::string(archive_error_string(a_)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -200,8 +206,8 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
bool filesystem_extractor_<LoggerPolicy>::extract(
|
||||||
size_t max_queued_bytes) {
|
filesystem_v2 const& fs, filesystem_extractor_options const& opts) {
|
||||||
DWARFS_CHECK(a_, "filesystem not opened");
|
DWARFS_CHECK(a_, "filesystem not opened");
|
||||||
|
|
||||||
auto lr = ::archive_entry_linkresolver_new();
|
auto lr = ::archive_entry_linkresolver_new();
|
||||||
@ -217,11 +223,13 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
|||||||
worker_group archiver("archiver", 1);
|
worker_group archiver("archiver", 1);
|
||||||
cache_semaphore sem;
|
cache_semaphore sem;
|
||||||
|
|
||||||
LOG_DEBUG << "extractor semaphore size: " << max_queued_bytes << " bytes";
|
LOG_DEBUG << "extractor semaphore size: " << opts.max_queued_bytes
|
||||||
|
<< " bytes";
|
||||||
|
|
||||||
sem.post(max_queued_bytes);
|
sem.post(opts.max_queued_bytes);
|
||||||
|
|
||||||
std::atomic<bool> abort{false};
|
std::atomic<size_t> hard_error{0};
|
||||||
|
std::atomic<size_t> soft_error{0};
|
||||||
|
|
||||||
auto do_archive = [&](::archive_entry* ae,
|
auto do_archive = [&](::archive_entry* ae,
|
||||||
inode_view entry) { // TODO: inode vs. entry
|
inode_view entry) { // TODO: inode vs. entry
|
||||||
@ -232,14 +240,16 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
|||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
size_t remain = size;
|
size_t remain = size;
|
||||||
|
|
||||||
while (remain > 0 && !abort) {
|
while (remain > 0 && hard_error == 0) {
|
||||||
size_t bs = remain < max_queued_bytes ? remain : max_queued_bytes;
|
size_t bs =
|
||||||
|
remain < opts.max_queued_bytes ? remain : opts.max_queued_bytes;
|
||||||
|
|
||||||
sem.wait(bs);
|
sem.wait(bs);
|
||||||
|
|
||||||
if (auto ranges = fs.readv(fd, bs, pos)) {
|
if (auto ranges = fs.readv(fd, bs, pos)) {
|
||||||
archiver.add_job([this, &sem, &abort, ranges = std::move(*ranges), ae,
|
archiver.add_job([this, &sem, &hard_error, &soft_error, &opts,
|
||||||
pos, remain, bs, size]() mutable {
|
ranges = std::move(*ranges), ae, pos, remain, bs,
|
||||||
|
size]() mutable {
|
||||||
try {
|
try {
|
||||||
if (pos == 0) {
|
if (pos == 0) {
|
||||||
LOG_DEBUG << "extracting " << ::archive_entry_pathname(ae)
|
LOG_DEBUG << "extracting " << ::archive_entry_pathname(ae)
|
||||||
@ -256,9 +266,17 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
|||||||
archive_entry_free(ae);
|
archive_entry_free(ae);
|
||||||
}
|
}
|
||||||
sem.post(bs);
|
sem.post(bs);
|
||||||
|
} catch (archive_error const& e) {
|
||||||
|
LOG_ERROR << folly::exceptionStr(e);
|
||||||
|
++hard_error;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
if (opts.continue_on_error) {
|
||||||
|
LOG_WARN << folly::exceptionStr(std::current_exception());
|
||||||
|
++soft_error;
|
||||||
|
} else {
|
||||||
LOG_ERROR << folly::exceptionStr(std::current_exception());
|
LOG_ERROR << folly::exceptionStr(std::current_exception());
|
||||||
abort = true;
|
++hard_error;
|
||||||
|
}
|
||||||
archive_entry_free(ae);
|
archive_entry_free(ae);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -273,13 +291,13 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
|||||||
remain -= bs;
|
remain -= bs;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
archiver.add_job([this, ae, &abort] {
|
archiver.add_job([this, ae, &hard_error] {
|
||||||
SCOPE_EXIT { ::archive_entry_free(ae); };
|
SCOPE_EXIT { ::archive_entry_free(ae); };
|
||||||
try {
|
try {
|
||||||
check_result(::archive_write_header(a_, ae));
|
check_result(::archive_write_header(a_, ae));
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOG_ERROR << folly::exceptionStr(std::current_exception());
|
LOG_ERROR << folly::exceptionStr(std::current_exception());
|
||||||
abort = true;
|
hard_error = true;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -287,7 +305,7 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
|||||||
|
|
||||||
fs.walk_data_order([&](auto entry) {
|
fs.walk_data_order([&](auto entry) {
|
||||||
// TODO: we can surely early abort walk() somehow
|
// TODO: we can surely early abort walk() somehow
|
||||||
if (entry.is_root() || abort) {
|
if (entry.is_root() || hard_error) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,7 +347,7 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
|||||||
|
|
||||||
archiver.wait();
|
archiver.wait();
|
||||||
|
|
||||||
if (abort) {
|
if (hard_error) {
|
||||||
DWARFS_THROW(runtime_error, "extraction aborted");
|
DWARFS_THROW(runtime_error, "extraction aborted");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -340,6 +358,15 @@ void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2 const& fs,
|
|||||||
if (ae) {
|
if (ae) {
|
||||||
DWARFS_THROW(runtime_error, "unexpected deferred entry");
|
DWARFS_THROW(runtime_error, "unexpected deferred entry");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (soft_error > 0) {
|
||||||
|
LOG_ERROR << "extraction finished with " << soft_error << " error(s)";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_INFO << "extraction finished without errors";
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
filesystem_extractor::filesystem_extractor(logger& lgr)
|
filesystem_extractor::filesystem_extractor(logger& lgr)
|
||||||
|
@ -45,6 +45,7 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
std::string filesystem, output, format, cache_size_str, log_level,
|
std::string filesystem, output, format, cache_size_str, log_level,
|
||||||
image_offset;
|
image_offset;
|
||||||
size_t num_workers;
|
size_t num_workers;
|
||||||
|
bool continue_on_error{false}, disable_integrity_check{false};
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
po::options_description opts("Command line options");
|
po::options_description opts("Command line options");
|
||||||
@ -61,6 +62,12 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
("format,f",
|
("format,f",
|
||||||
po::value<std::string>(&format),
|
po::value<std::string>(&format),
|
||||||
"output format")
|
"output format")
|
||||||
|
("continue-on-error",
|
||||||
|
po::value<bool>(&continue_on_error)->zero_tokens(),
|
||||||
|
"continue if errors are encountered")
|
||||||
|
("disable-integrity-check",
|
||||||
|
po::value<bool>(&disable_integrity_check)->zero_tokens(),
|
||||||
|
"disable file system image block integrity check (dangerous)")
|
||||||
("num-workers,n",
|
("num-workers,n",
|
||||||
po::value<size_t>(&num_workers)->default_value(4),
|
po::value<size_t>(&num_workers)->default_value(4),
|
||||||
"number of worker threads")
|
"number of worker threads")
|
||||||
@ -89,6 +96,8 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int rv = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto level = logger::parse_level(log_level);
|
auto level = logger::parse_level(log_level);
|
||||||
stream_logger lgr(std::cerr, level, level >= logger::DEBUG);
|
stream_logger lgr(std::cerr, level, level >= logger::DEBUG);
|
||||||
@ -103,6 +112,7 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
|
|
||||||
fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str);
|
fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str);
|
||||||
fsopts.block_cache.num_workers = num_workers;
|
fsopts.block_cache.num_workers = num_workers;
|
||||||
|
fsopts.block_cache.disable_block_integrity_check = disable_integrity_check;
|
||||||
fsopts.metadata.enable_nlink = true;
|
fsopts.metadata.enable_nlink = true;
|
||||||
|
|
||||||
filesystem_v2 fs(lgr, std::make_shared<mmap>(filesystem), fsopts);
|
filesystem_v2 fs(lgr, std::make_shared<mmap>(filesystem), fsopts);
|
||||||
@ -117,7 +127,12 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
fsx.open_archive(output, format);
|
fsx.open_archive(output, format);
|
||||||
}
|
}
|
||||||
|
|
||||||
fsx.extract(fs, fsopts.block_cache.max_bytes);
|
filesystem_extractor_options fsx_opts;
|
||||||
|
|
||||||
|
fsx_opts.max_queued_bytes = fsopts.block_cache.max_bytes;
|
||||||
|
fsx_opts.continue_on_error = continue_on_error;
|
||||||
|
|
||||||
|
rv = fsx.extract(fs, fsx_opts) ? 0 : 2;
|
||||||
|
|
||||||
fsx.close();
|
fsx.close();
|
||||||
} catch (runtime_error const& e) {
|
} catch (runtime_error const& e) {
|
||||||
@ -131,7 +146,7 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -972,7 +972,7 @@ void check_compat(logger& lgr, filesystem_v2 const& fs,
|
|||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
|
|
||||||
EXPECT_NO_THROW(ext.open_stream(oss, "mtree"));
|
EXPECT_NO_THROW(ext.open_stream(oss, "mtree"));
|
||||||
EXPECT_NO_THROW(ext.extract(fs, 4096));
|
EXPECT_NO_THROW(ext.extract(fs));
|
||||||
EXPECT_NO_THROW(ext.close());
|
EXPECT_NO_THROW(ext.close());
|
||||||
|
|
||||||
std::istringstream iss(oss.str());
|
std::istringstream iss(oss.str());
|
||||||
|
Loading…
x
Reference in New Issue
Block a user