mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-14 14:59:52 -04:00
feat(filesystem): load image even if non-metadata sections are corrupt
This commit is contained in:
parent
91368d0550
commit
fd005e0e06
@ -272,25 +272,51 @@ using section_map = std::unordered_map<section_type, std::vector<fs_section>>;
|
||||
|
||||
size_t
|
||||
get_uncompressed_section_size(std::shared_ptr<mmif> mm, fs_section const& sec) {
|
||||
if (sec.compression() == compression_type::NONE) {
|
||||
return sec.length();
|
||||
}
|
||||
|
||||
if (!sec.check_fast(*mm)) {
|
||||
DWARFS_THROW(
|
||||
runtime_error,
|
||||
fmt::format("attempt to access damaged {} section", sec.name()));
|
||||
}
|
||||
|
||||
std::vector<uint8_t> tmp;
|
||||
block_decompressor bd(sec.compression(), mm->as<uint8_t>(sec.start()),
|
||||
sec.length(), tmp);
|
||||
auto span = sec.data(*mm);
|
||||
block_decompressor bd(sec.compression(), span.data(), span.size(), tmp);
|
||||
return bd.uncompressed_size();
|
||||
}
|
||||
|
||||
std::optional<size_t>
|
||||
try_get_uncompressed_section_size(std::shared_ptr<mmif> mm,
|
||||
fs_section const& sec) {
|
||||
if (sec.check_fast(*mm)) {
|
||||
try {
|
||||
return get_uncompressed_section_size(mm, sec);
|
||||
} catch (std::exception const&) {
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::span<uint8_t const>
|
||||
get_section_data(std::shared_ptr<mmif> mm, fs_section const& section,
|
||||
std::vector<uint8_t>& buffer, bool force_buffer) {
|
||||
DWARFS_CHECK(
|
||||
section.check_fast(*mm),
|
||||
fmt::format("attempt to access damaged {} section", section.name()));
|
||||
|
||||
auto span = section.data(*mm);
|
||||
auto compression = section.compression();
|
||||
auto start = section.start();
|
||||
auto length = section.length();
|
||||
|
||||
if (!force_buffer && compression == compression_type::NONE) {
|
||||
return mm->span(start, length);
|
||||
return span;
|
||||
}
|
||||
|
||||
buffer = block_decompressor::decompress(compression, mm->as<uint8_t>(start),
|
||||
length);
|
||||
buffer =
|
||||
block_decompressor::decompress(compression, span.data(), span.size());
|
||||
|
||||
return buffer;
|
||||
}
|
||||
@ -538,7 +564,17 @@ filesystem_<LoggerPolicy>::filesystem_(
|
||||
check_section(*s);
|
||||
|
||||
if (!s->check_fast(*mm_)) {
|
||||
DWARFS_THROW(runtime_error, "checksum error in section: " + s->name());
|
||||
switch (s->type()) {
|
||||
case section_type::METADATA_V2:
|
||||
case section_type::METADATA_V2_SCHEMA:
|
||||
DWARFS_THROW(runtime_error,
|
||||
"checksum error in section: " + s->name());
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG_WARN << "checksum error in section: " << s->name();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sections[s->type()].push_back(*s);
|
||||
@ -560,8 +596,10 @@ filesystem_<LoggerPolicy>::filesystem_(
|
||||
|
||||
if (auto it = sections.find(section_type::HISTORY); it != sections.end()) {
|
||||
for (auto& section : it->second) {
|
||||
std::vector<uint8_t> buffer;
|
||||
history_.parse_append(get_section_data(mm_, section, buffer, false));
|
||||
if (section.check_fast(*mm_)) {
|
||||
std::vector<uint8_t> buffer;
|
||||
history_.parse_append(get_section_data(mm_, section, buffer, false));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -811,8 +849,17 @@ void filesystem_<LoggerPolicy>::dump(std::ostream& os, int detail_level) const {
|
||||
while (auto sp = parser.next_section()) {
|
||||
auto const& s = *sp;
|
||||
|
||||
auto uncompressed_size = get_uncompressed_section_size(mm_, s);
|
||||
float compression_ratio = float(s.length()) / uncompressed_size;
|
||||
std::string block_size;
|
||||
|
||||
if (auto uncompressed_size = try_get_uncompressed_section_size(mm_, s)) {
|
||||
float compression_ratio = float(s.length()) / uncompressed_size.value();
|
||||
block_size =
|
||||
fmt::format("blocksize={}, ratio={:.2f}%",
|
||||
uncompressed_size.value(), 100.0 * compression_ratio);
|
||||
} else {
|
||||
block_size = fmt::format("blocksize={} (estimate)", s.length());
|
||||
}
|
||||
|
||||
std::string category;
|
||||
|
||||
if (s.type() == section_type::BLOCK) {
|
||||
@ -822,9 +869,8 @@ void filesystem_<LoggerPolicy>::dump(std::ostream& os, int detail_level) const {
|
||||
++block_no;
|
||||
}
|
||||
|
||||
os << "SECTION " << s.description() << ", blocksize=" << uncompressed_size
|
||||
<< ", ratio=" << fmt::format("{:.2f}%", 100.0 * compression_ratio)
|
||||
<< category << "\n";
|
||||
os << "SECTION " << s.description() << ", " << block_size << category
|
||||
<< "\n";
|
||||
}
|
||||
}
|
||||
|
||||
@ -874,18 +920,21 @@ filesystem_<LoggerPolicy>::info_as_dynamic(int detail_level) const {
|
||||
while (auto sp = parser.next_section()) {
|
||||
auto const& s = *sp;
|
||||
|
||||
auto uncompressed_size = get_uncompressed_section_size(mm_, s);
|
||||
float compression_ratio = float(s.length()) / uncompressed_size;
|
||||
bool checksum_ok = s.check_fast(*mm_);
|
||||
|
||||
folly::dynamic section_info = folly::dynamic::object
|
||||
// clang-format off
|
||||
("type", s.name())
|
||||
("size", uncompressed_size)
|
||||
("compressed_size", s.length())
|
||||
("ratio", compression_ratio)
|
||||
("checksum_ok", checksum_ok)
|
||||
// clang-format on
|
||||
;
|
||||
|
||||
if (auto uncompressed_size = try_get_uncompressed_section_size(mm_, s)) {
|
||||
section_info["size"] = uncompressed_size.value();
|
||||
section_info["ratio"] = float(s.length()) / uncompressed_size.value();
|
||||
}
|
||||
|
||||
if (s.type() == section_type::BLOCK) {
|
||||
if (auto catstr = meta_.get_block_category(block_no)) {
|
||||
section_info["category"] = catstr.value();
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/String.h>
|
||||
#include <folly/container/Enumerate.h>
|
||||
#include <folly/json.h>
|
||||
|
||||
#include "dwarfs/filesystem_v2.h"
|
||||
@ -1781,7 +1782,7 @@ TEST(dwarfsck_test, check_fail) {
|
||||
EXPECT_EQ(0, t.run({"image.dwarfs", "--check-integrity"})) << t.err();
|
||||
}
|
||||
|
||||
std::map<std::string, size_t> section_offsets;
|
||||
std::vector<std::pair<std::string, size_t>> section_offsets;
|
||||
|
||||
{
|
||||
auto t = dwarfsck_tester::create_with_image(image);
|
||||
@ -1795,33 +1796,84 @@ TEST(dwarfsck_test, check_fail) {
|
||||
for (auto const& section : info["sections"]) {
|
||||
auto type = section["type"].asString();
|
||||
auto size = section["compressed_size"].asInt();
|
||||
section_offsets[type] = offset;
|
||||
section_offsets.emplace_back(type, offset);
|
||||
offset += section_header_size + size;
|
||||
}
|
||||
|
||||
EXPECT_EQ(image.size(), offset);
|
||||
}
|
||||
|
||||
size_t index = 0;
|
||||
|
||||
for (auto const& [type, offset] : section_offsets) {
|
||||
bool const is_metadata_section =
|
||||
type == "METADATA_V2" || type == "METADATA_V2_SCHEMA";
|
||||
bool const is_block = type == "BLOCK";
|
||||
auto corrupt_image = image;
|
||||
// flip a bit right after the header
|
||||
corrupt_image[offset + section_header_size] ^= 0x01;
|
||||
|
||||
// std::cout << "corrupting section: " << type << " @ " << offset << "\n";
|
||||
|
||||
{
|
||||
test::test_logger lgr;
|
||||
auto make_fs = [&] {
|
||||
return filesystem_v2{lgr,
|
||||
std::make_shared<test::mmap_mock>(corrupt_image)};
|
||||
};
|
||||
if (is_metadata_section) {
|
||||
EXPECT_THAT([&] { make_fs(); },
|
||||
::testing::ThrowsMessage<dwarfs::runtime_error>(
|
||||
::testing::HasSubstr(fmt::format(
|
||||
"checksum error in section: {}", type))));
|
||||
} else {
|
||||
auto fs = make_fs();
|
||||
auto& log = lgr.get_log();
|
||||
if (is_block) {
|
||||
EXPECT_EQ(0, log.size());
|
||||
} else {
|
||||
ASSERT_EQ(1, log.size());
|
||||
EXPECT_THAT(log[0].output,
|
||||
::testing::HasSubstr(
|
||||
fmt::format("checksum error in section: {}", type)));
|
||||
}
|
||||
auto info = fs.info_as_dynamic(3);
|
||||
ASSERT_EQ(1, info.count("sections"));
|
||||
ASSERT_EQ(section_offsets.size(), info["sections"].size());
|
||||
for (auto const& [i, section] : folly::enumerate(info["sections"])) {
|
||||
EXPECT_EQ(section["checksum_ok"].asBool(), i != index)
|
||||
<< type << ", " << index;
|
||||
}
|
||||
auto dump = fs.dump(3);
|
||||
EXPECT_THAT(dump, ::testing::HasSubstr("CHECKSUM ERROR"));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto t = dwarfsck_tester::create_with_image(corrupt_image);
|
||||
|
||||
// for blocks, we skip checks with --no-check
|
||||
if (type == "BLOCK") {
|
||||
EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
|
||||
EXPECT_GT(t.out().size(), 100) << t.out();
|
||||
} else {
|
||||
if (is_metadata_section) {
|
||||
EXPECT_EQ(1, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
|
||||
EXPECT_EQ(0, t.out().size()) << t.out();
|
||||
} else {
|
||||
EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
|
||||
}
|
||||
|
||||
// for blocks, we skip checks with --no-check
|
||||
if (!is_block) {
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||
"checksum error in section: {}", type)));
|
||||
}
|
||||
|
||||
// std::cout << "[" << type << ", nocheck]\n" << t.out() << std::endl;
|
||||
auto json = t.out();
|
||||
|
||||
// std::cout << "[" << type << ", nocheck]\n" << json << "\n";
|
||||
|
||||
if (is_metadata_section) {
|
||||
EXPECT_EQ(0, json.size()) << json;
|
||||
} else {
|
||||
EXPECT_GT(json.size(), 100) << json;
|
||||
EXPECT_NO_THROW(folly::parseJson(json)) << json;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@ -1829,16 +1881,19 @@ TEST(dwarfsck_test, check_fail) {
|
||||
|
||||
EXPECT_EQ(1, t.run({"image.dwarfs", "-j"})) << t.err();
|
||||
|
||||
if (type == "BLOCK") {
|
||||
EXPECT_GT(t.out().size(), 100) << t.out();
|
||||
} else {
|
||||
EXPECT_EQ(0, t.out().size()) << t.out();
|
||||
}
|
||||
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||
"checksum error in section: {}", type)));
|
||||
|
||||
// std::cout << "[" << type << "]\n" << t.out() << std::endl;
|
||||
auto json = t.out();
|
||||
|
||||
// std::cout << "[" << type << "]\n" << json << "\n";
|
||||
|
||||
if (is_metadata_section) {
|
||||
EXPECT_EQ(0, json.size()) << json;
|
||||
} else {
|
||||
EXPECT_GT(json.size(), 100) << json;
|
||||
EXPECT_NO_THROW(folly::parseJson(json)) << json;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@ -1847,18 +1902,42 @@ TEST(dwarfsck_test, check_fail) {
|
||||
EXPECT_EQ(1, t.run({"image.dwarfs", "--check-integrity", "-j"}))
|
||||
<< t.err();
|
||||
|
||||
if (type == "BLOCK") {
|
||||
EXPECT_GT(t.out().size(), 100) << t.out();
|
||||
if (is_block) {
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||
"integrity check error in section: BLOCK")));
|
||||
} else {
|
||||
EXPECT_EQ(0, t.out().size()) << t.out();
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||
"checksum error in section: {}", type)));
|
||||
}
|
||||
|
||||
// std::cout << "[" << type << ", integrity]\n" << t.out() << std::endl;
|
||||
auto json = t.out();
|
||||
|
||||
// std::cout << "[" << type << ", integrity]\n" << json << "\n";
|
||||
|
||||
if (is_metadata_section) {
|
||||
EXPECT_EQ(0, json.size()) << json;
|
||||
} else {
|
||||
EXPECT_GT(json.size(), 100) << json;
|
||||
EXPECT_NO_THROW(folly::parseJson(json)) << json;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto t = dwarfsck_tester::create_with_image(corrupt_image);
|
||||
|
||||
EXPECT_EQ(1, t.run({"image.dwarfs", "-d3"})) << t.err();
|
||||
|
||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||
"checksum error in section: {}", type)));
|
||||
|
||||
if (is_metadata_section) {
|
||||
EXPECT_EQ(0, t.out().size()) << t.out();
|
||||
} else {
|
||||
EXPECT_THAT(t.out(), ::testing::HasSubstr("CHECKSUM ERROR"));
|
||||
}
|
||||
}
|
||||
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user