From fd005e0e06d2156fd2757c72c993a779dcfa945b Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Tue, 16 Jan 2024 14:21:05 +0100 Subject: [PATCH] feat(filesystem): load image even if non-metadata sections are corrupt --- src/dwarfs/filesystem_v2.cpp | 87 +++++++++++++++++++------ test/tool_main_test.cpp | 119 +++++++++++++++++++++++++++++------ 2 files changed, 167 insertions(+), 39 deletions(-) diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index a82ffc96..7c52474f 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -272,25 +272,51 @@ using section_map = std::unordered_map>; size_t get_uncompressed_section_size(std::shared_ptr mm, fs_section const& sec) { + if (sec.compression() == compression_type::NONE) { + return sec.length(); + } + + if (!sec.check_fast(*mm)) { + DWARFS_THROW( + runtime_error, + fmt::format("attempt to access damaged {} section", sec.name())); + } + std::vector tmp; - block_decompressor bd(sec.compression(), mm->as(sec.start()), - sec.length(), tmp); + auto span = sec.data(*mm); + block_decompressor bd(sec.compression(), span.data(), span.size(), tmp); return bd.uncompressed_size(); } +std::optional +try_get_uncompressed_section_size(std::shared_ptr mm, + fs_section const& sec) { + if (sec.check_fast(*mm)) { + try { + return get_uncompressed_section_size(mm, sec); + } catch (std::exception const&) { + } + } + + return std::nullopt; +} + std::span get_section_data(std::shared_ptr mm, fs_section const& section, std::vector& buffer, bool force_buffer) { + DWARFS_CHECK( + section.check_fast(*mm), + fmt::format("attempt to access damaged {} section", section.name())); + + auto span = section.data(*mm); auto compression = section.compression(); - auto start = section.start(); - auto length = section.length(); if (!force_buffer && compression == compression_type::NONE) { - return mm->span(start, length); + return span; } - buffer = block_decompressor::decompress(compression, mm->as(start), - length); + buffer = + block_decompressor::decompress(compression, span.data(), span.size()); return buffer; } @@ -538,7 +564,17 @@ filesystem_::filesystem_( check_section(*s); if (!s->check_fast(*mm_)) { - DWARFS_THROW(runtime_error, "checksum error in section: " + s->name()); + switch (s->type()) { + case section_type::METADATA_V2: + case section_type::METADATA_V2_SCHEMA: + DWARFS_THROW(runtime_error, + "checksum error in section: " + s->name()); + break; + + default: + LOG_WARN << "checksum error in section: " << s->name(); + break; + } } sections[s->type()].push_back(*s); @@ -560,8 +596,10 @@ filesystem_::filesystem_( if (auto it = sections.find(section_type::HISTORY); it != sections.end()) { for (auto& section : it->second) { - std::vector buffer; - history_.parse_append(get_section_data(mm_, section, buffer, false)); + if (section.check_fast(*mm_)) { + std::vector buffer; + history_.parse_append(get_section_data(mm_, section, buffer, false)); + } } } } @@ -811,8 +849,17 @@ void filesystem_::dump(std::ostream& os, int detail_level) const { while (auto sp = parser.next_section()) { auto const& s = *sp; - auto uncompressed_size = get_uncompressed_section_size(mm_, s); - float compression_ratio = float(s.length()) / uncompressed_size; + std::string block_size; + + if (auto uncompressed_size = try_get_uncompressed_section_size(mm_, s)) { + float compression_ratio = float(s.length()) / uncompressed_size.value(); + block_size = + fmt::format("blocksize={}, ratio={:.2f}%", + uncompressed_size.value(), 100.0 * compression_ratio); + } else { + block_size = fmt::format("blocksize={} (estimate)", s.length()); + } + std::string category; if (s.type() == section_type::BLOCK) { @@ -822,9 +869,8 @@ void filesystem_::dump(std::ostream& os, int detail_level) const { ++block_no; } - os << "SECTION " << s.description() << ", blocksize=" << uncompressed_size - << ", ratio=" << fmt::format("{:.2f}%", 100.0 * compression_ratio) - << category << "\n"; + os << "SECTION " << s.description() << ", " << block_size << category + << "\n"; } } @@ -874,18 +920,21 @@ filesystem_::info_as_dynamic(int detail_level) const { while (auto sp = parser.next_section()) { auto const& s = *sp; - auto uncompressed_size = get_uncompressed_section_size(mm_, s); - float compression_ratio = float(s.length()) / uncompressed_size; + bool checksum_ok = s.check_fast(*mm_); folly::dynamic section_info = folly::dynamic::object // clang-format off ("type", s.name()) - ("size", uncompressed_size) ("compressed_size", s.length()) - ("ratio", compression_ratio) + ("checksum_ok", checksum_ok) // clang-format on ; + if (auto uncompressed_size = try_get_uncompressed_section_size(mm_, s)) { + section_info["size"] = uncompressed_size.value(); + section_info["ratio"] = float(s.length()) / uncompressed_size.value(); + } + if (s.type() == section_type::BLOCK) { if (auto catstr = meta_.get_block_category(block_no)) { section_info["category"] = catstr.value(); diff --git a/test/tool_main_test.cpp b/test/tool_main_test.cpp index de8d53fa..494c09a2 100644 --- a/test/tool_main_test.cpp +++ b/test/tool_main_test.cpp @@ -34,6 +34,7 @@ #include #include +#include #include #include "dwarfs/filesystem_v2.h" @@ -1781,7 +1782,7 @@ TEST(dwarfsck_test, check_fail) { EXPECT_EQ(0, t.run({"image.dwarfs", "--check-integrity"})) << t.err(); } - std::map section_offsets; + std::vector> section_offsets; { auto t = dwarfsck_tester::create_with_image(image); @@ -1795,33 +1796,84 @@ TEST(dwarfsck_test, check_fail) { for (auto const& section : info["sections"]) { auto type = section["type"].asString(); auto size = section["compressed_size"].asInt(); - section_offsets[type] = offset; + section_offsets.emplace_back(type, offset); offset += section_header_size + size; } EXPECT_EQ(image.size(), offset); } + size_t index = 0; + for (auto const& [type, offset] : section_offsets) { + bool const is_metadata_section = + type == "METADATA_V2" || type == "METADATA_V2_SCHEMA"; + bool const is_block = type == "BLOCK"; auto corrupt_image = image; // flip a bit right after the header corrupt_image[offset + section_header_size] ^= 0x01; + // std::cout << "corrupting section: " << type << " @ " << offset << "\n"; + + { + test::test_logger lgr; + auto make_fs = [&] { + return filesystem_v2{lgr, + std::make_shared(corrupt_image)}; + }; + if (is_metadata_section) { + EXPECT_THAT([&] { make_fs(); }, + ::testing::ThrowsMessage( + ::testing::HasSubstr(fmt::format( + "checksum error in section: {}", type)))); + } else { + auto fs = make_fs(); + auto& log = lgr.get_log(); + if (is_block) { + EXPECT_EQ(0, log.size()); + } else { + ASSERT_EQ(1, log.size()); + EXPECT_THAT(log[0].output, + ::testing::HasSubstr( + fmt::format("checksum error in section: {}", type))); + } + auto info = fs.info_as_dynamic(3); + ASSERT_EQ(1, info.count("sections")); + ASSERT_EQ(section_offsets.size(), info["sections"].size()); + for (auto const& [i, section] : folly::enumerate(info["sections"])) { + EXPECT_EQ(section["checksum_ok"].asBool(), i != index) + << type << ", " << index; + } + auto dump = fs.dump(3); + EXPECT_THAT(dump, ::testing::HasSubstr("CHECKSUM ERROR")); + } + } + { auto t = dwarfsck_tester::create_with_image(corrupt_image); - // for blocks, we skip checks with --no-check - if (type == "BLOCK") { - EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err(); - EXPECT_GT(t.out().size(), 100) << t.out(); - } else { + if (is_metadata_section) { EXPECT_EQ(1, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err(); - EXPECT_EQ(0, t.out().size()) << t.out(); + } else { + EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err(); + } + + // for blocks, we skip checks with --no-check + if (!is_block) { EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format( "checksum error in section: {}", type))); } - // std::cout << "[" << type << ", nocheck]\n" << t.out() << std::endl; + auto json = t.out(); + + // std::cout << "[" << type << ", nocheck]\n" << json << "\n"; + + if (is_metadata_section) { + EXPECT_EQ(0, json.size()) << json; + } else { + EXPECT_GT(json.size(), 100) << json; + EXPECT_NO_THROW(folly::parseJson(json)) << json; + } } { @@ -1829,16 +1881,19 @@ TEST(dwarfsck_test, check_fail) { EXPECT_EQ(1, t.run({"image.dwarfs", "-j"})) << t.err(); - if (type == "BLOCK") { - EXPECT_GT(t.out().size(), 100) << t.out(); - } else { - EXPECT_EQ(0, t.out().size()) << t.out(); - } - EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format( "checksum error in section: {}", type))); - // std::cout << "[" << type << "]\n" << t.out() << std::endl; + auto json = t.out(); + + // std::cout << "[" << type << "]\n" << json << "\n"; + + if (is_metadata_section) { + EXPECT_EQ(0, json.size()) << json; + } else { + EXPECT_GT(json.size(), 100) << json; + EXPECT_NO_THROW(folly::parseJson(json)) << json; + } } { @@ -1847,18 +1902,42 @@ TEST(dwarfsck_test, check_fail) { EXPECT_EQ(1, t.run({"image.dwarfs", "--check-integrity", "-j"})) << t.err(); - if (type == "BLOCK") { - EXPECT_GT(t.out().size(), 100) << t.out(); + if (is_block) { EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format( "integrity check error in section: BLOCK"))); } else { - EXPECT_EQ(0, t.out().size()) << t.out(); EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format( "checksum error in section: {}", type))); } - // std::cout << "[" << type << ", integrity]\n" << t.out() << std::endl; + auto json = t.out(); + + // std::cout << "[" << type << ", integrity]\n" << json << "\n"; + + if (is_metadata_section) { + EXPECT_EQ(0, json.size()) << json; + } else { + EXPECT_GT(json.size(), 100) << json; + EXPECT_NO_THROW(folly::parseJson(json)) << json; + } } + + { + auto t = dwarfsck_tester::create_with_image(corrupt_image); + + EXPECT_EQ(1, t.run({"image.dwarfs", "-d3"})) << t.err(); + + EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format( + "checksum error in section: {}", type))); + + if (is_metadata_section) { + EXPECT_EQ(0, t.out().size()) << t.out(); + } else { + EXPECT_THAT(t.out(), ::testing::HasSubstr("CHECKSUM ERROR")); + } + } + + ++index; } }