feat(filesystem): load image even if non-metadata sections are corrupt

This commit is contained in:
Marcus Holland-Moritz 2024-01-16 14:21:05 +01:00
parent 91368d0550
commit fd005e0e06
2 changed files with 167 additions and 39 deletions

View File

@ -272,25 +272,51 @@ using section_map = std::unordered_map<section_type, std::vector<fs_section>>;
size_t
get_uncompressed_section_size(std::shared_ptr<mmif> mm, fs_section const& sec) {
if (sec.compression() == compression_type::NONE) {
return sec.length();
}
if (!sec.check_fast(*mm)) {
DWARFS_THROW(
runtime_error,
fmt::format("attempt to access damaged {} section", sec.name()));
}
std::vector<uint8_t> tmp;
block_decompressor bd(sec.compression(), mm->as<uint8_t>(sec.start()),
sec.length(), tmp);
auto span = sec.data(*mm);
block_decompressor bd(sec.compression(), span.data(), span.size(), tmp);
return bd.uncompressed_size();
}
std::optional<size_t>
try_get_uncompressed_section_size(std::shared_ptr<mmif> mm,
fs_section const& sec) {
if (sec.check_fast(*mm)) {
try {
return get_uncompressed_section_size(mm, sec);
} catch (std::exception const&) {
}
}
return std::nullopt;
}
std::span<uint8_t const>
get_section_data(std::shared_ptr<mmif> mm, fs_section const& section,
std::vector<uint8_t>& buffer, bool force_buffer) {
DWARFS_CHECK(
section.check_fast(*mm),
fmt::format("attempt to access damaged {} section", section.name()));
auto span = section.data(*mm);
auto compression = section.compression();
auto start = section.start();
auto length = section.length();
if (!force_buffer && compression == compression_type::NONE) {
return mm->span(start, length);
return span;
}
buffer = block_decompressor::decompress(compression, mm->as<uint8_t>(start),
length);
buffer =
block_decompressor::decompress(compression, span.data(), span.size());
return buffer;
}
@ -538,7 +564,17 @@ filesystem_<LoggerPolicy>::filesystem_(
check_section(*s);
if (!s->check_fast(*mm_)) {
DWARFS_THROW(runtime_error, "checksum error in section: " + s->name());
switch (s->type()) {
case section_type::METADATA_V2:
case section_type::METADATA_V2_SCHEMA:
DWARFS_THROW(runtime_error,
"checksum error in section: " + s->name());
break;
default:
LOG_WARN << "checksum error in section: " << s->name();
break;
}
}
sections[s->type()].push_back(*s);
@ -560,8 +596,10 @@ filesystem_<LoggerPolicy>::filesystem_(
if (auto it = sections.find(section_type::HISTORY); it != sections.end()) {
for (auto& section : it->second) {
std::vector<uint8_t> buffer;
history_.parse_append(get_section_data(mm_, section, buffer, false));
if (section.check_fast(*mm_)) {
std::vector<uint8_t> buffer;
history_.parse_append(get_section_data(mm_, section, buffer, false));
}
}
}
}
@ -811,8 +849,17 @@ void filesystem_<LoggerPolicy>::dump(std::ostream& os, int detail_level) const {
while (auto sp = parser.next_section()) {
auto const& s = *sp;
auto uncompressed_size = get_uncompressed_section_size(mm_, s);
float compression_ratio = float(s.length()) / uncompressed_size;
std::string block_size;
if (auto uncompressed_size = try_get_uncompressed_section_size(mm_, s)) {
float compression_ratio = float(s.length()) / uncompressed_size.value();
block_size =
fmt::format("blocksize={}, ratio={:.2f}%",
uncompressed_size.value(), 100.0 * compression_ratio);
} else {
block_size = fmt::format("blocksize={} (estimate)", s.length());
}
std::string category;
if (s.type() == section_type::BLOCK) {
@ -822,9 +869,8 @@ void filesystem_<LoggerPolicy>::dump(std::ostream& os, int detail_level) const {
++block_no;
}
os << "SECTION " << s.description() << ", blocksize=" << uncompressed_size
<< ", ratio=" << fmt::format("{:.2f}%", 100.0 * compression_ratio)
<< category << "\n";
os << "SECTION " << s.description() << ", " << block_size << category
<< "\n";
}
}
@ -874,18 +920,21 @@ filesystem_<LoggerPolicy>::info_as_dynamic(int detail_level) const {
while (auto sp = parser.next_section()) {
auto const& s = *sp;
auto uncompressed_size = get_uncompressed_section_size(mm_, s);
float compression_ratio = float(s.length()) / uncompressed_size;
bool checksum_ok = s.check_fast(*mm_);
folly::dynamic section_info = folly::dynamic::object
// clang-format off
("type", s.name())
("size", uncompressed_size)
("compressed_size", s.length())
("ratio", compression_ratio)
("checksum_ok", checksum_ok)
// clang-format on
;
if (auto uncompressed_size = try_get_uncompressed_section_size(mm_, s)) {
section_info["size"] = uncompressed_size.value();
section_info["ratio"] = float(s.length()) / uncompressed_size.value();
}
if (s.type() == section_type::BLOCK) {
if (auto catstr = meta_.get_block_category(block_no)) {
section_info["category"] = catstr.value();

View File

@ -34,6 +34,7 @@
#include <fmt/format.h>
#include <folly/String.h>
#include <folly/container/Enumerate.h>
#include <folly/json.h>
#include "dwarfs/filesystem_v2.h"
@ -1781,7 +1782,7 @@ TEST(dwarfsck_test, check_fail) {
EXPECT_EQ(0, t.run({"image.dwarfs", "--check-integrity"})) << t.err();
}
std::map<std::string, size_t> section_offsets;
std::vector<std::pair<std::string, size_t>> section_offsets;
{
auto t = dwarfsck_tester::create_with_image(image);
@ -1795,33 +1796,84 @@ TEST(dwarfsck_test, check_fail) {
for (auto const& section : info["sections"]) {
auto type = section["type"].asString();
auto size = section["compressed_size"].asInt();
section_offsets[type] = offset;
section_offsets.emplace_back(type, offset);
offset += section_header_size + size;
}
EXPECT_EQ(image.size(), offset);
}
size_t index = 0;
for (auto const& [type, offset] : section_offsets) {
bool const is_metadata_section =
type == "METADATA_V2" || type == "METADATA_V2_SCHEMA";
bool const is_block = type == "BLOCK";
auto corrupt_image = image;
// flip a bit right after the header
corrupt_image[offset + section_header_size] ^= 0x01;
// std::cout << "corrupting section: " << type << " @ " << offset << "\n";
{
test::test_logger lgr;
auto make_fs = [&] {
return filesystem_v2{lgr,
std::make_shared<test::mmap_mock>(corrupt_image)};
};
if (is_metadata_section) {
EXPECT_THAT([&] { make_fs(); },
::testing::ThrowsMessage<dwarfs::runtime_error>(
::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type))));
} else {
auto fs = make_fs();
auto& log = lgr.get_log();
if (is_block) {
EXPECT_EQ(0, log.size());
} else {
ASSERT_EQ(1, log.size());
EXPECT_THAT(log[0].output,
::testing::HasSubstr(
fmt::format("checksum error in section: {}", type)));
}
auto info = fs.info_as_dynamic(3);
ASSERT_EQ(1, info.count("sections"));
ASSERT_EQ(section_offsets.size(), info["sections"].size());
for (auto const& [i, section] : folly::enumerate(info["sections"])) {
EXPECT_EQ(section["checksum_ok"].asBool(), i != index)
<< type << ", " << index;
}
auto dump = fs.dump(3);
EXPECT_THAT(dump, ::testing::HasSubstr("CHECKSUM ERROR"));
}
}
{
auto t = dwarfsck_tester::create_with_image(corrupt_image);
// for blocks, we skip checks with --no-check
if (type == "BLOCK") {
EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
EXPECT_GT(t.out().size(), 100) << t.out();
} else {
if (is_metadata_section) {
EXPECT_EQ(1, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
EXPECT_EQ(0, t.out().size()) << t.out();
} else {
EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
}
// for blocks, we skip checks with --no-check
if (!is_block) {
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type)));
}
// std::cout << "[" << type << ", nocheck]\n" << t.out() << std::endl;
auto json = t.out();
// std::cout << "[" << type << ", nocheck]\n" << json << "\n";
if (is_metadata_section) {
EXPECT_EQ(0, json.size()) << json;
} else {
EXPECT_GT(json.size(), 100) << json;
EXPECT_NO_THROW(folly::parseJson(json)) << json;
}
}
{
@ -1829,16 +1881,19 @@ TEST(dwarfsck_test, check_fail) {
EXPECT_EQ(1, t.run({"image.dwarfs", "-j"})) << t.err();
if (type == "BLOCK") {
EXPECT_GT(t.out().size(), 100) << t.out();
} else {
EXPECT_EQ(0, t.out().size()) << t.out();
}
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type)));
// std::cout << "[" << type << "]\n" << t.out() << std::endl;
auto json = t.out();
// std::cout << "[" << type << "]\n" << json << "\n";
if (is_metadata_section) {
EXPECT_EQ(0, json.size()) << json;
} else {
EXPECT_GT(json.size(), 100) << json;
EXPECT_NO_THROW(folly::parseJson(json)) << json;
}
}
{
@ -1847,18 +1902,42 @@ TEST(dwarfsck_test, check_fail) {
EXPECT_EQ(1, t.run({"image.dwarfs", "--check-integrity", "-j"}))
<< t.err();
if (type == "BLOCK") {
EXPECT_GT(t.out().size(), 100) << t.out();
if (is_block) {
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"integrity check error in section: BLOCK")));
} else {
EXPECT_EQ(0, t.out().size()) << t.out();
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type)));
}
// std::cout << "[" << type << ", integrity]\n" << t.out() << std::endl;
auto json = t.out();
// std::cout << "[" << type << ", integrity]\n" << json << "\n";
if (is_metadata_section) {
EXPECT_EQ(0, json.size()) << json;
} else {
EXPECT_GT(json.size(), 100) << json;
EXPECT_NO_THROW(folly::parseJson(json)) << json;
}
}
{
auto t = dwarfsck_tester::create_with_image(corrupt_image);
EXPECT_EQ(1, t.run({"image.dwarfs", "-d3"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type)));
if (is_metadata_section) {
EXPECT_EQ(0, t.out().size()) << t.out();
} else {
EXPECT_THAT(t.out(), ::testing::HasSubstr("CHECKSUM ERROR"));
}
}
++index;
}
}