mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-15 07:16:13 -04:00
feat(filesystem): load image even if non-metadata sections are corrupt
This commit is contained in:
parent
91368d0550
commit
fd005e0e06
@ -272,25 +272,51 @@ using section_map = std::unordered_map<section_type, std::vector<fs_section>>;
|
|||||||
|
|
||||||
size_t
|
size_t
|
||||||
get_uncompressed_section_size(std::shared_ptr<mmif> mm, fs_section const& sec) {
|
get_uncompressed_section_size(std::shared_ptr<mmif> mm, fs_section const& sec) {
|
||||||
|
if (sec.compression() == compression_type::NONE) {
|
||||||
|
return sec.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sec.check_fast(*mm)) {
|
||||||
|
DWARFS_THROW(
|
||||||
|
runtime_error,
|
||||||
|
fmt::format("attempt to access damaged {} section", sec.name()));
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> tmp;
|
std::vector<uint8_t> tmp;
|
||||||
block_decompressor bd(sec.compression(), mm->as<uint8_t>(sec.start()),
|
auto span = sec.data(*mm);
|
||||||
sec.length(), tmp);
|
block_decompressor bd(sec.compression(), span.data(), span.size(), tmp);
|
||||||
return bd.uncompressed_size();
|
return bd.uncompressed_size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<size_t>
|
||||||
|
try_get_uncompressed_section_size(std::shared_ptr<mmif> mm,
|
||||||
|
fs_section const& sec) {
|
||||||
|
if (sec.check_fast(*mm)) {
|
||||||
|
try {
|
||||||
|
return get_uncompressed_section_size(mm, sec);
|
||||||
|
} catch (std::exception const&) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
std::span<uint8_t const>
|
std::span<uint8_t const>
|
||||||
get_section_data(std::shared_ptr<mmif> mm, fs_section const& section,
|
get_section_data(std::shared_ptr<mmif> mm, fs_section const& section,
|
||||||
std::vector<uint8_t>& buffer, bool force_buffer) {
|
std::vector<uint8_t>& buffer, bool force_buffer) {
|
||||||
|
DWARFS_CHECK(
|
||||||
|
section.check_fast(*mm),
|
||||||
|
fmt::format("attempt to access damaged {} section", section.name()));
|
||||||
|
|
||||||
|
auto span = section.data(*mm);
|
||||||
auto compression = section.compression();
|
auto compression = section.compression();
|
||||||
auto start = section.start();
|
|
||||||
auto length = section.length();
|
|
||||||
|
|
||||||
if (!force_buffer && compression == compression_type::NONE) {
|
if (!force_buffer && compression == compression_type::NONE) {
|
||||||
return mm->span(start, length);
|
return span;
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer = block_decompressor::decompress(compression, mm->as<uint8_t>(start),
|
buffer =
|
||||||
length);
|
block_decompressor::decompress(compression, span.data(), span.size());
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
@ -538,7 +564,17 @@ filesystem_<LoggerPolicy>::filesystem_(
|
|||||||
check_section(*s);
|
check_section(*s);
|
||||||
|
|
||||||
if (!s->check_fast(*mm_)) {
|
if (!s->check_fast(*mm_)) {
|
||||||
DWARFS_THROW(runtime_error, "checksum error in section: " + s->name());
|
switch (s->type()) {
|
||||||
|
case section_type::METADATA_V2:
|
||||||
|
case section_type::METADATA_V2_SCHEMA:
|
||||||
|
DWARFS_THROW(runtime_error,
|
||||||
|
"checksum error in section: " + s->name());
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
LOG_WARN << "checksum error in section: " << s->name();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sections[s->type()].push_back(*s);
|
sections[s->type()].push_back(*s);
|
||||||
@ -560,8 +596,10 @@ filesystem_<LoggerPolicy>::filesystem_(
|
|||||||
|
|
||||||
if (auto it = sections.find(section_type::HISTORY); it != sections.end()) {
|
if (auto it = sections.find(section_type::HISTORY); it != sections.end()) {
|
||||||
for (auto& section : it->second) {
|
for (auto& section : it->second) {
|
||||||
std::vector<uint8_t> buffer;
|
if (section.check_fast(*mm_)) {
|
||||||
history_.parse_append(get_section_data(mm_, section, buffer, false));
|
std::vector<uint8_t> buffer;
|
||||||
|
history_.parse_append(get_section_data(mm_, section, buffer, false));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -811,8 +849,17 @@ void filesystem_<LoggerPolicy>::dump(std::ostream& os, int detail_level) const {
|
|||||||
while (auto sp = parser.next_section()) {
|
while (auto sp = parser.next_section()) {
|
||||||
auto const& s = *sp;
|
auto const& s = *sp;
|
||||||
|
|
||||||
auto uncompressed_size = get_uncompressed_section_size(mm_, s);
|
std::string block_size;
|
||||||
float compression_ratio = float(s.length()) / uncompressed_size;
|
|
||||||
|
if (auto uncompressed_size = try_get_uncompressed_section_size(mm_, s)) {
|
||||||
|
float compression_ratio = float(s.length()) / uncompressed_size.value();
|
||||||
|
block_size =
|
||||||
|
fmt::format("blocksize={}, ratio={:.2f}%",
|
||||||
|
uncompressed_size.value(), 100.0 * compression_ratio);
|
||||||
|
} else {
|
||||||
|
block_size = fmt::format("blocksize={} (estimate)", s.length());
|
||||||
|
}
|
||||||
|
|
||||||
std::string category;
|
std::string category;
|
||||||
|
|
||||||
if (s.type() == section_type::BLOCK) {
|
if (s.type() == section_type::BLOCK) {
|
||||||
@ -822,9 +869,8 @@ void filesystem_<LoggerPolicy>::dump(std::ostream& os, int detail_level) const {
|
|||||||
++block_no;
|
++block_no;
|
||||||
}
|
}
|
||||||
|
|
||||||
os << "SECTION " << s.description() << ", blocksize=" << uncompressed_size
|
os << "SECTION " << s.description() << ", " << block_size << category
|
||||||
<< ", ratio=" << fmt::format("{:.2f}%", 100.0 * compression_ratio)
|
<< "\n";
|
||||||
<< category << "\n";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -874,18 +920,21 @@ filesystem_<LoggerPolicy>::info_as_dynamic(int detail_level) const {
|
|||||||
while (auto sp = parser.next_section()) {
|
while (auto sp = parser.next_section()) {
|
||||||
auto const& s = *sp;
|
auto const& s = *sp;
|
||||||
|
|
||||||
auto uncompressed_size = get_uncompressed_section_size(mm_, s);
|
bool checksum_ok = s.check_fast(*mm_);
|
||||||
float compression_ratio = float(s.length()) / uncompressed_size;
|
|
||||||
|
|
||||||
folly::dynamic section_info = folly::dynamic::object
|
folly::dynamic section_info = folly::dynamic::object
|
||||||
// clang-format off
|
// clang-format off
|
||||||
("type", s.name())
|
("type", s.name())
|
||||||
("size", uncompressed_size)
|
|
||||||
("compressed_size", s.length())
|
("compressed_size", s.length())
|
||||||
("ratio", compression_ratio)
|
("checksum_ok", checksum_ok)
|
||||||
// clang-format on
|
// clang-format on
|
||||||
;
|
;
|
||||||
|
|
||||||
|
if (auto uncompressed_size = try_get_uncompressed_section_size(mm_, s)) {
|
||||||
|
section_info["size"] = uncompressed_size.value();
|
||||||
|
section_info["ratio"] = float(s.length()) / uncompressed_size.value();
|
||||||
|
}
|
||||||
|
|
||||||
if (s.type() == section_type::BLOCK) {
|
if (s.type() == section_type::BLOCK) {
|
||||||
if (auto catstr = meta_.get_block_category(block_no)) {
|
if (auto catstr = meta_.get_block_category(block_no)) {
|
||||||
section_info["category"] = catstr.value();
|
section_info["category"] = catstr.value();
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include <folly/String.h>
|
#include <folly/String.h>
|
||||||
|
#include <folly/container/Enumerate.h>
|
||||||
#include <folly/json.h>
|
#include <folly/json.h>
|
||||||
|
|
||||||
#include "dwarfs/filesystem_v2.h"
|
#include "dwarfs/filesystem_v2.h"
|
||||||
@ -1781,7 +1782,7 @@ TEST(dwarfsck_test, check_fail) {
|
|||||||
EXPECT_EQ(0, t.run({"image.dwarfs", "--check-integrity"})) << t.err();
|
EXPECT_EQ(0, t.run({"image.dwarfs", "--check-integrity"})) << t.err();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string, size_t> section_offsets;
|
std::vector<std::pair<std::string, size_t>> section_offsets;
|
||||||
|
|
||||||
{
|
{
|
||||||
auto t = dwarfsck_tester::create_with_image(image);
|
auto t = dwarfsck_tester::create_with_image(image);
|
||||||
@ -1795,33 +1796,84 @@ TEST(dwarfsck_test, check_fail) {
|
|||||||
for (auto const& section : info["sections"]) {
|
for (auto const& section : info["sections"]) {
|
||||||
auto type = section["type"].asString();
|
auto type = section["type"].asString();
|
||||||
auto size = section["compressed_size"].asInt();
|
auto size = section["compressed_size"].asInt();
|
||||||
section_offsets[type] = offset;
|
section_offsets.emplace_back(type, offset);
|
||||||
offset += section_header_size + size;
|
offset += section_header_size + size;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_EQ(image.size(), offset);
|
EXPECT_EQ(image.size(), offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t index = 0;
|
||||||
|
|
||||||
for (auto const& [type, offset] : section_offsets) {
|
for (auto const& [type, offset] : section_offsets) {
|
||||||
|
bool const is_metadata_section =
|
||||||
|
type == "METADATA_V2" || type == "METADATA_V2_SCHEMA";
|
||||||
|
bool const is_block = type == "BLOCK";
|
||||||
auto corrupt_image = image;
|
auto corrupt_image = image;
|
||||||
// flip a bit right after the header
|
// flip a bit right after the header
|
||||||
corrupt_image[offset + section_header_size] ^= 0x01;
|
corrupt_image[offset + section_header_size] ^= 0x01;
|
||||||
|
|
||||||
|
// std::cout << "corrupting section: " << type << " @ " << offset << "\n";
|
||||||
|
|
||||||
|
{
|
||||||
|
test::test_logger lgr;
|
||||||
|
auto make_fs = [&] {
|
||||||
|
return filesystem_v2{lgr,
|
||||||
|
std::make_shared<test::mmap_mock>(corrupt_image)};
|
||||||
|
};
|
||||||
|
if (is_metadata_section) {
|
||||||
|
EXPECT_THAT([&] { make_fs(); },
|
||||||
|
::testing::ThrowsMessage<dwarfs::runtime_error>(
|
||||||
|
::testing::HasSubstr(fmt::format(
|
||||||
|
"checksum error in section: {}", type))));
|
||||||
|
} else {
|
||||||
|
auto fs = make_fs();
|
||||||
|
auto& log = lgr.get_log();
|
||||||
|
if (is_block) {
|
||||||
|
EXPECT_EQ(0, log.size());
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(1, log.size());
|
||||||
|
EXPECT_THAT(log[0].output,
|
||||||
|
::testing::HasSubstr(
|
||||||
|
fmt::format("checksum error in section: {}", type)));
|
||||||
|
}
|
||||||
|
auto info = fs.info_as_dynamic(3);
|
||||||
|
ASSERT_EQ(1, info.count("sections"));
|
||||||
|
ASSERT_EQ(section_offsets.size(), info["sections"].size());
|
||||||
|
for (auto const& [i, section] : folly::enumerate(info["sections"])) {
|
||||||
|
EXPECT_EQ(section["checksum_ok"].asBool(), i != index)
|
||||||
|
<< type << ", " << index;
|
||||||
|
}
|
||||||
|
auto dump = fs.dump(3);
|
||||||
|
EXPECT_THAT(dump, ::testing::HasSubstr("CHECKSUM ERROR"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
auto t = dwarfsck_tester::create_with_image(corrupt_image);
|
auto t = dwarfsck_tester::create_with_image(corrupt_image);
|
||||||
|
|
||||||
// for blocks, we skip checks with --no-check
|
if (is_metadata_section) {
|
||||||
if (type == "BLOCK") {
|
|
||||||
EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
|
|
||||||
EXPECT_GT(t.out().size(), 100) << t.out();
|
|
||||||
} else {
|
|
||||||
EXPECT_EQ(1, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
|
EXPECT_EQ(1, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
|
||||||
EXPECT_EQ(0, t.out().size()) << t.out();
|
} else {
|
||||||
|
EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
|
||||||
|
}
|
||||||
|
|
||||||
|
// for blocks, we skip checks with --no-check
|
||||||
|
if (!is_block) {
|
||||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||||
"checksum error in section: {}", type)));
|
"checksum error in section: {}", type)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// std::cout << "[" << type << ", nocheck]\n" << t.out() << std::endl;
|
auto json = t.out();
|
||||||
|
|
||||||
|
// std::cout << "[" << type << ", nocheck]\n" << json << "\n";
|
||||||
|
|
||||||
|
if (is_metadata_section) {
|
||||||
|
EXPECT_EQ(0, json.size()) << json;
|
||||||
|
} else {
|
||||||
|
EXPECT_GT(json.size(), 100) << json;
|
||||||
|
EXPECT_NO_THROW(folly::parseJson(json)) << json;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -1829,16 +1881,19 @@ TEST(dwarfsck_test, check_fail) {
|
|||||||
|
|
||||||
EXPECT_EQ(1, t.run({"image.dwarfs", "-j"})) << t.err();
|
EXPECT_EQ(1, t.run({"image.dwarfs", "-j"})) << t.err();
|
||||||
|
|
||||||
if (type == "BLOCK") {
|
|
||||||
EXPECT_GT(t.out().size(), 100) << t.out();
|
|
||||||
} else {
|
|
||||||
EXPECT_EQ(0, t.out().size()) << t.out();
|
|
||||||
}
|
|
||||||
|
|
||||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||||
"checksum error in section: {}", type)));
|
"checksum error in section: {}", type)));
|
||||||
|
|
||||||
// std::cout << "[" << type << "]\n" << t.out() << std::endl;
|
auto json = t.out();
|
||||||
|
|
||||||
|
// std::cout << "[" << type << "]\n" << json << "\n";
|
||||||
|
|
||||||
|
if (is_metadata_section) {
|
||||||
|
EXPECT_EQ(0, json.size()) << json;
|
||||||
|
} else {
|
||||||
|
EXPECT_GT(json.size(), 100) << json;
|
||||||
|
EXPECT_NO_THROW(folly::parseJson(json)) << json;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -1847,18 +1902,42 @@ TEST(dwarfsck_test, check_fail) {
|
|||||||
EXPECT_EQ(1, t.run({"image.dwarfs", "--check-integrity", "-j"}))
|
EXPECT_EQ(1, t.run({"image.dwarfs", "--check-integrity", "-j"}))
|
||||||
<< t.err();
|
<< t.err();
|
||||||
|
|
||||||
if (type == "BLOCK") {
|
if (is_block) {
|
||||||
EXPECT_GT(t.out().size(), 100) << t.out();
|
|
||||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||||
"integrity check error in section: BLOCK")));
|
"integrity check error in section: BLOCK")));
|
||||||
} else {
|
} else {
|
||||||
EXPECT_EQ(0, t.out().size()) << t.out();
|
|
||||||
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||||
"checksum error in section: {}", type)));
|
"checksum error in section: {}", type)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// std::cout << "[" << type << ", integrity]\n" << t.out() << std::endl;
|
auto json = t.out();
|
||||||
|
|
||||||
|
// std::cout << "[" << type << ", integrity]\n" << json << "\n";
|
||||||
|
|
||||||
|
if (is_metadata_section) {
|
||||||
|
EXPECT_EQ(0, json.size()) << json;
|
||||||
|
} else {
|
||||||
|
EXPECT_GT(json.size(), 100) << json;
|
||||||
|
EXPECT_NO_THROW(folly::parseJson(json)) << json;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto t = dwarfsck_tester::create_with_image(corrupt_image);
|
||||||
|
|
||||||
|
EXPECT_EQ(1, t.run({"image.dwarfs", "-d3"})) << t.err();
|
||||||
|
|
||||||
|
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
|
||||||
|
"checksum error in section: {}", type)));
|
||||||
|
|
||||||
|
if (is_metadata_section) {
|
||||||
|
EXPECT_EQ(0, t.out().size()) << t.out();
|
||||||
|
} else {
|
||||||
|
EXPECT_THAT(t.out(), ::testing::HasSubstr("CHECKSUM ERROR"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
++index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user