From 8831009a52bb8a96200a01cc55d4bc1d86ff762a Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Wed, 7 Feb 2024 17:53:25 +0100 Subject: [PATCH] feat(dwarfsck): add --list, --checksum, --verbose (fixes gh #192) --- doc/dwarfsck.md | 16 +++++ src/dwarfsck_main.cpp | 145 ++++++++++++++++++++++++++++++++++++++-- test/tool_main_test.cpp | 86 ++++++++++++++++++++++++ 3 files changed, 243 insertions(+), 4 deletions(-) diff --git a/doc/dwarfsck.md b/doc/dwarfsck.md index d941a417..d7958687 100644 --- a/doc/dwarfsck.md +++ b/doc/dwarfsck.md @@ -25,6 +25,9 @@ with a non-zero exit code. - `-q`, `--quiet`: Don't produce any output unless there is an error. +- `-v`, `--verbose`: + Produce verbose output, where applicable. + - `-O`, `--image-offset=`*value*|`auto`: Specify the byte offset at which the filesystem is located in the image. Use `auto` to detect the offset automatically. This is also the default. @@ -36,6 +39,19 @@ with a non-zero exit code. header is present, the program will exit with exit code 2 and emit a warning. +- `-l`, `--list`: + List all entries in the file system image. Uses output similar to `tar -t`. + With `--verbose`, also print details about each entry. + +- `--checksum=`*name*: + Produce a checksum using the specified algorithm for each regular file in + the file system image. This can be used to easily verify the file system + image against local files, e.g.: + +``` +dwarfsck --checksum=sha512 /tmp/fs.dwarfs | sha512sum --check +``` + - `-n`, `--num-workers=`*value*: Number of worker threads used for integrity checking. diff --git a/src/dwarfsck_main.cpp b/src/dwarfsck_main.cpp index dd6e366e..3f7ea979 100644 --- a/src/dwarfsck_main.cpp +++ b/src/dwarfsck_main.cpp @@ -19,18 +19,25 @@ * along with dwarfs. If not, see . */ +#include #include #include +#include #include #include #include +#include +#include + #include +#include #include #include #include +#include "dwarfs/checksum.h" #include "dwarfs/error.h" #include "dwarfs/file_access.h" #include "dwarfs/filesystem_v2.h" @@ -41,24 +48,131 @@ #include "dwarfs/os_access.h" #include "dwarfs/tool.h" #include "dwarfs/util.h" +#include "dwarfs/worker_group.h" #include "dwarfs_tool_main.h" namespace dwarfs { namespace po = boost::program_options; +namespace { + +void do_list_files(filesystem_v2& fs, iolayer const& iol, bool verbose) { + auto max_width = [](auto const& vec) { + auto max = std::max_element(vec.begin(), vec.end()); + return std::to_string(*max).size(); + }; + + auto const uid_width = max_width(fs.get_all_uids()); + auto const gid_width = max_width(fs.get_all_gids()); + + file_stat::off_type max_inode_size{0}; + fs.walk([&](auto const& de) { + file_stat st; + fs.getattr(de.inode(), &st); + max_inode_size = std::max(max_inode_size, st.size); + }); + + auto const inode_size_width = fmt::format("{:L}", max_inode_size).size(); + + fs.walk([&](auto const& de) { + auto iv = de.inode(); + file_stat st; + fs.getattr(iv, &st); + auto name = de.unix_path(); + utf8_sanitize(name); + + if (verbose) { + if (iv.is_symlink()) { + auto target = fs.readlink(iv).value(); + utf8_sanitize(target); + name += " -> " + target; + } + + iol.out << fmt::format( + "{3} {4:{0}}/{5:{1}} {6:{2}L} {7:%Y-%m-%d %H:%M} {8}\n", uid_width, + gid_width, inode_size_width, iv.mode_string(), iv.getuid(), + iv.getgid(), st.size, fmt::localtime(st.mtime), name); + } else if (!name.empty()) { + iol.out << name << "\n"; + } + }); +} + +void do_checksum(logger& lgr, filesystem_v2& fs, iolayer const& iol, + std::string const& algo, size_t num_workers) { + LOG_PROXY(debug_logger_policy, lgr); + + worker_group wg{lgr, *iol.os, "checksum", num_workers}; + std::mutex mx; + + fs.walk_data_order([&](auto const& de) { + auto iv = de.inode(); + if (iv.is_regular_file()) { + wg.add_job([&, de, iv] { + file_stat st; + + if (fs.getattr(de.inode(), &st) != 0) { + LOG_ERROR << "failed to get attributes for inode " << iv.inode_num(); + return; + } + + auto ranges = fs.readv(iv.inode_num(), st.size); + + if (!ranges) { + LOG_ERROR << "failed to read inode " << iv.inode_num() << ": " + << std::strerror(-ranges.error()); + return; + } + + checksum cs(algo); + + for (auto& fut : ranges.value()) { + try { + auto range = fut.get(); + cs.update(range.data(), range.size()); + } catch (std::exception const& e) { + LOG_ERROR << "error reading data from inode " << iv.inode_num() + << ": " << e.what(); + return; + } + } + + auto output = fmt::format("{} {}\n", cs.hexdigest(), de.unix_path()); + + { + std::lock_guard lock(mx); + iol.out << output; + } + }); + } + }); + + wg.wait(); +} + +} // namespace + int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) { + using namespace folly::gen; + const size_t num_cpu = std::max(folly::hardware_concurrency(), 1u); - std::string input, export_metadata, image_offset; + auto algo_list = checksum::available_algorithms(); + auto checksum_desc = "print checksums for all files (" + + (from(algo_list) | unsplit(", ")) + ")"; + + std::string input, export_metadata, image_offset, checksum_algo; logger_options logopts; size_t num_workers; int detail; bool quiet{false}; + bool verbose{false}; bool output_json{false}; bool check_integrity{false}; bool no_check{false}; bool print_header{false}; + bool list_files{false}; // clang-format off po::options_description opts("Command line options"); @@ -72,12 +186,21 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) { ("quiet,q", po::value(&quiet)->zero_tokens(), "don't print anything unless an error occurs") + ("verbose,v", + po::value(&verbose)->zero_tokens(), + "produce verbose output") ("image-offset,O", po::value(&image_offset)->default_value("auto"), "filesystem image offset in bytes") ("print-header,H", po::value(&print_header)->zero_tokens(), "print filesystem header to stdout and exit") + ("list,l", + po::value(&list_files)->zero_tokens(), + "list all files and exit") + ("checksum", + po::value(&checksum_algo), + checksum_desc.c_str()) ("num-workers,n", po::value(&num_workers)->default_value(num_cpu), "number of reader worker threads") @@ -138,10 +261,16 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) { return 1; } + if (vm.count("checksum") && !checksum::is_available(checksum_algo)) { + LOG_WARN << "checksum algorithm not available: " << checksum_algo; + return 1; + } + if (print_header && - (output_json || !export_metadata.empty() || check_integrity)) { + (output_json || !export_metadata.empty() || check_integrity || + list_files || !checksum_algo.empty())) { LOG_WARN << "--print-header is mutually exclusive with --json, " - "--export-metadata and --check-integrity"; + "--export-metadata, --check-integrity, --list and --checksum"; return 1; } @@ -191,7 +320,7 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) { : filesystem_check_level::CHECKSUM; auto errors = no_check ? 0 : fs.check(level, num_workers); - if (!quiet) { + if (!quiet && !list_files && checksum_algo.empty()) { if (output_json) { iol.out << folly::toPrettyJson(fs.info_as_dynamic(detail)) << "\n"; } else { @@ -199,6 +328,14 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) { } } + if (list_files) { + do_list_files(fs, iol, verbose); + } + + if (!checksum_algo.empty()) { + do_checksum(lgr, fs, iol, checksum_algo, num_workers); + } + if (errors > 0) { return 1; } diff --git a/test/tool_main_test.cpp b/test/tool_main_test.cpp index a2948d96..6fc3c496 100644 --- a/test/tool_main_test.cpp +++ b/test/tool_main_test.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -2054,6 +2055,91 @@ TEST(dwarfsck_test, export_metadata_close_error) { ::testing::HasSubstr("failed to close metadata output file")); } +TEST(dwarfsck_test, checksum_algorithm_not_available) { + auto t = dwarfsck_tester::create_with_image(); + EXPECT_NE(0, t.run({"image.dwarfs", "--checksum=grmpf"})) << t.err(); + EXPECT_THAT(t.err(), + ::testing::HasSubstr("checksum algorithm not available: grmpf")); +} + +TEST(dwarfsck_test, list_files) { + auto t = dwarfsck_tester::create_with_image(); + EXPECT_EQ(0, t.run({"image.dwarfs", "--list"})) << t.err(); + auto out = t.out(); + + std::set files; + folly::splitTo('\n', out, std::inserter(files, files.end()), + true); + + std::set const expected{ + "test.pl", "somelink", "somedir", "foo.pl", + "bar.pl", "baz.pl", "ipsum.txt", "somedir/ipsum.py", + "somedir/bad", "somedir/empty", "empty", + }; + + EXPECT_EQ(expected, files); +} + +TEST(dwarfsck_test, list_files_verbose) { + auto t = dwarfsck_tester::create_with_image(); + EXPECT_EQ(0, t.run({"image.dwarfs", "--list", "--verbose"})) << t.err(); + auto out = t.out(); + + auto num_lines = std::count(out.begin(), out.end(), '\n'); + EXPECT_EQ(12, num_lines); + + std::vector expected_re{ + fmt::format("drwxrwxrwx\\s+1000/100\\s+8\\s+{:%Y-%m-%d %H:%M}\\s*\n", + fmt::localtime(2)), + fmt::format( + "-rw-------\\s+1337/ 0\\s+{:L}\\s+{:%Y-%m-%d %H:%M}\\s+baz.pl\n", + 23456, fmt::localtime(8002)), + fmt::format("lrwxrwxrwx\\s+1000/100\\s+16\\s+{:%Y-%m-%d " + "%H:%M}\\s+somelink -> somedir/ipsum.py\n", + fmt::localtime(2002)), + }; + + for (auto const& str : expected_re) { + std::regex re{str}; + EXPECT_TRUE(std::regex_search(out, re)) << "[" << str << "]\n" << out; + } +} + +TEST(dwarfsck_test, checksum_files) { + auto t = dwarfsck_tester::create_with_image(); + EXPECT_EQ(0, t.run({"image.dwarfs", "--checksum=md5"})) << t.err(); + auto out = t.out(); + + auto num_lines = std::count(out.begin(), out.end(), '\n'); + EXPECT_EQ(8, num_lines); + + std::map actual; + std::vector lines; + folly::split('\n', out, lines); + + for (auto const& line : lines) { + if (line.empty()) { + continue; + } + std::string file, hash; + folly::split(" ", line, hash, file); + EXPECT_TRUE(actual.emplace(file, hash).second); + } + + std::map const expected{ + {"empty", "d41d8cd98f00b204e9800998ecf8427e"}, + {"somedir/empty", "d41d8cd98f00b204e9800998ecf8427e"}, + {"test.pl", "d41d8cd98f00b204e9800998ecf8427e"}, + {"baz.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"}, + {"somedir/ipsum.py", "70fe813c36ed50ebd7f4991857683676"}, + {"foo.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"}, + {"bar.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"}, + {"ipsum.txt", "0782b6a546cedd8be8fc86ac47dc6d96"}, + }; + + EXPECT_EQ(expected, actual); +} + class mkdwarfs_sim_order_test : public testing::TestWithParam {}; TEST(mkdwarfs_test, max_similarity_size) {