diff --git a/doc/dwarfsck.md b/doc/dwarfsck.md
index d941a417..d7958687 100644
--- a/doc/dwarfsck.md
+++ b/doc/dwarfsck.md
@@ -25,6 +25,9 @@ with a non-zero exit code.
- `-q`, `--quiet`:
Don't produce any output unless there is an error.
+- `-v`, `--verbose`:
+ Produce verbose output, where applicable.
+
- `-O`, `--image-offset=`*value*|`auto`:
Specify the byte offset at which the filesystem is located in the image.
Use `auto` to detect the offset automatically. This is also the default.
@@ -36,6 +39,19 @@ with a non-zero exit code.
header is present, the program will exit with exit code 2 and emit a
warning.
+- `-l`, `--list`:
+ List all entries in the file system image. Uses output similar to `tar -t`.
+ With `--verbose`, also print details about each entry.
+
+- `--checksum=`*name*:
+ Produce a checksum using the specified algorithm for each regular file in
+ the file system image. This can be used to easily verify the file system
+ image against local files, e.g.:
+
+```
+dwarfsck --checksum=sha512 /tmp/fs.dwarfs | sha512sum --check
+```
+
- `-n`, `--num-workers=`*value*:
Number of worker threads used for integrity checking.
diff --git a/src/dwarfsck_main.cpp b/src/dwarfsck_main.cpp
index dd6e366e..3f7ea979 100644
--- a/src/dwarfsck_main.cpp
+++ b/src/dwarfsck_main.cpp
@@ -19,18 +19,25 @@
* along with dwarfs. If not, see .
*/
+#include
#include
#include
+#include
#include
#include
#include
+#include
+#include
+
#include
+#include
#include
#include
#include
+#include "dwarfs/checksum.h"
#include "dwarfs/error.h"
#include "dwarfs/file_access.h"
#include "dwarfs/filesystem_v2.h"
@@ -41,24 +48,131 @@
#include "dwarfs/os_access.h"
#include "dwarfs/tool.h"
#include "dwarfs/util.h"
+#include "dwarfs/worker_group.h"
#include "dwarfs_tool_main.h"
namespace dwarfs {
namespace po = boost::program_options;
+namespace {
+
+void do_list_files(filesystem_v2& fs, iolayer const& iol, bool verbose) {
+ auto max_width = [](auto const& vec) {
+ auto max = std::max_element(vec.begin(), vec.end());
+ return std::to_string(*max).size();
+ };
+
+ auto const uid_width = max_width(fs.get_all_uids());
+ auto const gid_width = max_width(fs.get_all_gids());
+
+ file_stat::off_type max_inode_size{0};
+ fs.walk([&](auto const& de) {
+ file_stat st;
+ fs.getattr(de.inode(), &st);
+ max_inode_size = std::max(max_inode_size, st.size);
+ });
+
+ auto const inode_size_width = fmt::format("{:L}", max_inode_size).size();
+
+ fs.walk([&](auto const& de) {
+ auto iv = de.inode();
+ file_stat st;
+ fs.getattr(iv, &st);
+ auto name = de.unix_path();
+ utf8_sanitize(name);
+
+ if (verbose) {
+ if (iv.is_symlink()) {
+ auto target = fs.readlink(iv).value();
+ utf8_sanitize(target);
+ name += " -> " + target;
+ }
+
+ iol.out << fmt::format(
+ "{3} {4:{0}}/{5:{1}} {6:{2}L} {7:%Y-%m-%d %H:%M} {8}\n", uid_width,
+ gid_width, inode_size_width, iv.mode_string(), iv.getuid(),
+ iv.getgid(), st.size, fmt::localtime(st.mtime), name);
+ } else if (!name.empty()) {
+ iol.out << name << "\n";
+ }
+ });
+}
+
+void do_checksum(logger& lgr, filesystem_v2& fs, iolayer const& iol,
+ std::string const& algo, size_t num_workers) {
+ LOG_PROXY(debug_logger_policy, lgr);
+
+ worker_group wg{lgr, *iol.os, "checksum", num_workers};
+ std::mutex mx;
+
+ fs.walk_data_order([&](auto const& de) {
+ auto iv = de.inode();
+ if (iv.is_regular_file()) {
+ wg.add_job([&, de, iv] {
+ file_stat st;
+
+ if (fs.getattr(de.inode(), &st) != 0) {
+ LOG_ERROR << "failed to get attributes for inode " << iv.inode_num();
+ return;
+ }
+
+ auto ranges = fs.readv(iv.inode_num(), st.size);
+
+ if (!ranges) {
+ LOG_ERROR << "failed to read inode " << iv.inode_num() << ": "
+ << std::strerror(-ranges.error());
+ return;
+ }
+
+ checksum cs(algo);
+
+ for (auto& fut : ranges.value()) {
+ try {
+ auto range = fut.get();
+ cs.update(range.data(), range.size());
+ } catch (std::exception const& e) {
+ LOG_ERROR << "error reading data from inode " << iv.inode_num()
+ << ": " << e.what();
+ return;
+ }
+ }
+
+ auto output = fmt::format("{} {}\n", cs.hexdigest(), de.unix_path());
+
+ {
+ std::lock_guard lock(mx);
+ iol.out << output;
+ }
+ });
+ }
+ });
+
+ wg.wait();
+}
+
+} // namespace
+
int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
+ using namespace folly::gen;
+
const size_t num_cpu = std::max(folly::hardware_concurrency(), 1u);
- std::string input, export_metadata, image_offset;
+ auto algo_list = checksum::available_algorithms();
+ auto checksum_desc = "print checksums for all files (" +
+ (from(algo_list) | unsplit(", ")) + ")";
+
+ std::string input, export_metadata, image_offset, checksum_algo;
logger_options logopts;
size_t num_workers;
int detail;
bool quiet{false};
+ bool verbose{false};
bool output_json{false};
bool check_integrity{false};
bool no_check{false};
bool print_header{false};
+ bool list_files{false};
// clang-format off
po::options_description opts("Command line options");
@@ -72,12 +186,21 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
("quiet,q",
po::value(&quiet)->zero_tokens(),
"don't print anything unless an error occurs")
+ ("verbose,v",
+ po::value(&verbose)->zero_tokens(),
+ "produce verbose output")
("image-offset,O",
po::value(&image_offset)->default_value("auto"),
"filesystem image offset in bytes")
("print-header,H",
po::value(&print_header)->zero_tokens(),
"print filesystem header to stdout and exit")
+ ("list,l",
+ po::value(&list_files)->zero_tokens(),
+ "list all files and exit")
+ ("checksum",
+ po::value(&checksum_algo),
+ checksum_desc.c_str())
("num-workers,n",
po::value(&num_workers)->default_value(num_cpu),
"number of reader worker threads")
@@ -138,10 +261,16 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
return 1;
}
+ if (vm.count("checksum") && !checksum::is_available(checksum_algo)) {
+ LOG_WARN << "checksum algorithm not available: " << checksum_algo;
+ return 1;
+ }
+
if (print_header &&
- (output_json || !export_metadata.empty() || check_integrity)) {
+ (output_json || !export_metadata.empty() || check_integrity ||
+ list_files || !checksum_algo.empty())) {
LOG_WARN << "--print-header is mutually exclusive with --json, "
- "--export-metadata and --check-integrity";
+ "--export-metadata, --check-integrity, --list and --checksum";
return 1;
}
@@ -191,7 +320,7 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
: filesystem_check_level::CHECKSUM;
auto errors = no_check ? 0 : fs.check(level, num_workers);
- if (!quiet) {
+ if (!quiet && !list_files && checksum_algo.empty()) {
if (output_json) {
iol.out << folly::toPrettyJson(fs.info_as_dynamic(detail)) << "\n";
} else {
@@ -199,6 +328,14 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
}
}
+ if (list_files) {
+ do_list_files(fs, iol, verbose);
+ }
+
+ if (!checksum_algo.empty()) {
+ do_checksum(lgr, fs, iol, checksum_algo, num_workers);
+ }
+
if (errors > 0) {
return 1;
}
diff --git a/test/tool_main_test.cpp b/test/tool_main_test.cpp
index a2948d96..6fc3c496 100644
--- a/test/tool_main_test.cpp
+++ b/test/tool_main_test.cpp
@@ -31,6 +31,7 @@
#include
#include
+#include
#include
#include
@@ -2054,6 +2055,91 @@ TEST(dwarfsck_test, export_metadata_close_error) {
::testing::HasSubstr("failed to close metadata output file"));
}
+TEST(dwarfsck_test, checksum_algorithm_not_available) {
+ auto t = dwarfsck_tester::create_with_image();
+ EXPECT_NE(0, t.run({"image.dwarfs", "--checksum=grmpf"})) << t.err();
+ EXPECT_THAT(t.err(),
+ ::testing::HasSubstr("checksum algorithm not available: grmpf"));
+}
+
+TEST(dwarfsck_test, list_files) {
+ auto t = dwarfsck_tester::create_with_image();
+ EXPECT_EQ(0, t.run({"image.dwarfs", "--list"})) << t.err();
+ auto out = t.out();
+
+ std::set files;
+ folly::splitTo('\n', out, std::inserter(files, files.end()),
+ true);
+
+ std::set const expected{
+ "test.pl", "somelink", "somedir", "foo.pl",
+ "bar.pl", "baz.pl", "ipsum.txt", "somedir/ipsum.py",
+ "somedir/bad", "somedir/empty", "empty",
+ };
+
+ EXPECT_EQ(expected, files);
+}
+
+TEST(dwarfsck_test, list_files_verbose) {
+ auto t = dwarfsck_tester::create_with_image();
+ EXPECT_EQ(0, t.run({"image.dwarfs", "--list", "--verbose"})) << t.err();
+ auto out = t.out();
+
+ auto num_lines = std::count(out.begin(), out.end(), '\n');
+ EXPECT_EQ(12, num_lines);
+
+ std::vector expected_re{
+ fmt::format("drwxrwxrwx\\s+1000/100\\s+8\\s+{:%Y-%m-%d %H:%M}\\s*\n",
+ fmt::localtime(2)),
+ fmt::format(
+ "-rw-------\\s+1337/ 0\\s+{:L}\\s+{:%Y-%m-%d %H:%M}\\s+baz.pl\n",
+ 23456, fmt::localtime(8002)),
+ fmt::format("lrwxrwxrwx\\s+1000/100\\s+16\\s+{:%Y-%m-%d "
+ "%H:%M}\\s+somelink -> somedir/ipsum.py\n",
+ fmt::localtime(2002)),
+ };
+
+ for (auto const& str : expected_re) {
+ std::regex re{str};
+ EXPECT_TRUE(std::regex_search(out, re)) << "[" << str << "]\n" << out;
+ }
+}
+
+TEST(dwarfsck_test, checksum_files) {
+ auto t = dwarfsck_tester::create_with_image();
+ EXPECT_EQ(0, t.run({"image.dwarfs", "--checksum=md5"})) << t.err();
+ auto out = t.out();
+
+ auto num_lines = std::count(out.begin(), out.end(), '\n');
+ EXPECT_EQ(8, num_lines);
+
+ std::map actual;
+ std::vector lines;
+ folly::split('\n', out, lines);
+
+ for (auto const& line : lines) {
+ if (line.empty()) {
+ continue;
+ }
+ std::string file, hash;
+ folly::split(" ", line, hash, file);
+ EXPECT_TRUE(actual.emplace(file, hash).second);
+ }
+
+ std::map const expected{
+ {"empty", "d41d8cd98f00b204e9800998ecf8427e"},
+ {"somedir/empty", "d41d8cd98f00b204e9800998ecf8427e"},
+ {"test.pl", "d41d8cd98f00b204e9800998ecf8427e"},
+ {"baz.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
+ {"somedir/ipsum.py", "70fe813c36ed50ebd7f4991857683676"},
+ {"foo.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
+ {"bar.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
+ {"ipsum.txt", "0782b6a546cedd8be8fc86ac47dc6d96"},
+ };
+
+ EXPECT_EQ(expected, actual);
+}
+
class mkdwarfs_sim_order_test : public testing::TestWithParam {};
TEST(mkdwarfs_test, max_similarity_size) {