mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-17 16:31:27 -04:00
feat(dwarfsck): add --list, --checksum, --verbose (fixes gh #192)
This commit is contained in:
parent
d6bd917fae
commit
8831009a52
@ -25,6 +25,9 @@ with a non-zero exit code.
|
||||
- `-q`, `--quiet`:
|
||||
Don't produce any output unless there is an error.
|
||||
|
||||
- `-v`, `--verbose`:
|
||||
Produce verbose output, where applicable.
|
||||
|
||||
- `-O`, `--image-offset=`*value*|`auto`:
|
||||
Specify the byte offset at which the filesystem is located in the image.
|
||||
Use `auto` to detect the offset automatically. This is also the default.
|
||||
@ -36,6 +39,19 @@ with a non-zero exit code.
|
||||
header is present, the program will exit with exit code 2 and emit a
|
||||
warning.
|
||||
|
||||
- `-l`, `--list`:
|
||||
List all entries in the file system image. Uses output similar to `tar -t`.
|
||||
With `--verbose`, also print details about each entry.
|
||||
|
||||
- `--checksum=`*name*:
|
||||
Produce a checksum using the specified algorithm for each regular file in
|
||||
the file system image. This can be used to easily verify the file system
|
||||
image against local files, e.g.:
|
||||
|
||||
```
|
||||
dwarfsck --checksum=sha512 /tmp/fs.dwarfs | sha512sum --check
|
||||
```
|
||||
|
||||
- `-n`, `--num-workers=`*value*:
|
||||
Number of worker threads used for integrity checking.
|
||||
|
||||
|
@ -19,18 +19,25 @@
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <fmt/chrono.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/String.h>
|
||||
#include <folly/gen/String.h>
|
||||
#include <folly/json.h>
|
||||
#include <folly/portability/Unistd.h>
|
||||
#include <folly/system/HardwareConcurrency.h>
|
||||
|
||||
#include "dwarfs/checksum.h"
|
||||
#include "dwarfs/error.h"
|
||||
#include "dwarfs/file_access.h"
|
||||
#include "dwarfs/filesystem_v2.h"
|
||||
@ -41,24 +48,131 @@
|
||||
#include "dwarfs/os_access.h"
|
||||
#include "dwarfs/tool.h"
|
||||
#include "dwarfs/util.h"
|
||||
#include "dwarfs/worker_group.h"
|
||||
#include "dwarfs_tool_main.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
namespace {
|
||||
|
||||
void do_list_files(filesystem_v2& fs, iolayer const& iol, bool verbose) {
|
||||
auto max_width = [](auto const& vec) {
|
||||
auto max = std::max_element(vec.begin(), vec.end());
|
||||
return std::to_string(*max).size();
|
||||
};
|
||||
|
||||
auto const uid_width = max_width(fs.get_all_uids());
|
||||
auto const gid_width = max_width(fs.get_all_gids());
|
||||
|
||||
file_stat::off_type max_inode_size{0};
|
||||
fs.walk([&](auto const& de) {
|
||||
file_stat st;
|
||||
fs.getattr(de.inode(), &st);
|
||||
max_inode_size = std::max(max_inode_size, st.size);
|
||||
});
|
||||
|
||||
auto const inode_size_width = fmt::format("{:L}", max_inode_size).size();
|
||||
|
||||
fs.walk([&](auto const& de) {
|
||||
auto iv = de.inode();
|
||||
file_stat st;
|
||||
fs.getattr(iv, &st);
|
||||
auto name = de.unix_path();
|
||||
utf8_sanitize(name);
|
||||
|
||||
if (verbose) {
|
||||
if (iv.is_symlink()) {
|
||||
auto target = fs.readlink(iv).value();
|
||||
utf8_sanitize(target);
|
||||
name += " -> " + target;
|
||||
}
|
||||
|
||||
iol.out << fmt::format(
|
||||
"{3} {4:{0}}/{5:{1}} {6:{2}L} {7:%Y-%m-%d %H:%M} {8}\n", uid_width,
|
||||
gid_width, inode_size_width, iv.mode_string(), iv.getuid(),
|
||||
iv.getgid(), st.size, fmt::localtime(st.mtime), name);
|
||||
} else if (!name.empty()) {
|
||||
iol.out << name << "\n";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void do_checksum(logger& lgr, filesystem_v2& fs, iolayer const& iol,
|
||||
std::string const& algo, size_t num_workers) {
|
||||
LOG_PROXY(debug_logger_policy, lgr);
|
||||
|
||||
worker_group wg{lgr, *iol.os, "checksum", num_workers};
|
||||
std::mutex mx;
|
||||
|
||||
fs.walk_data_order([&](auto const& de) {
|
||||
auto iv = de.inode();
|
||||
if (iv.is_regular_file()) {
|
||||
wg.add_job([&, de, iv] {
|
||||
file_stat st;
|
||||
|
||||
if (fs.getattr(de.inode(), &st) != 0) {
|
||||
LOG_ERROR << "failed to get attributes for inode " << iv.inode_num();
|
||||
return;
|
||||
}
|
||||
|
||||
auto ranges = fs.readv(iv.inode_num(), st.size);
|
||||
|
||||
if (!ranges) {
|
||||
LOG_ERROR << "failed to read inode " << iv.inode_num() << ": "
|
||||
<< std::strerror(-ranges.error());
|
||||
return;
|
||||
}
|
||||
|
||||
checksum cs(algo);
|
||||
|
||||
for (auto& fut : ranges.value()) {
|
||||
try {
|
||||
auto range = fut.get();
|
||||
cs.update(range.data(), range.size());
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << "error reading data from inode " << iv.inode_num()
|
||||
<< ": " << e.what();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto output = fmt::format("{} {}\n", cs.hexdigest(), de.unix_path());
|
||||
|
||||
{
|
||||
std::lock_guard lock(mx);
|
||||
iol.out << output;
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
wg.wait();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
using namespace folly::gen;
|
||||
|
||||
const size_t num_cpu = std::max(folly::hardware_concurrency(), 1u);
|
||||
|
||||
std::string input, export_metadata, image_offset;
|
||||
auto algo_list = checksum::available_algorithms();
|
||||
auto checksum_desc = "print checksums for all files (" +
|
||||
(from(algo_list) | unsplit(", ")) + ")";
|
||||
|
||||
std::string input, export_metadata, image_offset, checksum_algo;
|
||||
logger_options logopts;
|
||||
size_t num_workers;
|
||||
int detail;
|
||||
bool quiet{false};
|
||||
bool verbose{false};
|
||||
bool output_json{false};
|
||||
bool check_integrity{false};
|
||||
bool no_check{false};
|
||||
bool print_header{false};
|
||||
bool list_files{false};
|
||||
|
||||
// clang-format off
|
||||
po::options_description opts("Command line options");
|
||||
@ -72,12 +186,21 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
("quiet,q",
|
||||
po::value<bool>(&quiet)->zero_tokens(),
|
||||
"don't print anything unless an error occurs")
|
||||
("verbose,v",
|
||||
po::value<bool>(&verbose)->zero_tokens(),
|
||||
"produce verbose output")
|
||||
("image-offset,O",
|
||||
po::value<std::string>(&image_offset)->default_value("auto"),
|
||||
"filesystem image offset in bytes")
|
||||
("print-header,H",
|
||||
po::value<bool>(&print_header)->zero_tokens(),
|
||||
"print filesystem header to stdout and exit")
|
||||
("list,l",
|
||||
po::value<bool>(&list_files)->zero_tokens(),
|
||||
"list all files and exit")
|
||||
("checksum",
|
||||
po::value<std::string>(&checksum_algo),
|
||||
checksum_desc.c_str())
|
||||
("num-workers,n",
|
||||
po::value<size_t>(&num_workers)->default_value(num_cpu),
|
||||
"number of reader worker threads")
|
||||
@ -138,10 +261,16 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (vm.count("checksum") && !checksum::is_available(checksum_algo)) {
|
||||
LOG_WARN << "checksum algorithm not available: " << checksum_algo;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (print_header &&
|
||||
(output_json || !export_metadata.empty() || check_integrity)) {
|
||||
(output_json || !export_metadata.empty() || check_integrity ||
|
||||
list_files || !checksum_algo.empty())) {
|
||||
LOG_WARN << "--print-header is mutually exclusive with --json, "
|
||||
"--export-metadata and --check-integrity";
|
||||
"--export-metadata, --check-integrity, --list and --checksum";
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -191,7 +320,7 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
: filesystem_check_level::CHECKSUM;
|
||||
auto errors = no_check ? 0 : fs.check(level, num_workers);
|
||||
|
||||
if (!quiet) {
|
||||
if (!quiet && !list_files && checksum_algo.empty()) {
|
||||
if (output_json) {
|
||||
iol.out << folly::toPrettyJson(fs.info_as_dynamic(detail)) << "\n";
|
||||
} else {
|
||||
@ -199,6 +328,14 @@ int dwarfsck_main(int argc, sys_char** argv, iolayer const& iol) {
|
||||
}
|
||||
}
|
||||
|
||||
if (list_files) {
|
||||
do_list_files(fs, iol, verbose);
|
||||
}
|
||||
|
||||
if (!checksum_algo.empty()) {
|
||||
do_checksum(lgr, fs, iol, checksum_algo, num_workers);
|
||||
}
|
||||
|
||||
if (errors > 0) {
|
||||
return 1;
|
||||
}
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <fmt/chrono.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/FileUtil.h>
|
||||
@ -2054,6 +2055,91 @@ TEST(dwarfsck_test, export_metadata_close_error) {
|
||||
::testing::HasSubstr("failed to close metadata output file"));
|
||||
}
|
||||
|
||||
TEST(dwarfsck_test, checksum_algorithm_not_available) {
|
||||
auto t = dwarfsck_tester::create_with_image();
|
||||
EXPECT_NE(0, t.run({"image.dwarfs", "--checksum=grmpf"})) << t.err();
|
||||
EXPECT_THAT(t.err(),
|
||||
::testing::HasSubstr("checksum algorithm not available: grmpf"));
|
||||
}
|
||||
|
||||
TEST(dwarfsck_test, list_files) {
|
||||
auto t = dwarfsck_tester::create_with_image();
|
||||
EXPECT_EQ(0, t.run({"image.dwarfs", "--list"})) << t.err();
|
||||
auto out = t.out();
|
||||
|
||||
std::set<std::string> files;
|
||||
folly::splitTo<std::string>('\n', out, std::inserter(files, files.end()),
|
||||
true);
|
||||
|
||||
std::set<std::string> const expected{
|
||||
"test.pl", "somelink", "somedir", "foo.pl",
|
||||
"bar.pl", "baz.pl", "ipsum.txt", "somedir/ipsum.py",
|
||||
"somedir/bad", "somedir/empty", "empty",
|
||||
};
|
||||
|
||||
EXPECT_EQ(expected, files);
|
||||
}
|
||||
|
||||
TEST(dwarfsck_test, list_files_verbose) {
|
||||
auto t = dwarfsck_tester::create_with_image();
|
||||
EXPECT_EQ(0, t.run({"image.dwarfs", "--list", "--verbose"})) << t.err();
|
||||
auto out = t.out();
|
||||
|
||||
auto num_lines = std::count(out.begin(), out.end(), '\n');
|
||||
EXPECT_EQ(12, num_lines);
|
||||
|
||||
std::vector<std::string> expected_re{
|
||||
fmt::format("drwxrwxrwx\\s+1000/100\\s+8\\s+{:%Y-%m-%d %H:%M}\\s*\n",
|
||||
fmt::localtime(2)),
|
||||
fmt::format(
|
||||
"-rw-------\\s+1337/ 0\\s+{:L}\\s+{:%Y-%m-%d %H:%M}\\s+baz.pl\n",
|
||||
23456, fmt::localtime(8002)),
|
||||
fmt::format("lrwxrwxrwx\\s+1000/100\\s+16\\s+{:%Y-%m-%d "
|
||||
"%H:%M}\\s+somelink -> somedir/ipsum.py\n",
|
||||
fmt::localtime(2002)),
|
||||
};
|
||||
|
||||
for (auto const& str : expected_re) {
|
||||
std::regex re{str};
|
||||
EXPECT_TRUE(std::regex_search(out, re)) << "[" << str << "]\n" << out;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(dwarfsck_test, checksum_files) {
|
||||
auto t = dwarfsck_tester::create_with_image();
|
||||
EXPECT_EQ(0, t.run({"image.dwarfs", "--checksum=md5"})) << t.err();
|
||||
auto out = t.out();
|
||||
|
||||
auto num_lines = std::count(out.begin(), out.end(), '\n');
|
||||
EXPECT_EQ(8, num_lines);
|
||||
|
||||
std::map<std::string, std::string> actual;
|
||||
std::vector<std::string_view> lines;
|
||||
folly::split('\n', out, lines);
|
||||
|
||||
for (auto const& line : lines) {
|
||||
if (line.empty()) {
|
||||
continue;
|
||||
}
|
||||
std::string file, hash;
|
||||
folly::split(" ", line, hash, file);
|
||||
EXPECT_TRUE(actual.emplace(file, hash).second);
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> const expected{
|
||||
{"empty", "d41d8cd98f00b204e9800998ecf8427e"},
|
||||
{"somedir/empty", "d41d8cd98f00b204e9800998ecf8427e"},
|
||||
{"test.pl", "d41d8cd98f00b204e9800998ecf8427e"},
|
||||
{"baz.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
|
||||
{"somedir/ipsum.py", "70fe813c36ed50ebd7f4991857683676"},
|
||||
{"foo.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
|
||||
{"bar.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
|
||||
{"ipsum.txt", "0782b6a546cedd8be8fc86ac47dc6d96"},
|
||||
};
|
||||
|
||||
EXPECT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
class mkdwarfs_sim_order_test : public testing::TestWithParam<char const*> {};
|
||||
|
||||
TEST(mkdwarfs_test, max_similarity_size) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user