feat(dwarfsextract): support --format-filters and --format=auto

This commit is contained in:
Marcus Holland-Moritz 2025-05-27 09:52:59 +02:00
parent bd1c068f3e
commit 19fcdbdd03
7 changed files with 131 additions and 40 deletions

View File

@ -65,11 +65,17 @@ to disk:
The archive format to produce. If this is left empty or unspecified, The archive format to produce. If this is left empty or unspecified,
files will be extracted to the output directory (or the current directory files will be extracted to the output directory (or the current directory
if no output directory is specified). For a full list of supported formats, if no output directory is specified). For a full list of supported formats,
see libarchive-formats(5). see libarchive-formats(5). If an output file is specified, `--format=auto`
can be used to automatically determine the format and filters from the
file name.
- `--format-filters=`*filters*:
Comma-separated list of libarchive filters to apply to the selected format.
Filters will be selected based on the file name with `--format=auto`. It is
not supported to use both `--format=auto` and `--format-filters`.
- `--format-options=`*options*: - `--format-options=`*options*:
Comma-separated libarchive options for the specific output format/filters.
Comma-separated libarchive options for the specific output format.
The options are passed to libarchive. For a full list of options for each The options are passed to libarchive. For a full list of options for each
output format, see archive_write_set_options(3). output format, see archive_write_set_options(3).

View File

@ -34,6 +34,7 @@
#include <ostream> #include <ostream>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <vector>
namespace dwarfs { namespace dwarfs {
@ -56,21 +57,28 @@ struct filesystem_extractor_options {
std::function<void(std::string_view, uint64_t, uint64_t)> progress; std::function<void(std::string_view, uint64_t, uint64_t)> progress;
}; };
struct filesystem_extractor_archive_format {
std::string name;
std::vector<std::string> filters{};
std::string options{};
std::string description() const;
};
class filesystem_extractor { class filesystem_extractor {
public: public:
filesystem_extractor(logger& lgr, os_access const& os); filesystem_extractor(logger& lgr, os_access const& os);
static void add_library_dependencies(library_dependencies& deps); static void add_library_dependencies(library_dependencies& deps);
void void open_archive(std::filesystem::path const& output,
open_archive(std::filesystem::path const& output, std::string const& format, filesystem_extractor_archive_format const& format) {
std::string const& format_options = "") { impl_->open_archive(output, format);
impl_->open_archive(output, format, format_options);
} }
void open_stream(std::ostream& os, std::string const& format, void open_stream(std::ostream& os,
std::string const& format_options = "") { filesystem_extractor_archive_format const& format) {
impl_->open_stream(os, format, format_options); impl_->open_stream(os, format);
} }
void open_disk(std::filesystem::path const& output) { void open_disk(std::filesystem::path const& output) {
@ -97,10 +105,11 @@ class filesystem_extractor {
virtual ~impl() = default; virtual ~impl() = default;
virtual void virtual void
open_archive(std::filesystem::path const& output, std::string const& format, open_archive(std::filesystem::path const& output,
std::string const& format_options = "") = 0; filesystem_extractor_archive_format const& format) = 0;
virtual void open_stream(std::ostream& os, std::string const& format, virtual void
std::string const& format_options) = 0; open_stream(std::ostream& os,
filesystem_extractor_archive_format const& format) = 0;
virtual void open_disk(std::filesystem::path const& output) = 0; virtual void open_disk(std::filesystem::path const& output) = 0;
virtual void close() = 0; virtual void close() = 0;
virtual bool virtual bool

View File

@ -42,6 +42,11 @@
#include <archive.h> #include <archive.h>
#include <archive_entry.h> #include <archive_entry.h>
#include <fmt/format.h>
#if FMT_VERSION >= 110000
#include <fmt/ranges.h>
#endif
#include <folly/ExceptionString.h> #include <folly/ExceptionString.h>
#include <folly/portability/Fcntl.h> #include <folly/portability/Fcntl.h>
#include <folly/portability/Unistd.h> #include <folly/portability/Unistd.h>
@ -122,21 +127,17 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
} }
} }
void void open_archive(std::filesystem::path const& output [[maybe_unused]],
open_archive(std::filesystem::path const& output [[maybe_unused]], filesystem_extractor_archive_format const& format
std::string const& format [[maybe_unused]], [[maybe_unused]]) override {
std::string const& format_options [[maybe_unused]]) override {
#ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT #ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
DWARFS_THROW(runtime_error, "open_archive() not supported in this build"); DWARFS_THROW(runtime_error, "open_archive() not supported in this build");
#else #else
LOG_DEBUG << "opening archive file in " << format LOG_DEBUG << "opening archive file in " << format.description();
<< " format with options '" << format_options << "'";
a_ = ::archive_write_new(); a_ = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a_, format.c_str())); configure_format(format, &output);
check_result(::archive_write_set_options(a_, format_options.c_str()));
check_result(::archive_write_set_bytes_in_last_block(a_, 1));
#ifdef _WIN32 #ifdef _WIN32
check_result(::archive_write_open_filename_w( check_result(::archive_write_open_filename_w(
@ -148,10 +149,9 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
#endif #endif
} }
void void open_stream(std::ostream& os [[maybe_unused]],
open_stream(std::ostream& os [[maybe_unused]], filesystem_extractor_archive_format const& format
std::string const& format [[maybe_unused]], [[maybe_unused]]) override {
std::string const& format_options [[maybe_unused]]) override {
#ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT #ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
DWARFS_THROW(runtime_error, "open_stream() not supported in this build"); DWARFS_THROW(runtime_error, "open_stream() not supported in this build");
#else #else
@ -168,14 +168,12 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
iot_ = std::make_unique<std::thread>( iot_ = std::make_unique<std::thread>(
[this, &os, fd = pipefd_[0]] { pump(os, fd); }); [this, &os, fd = pipefd_[0]] { pump(os, fd); });
LOG_DEBUG << "opening archive stream in " << format LOG_DEBUG << "opening archive stream in " << format.description();
<< " format with options '" << format_options << "'";
a_ = ::archive_write_new(); a_ = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a_, format.c_str())); configure_format(format);
check_result(::archive_write_set_options(a_, format_options.c_str()));
check_result(::archive_write_set_bytes_in_last_block(a_, 1));
check_result(::archive_write_open_fd(a_, pipefd_[1])); check_result(::archive_write_open_fd(a_, pipefd_[1]));
#endif #endif
} }
@ -221,6 +219,37 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
filesystem_extractor_options const& opts) override; filesystem_extractor_options const& opts) override;
private: private:
void configure_format(filesystem_extractor_archive_format const& format
[[maybe_unused]],
std::filesystem::path const* output
[[maybe_unused]] = nullptr) {
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
if (format.name == "auto") {
if (!output || output->empty()) {
DWARFS_THROW(runtime_error, "auto format requires output path");
}
if (!format.filters.empty()) {
DWARFS_THROW(runtime_error, "auto format does not support filters");
}
auto fn = output->filename().string();
LOG_DEBUG << "setting archive format by extension for " << fn;
check_result(::archive_write_set_format_filter_by_ext(a_, fn.c_str()));
} else {
check_result(::archive_write_set_format_by_name(a_, format.name.c_str()));
for (auto const& filter : format.filters) {
check_result(::archive_write_add_filter_by_name(a_, filter.c_str()));
}
}
check_result(::archive_write_set_options(a_, format.options.c_str()));
check_result(::archive_write_set_bytes_in_last_block(a_, 1));
#endif
}
void closefd(int& fd) { void closefd(int& fd) {
if (fd >= 0) { if (fd >= 0) {
if (::close(fd) != 0) { if (::close(fd) != 0) {
@ -517,6 +546,20 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
} // namespace internal } // namespace internal
std::string filesystem_extractor_archive_format::description() const {
std::string desc = name;
if (!filters.empty()) {
desc += fmt::format(" ({})", fmt::join(filters, ", "));
}
if (!options.empty()) {
desc += " with options '" + options + "'";
}
return desc;
}
filesystem_extractor::filesystem_extractor(logger& lgr, os_access const& os) filesystem_extractor::filesystem_extractor(logger& lgr, os_access const& os)
: impl_(make_unique_logging_object<filesystem_extractor::impl, : impl_(make_unique_logging_object<filesystem_extractor::impl,
internal::filesystem_extractor_, internal::filesystem_extractor_,

View File

@ -1135,7 +1135,7 @@ void check_compat(logger& lgr [[maybe_unused]], reader::filesystem_v2 const& fs,
utility::filesystem_extractor ext(lgr, os); utility::filesystem_extractor ext(lgr, os);
std::ostringstream oss; std::ostringstream oss;
EXPECT_NO_THROW(ext.open_stream(oss, "mtree")); EXPECT_NO_THROW(ext.open_stream(oss, {.name = "mtree"}));
EXPECT_NO_THROW(ext.extract(fs)); EXPECT_NO_THROW(ext.extract(fs));
EXPECT_NO_THROW(ext.close()); EXPECT_NO_THROW(ext.close());

View File

@ -194,6 +194,7 @@ TEST_P(manpage_coverage_test, options) {
if (tool_name == "dwarfsextract") { if (tool_name == "dwarfsextract") {
#ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT #ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
man_opts.erase("format"); man_opts.erase("format");
man_opts.erase("format-filters");
man_opts.erase("format-options"); man_opts.erase("format-options");
#endif #endif
man_opts.erase("pattern"); man_opts.erase("pattern");

View File

@ -2263,6 +2263,32 @@ TEST(dwarfsextract_test, mtree) {
EXPECT_THAT(out, ::testing::HasSubstr("sha256digest=")); EXPECT_THAT(out, ::testing::HasSubstr("sha256digest="));
} }
TEST(dwarfsextract_test, filters) {
auto t = dwarfsextract_tester::create_with_image();
ASSERT_EQ(0, t.run({"-i", "image.dwarfs", "-f", "gnutar", "--format-filters",
"zstd", "--format-options", "zstd:compression-level=3"}))
<< t.err();
auto out = t.out();
auto ar = ::archive_read_new();
ASSERT_EQ(ARCHIVE_OK,
::archive_read_set_format(ar, ARCHIVE_FORMAT_TAR_GNUTAR))
<< ::archive_error_string(ar);
ASSERT_THAT(::archive_read_append_filter(ar, ARCHIVE_FILTER_ZSTD),
::testing::AnyOf(ARCHIVE_OK, ARCHIVE_WARN))
<< ::archive_error_string(ar);
ASSERT_EQ(ARCHIVE_OK, ::archive_read_open_memory(ar, out.data(), out.size()))
<< ::archive_error_string(ar);
struct archive_entry* entry;
int ret = ::archive_read_next_header(ar, &entry);
EXPECT_EQ(ARCHIVE_OK, ret) << ::archive_error_string(ar);
EXPECT_EQ(ARCHIVE_OK, ::archive_read_free(ar)) << ::archive_error_string(ar);
}
TEST(dwarfsextract_test, patterns) { TEST(dwarfsextract_test, patterns) {
auto mkdt = mkdwarfs_tester::create_empty(); auto mkdt = mkdwarfs_tester::create_empty();
mkdt.add_test_file_tree(); mkdt.add_test_file_tree();

View File

@ -72,7 +72,8 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
std::string cache_size_str, image_offset; std::string cache_size_str, image_offset;
logger_options logopts; logger_options logopts;
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT #ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
std::string format, format_options; utility::filesystem_extractor_archive_format format;
std::string format_filters;
#endif #endif
#if DWARFS_PERFMON_ENABLED #if DWARFS_PERFMON_ENABLED
std::string perfmon_str; std::string perfmon_str;
@ -98,11 +99,14 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
"filesystem image offset in bytes") "filesystem image offset in bytes")
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT #ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
("format,f", ("format,f",
po::value<std::string>(&format), po::value<std::string>(&format.name),
"output format") "output format")
("format-filters",
po::value<std::string>(&format_filters),
"comma-separated libarchive format filters")
("format-options", ("format-options",
po::value<std::string>(&format_options), po::value<std::string>(&format.options),
"comma-separated libarchive options for the specific output format") "options for the specific libarchive format/filters")
#endif #endif
("continue-on-error", ("continue-on-error",
po::value<bool>(&continue_on_error)->zero_tokens(), po::value<bool>(&continue_on_error)->zero_tokens(),
@ -210,7 +214,7 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
utility::filesystem_extractor fsx(lgr, *iol.os); utility::filesystem_extractor fsx(lgr, *iol.os);
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT #ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
if (format.empty()) { if (format.name.empty()) {
#endif #endif
fsx.open_disk(iol.os->canonical(output)); fsx.open_disk(iol.os->canonical(output));
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT #ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
@ -230,10 +234,12 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
} }
} }
split_to(format_filters, ',', format.filters);
if (stream) { if (stream) {
fsx.open_stream(*stream, format, format_options); fsx.open_stream(*stream, format);
} else { } else {
fsx.open_archive(iol.os->canonical(output), format, format_options); fsx.open_archive(iol.os->canonical(output), format);
} }
} }
#endif #endif