From 19fcdbdd0399309bab81095b2513da6065d67df5 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Tue, 27 May 2025 09:52:59 +0200 Subject: [PATCH] feat(dwarfsextract): support `--format-filters` and `--format=auto` --- doc/dwarfsextract.md | 12 ++- include/dwarfs/utility/filesystem_extractor.h | 31 +++++--- src/utility/filesystem_extractor.cpp | 79 ++++++++++++++----- test/compat_test.cpp | 2 +- test/manpage_test.cpp | 1 + test/tool_main_test.cpp | 26 ++++++ tools/src/dwarfsextract_main.cpp | 20 +++-- 7 files changed, 131 insertions(+), 40 deletions(-) diff --git a/doc/dwarfsextract.md b/doc/dwarfsextract.md index da03d12e..1d55b9fb 100644 --- a/doc/dwarfsextract.md +++ b/doc/dwarfsextract.md @@ -65,11 +65,17 @@ to disk: The archive format to produce. If this is left empty or unspecified, files will be extracted to the output directory (or the current directory if no output directory is specified). For a full list of supported formats, - see libarchive-formats(5). + see libarchive-formats(5). If an output file is specified, `--format=auto` + can be used to automatically determine the format and filters from the + file name. + +- `--format-filters=`*filters*: + Comma-separated list of libarchive filters to apply to the selected format. + Filters will be selected based on the file name with `--format=auto`. It is + not supported to use both `--format=auto` and `--format-filters`. - `--format-options=`*options*: - - Comma-separated libarchive options for the specific output format. + Comma-separated libarchive options for the specific output format/filters. The options are passed to libarchive. For a full list of options for each output format, see archive_write_set_options(3). diff --git a/include/dwarfs/utility/filesystem_extractor.h b/include/dwarfs/utility/filesystem_extractor.h index 33ee7806..e4a03fed 100644 --- a/include/dwarfs/utility/filesystem_extractor.h +++ b/include/dwarfs/utility/filesystem_extractor.h @@ -34,6 +34,7 @@ #include #include #include +#include namespace dwarfs { @@ -56,21 +57,28 @@ struct filesystem_extractor_options { std::function progress; }; +struct filesystem_extractor_archive_format { + std::string name; + std::vector filters{}; + std::string options{}; + + std::string description() const; +}; + class filesystem_extractor { public: filesystem_extractor(logger& lgr, os_access const& os); static void add_library_dependencies(library_dependencies& deps); - void - open_archive(std::filesystem::path const& output, std::string const& format, - std::string const& format_options = "") { - impl_->open_archive(output, format, format_options); + void open_archive(std::filesystem::path const& output, + filesystem_extractor_archive_format const& format) { + impl_->open_archive(output, format); } - void open_stream(std::ostream& os, std::string const& format, - std::string const& format_options = "") { - impl_->open_stream(os, format, format_options); + void open_stream(std::ostream& os, + filesystem_extractor_archive_format const& format) { + impl_->open_stream(os, format); } void open_disk(std::filesystem::path const& output) { @@ -97,10 +105,11 @@ class filesystem_extractor { virtual ~impl() = default; virtual void - open_archive(std::filesystem::path const& output, std::string const& format, - std::string const& format_options = "") = 0; - virtual void open_stream(std::ostream& os, std::string const& format, - std::string const& format_options) = 0; + open_archive(std::filesystem::path const& output, + filesystem_extractor_archive_format const& format) = 0; + virtual void + open_stream(std::ostream& os, + filesystem_extractor_archive_format const& format) = 0; virtual void open_disk(std::filesystem::path const& output) = 0; virtual void close() = 0; virtual bool diff --git a/src/utility/filesystem_extractor.cpp b/src/utility/filesystem_extractor.cpp index 9bef0c9e..cda75c22 100644 --- a/src/utility/filesystem_extractor.cpp +++ b/src/utility/filesystem_extractor.cpp @@ -42,6 +42,11 @@ #include #include +#include +#if FMT_VERSION >= 110000 +#include +#endif + #include #include #include @@ -122,21 +127,17 @@ class filesystem_extractor_ final : public filesystem_extractor::impl { } } - void - open_archive(std::filesystem::path const& output [[maybe_unused]], - std::string const& format [[maybe_unused]], - std::string const& format_options [[maybe_unused]]) override { + void open_archive(std::filesystem::path const& output [[maybe_unused]], + filesystem_extractor_archive_format const& format + [[maybe_unused]]) override { #ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT DWARFS_THROW(runtime_error, "open_archive() not supported in this build"); #else - LOG_DEBUG << "opening archive file in " << format - << " format with options '" << format_options << "'"; + LOG_DEBUG << "opening archive file in " << format.description(); a_ = ::archive_write_new(); - check_result(::archive_write_set_format_by_name(a_, format.c_str())); - check_result(::archive_write_set_options(a_, format_options.c_str())); - check_result(::archive_write_set_bytes_in_last_block(a_, 1)); + configure_format(format, &output); #ifdef _WIN32 check_result(::archive_write_open_filename_w( @@ -148,10 +149,9 @@ class filesystem_extractor_ final : public filesystem_extractor::impl { #endif } - void - open_stream(std::ostream& os [[maybe_unused]], - std::string const& format [[maybe_unused]], - std::string const& format_options [[maybe_unused]]) override { + void open_stream(std::ostream& os [[maybe_unused]], + filesystem_extractor_archive_format const& format + [[maybe_unused]]) override { #ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT DWARFS_THROW(runtime_error, "open_stream() not supported in this build"); #else @@ -168,14 +168,12 @@ class filesystem_extractor_ final : public filesystem_extractor::impl { iot_ = std::make_unique( [this, &os, fd = pipefd_[0]] { pump(os, fd); }); - LOG_DEBUG << "opening archive stream in " << format - << " format with options '" << format_options << "'"; + LOG_DEBUG << "opening archive stream in " << format.description(); a_ = ::archive_write_new(); - check_result(::archive_write_set_format_by_name(a_, format.c_str())); - check_result(::archive_write_set_options(a_, format_options.c_str())); - check_result(::archive_write_set_bytes_in_last_block(a_, 1)); + configure_format(format); + check_result(::archive_write_open_fd(a_, pipefd_[1])); #endif } @@ -221,6 +219,37 @@ class filesystem_extractor_ final : public filesystem_extractor::impl { filesystem_extractor_options const& opts) override; private: + void configure_format(filesystem_extractor_archive_format const& format + [[maybe_unused]], + std::filesystem::path const* output + [[maybe_unused]] = nullptr) { +#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT + if (format.name == "auto") { + if (!output || output->empty()) { + DWARFS_THROW(runtime_error, "auto format requires output path"); + } + + if (!format.filters.empty()) { + DWARFS_THROW(runtime_error, "auto format does not support filters"); + } + + auto fn = output->filename().string(); + + LOG_DEBUG << "setting archive format by extension for " << fn; + check_result(::archive_write_set_format_filter_by_ext(a_, fn.c_str())); + } else { + check_result(::archive_write_set_format_by_name(a_, format.name.c_str())); + + for (auto const& filter : format.filters) { + check_result(::archive_write_add_filter_by_name(a_, filter.c_str())); + } + } + + check_result(::archive_write_set_options(a_, format.options.c_str())); + check_result(::archive_write_set_bytes_in_last_block(a_, 1)); +#endif + } + void closefd(int& fd) { if (fd >= 0) { if (::close(fd) != 0) { @@ -517,6 +546,20 @@ bool filesystem_extractor_::extract( } // namespace internal +std::string filesystem_extractor_archive_format::description() const { + std::string desc = name; + + if (!filters.empty()) { + desc += fmt::format(" ({})", fmt::join(filters, ", ")); + } + + if (!options.empty()) { + desc += " with options '" + options + "'"; + } + + return desc; +} + filesystem_extractor::filesystem_extractor(logger& lgr, os_access const& os) : impl_(make_unique_logging_object(&format), + po::value(&format.name), "output format") + ("format-filters", + po::value(&format_filters), + "comma-separated libarchive format filters") ("format-options", - po::value(&format_options), - "comma-separated libarchive options for the specific output format") + po::value(&format.options), + "options for the specific libarchive format/filters") #endif ("continue-on-error", po::value(&continue_on_error)->zero_tokens(), @@ -210,7 +214,7 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) { utility::filesystem_extractor fsx(lgr, *iol.os); #ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT - if (format.empty()) { + if (format.name.empty()) { #endif fsx.open_disk(iol.os->canonical(output)); #ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT @@ -230,10 +234,12 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) { } } + split_to(format_filters, ',', format.filters); + if (stream) { - fsx.open_stream(*stream, format, format_options); + fsx.open_stream(*stream, format); } else { - fsx.open_archive(iol.os->canonical(output), format, format_options); + fsx.open_archive(iol.os->canonical(output), format); } } #endif