feat(dwarfsextract): support --format-filters and --format=auto

This commit is contained in:
Marcus Holland-Moritz 2025-05-27 09:52:59 +02:00
parent bd1c068f3e
commit 19fcdbdd03
7 changed files with 131 additions and 40 deletions

View File

@ -65,11 +65,17 @@ to disk:
The archive format to produce. If this is left empty or unspecified,
files will be extracted to the output directory (or the current directory
if no output directory is specified). For a full list of supported formats,
see libarchive-formats(5).
see libarchive-formats(5). If an output file is specified, `--format=auto`
can be used to automatically determine the format and filters from the
file name.
- `--format-filters=`*filters*:
Comma-separated list of libarchive filters to apply to the selected format.
Filters will be selected based on the file name with `--format=auto`. It is
not supported to use both `--format=auto` and `--format-filters`.
- `--format-options=`*options*:
Comma-separated libarchive options for the specific output format.
Comma-separated libarchive options for the specific output format/filters.
The options are passed to libarchive. For a full list of options for each
output format, see archive_write_set_options(3).

View File

@ -34,6 +34,7 @@
#include <ostream>
#include <string>
#include <string_view>
#include <vector>
namespace dwarfs {
@ -56,21 +57,28 @@ struct filesystem_extractor_options {
std::function<void(std::string_view, uint64_t, uint64_t)> progress;
};
struct filesystem_extractor_archive_format {
std::string name;
std::vector<std::string> filters{};
std::string options{};
std::string description() const;
};
class filesystem_extractor {
public:
filesystem_extractor(logger& lgr, os_access const& os);
static void add_library_dependencies(library_dependencies& deps);
void
open_archive(std::filesystem::path const& output, std::string const& format,
std::string const& format_options = "") {
impl_->open_archive(output, format, format_options);
void open_archive(std::filesystem::path const& output,
filesystem_extractor_archive_format const& format) {
impl_->open_archive(output, format);
}
void open_stream(std::ostream& os, std::string const& format,
std::string const& format_options = "") {
impl_->open_stream(os, format, format_options);
void open_stream(std::ostream& os,
filesystem_extractor_archive_format const& format) {
impl_->open_stream(os, format);
}
void open_disk(std::filesystem::path const& output) {
@ -97,10 +105,11 @@ class filesystem_extractor {
virtual ~impl() = default;
virtual void
open_archive(std::filesystem::path const& output, std::string const& format,
std::string const& format_options = "") = 0;
virtual void open_stream(std::ostream& os, std::string const& format,
std::string const& format_options) = 0;
open_archive(std::filesystem::path const& output,
filesystem_extractor_archive_format const& format) = 0;
virtual void
open_stream(std::ostream& os,
filesystem_extractor_archive_format const& format) = 0;
virtual void open_disk(std::filesystem::path const& output) = 0;
virtual void close() = 0;
virtual bool

View File

@ -42,6 +42,11 @@
#include <archive.h>
#include <archive_entry.h>
#include <fmt/format.h>
#if FMT_VERSION >= 110000
#include <fmt/ranges.h>
#endif
#include <folly/ExceptionString.h>
#include <folly/portability/Fcntl.h>
#include <folly/portability/Unistd.h>
@ -122,21 +127,17 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
}
}
void
open_archive(std::filesystem::path const& output [[maybe_unused]],
std::string const& format [[maybe_unused]],
std::string const& format_options [[maybe_unused]]) override {
void open_archive(std::filesystem::path const& output [[maybe_unused]],
filesystem_extractor_archive_format const& format
[[maybe_unused]]) override {
#ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
DWARFS_THROW(runtime_error, "open_archive() not supported in this build");
#else
LOG_DEBUG << "opening archive file in " << format
<< " format with options '" << format_options << "'";
LOG_DEBUG << "opening archive file in " << format.description();
a_ = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a_, format.c_str()));
check_result(::archive_write_set_options(a_, format_options.c_str()));
check_result(::archive_write_set_bytes_in_last_block(a_, 1));
configure_format(format, &output);
#ifdef _WIN32
check_result(::archive_write_open_filename_w(
@ -148,10 +149,9 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
#endif
}
void
open_stream(std::ostream& os [[maybe_unused]],
std::string const& format [[maybe_unused]],
std::string const& format_options [[maybe_unused]]) override {
void open_stream(std::ostream& os [[maybe_unused]],
filesystem_extractor_archive_format const& format
[[maybe_unused]]) override {
#ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
DWARFS_THROW(runtime_error, "open_stream() not supported in this build");
#else
@ -168,14 +168,12 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
iot_ = std::make_unique<std::thread>(
[this, &os, fd = pipefd_[0]] { pump(os, fd); });
LOG_DEBUG << "opening archive stream in " << format
<< " format with options '" << format_options << "'";
LOG_DEBUG << "opening archive stream in " << format.description();
a_ = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a_, format.c_str()));
check_result(::archive_write_set_options(a_, format_options.c_str()));
check_result(::archive_write_set_bytes_in_last_block(a_, 1));
configure_format(format);
check_result(::archive_write_open_fd(a_, pipefd_[1]));
#endif
}
@ -221,6 +219,37 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
filesystem_extractor_options const& opts) override;
private:
void configure_format(filesystem_extractor_archive_format const& format
[[maybe_unused]],
std::filesystem::path const* output
[[maybe_unused]] = nullptr) {
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
if (format.name == "auto") {
if (!output || output->empty()) {
DWARFS_THROW(runtime_error, "auto format requires output path");
}
if (!format.filters.empty()) {
DWARFS_THROW(runtime_error, "auto format does not support filters");
}
auto fn = output->filename().string();
LOG_DEBUG << "setting archive format by extension for " << fn;
check_result(::archive_write_set_format_filter_by_ext(a_, fn.c_str()));
} else {
check_result(::archive_write_set_format_by_name(a_, format.name.c_str()));
for (auto const& filter : format.filters) {
check_result(::archive_write_add_filter_by_name(a_, filter.c_str()));
}
}
check_result(::archive_write_set_options(a_, format.options.c_str()));
check_result(::archive_write_set_bytes_in_last_block(a_, 1));
#endif
}
void closefd(int& fd) {
if (fd >= 0) {
if (::close(fd) != 0) {
@ -517,6 +546,20 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
} // namespace internal
std::string filesystem_extractor_archive_format::description() const {
std::string desc = name;
if (!filters.empty()) {
desc += fmt::format(" ({})", fmt::join(filters, ", "));
}
if (!options.empty()) {
desc += " with options '" + options + "'";
}
return desc;
}
filesystem_extractor::filesystem_extractor(logger& lgr, os_access const& os)
: impl_(make_unique_logging_object<filesystem_extractor::impl,
internal::filesystem_extractor_,

View File

@ -1135,7 +1135,7 @@ void check_compat(logger& lgr [[maybe_unused]], reader::filesystem_v2 const& fs,
utility::filesystem_extractor ext(lgr, os);
std::ostringstream oss;
EXPECT_NO_THROW(ext.open_stream(oss, "mtree"));
EXPECT_NO_THROW(ext.open_stream(oss, {.name = "mtree"}));
EXPECT_NO_THROW(ext.extract(fs));
EXPECT_NO_THROW(ext.close());

View File

@ -194,6 +194,7 @@ TEST_P(manpage_coverage_test, options) {
if (tool_name == "dwarfsextract") {
#ifdef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
man_opts.erase("format");
man_opts.erase("format-filters");
man_opts.erase("format-options");
#endif
man_opts.erase("pattern");

View File

@ -2263,6 +2263,32 @@ TEST(dwarfsextract_test, mtree) {
EXPECT_THAT(out, ::testing::HasSubstr("sha256digest="));
}
TEST(dwarfsextract_test, filters) {
auto t = dwarfsextract_tester::create_with_image();
ASSERT_EQ(0, t.run({"-i", "image.dwarfs", "-f", "gnutar", "--format-filters",
"zstd", "--format-options", "zstd:compression-level=3"}))
<< t.err();
auto out = t.out();
auto ar = ::archive_read_new();
ASSERT_EQ(ARCHIVE_OK,
::archive_read_set_format(ar, ARCHIVE_FORMAT_TAR_GNUTAR))
<< ::archive_error_string(ar);
ASSERT_THAT(::archive_read_append_filter(ar, ARCHIVE_FILTER_ZSTD),
::testing::AnyOf(ARCHIVE_OK, ARCHIVE_WARN))
<< ::archive_error_string(ar);
ASSERT_EQ(ARCHIVE_OK, ::archive_read_open_memory(ar, out.data(), out.size()))
<< ::archive_error_string(ar);
struct archive_entry* entry;
int ret = ::archive_read_next_header(ar, &entry);
EXPECT_EQ(ARCHIVE_OK, ret) << ::archive_error_string(ar);
EXPECT_EQ(ARCHIVE_OK, ::archive_read_free(ar)) << ::archive_error_string(ar);
}
TEST(dwarfsextract_test, patterns) {
auto mkdt = mkdwarfs_tester::create_empty();
mkdt.add_test_file_tree();

View File

@ -72,7 +72,8 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
std::string cache_size_str, image_offset;
logger_options logopts;
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
std::string format, format_options;
utility::filesystem_extractor_archive_format format;
std::string format_filters;
#endif
#if DWARFS_PERFMON_ENABLED
std::string perfmon_str;
@ -98,11 +99,14 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
"filesystem image offset in bytes")
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
("format,f",
po::value<std::string>(&format),
po::value<std::string>(&format.name),
"output format")
("format-filters",
po::value<std::string>(&format_filters),
"comma-separated libarchive format filters")
("format-options",
po::value<std::string>(&format_options),
"comma-separated libarchive options for the specific output format")
po::value<std::string>(&format.options),
"options for the specific libarchive format/filters")
#endif
("continue-on-error",
po::value<bool>(&continue_on_error)->zero_tokens(),
@ -210,7 +214,7 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
utility::filesystem_extractor fsx(lgr, *iol.os);
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
if (format.empty()) {
if (format.name.empty()) {
#endif
fsx.open_disk(iol.os->canonical(output));
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
@ -230,10 +234,12 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
}
}
split_to(format_filters, ',', format.filters);
if (stream) {
fsx.open_stream(*stream, format, format_options);
fsx.open_stream(*stream, format);
} else {
fsx.open_archive(iol.os->canonical(output), format, format_options);
fsx.open_archive(iol.os->canonical(output), format);
}
}
#endif