diff --git a/CMakeLists.txt b/CMakeLists.txt
index 63e64c4b..49824c37 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -90,6 +90,7 @@ pkg_check_modules(FUSE IMPORTED_TARGET fuse>=2.9.9)
pkg_check_modules(FUSE3 IMPORTED_TARGET fuse3>=3.4.1)
pkg_check_modules(LIBLZ4 IMPORTED_TARGET liblz4>=1.8.3)
pkg_check_modules(LIBLZMA IMPORTED_TARGET liblzma>=5.2.4)
+pkg_check_modules(LIBARCHIVE IMPORTED_TARGET libarchive>=3.1.2)
if(NOT FUSE_FOUND AND NOT FUSE3_FOUND)
message(FATAL_ERROR "No FUSE or FUSE3 library found")
@@ -221,8 +222,11 @@ add_library(dwarfs ${LIBDWARFS_SRC})
add_executable(mkdwarfs src/mkdwarfs.cpp)
add_executable(dwarfsck src/dwarfsck.cpp)
add_executable(dwarfsbench src/dwarfsbench.cpp)
+add_executable(dwarfsextract src/dwarfsextract.cpp)
-list(APPEND BINARY_TARGETS mkdwarfs dwarfsck dwarfsbench)
+list(APPEND BINARY_TARGETS mkdwarfs dwarfsck dwarfsbench dwarfsextract)
+
+target_link_libraries(dwarfsextract PkgConfig::LIBARCHIVE)
if(FUSE3_FOUND)
add_executable(dwarfs-bin src/dwarfs.cpp)
@@ -473,7 +477,7 @@ add_custom_target(
googletest-* _CPack_Packages install_manifest.txt share)
install(
- TARGETS mkdwarfs dwarfsck dwarfsbench
+ TARGETS mkdwarfs dwarfsck dwarfsbench dwarfsextract
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
diff --git a/src/dwarfsextract.cpp b/src/dwarfsextract.cpp
new file mode 100644
index 00000000..284a370e
--- /dev/null
+++ b/src/dwarfsextract.cpp
@@ -0,0 +1,232 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs. If not, see .
+ */
+
+#include
+#include
+
+#include
+
+#include
+
+#include
+#include
+
+#include
+#include
+
+#include "dwarfs/filesystem_v2.h"
+#include "dwarfs/fstypes.h"
+#include "dwarfs/logger.h"
+#include "dwarfs/mmap.h"
+#include "dwarfs/options.h"
+#include "dwarfs/util.h"
+#include "dwarfs/version.h"
+
+namespace po = boost::program_options;
+
+using namespace dwarfs;
+
+namespace {
+
+int dwarfsextract(int argc, char** argv) {
+ std::string filesystem, output, format, cache_size_str, log_level;
+ size_t num_workers;
+
+ // clang-format off
+ po::options_description opts("Command line options");
+ opts.add_options()
+ ("input,i",
+ po::value(&filesystem),
+ "input filesystem file")
+ ("output,o",
+ po::value(&output),
+ "output file or directory")
+ ("format,f",
+ po::value(&format),
+ "output format")
+ ("num-workers,n",
+ po::value(&num_workers)->default_value(1),
+ "number of worker threads")
+ ("cache-size,s",
+ po::value(&cache_size_str)->default_value("256m"),
+ "block cache size")
+ ("log-level,l",
+ po::value(&log_level)->default_value("warn"),
+ "log level (error, warn, info, debug, trace)")
+ ("help,h",
+ "output help message and exit");
+ // clang-format on
+
+ po::variables_map vm;
+
+ try {
+ po::store(po::parse_command_line(argc, argv, opts), vm);
+ po::notify(vm);
+ } catch (po::error const& e) {
+ std::cerr << "error: " << e.what() << std::endl;
+ return 1;
+ }
+
+ if (vm.count("help") or !vm.count("input")) {
+ std::cerr << "dwarfsextract (" << PRJ_GIT_ID << ")\n\n"
+ << opts << std::endl;
+ return 0;
+ }
+
+ try {
+ stream_logger lgr(std::cerr, logger::parse_level(log_level));
+ filesystem_options fsopts;
+
+ fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str);
+ fsopts.block_cache.num_workers = num_workers;
+ fsopts.metadata.enable_nlink = true;
+
+ dwarfs::filesystem_v2 fs(lgr, std::make_shared(filesystem),
+ fsopts);
+
+ log_proxy log_(lgr);
+ struct ::archive* a;
+
+ auto check_result = [&](int res) {
+ switch (res) {
+ case ARCHIVE_OK:
+ break;
+ case ARCHIVE_WARN:
+ LOG_WARN << std::string(archive_error_string(a));
+ break;
+ case ARCHIVE_RETRY:
+ case ARCHIVE_FATAL:
+ DWARFS_THROW(runtime_error, std::string(archive_error_string(a)));
+ }
+ };
+
+ if (format.empty()) {
+ if (!output.empty()) {
+ if (::chdir(output.c_str()) != 0) {
+ DWARFS_THROW(runtime_error,
+ output + ": " + std::string(strerror(errno)));
+ }
+ }
+
+ a = ::archive_write_disk_new();
+
+ check_result(::archive_write_disk_set_options(
+ a,
+ ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME));
+ } else {
+ a = ::archive_write_new();
+
+ check_result(::archive_write_set_format_by_name(a, format.c_str()));
+ check_result(::archive_write_open_filename(
+ a, vm.count("output") && !output.empty() && output != "-"
+ ? output.c_str()
+ : nullptr));
+ }
+
+ auto lr = ::archive_entry_linkresolver_new();
+
+ ::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a));
+
+ ::archive_entry* spare = nullptr;
+
+ auto do_archive = [&](::archive_entry* ae, entry_view entry) {
+ check_result(::archive_write_header(a, ae));
+ if (auto size = ::archive_entry_size(ae); size > 0) {
+ std::vector buf(size);
+ int fh = fs.open(entry);
+ fs.read(fh, buf.data(), buf.size());
+ check_result(::archive_write_data(a, buf.data(), buf.size()));
+ }
+ };
+
+ fs.walk([&](auto entry, auto parent) {
+ if (entry.inode() == 0) {
+ return;
+ }
+
+ auto ae = ::archive_entry_new();
+ struct ::stat stbuf;
+
+ if (fs.getattr(entry, &stbuf) != 0) {
+ DWARFS_THROW(runtime_error, "getattr() failed");
+ }
+
+ std::string path = parent.path();
+ if (!path.empty()) {
+ path += '/';
+ }
+ path += entry.name();
+
+ ::archive_entry_set_pathname(ae, path.c_str());
+ ::archive_entry_copy_stat(ae, &stbuf);
+
+ if (S_ISLNK(entry.mode())) {
+ std::string link;
+ if (fs.readlink(entry, &link) != 0) {
+ LOG_ERROR << "readlink() failed";
+ }
+ ::archive_entry_set_symlink(ae, link.c_str());
+ }
+
+ ::archive_entry_linkify(lr, &ae, &spare);
+
+ if (ae) {
+ do_archive(ae, entry);
+ ::archive_entry_free(ae);
+ }
+
+ if (spare) {
+ auto ev = fs.find(::archive_entry_ino(spare));
+ if (!ev) {
+ LOG_ERROR << "find() failed";
+ }
+ LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
+ do_archive(spare, *ev);
+ ::archive_entry_free(spare);
+ }
+ });
+
+ // As we're visiting *all* hardlinks, we should never see any deferred
+ // entries.
+ ::archive_entry* ae = nullptr;
+ ::archive_entry_linkify(lr, &ae, &spare);
+ if (ae) {
+ DWARFS_THROW(runtime_error, "unexpected deferred entry");
+ }
+
+ ::archive_entry_linkresolver_free(lr);
+ check_result(::archive_write_free(a));
+ } catch (runtime_error const& e) {
+ std::cerr << "error: " << e.what() << std::endl;
+ return 1;
+ } catch (system_error const& e) {
+ std::cerr << "error: " << e.what() << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
+
+} // namespace
+
+int main(int argc, char** argv) {
+ return dwarfs::safe_main([&] { return dwarfsextract(argc, argv); });
+}