From 000dc33aa8737bbc2302cf2fbdf3a86c41d259ab Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sun, 28 Feb 2021 21:22:24 +0100 Subject: [PATCH] Initial version of dwarfsextract --- CMakeLists.txt | 8 +- src/dwarfsextract.cpp | 232 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 src/dwarfsextract.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 63e64c4b..49824c37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,6 +90,7 @@ pkg_check_modules(FUSE IMPORTED_TARGET fuse>=2.9.9) pkg_check_modules(FUSE3 IMPORTED_TARGET fuse3>=3.4.1) pkg_check_modules(LIBLZ4 IMPORTED_TARGET liblz4>=1.8.3) pkg_check_modules(LIBLZMA IMPORTED_TARGET liblzma>=5.2.4) +pkg_check_modules(LIBARCHIVE IMPORTED_TARGET libarchive>=3.1.2) if(NOT FUSE_FOUND AND NOT FUSE3_FOUND) message(FATAL_ERROR "No FUSE or FUSE3 library found") @@ -221,8 +222,11 @@ add_library(dwarfs ${LIBDWARFS_SRC}) add_executable(mkdwarfs src/mkdwarfs.cpp) add_executable(dwarfsck src/dwarfsck.cpp) add_executable(dwarfsbench src/dwarfsbench.cpp) +add_executable(dwarfsextract src/dwarfsextract.cpp) -list(APPEND BINARY_TARGETS mkdwarfs dwarfsck dwarfsbench) +list(APPEND BINARY_TARGETS mkdwarfs dwarfsck dwarfsbench dwarfsextract) + +target_link_libraries(dwarfsextract PkgConfig::LIBARCHIVE) if(FUSE3_FOUND) add_executable(dwarfs-bin src/dwarfs.cpp) @@ -473,7 +477,7 @@ add_custom_target( googletest-* _CPack_Packages install_manifest.txt share) install( - TARGETS mkdwarfs dwarfsck dwarfsbench + TARGETS mkdwarfs dwarfsck dwarfsbench dwarfsextract RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) diff --git a/src/dwarfsextract.cpp b/src/dwarfsextract.cpp new file mode 100644 index 00000000..284a370e --- /dev/null +++ b/src/dwarfsextract.cpp @@ -0,0 +1,232 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include + +#include + +#include + +#include +#include + +#include +#include + +#include "dwarfs/filesystem_v2.h" +#include "dwarfs/fstypes.h" +#include "dwarfs/logger.h" +#include "dwarfs/mmap.h" +#include "dwarfs/options.h" +#include "dwarfs/util.h" +#include "dwarfs/version.h" + +namespace po = boost::program_options; + +using namespace dwarfs; + +namespace { + +int dwarfsextract(int argc, char** argv) { + std::string filesystem, output, format, cache_size_str, log_level; + size_t num_workers; + + // clang-format off + po::options_description opts("Command line options"); + opts.add_options() + ("input,i", + po::value(&filesystem), + "input filesystem file") + ("output,o", + po::value(&output), + "output file or directory") + ("format,f", + po::value(&format), + "output format") + ("num-workers,n", + po::value(&num_workers)->default_value(1), + "number of worker threads") + ("cache-size,s", + po::value(&cache_size_str)->default_value("256m"), + "block cache size") + ("log-level,l", + po::value(&log_level)->default_value("warn"), + "log level (error, warn, info, debug, trace)") + ("help,h", + "output help message and exit"); + // clang-format on + + po::variables_map vm; + + try { + po::store(po::parse_command_line(argc, argv, opts), vm); + po::notify(vm); + } catch (po::error const& e) { + std::cerr << "error: " << e.what() << std::endl; + return 1; + } + + if (vm.count("help") or !vm.count("input")) { + std::cerr << "dwarfsextract (" << PRJ_GIT_ID << ")\n\n" + << opts << std::endl; + return 0; + } + + try { + stream_logger lgr(std::cerr, logger::parse_level(log_level)); + filesystem_options fsopts; + + fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str); + fsopts.block_cache.num_workers = num_workers; + fsopts.metadata.enable_nlink = true; + + dwarfs::filesystem_v2 fs(lgr, std::make_shared(filesystem), + fsopts); + + log_proxy log_(lgr); + struct ::archive* a; + + auto check_result = [&](int res) { + switch (res) { + case ARCHIVE_OK: + break; + case ARCHIVE_WARN: + LOG_WARN << std::string(archive_error_string(a)); + break; + case ARCHIVE_RETRY: + case ARCHIVE_FATAL: + DWARFS_THROW(runtime_error, std::string(archive_error_string(a))); + } + }; + + if (format.empty()) { + if (!output.empty()) { + if (::chdir(output.c_str()) != 0) { + DWARFS_THROW(runtime_error, + output + ": " + std::string(strerror(errno))); + } + } + + a = ::archive_write_disk_new(); + + check_result(::archive_write_disk_set_options( + a, + ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME)); + } else { + a = ::archive_write_new(); + + check_result(::archive_write_set_format_by_name(a, format.c_str())); + check_result(::archive_write_open_filename( + a, vm.count("output") && !output.empty() && output != "-" + ? output.c_str() + : nullptr)); + } + + auto lr = ::archive_entry_linkresolver_new(); + + ::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a)); + + ::archive_entry* spare = nullptr; + + auto do_archive = [&](::archive_entry* ae, entry_view entry) { + check_result(::archive_write_header(a, ae)); + if (auto size = ::archive_entry_size(ae); size > 0) { + std::vector buf(size); + int fh = fs.open(entry); + fs.read(fh, buf.data(), buf.size()); + check_result(::archive_write_data(a, buf.data(), buf.size())); + } + }; + + fs.walk([&](auto entry, auto parent) { + if (entry.inode() == 0) { + return; + } + + auto ae = ::archive_entry_new(); + struct ::stat stbuf; + + if (fs.getattr(entry, &stbuf) != 0) { + DWARFS_THROW(runtime_error, "getattr() failed"); + } + + std::string path = parent.path(); + if (!path.empty()) { + path += '/'; + } + path += entry.name(); + + ::archive_entry_set_pathname(ae, path.c_str()); + ::archive_entry_copy_stat(ae, &stbuf); + + if (S_ISLNK(entry.mode())) { + std::string link; + if (fs.readlink(entry, &link) != 0) { + LOG_ERROR << "readlink() failed"; + } + ::archive_entry_set_symlink(ae, link.c_str()); + } + + ::archive_entry_linkify(lr, &ae, &spare); + + if (ae) { + do_archive(ae, entry); + ::archive_entry_free(ae); + } + + if (spare) { + auto ev = fs.find(::archive_entry_ino(spare)); + if (!ev) { + LOG_ERROR << "find() failed"; + } + LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare); + do_archive(spare, *ev); + ::archive_entry_free(spare); + } + }); + + // As we're visiting *all* hardlinks, we should never see any deferred + // entries. + ::archive_entry* ae = nullptr; + ::archive_entry_linkify(lr, &ae, &spare); + if (ae) { + DWARFS_THROW(runtime_error, "unexpected deferred entry"); + } + + ::archive_entry_linkresolver_free(lr); + check_result(::archive_write_free(a)); + } catch (runtime_error const& e) { + std::cerr << "error: " << e.what() << std::endl; + return 1; + } catch (system_error const& e) { + std::cerr << "error: " << e.what() << std::endl; + return 1; + } + + return 0; +} + +} // namespace + +int main(int argc, char** argv) { + return dwarfs::safe_main([&] { return dwarfsextract(argc, argv); }); +}