Initial version of dwarfsextract

This commit is contained in:
Marcus Holland-Moritz 2021-02-28 21:22:24 +01:00
parent 40fa1edc8a
commit 000dc33aa8
2 changed files with 238 additions and 2 deletions

View File

@ -90,6 +90,7 @@ pkg_check_modules(FUSE IMPORTED_TARGET fuse>=2.9.9)
pkg_check_modules(FUSE3 IMPORTED_TARGET fuse3>=3.4.1) pkg_check_modules(FUSE3 IMPORTED_TARGET fuse3>=3.4.1)
pkg_check_modules(LIBLZ4 IMPORTED_TARGET liblz4>=1.8.3) pkg_check_modules(LIBLZ4 IMPORTED_TARGET liblz4>=1.8.3)
pkg_check_modules(LIBLZMA IMPORTED_TARGET liblzma>=5.2.4) pkg_check_modules(LIBLZMA IMPORTED_TARGET liblzma>=5.2.4)
pkg_check_modules(LIBARCHIVE IMPORTED_TARGET libarchive>=3.1.2)
if(NOT FUSE_FOUND AND NOT FUSE3_FOUND) if(NOT FUSE_FOUND AND NOT FUSE3_FOUND)
message(FATAL_ERROR "No FUSE or FUSE3 library found") message(FATAL_ERROR "No FUSE or FUSE3 library found")
@ -221,8 +222,11 @@ add_library(dwarfs ${LIBDWARFS_SRC})
add_executable(mkdwarfs src/mkdwarfs.cpp) add_executable(mkdwarfs src/mkdwarfs.cpp)
add_executable(dwarfsck src/dwarfsck.cpp) add_executable(dwarfsck src/dwarfsck.cpp)
add_executable(dwarfsbench src/dwarfsbench.cpp) add_executable(dwarfsbench src/dwarfsbench.cpp)
add_executable(dwarfsextract src/dwarfsextract.cpp)
list(APPEND BINARY_TARGETS mkdwarfs dwarfsck dwarfsbench) list(APPEND BINARY_TARGETS mkdwarfs dwarfsck dwarfsbench dwarfsextract)
target_link_libraries(dwarfsextract PkgConfig::LIBARCHIVE)
if(FUSE3_FOUND) if(FUSE3_FOUND)
add_executable(dwarfs-bin src/dwarfs.cpp) add_executable(dwarfs-bin src/dwarfs.cpp)
@ -473,7 +477,7 @@ add_custom_target(
googletest-* _CPack_Packages install_manifest.txt share) googletest-* _CPack_Packages install_manifest.txt share)
install( install(
TARGETS mkdwarfs dwarfsck dwarfsbench TARGETS mkdwarfs dwarfsck dwarfsbench dwarfsextract
RUNTIME DESTINATION bin RUNTIME DESTINATION bin
LIBRARY DESTINATION lib LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib) ARCHIVE DESTINATION lib)

232
src/dwarfsextract.cpp Normal file
View File

@ -0,0 +1,232 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <cstring>
#include <exception>
#include <unistd.h>
#include <boost/program_options.hpp>
#include <folly/Conv.h>
#include <folly/String.h>
#include <archive.h>
#include <archive_entry.h>
#include "dwarfs/filesystem_v2.h"
#include "dwarfs/fstypes.h"
#include "dwarfs/logger.h"
#include "dwarfs/mmap.h"
#include "dwarfs/options.h"
#include "dwarfs/util.h"
#include "dwarfs/version.h"
namespace po = boost::program_options;
using namespace dwarfs;
namespace {
int dwarfsextract(int argc, char** argv) {
std::string filesystem, output, format, cache_size_str, log_level;
size_t num_workers;
// clang-format off
po::options_description opts("Command line options");
opts.add_options()
("input,i",
po::value<std::string>(&filesystem),
"input filesystem file")
("output,o",
po::value<std::string>(&output),
"output file or directory")
("format,f",
po::value<std::string>(&format),
"output format")
("num-workers,n",
po::value<size_t>(&num_workers)->default_value(1),
"number of worker threads")
("cache-size,s",
po::value<std::string>(&cache_size_str)->default_value("256m"),
"block cache size")
("log-level,l",
po::value<std::string>(&log_level)->default_value("warn"),
"log level (error, warn, info, debug, trace)")
("help,h",
"output help message and exit");
// clang-format on
po::variables_map vm;
try {
po::store(po::parse_command_line(argc, argv, opts), vm);
po::notify(vm);
} catch (po::error const& e) {
std::cerr << "error: " << e.what() << std::endl;
return 1;
}
if (vm.count("help") or !vm.count("input")) {
std::cerr << "dwarfsextract (" << PRJ_GIT_ID << ")\n\n"
<< opts << std::endl;
return 0;
}
try {
stream_logger lgr(std::cerr, logger::parse_level(log_level));
filesystem_options fsopts;
fsopts.block_cache.max_bytes = parse_size_with_unit(cache_size_str);
fsopts.block_cache.num_workers = num_workers;
fsopts.metadata.enable_nlink = true;
dwarfs::filesystem_v2 fs(lgr, std::make_shared<dwarfs::mmap>(filesystem),
fsopts);
log_proxy<debug_logger_policy> log_(lgr);
struct ::archive* a;
auto check_result = [&](int res) {
switch (res) {
case ARCHIVE_OK:
break;
case ARCHIVE_WARN:
LOG_WARN << std::string(archive_error_string(a));
break;
case ARCHIVE_RETRY:
case ARCHIVE_FATAL:
DWARFS_THROW(runtime_error, std::string(archive_error_string(a)));
}
};
if (format.empty()) {
if (!output.empty()) {
if (::chdir(output.c_str()) != 0) {
DWARFS_THROW(runtime_error,
output + ": " + std::string(strerror(errno)));
}
}
a = ::archive_write_disk_new();
check_result(::archive_write_disk_set_options(
a,
ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME));
} else {
a = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a, format.c_str()));
check_result(::archive_write_open_filename(
a, vm.count("output") && !output.empty() && output != "-"
? output.c_str()
: nullptr));
}
auto lr = ::archive_entry_linkresolver_new();
::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a));
::archive_entry* spare = nullptr;
auto do_archive = [&](::archive_entry* ae, entry_view entry) {
check_result(::archive_write_header(a, ae));
if (auto size = ::archive_entry_size(ae); size > 0) {
std::vector<char> buf(size);
int fh = fs.open(entry);
fs.read(fh, buf.data(), buf.size());
check_result(::archive_write_data(a, buf.data(), buf.size()));
}
};
fs.walk([&](auto entry, auto parent) {
if (entry.inode() == 0) {
return;
}
auto ae = ::archive_entry_new();
struct ::stat stbuf;
if (fs.getattr(entry, &stbuf) != 0) {
DWARFS_THROW(runtime_error, "getattr() failed");
}
std::string path = parent.path();
if (!path.empty()) {
path += '/';
}
path += entry.name();
::archive_entry_set_pathname(ae, path.c_str());
::archive_entry_copy_stat(ae, &stbuf);
if (S_ISLNK(entry.mode())) {
std::string link;
if (fs.readlink(entry, &link) != 0) {
LOG_ERROR << "readlink() failed";
}
::archive_entry_set_symlink(ae, link.c_str());
}
::archive_entry_linkify(lr, &ae, &spare);
if (ae) {
do_archive(ae, entry);
::archive_entry_free(ae);
}
if (spare) {
auto ev = fs.find(::archive_entry_ino(spare));
if (!ev) {
LOG_ERROR << "find() failed";
}
LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
do_archive(spare, *ev);
::archive_entry_free(spare);
}
});
// As we're visiting *all* hardlinks, we should never see any deferred
// entries.
::archive_entry* ae = nullptr;
::archive_entry_linkify(lr, &ae, &spare);
if (ae) {
DWARFS_THROW(runtime_error, "unexpected deferred entry");
}
::archive_entry_linkresolver_free(lr);
check_result(::archive_write_free(a));
} catch (runtime_error const& e) {
std::cerr << "error: " << e.what() << std::endl;
return 1;
} catch (system_error const& e) {
std::cerr << "error: " << e.what() << std::endl;
return 1;
}
return 0;
}
} // namespace
int main(int argc, char** argv) {
return dwarfs::safe_main([&] { return dwarfsextract(argc, argv); });
}