diff --git a/CMakeLists.txt b/CMakeLists.txt index f795f0ec..43004166 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,6 +222,7 @@ list( src/dwarfs/console_writer.cpp src/dwarfs/entry.cpp src/dwarfs/error.cpp + src/dwarfs/filesystem_extractor.cpp src/dwarfs/filesystem_v2.cpp src/dwarfs/filesystem_writer.cpp src/dwarfs/fstypes.cpp diff --git a/include/dwarfs/filesystem_extractor.h b/include/dwarfs/filesystem_extractor.h new file mode 100644 index 00000000..eeeaf819 --- /dev/null +++ b/include/dwarfs/filesystem_extractor.h @@ -0,0 +1,63 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include + +namespace dwarfs { + +class filesystem_v2; +class logger; + +class filesystem_extractor { + public: + filesystem_extractor(logger& lgr); + + void open_archive(std::string const& output, std::string const& format) { + return impl_->open_archive(output, format); + } + + void open_disk(std::string const& output) { return impl_->open_disk(output); } + + void close() { return impl_->close(); } + + void extract(filesystem_v2& fs, size_t max_queued_bytes) { + return impl_->extract(fs, max_queued_bytes); + } + + class impl { + public: + virtual ~impl() = default; + + virtual void + open_archive(std::string const& output, std::string const& format) = 0; + virtual void open_disk(std::string const& output) = 0; + virtual void close() = 0; + virtual void extract(filesystem_v2& fs, size_t max_queued_bytes) = 0; + }; + + private: + std::unique_ptr impl_; +}; + +} // namespace dwarfs diff --git a/src/dwarfs/filesystem_extractor.cpp b/src/dwarfs/filesystem_extractor.cpp new file mode 100644 index 00000000..128aed45 --- /dev/null +++ b/src/dwarfs/filesystem_extractor.cpp @@ -0,0 +1,248 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include + +#include + +#include +#include + +#include "dwarfs/filesystem_extractor.h" +#include "dwarfs/filesystem_v2.h" +#include "dwarfs/fstypes.h" +#include "dwarfs/logger.h" +#include "dwarfs/options.h" +#include "dwarfs/worker_group.h" + +namespace dwarfs { + +namespace { + +class cache_semaphore { + public: + void post(int64_t n) { + { + std::lock_guard lock(mx_); + size_ += n; + ++count_; + } + condition_.notify_one(); + } + + void wait(int64_t n) { + std::unique_lock lock(mx_); + while (size_ < n && count_ <= 0) { + condition_.wait(lock); + } + size_ -= n; + --count_; + } + + private: + std::mutex mx_; + std::condition_variable condition_; + int64_t count_{0}; + int64_t size_{0}; +}; + +} // namespace + +template +class filesystem_extractor_ : public filesystem_extractor::impl { + public: + filesystem_extractor_(logger& lgr) + : log_{lgr} {} + + ~filesystem_extractor_() override { + try { + close(); + } catch (std::exception const& e) { + LOG_ERROR << "close() failed in destructor: " << e.what(); + } catch (...) { + LOG_ERROR << "close() failed in destructor"; + } + } + + void + open_archive(std::string const& output, std::string const& format) override { + a_ = ::archive_write_new(); + + check_result(::archive_write_set_format_by_name(a_, format.c_str())); + check_result(::archive_write_open_filename( + a_, output.empty() ? nullptr : output.c_str())); + } + + void open_disk(std::string const& output) override { + if (!output.empty()) { + if (::chdir(output.c_str()) != 0) { + DWARFS_THROW(runtime_error, + output + ": " + std::string(strerror(errno))); + } + } + + a_ = ::archive_write_disk_new(); + + check_result(::archive_write_disk_set_options( + a_, + ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME)); + } + + void close() override { + if (a_) { + check_result(::archive_write_free(a_)); + a_ = nullptr; + } + } + + void extract(filesystem_v2& fs, size_t max_queued_bytes) override; + + private: + void check_result(int res) { + switch (res) { + case ARCHIVE_OK: + break; + case ARCHIVE_WARN: + LOG_WARN << std::string(archive_error_string(a_)); + break; + case ARCHIVE_RETRY: + case ARCHIVE_FATAL: + DWARFS_THROW(runtime_error, std::string(archive_error_string(a_))); + } + } + + log_proxy log_; + struct ::archive* a_{nullptr}; +}; + +template +void filesystem_extractor_::extract(filesystem_v2& fs, + size_t max_queued_bytes) { + DWARFS_CHECK(a_, "filesystem not opened"); + + auto lr = ::archive_entry_linkresolver_new(); + + ::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a_)); + + ::archive_entry* spare = nullptr; + + worker_group archiver("archiver", 1); + cache_semaphore sem; + + sem.post(max_queued_bytes); + + auto do_archive = [&](::archive_entry* ae, entry_view entry) { + if (auto size = ::archive_entry_size(ae); + S_ISREG(entry.mode()) && size > 0) { + auto fd = fs.open(entry); + + sem.wait(size); + + if (auto ranges = fs.readv(fd, size, 0)) { + archiver.add_job( + [this, &sem, ranges = std::move(*ranges), ae, size]() mutable { + check_result(::archive_write_header(a_, ae)); + for (auto& r : ranges) { + auto br = r.get(); + check_result(::archive_write_data(a_, br.data(), br.size())); + } + sem.post(size); + ::archive_entry_free(ae); + }); + } else { + LOG_ERROR << "error reading inode [" << fd + << "]: " << ::strerror(-ranges.error()); + } + } else { + archiver.add_job([this, ae] { + check_result(::archive_write_header(a_, ae)); + ::archive_entry_free(ae); + }); + } + }; + + fs.walk_inode_order([&](auto entry, auto parent) { + if (entry.inode() == 0) { + return; + } + + auto ae = ::archive_entry_new(); + struct ::stat stbuf; + + if (fs.getattr(entry, &stbuf) != 0) { + DWARFS_THROW(runtime_error, "getattr() failed"); + } + + std::string path; + path.reserve(256); + parent.append_path_to(path); + if (!path.empty()) { + path += '/'; + } + path += entry.name(); + + ::archive_entry_set_pathname(ae, path.c_str()); + ::archive_entry_copy_stat(ae, &stbuf); + + if (S_ISLNK(entry.mode())) { + std::string link; + if (fs.readlink(entry, &link) != 0) { + LOG_ERROR << "readlink() failed"; + } + ::archive_entry_set_symlink(ae, link.c_str()); + } + + ::archive_entry_linkify(lr, &ae, &spare); + + if (ae) { + do_archive(ae, entry); + } + + if (spare) { + auto ev = fs.find(::archive_entry_ino(spare)); + if (!ev) { + LOG_ERROR << "find() failed"; + } + LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare); + do_archive(spare, *ev); + } + }); + + archiver.wait(); + + // As we're visiting *all* hardlinks, we should never see any deferred + // entries. + ::archive_entry* ae = nullptr; + ::archive_entry_linkify(lr, &ae, &spare); + if (ae) { + DWARFS_THROW(runtime_error, "unexpected deferred entry"); + } + + ::archive_entry_linkresolver_free(lr); +} + +filesystem_extractor::filesystem_extractor(logger& lgr) + : impl_(make_unique_logging_object( + lgr)) {} + +} // namespace dwarfs diff --git a/src/dwarfsextract.cpp b/src/dwarfsextract.cpp index fb6ded61..c7c3bdb2 100644 --- a/src/dwarfsextract.cpp +++ b/src/dwarfsextract.cpp @@ -19,34 +19,21 @@ * along with dwarfs. If not, see . */ -#include -#include -#include #include -#include -#include -#include -#include +#include +#include #include -#include #include -#include -#include - -#include -#include - +#include "dwarfs/filesystem_extractor.h" #include "dwarfs/filesystem_v2.h" -#include "dwarfs/fstypes.h" #include "dwarfs/logger.h" #include "dwarfs/mmap.h" #include "dwarfs/options.h" #include "dwarfs/util.h" #include "dwarfs/version.h" -#include "dwarfs/worker_group.h" namespace po = boost::program_options; @@ -54,33 +41,6 @@ using namespace dwarfs; namespace { -class cache_semaphore { - public: - void post(int64_t n) { - { - std::lock_guard lock(mx_); - size_ += n; - ++count_; - } - condition_.notify_one(); - } - - void wait(int64_t n) { - std::unique_lock lock(mx_); - while (size_ < n && count_ <= 0) { - condition_.wait(lock); - } - size_ -= n; - --count_; - } - - private: - std::mutex mx_; - std::condition_variable condition_; - int64_t count_{0}; - int64_t size_{0}; -}; - int dwarfsextract(int argc, char** argv) { std::string filesystem, output, format, cache_size_str, log_level; size_t num_workers; @@ -134,153 +94,31 @@ int dwarfsextract(int argc, char** argv) { fsopts.block_cache.num_workers = num_workers; fsopts.metadata.enable_nlink = true; - dwarfs::filesystem_v2 fs(lgr, std::make_shared(filesystem), - fsopts); + filesystem_v2 fs(lgr, std::make_shared(filesystem), fsopts); + filesystem_extractor fsx(lgr); - log_proxy log_(lgr); - struct ::archive* a; - - auto check_result = [&](int res) { - switch (res) { - case ARCHIVE_OK: - break; - case ARCHIVE_WARN: - LOG_WARN << std::string(archive_error_string(a)); - break; - case ARCHIVE_RETRY: - case ARCHIVE_FATAL: - DWARFS_THROW(runtime_error, std::string(archive_error_string(a))); - } - }; - - if (format.empty()) { - if (!output.empty()) { - if (::chdir(output.c_str()) != 0) { - DWARFS_THROW(runtime_error, - output + ": " + std::string(strerror(errno))); - } - } - - a = ::archive_write_disk_new(); - - check_result(::archive_write_disk_set_options( - a, - ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME)); - } else { - a = ::archive_write_new(); - - check_result(::archive_write_set_format_by_name(a, format.c_str())); - check_result(::archive_write_open_filename( - a, vm.count("output") && !output.empty() && output != "-" - ? output.c_str() - : nullptr)); - } - - auto lr = ::archive_entry_linkresolver_new(); - - ::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a)); - - ::archive_entry* spare = nullptr; - - worker_group archiver("archiver", 1); - cache_semaphore sem; + size_t max_queued_bytes = 0; { struct ::statvfs buf; fs.statvfs(&buf); - sem.post(fsopts.block_cache.max_bytes > buf.f_bsize - ? fsopts.block_cache.max_bytes - buf.f_bsize - : 0); + if (fsopts.block_cache.max_bytes > buf.f_bsize) { + max_queued_bytes = fsopts.block_cache.max_bytes - buf.f_bsize; + } } - auto do_archive = [&](::archive_entry* ae, entry_view entry) { - if (auto size = ::archive_entry_size(ae); - S_ISREG(entry.mode()) && size > 0) { - auto fd = fs.open(entry); - sem.wait(size); - auto ranges = fs.readv(fd, size, 0); - if (!ranges) { - LOG_ERROR << "error reading inode [" << fd - << "]: " << ::strerror(-ranges.error()); - return; - } - archiver.add_job([&sem, &check_result, ranges = std::move(*ranges), a, - ae, size]() mutable { - check_result(::archive_write_header(a, ae)); - for (auto& r : ranges) { - auto br = r.get(); - check_result(::archive_write_data(a, br.data(), br.size())); - } - sem.post(size); - ::archive_entry_free(ae); - }); - } else { - archiver.add_job([&check_result, a, ae] { - check_result(::archive_write_header(a, ae)); - ::archive_entry_free(ae); - }); + if (format.empty()) { + fsx.open_disk(output); + } else { + if (output == "-") { + output.clear(); } - }; - - fs.walk_inode_order([&](auto entry, auto parent) { - if (entry.inode() == 0) { - return; - } - - auto ae = ::archive_entry_new(); - struct ::stat stbuf; - - if (fs.getattr(entry, &stbuf) != 0) { - DWARFS_THROW(runtime_error, "getattr() failed"); - } - - std::string path; - path.reserve(256); - parent.append_path_to(path); - if (!path.empty()) { - path += '/'; - } - path += entry.name(); - - ::archive_entry_set_pathname(ae, path.c_str()); - ::archive_entry_copy_stat(ae, &stbuf); - - if (S_ISLNK(entry.mode())) { - std::string link; - if (fs.readlink(entry, &link) != 0) { - LOG_ERROR << "readlink() failed"; - } - ::archive_entry_set_symlink(ae, link.c_str()); - } - - ::archive_entry_linkify(lr, &ae, &spare); - - if (ae) { - do_archive(ae, entry); - } - - if (spare) { - auto ev = fs.find(::archive_entry_ino(spare)); - if (!ev) { - LOG_ERROR << "find() failed"; - } - LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare); - do_archive(spare, *ev); - } - }); - - archiver.wait(); - - // As we're visiting *all* hardlinks, we should never see any deferred - // entries. - ::archive_entry* ae = nullptr; - ::archive_entry_linkify(lr, &ae, &spare); - if (ae) { - DWARFS_THROW(runtime_error, "unexpected deferred entry"); + fsx.open_archive(output, format); } - ::archive_entry_linkresolver_free(lr); - check_result(::archive_write_free(a)); + fsx.extract(fs, max_queued_bytes); + + fsx.close(); } catch (runtime_error const& e) { std::cerr << "error: " << e.what() << std::endl; return 1;