diff --git a/CMakeLists.txt b/CMakeLists.txt
index f795f0ec..43004166 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -222,6 +222,7 @@ list(
src/dwarfs/console_writer.cpp
src/dwarfs/entry.cpp
src/dwarfs/error.cpp
+ src/dwarfs/filesystem_extractor.cpp
src/dwarfs/filesystem_v2.cpp
src/dwarfs/filesystem_writer.cpp
src/dwarfs/fstypes.cpp
diff --git a/include/dwarfs/filesystem_extractor.h b/include/dwarfs/filesystem_extractor.h
new file mode 100644
index 00000000..eeeaf819
--- /dev/null
+++ b/include/dwarfs/filesystem_extractor.h
@@ -0,0 +1,63 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs. If not, see .
+ */
+
+#pragma once
+
+#include
+#include
+
+namespace dwarfs {
+
+class filesystem_v2;
+class logger;
+
+class filesystem_extractor {
+ public:
+ filesystem_extractor(logger& lgr);
+
+ void open_archive(std::string const& output, std::string const& format) {
+ return impl_->open_archive(output, format);
+ }
+
+ void open_disk(std::string const& output) { return impl_->open_disk(output); }
+
+ void close() { return impl_->close(); }
+
+ void extract(filesystem_v2& fs, size_t max_queued_bytes) {
+ return impl_->extract(fs, max_queued_bytes);
+ }
+
+ class impl {
+ public:
+ virtual ~impl() = default;
+
+ virtual void
+ open_archive(std::string const& output, std::string const& format) = 0;
+ virtual void open_disk(std::string const& output) = 0;
+ virtual void close() = 0;
+ virtual void extract(filesystem_v2& fs, size_t max_queued_bytes) = 0;
+ };
+
+ private:
+ std::unique_ptr impl_;
+};
+
+} // namespace dwarfs
diff --git a/src/dwarfs/filesystem_extractor.cpp b/src/dwarfs/filesystem_extractor.cpp
new file mode 100644
index 00000000..128aed45
--- /dev/null
+++ b/src/dwarfs/filesystem_extractor.cpp
@@ -0,0 +1,248 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs. If not, see .
+ */
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+#include "dwarfs/filesystem_extractor.h"
+#include "dwarfs/filesystem_v2.h"
+#include "dwarfs/fstypes.h"
+#include "dwarfs/logger.h"
+#include "dwarfs/options.h"
+#include "dwarfs/worker_group.h"
+
+namespace dwarfs {
+
+namespace {
+
+class cache_semaphore {
+ public:
+ void post(int64_t n) {
+ {
+ std::lock_guard lock(mx_);
+ size_ += n;
+ ++count_;
+ }
+ condition_.notify_one();
+ }
+
+ void wait(int64_t n) {
+ std::unique_lock lock(mx_);
+ while (size_ < n && count_ <= 0) {
+ condition_.wait(lock);
+ }
+ size_ -= n;
+ --count_;
+ }
+
+ private:
+ std::mutex mx_;
+ std::condition_variable condition_;
+ int64_t count_{0};
+ int64_t size_{0};
+};
+
+} // namespace
+
+template
+class filesystem_extractor_ : public filesystem_extractor::impl {
+ public:
+ filesystem_extractor_(logger& lgr)
+ : log_{lgr} {}
+
+ ~filesystem_extractor_() override {
+ try {
+ close();
+ } catch (std::exception const& e) {
+ LOG_ERROR << "close() failed in destructor: " << e.what();
+ } catch (...) {
+ LOG_ERROR << "close() failed in destructor";
+ }
+ }
+
+ void
+ open_archive(std::string const& output, std::string const& format) override {
+ a_ = ::archive_write_new();
+
+ check_result(::archive_write_set_format_by_name(a_, format.c_str()));
+ check_result(::archive_write_open_filename(
+ a_, output.empty() ? nullptr : output.c_str()));
+ }
+
+ void open_disk(std::string const& output) override {
+ if (!output.empty()) {
+ if (::chdir(output.c_str()) != 0) {
+ DWARFS_THROW(runtime_error,
+ output + ": " + std::string(strerror(errno)));
+ }
+ }
+
+ a_ = ::archive_write_disk_new();
+
+ check_result(::archive_write_disk_set_options(
+ a_,
+ ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME));
+ }
+
+ void close() override {
+ if (a_) {
+ check_result(::archive_write_free(a_));
+ a_ = nullptr;
+ }
+ }
+
+ void extract(filesystem_v2& fs, size_t max_queued_bytes) override;
+
+ private:
+ void check_result(int res) {
+ switch (res) {
+ case ARCHIVE_OK:
+ break;
+ case ARCHIVE_WARN:
+ LOG_WARN << std::string(archive_error_string(a_));
+ break;
+ case ARCHIVE_RETRY:
+ case ARCHIVE_FATAL:
+ DWARFS_THROW(runtime_error, std::string(archive_error_string(a_)));
+ }
+ }
+
+ log_proxy log_;
+ struct ::archive* a_{nullptr};
+};
+
+template
+void filesystem_extractor_::extract(filesystem_v2& fs,
+ size_t max_queued_bytes) {
+ DWARFS_CHECK(a_, "filesystem not opened");
+
+ auto lr = ::archive_entry_linkresolver_new();
+
+ ::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a_));
+
+ ::archive_entry* spare = nullptr;
+
+ worker_group archiver("archiver", 1);
+ cache_semaphore sem;
+
+ sem.post(max_queued_bytes);
+
+ auto do_archive = [&](::archive_entry* ae, entry_view entry) {
+ if (auto size = ::archive_entry_size(ae);
+ S_ISREG(entry.mode()) && size > 0) {
+ auto fd = fs.open(entry);
+
+ sem.wait(size);
+
+ if (auto ranges = fs.readv(fd, size, 0)) {
+ archiver.add_job(
+ [this, &sem, ranges = std::move(*ranges), ae, size]() mutable {
+ check_result(::archive_write_header(a_, ae));
+ for (auto& r : ranges) {
+ auto br = r.get();
+ check_result(::archive_write_data(a_, br.data(), br.size()));
+ }
+ sem.post(size);
+ ::archive_entry_free(ae);
+ });
+ } else {
+ LOG_ERROR << "error reading inode [" << fd
+ << "]: " << ::strerror(-ranges.error());
+ }
+ } else {
+ archiver.add_job([this, ae] {
+ check_result(::archive_write_header(a_, ae));
+ ::archive_entry_free(ae);
+ });
+ }
+ };
+
+ fs.walk_inode_order([&](auto entry, auto parent) {
+ if (entry.inode() == 0) {
+ return;
+ }
+
+ auto ae = ::archive_entry_new();
+ struct ::stat stbuf;
+
+ if (fs.getattr(entry, &stbuf) != 0) {
+ DWARFS_THROW(runtime_error, "getattr() failed");
+ }
+
+ std::string path;
+ path.reserve(256);
+ parent.append_path_to(path);
+ if (!path.empty()) {
+ path += '/';
+ }
+ path += entry.name();
+
+ ::archive_entry_set_pathname(ae, path.c_str());
+ ::archive_entry_copy_stat(ae, &stbuf);
+
+ if (S_ISLNK(entry.mode())) {
+ std::string link;
+ if (fs.readlink(entry, &link) != 0) {
+ LOG_ERROR << "readlink() failed";
+ }
+ ::archive_entry_set_symlink(ae, link.c_str());
+ }
+
+ ::archive_entry_linkify(lr, &ae, &spare);
+
+ if (ae) {
+ do_archive(ae, entry);
+ }
+
+ if (spare) {
+ auto ev = fs.find(::archive_entry_ino(spare));
+ if (!ev) {
+ LOG_ERROR << "find() failed";
+ }
+ LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
+ do_archive(spare, *ev);
+ }
+ });
+
+ archiver.wait();
+
+ // As we're visiting *all* hardlinks, we should never see any deferred
+ // entries.
+ ::archive_entry* ae = nullptr;
+ ::archive_entry_linkify(lr, &ae, &spare);
+ if (ae) {
+ DWARFS_THROW(runtime_error, "unexpected deferred entry");
+ }
+
+ ::archive_entry_linkresolver_free(lr);
+}
+
+filesystem_extractor::filesystem_extractor(logger& lgr)
+ : impl_(make_unique_logging_object(
+ lgr)) {}
+
+} // namespace dwarfs
diff --git a/src/dwarfsextract.cpp b/src/dwarfsextract.cpp
index fb6ded61..c7c3bdb2 100644
--- a/src/dwarfsextract.cpp
+++ b/src/dwarfsextract.cpp
@@ -19,34 +19,21 @@
* along with dwarfs. If not, see .
*/
-#include
-#include
-#include
#include
-#include
-#include
-#include
-#include
+#include
+#include
#include
-#include
#include
-#include
-#include
-
-#include
-#include
-
+#include "dwarfs/filesystem_extractor.h"
#include "dwarfs/filesystem_v2.h"
-#include "dwarfs/fstypes.h"
#include "dwarfs/logger.h"
#include "dwarfs/mmap.h"
#include "dwarfs/options.h"
#include "dwarfs/util.h"
#include "dwarfs/version.h"
-#include "dwarfs/worker_group.h"
namespace po = boost::program_options;
@@ -54,33 +41,6 @@ using namespace dwarfs;
namespace {
-class cache_semaphore {
- public:
- void post(int64_t n) {
- {
- std::lock_guard lock(mx_);
- size_ += n;
- ++count_;
- }
- condition_.notify_one();
- }
-
- void wait(int64_t n) {
- std::unique_lock lock(mx_);
- while (size_ < n && count_ <= 0) {
- condition_.wait(lock);
- }
- size_ -= n;
- --count_;
- }
-
- private:
- std::mutex mx_;
- std::condition_variable condition_;
- int64_t count_{0};
- int64_t size_{0};
-};
-
int dwarfsextract(int argc, char** argv) {
std::string filesystem, output, format, cache_size_str, log_level;
size_t num_workers;
@@ -134,153 +94,31 @@ int dwarfsextract(int argc, char** argv) {
fsopts.block_cache.num_workers = num_workers;
fsopts.metadata.enable_nlink = true;
- dwarfs::filesystem_v2 fs(lgr, std::make_shared(filesystem),
- fsopts);
+ filesystem_v2 fs(lgr, std::make_shared(filesystem), fsopts);
+ filesystem_extractor fsx(lgr);
- log_proxy log_(lgr);
- struct ::archive* a;
-
- auto check_result = [&](int res) {
- switch (res) {
- case ARCHIVE_OK:
- break;
- case ARCHIVE_WARN:
- LOG_WARN << std::string(archive_error_string(a));
- break;
- case ARCHIVE_RETRY:
- case ARCHIVE_FATAL:
- DWARFS_THROW(runtime_error, std::string(archive_error_string(a)));
- }
- };
-
- if (format.empty()) {
- if (!output.empty()) {
- if (::chdir(output.c_str()) != 0) {
- DWARFS_THROW(runtime_error,
- output + ": " + std::string(strerror(errno)));
- }
- }
-
- a = ::archive_write_disk_new();
-
- check_result(::archive_write_disk_set_options(
- a,
- ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME));
- } else {
- a = ::archive_write_new();
-
- check_result(::archive_write_set_format_by_name(a, format.c_str()));
- check_result(::archive_write_open_filename(
- a, vm.count("output") && !output.empty() && output != "-"
- ? output.c_str()
- : nullptr));
- }
-
- auto lr = ::archive_entry_linkresolver_new();
-
- ::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a));
-
- ::archive_entry* spare = nullptr;
-
- worker_group archiver("archiver", 1);
- cache_semaphore sem;
+ size_t max_queued_bytes = 0;
{
struct ::statvfs buf;
fs.statvfs(&buf);
- sem.post(fsopts.block_cache.max_bytes > buf.f_bsize
- ? fsopts.block_cache.max_bytes - buf.f_bsize
- : 0);
+ if (fsopts.block_cache.max_bytes > buf.f_bsize) {
+ max_queued_bytes = fsopts.block_cache.max_bytes - buf.f_bsize;
+ }
}
- auto do_archive = [&](::archive_entry* ae, entry_view entry) {
- if (auto size = ::archive_entry_size(ae);
- S_ISREG(entry.mode()) && size > 0) {
- auto fd = fs.open(entry);
- sem.wait(size);
- auto ranges = fs.readv(fd, size, 0);
- if (!ranges) {
- LOG_ERROR << "error reading inode [" << fd
- << "]: " << ::strerror(-ranges.error());
- return;
- }
- archiver.add_job([&sem, &check_result, ranges = std::move(*ranges), a,
- ae, size]() mutable {
- check_result(::archive_write_header(a, ae));
- for (auto& r : ranges) {
- auto br = r.get();
- check_result(::archive_write_data(a, br.data(), br.size()));
- }
- sem.post(size);
- ::archive_entry_free(ae);
- });
- } else {
- archiver.add_job([&check_result, a, ae] {
- check_result(::archive_write_header(a, ae));
- ::archive_entry_free(ae);
- });
+ if (format.empty()) {
+ fsx.open_disk(output);
+ } else {
+ if (output == "-") {
+ output.clear();
}
- };
-
- fs.walk_inode_order([&](auto entry, auto parent) {
- if (entry.inode() == 0) {
- return;
- }
-
- auto ae = ::archive_entry_new();
- struct ::stat stbuf;
-
- if (fs.getattr(entry, &stbuf) != 0) {
- DWARFS_THROW(runtime_error, "getattr() failed");
- }
-
- std::string path;
- path.reserve(256);
- parent.append_path_to(path);
- if (!path.empty()) {
- path += '/';
- }
- path += entry.name();
-
- ::archive_entry_set_pathname(ae, path.c_str());
- ::archive_entry_copy_stat(ae, &stbuf);
-
- if (S_ISLNK(entry.mode())) {
- std::string link;
- if (fs.readlink(entry, &link) != 0) {
- LOG_ERROR << "readlink() failed";
- }
- ::archive_entry_set_symlink(ae, link.c_str());
- }
-
- ::archive_entry_linkify(lr, &ae, &spare);
-
- if (ae) {
- do_archive(ae, entry);
- }
-
- if (spare) {
- auto ev = fs.find(::archive_entry_ino(spare));
- if (!ev) {
- LOG_ERROR << "find() failed";
- }
- LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
- do_archive(spare, *ev);
- }
- });
-
- archiver.wait();
-
- // As we're visiting *all* hardlinks, we should never see any deferred
- // entries.
- ::archive_entry* ae = nullptr;
- ::archive_entry_linkify(lr, &ae, &spare);
- if (ae) {
- DWARFS_THROW(runtime_error, "unexpected deferred entry");
+ fsx.open_archive(output, format);
}
- ::archive_entry_linkresolver_free(lr);
- check_result(::archive_write_free(a));
+ fsx.extract(fs, max_queued_bytes);
+
+ fsx.close();
} catch (runtime_error const& e) {
std::cerr << "error: " << e.what() << std::endl;
return 1;