Create filesystem_extractor class

This commit is contained in:
Marcus Holland-Moritz 2021-03-06 12:32:56 +01:00
parent 77a409f97a
commit 06f0d3ff07
4 changed files with 330 additions and 180 deletions

View File

@ -222,6 +222,7 @@ list(
src/dwarfs/console_writer.cpp
src/dwarfs/entry.cpp
src/dwarfs/error.cpp
src/dwarfs/filesystem_extractor.cpp
src/dwarfs/filesystem_v2.cpp
src/dwarfs/filesystem_writer.cpp
src/dwarfs/fstypes.cpp

View File

@ -0,0 +1,63 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <memory>
#include <string>
namespace dwarfs {
class filesystem_v2;
class logger;
class filesystem_extractor {
public:
filesystem_extractor(logger& lgr);
void open_archive(std::string const& output, std::string const& format) {
return impl_->open_archive(output, format);
}
void open_disk(std::string const& output) { return impl_->open_disk(output); }
void close() { return impl_->close(); }
void extract(filesystem_v2& fs, size_t max_queued_bytes) {
return impl_->extract(fs, max_queued_bytes);
}
class impl {
public:
virtual ~impl() = default;
virtual void
open_archive(std::string const& output, std::string const& format) = 0;
virtual void open_disk(std::string const& output) = 0;
virtual void close() = 0;
virtual void extract(filesystem_v2& fs, size_t max_queued_bytes) = 0;
};
private:
std::unique_ptr<impl> impl_;
};
} // namespace dwarfs

View File

@ -0,0 +1,248 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <condition_variable>
#include <mutex>
#include <unistd.h>
#include <archive.h>
#include <archive_entry.h>
#include "dwarfs/filesystem_extractor.h"
#include "dwarfs/filesystem_v2.h"
#include "dwarfs/fstypes.h"
#include "dwarfs/logger.h"
#include "dwarfs/options.h"
#include "dwarfs/worker_group.h"
namespace dwarfs {
namespace {
class cache_semaphore {
public:
void post(int64_t n) {
{
std::lock_guard lock(mx_);
size_ += n;
++count_;
}
condition_.notify_one();
}
void wait(int64_t n) {
std::unique_lock lock(mx_);
while (size_ < n && count_ <= 0) {
condition_.wait(lock);
}
size_ -= n;
--count_;
}
private:
std::mutex mx_;
std::condition_variable condition_;
int64_t count_{0};
int64_t size_{0};
};
} // namespace
template <typename LoggerPolicy>
class filesystem_extractor_ : public filesystem_extractor::impl {
public:
filesystem_extractor_(logger& lgr)
: log_{lgr} {}
~filesystem_extractor_() override {
try {
close();
} catch (std::exception const& e) {
LOG_ERROR << "close() failed in destructor: " << e.what();
} catch (...) {
LOG_ERROR << "close() failed in destructor";
}
}
void
open_archive(std::string const& output, std::string const& format) override {
a_ = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a_, format.c_str()));
check_result(::archive_write_open_filename(
a_, output.empty() ? nullptr : output.c_str()));
}
void open_disk(std::string const& output) override {
if (!output.empty()) {
if (::chdir(output.c_str()) != 0) {
DWARFS_THROW(runtime_error,
output + ": " + std::string(strerror(errno)));
}
}
a_ = ::archive_write_disk_new();
check_result(::archive_write_disk_set_options(
a_,
ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME));
}
void close() override {
if (a_) {
check_result(::archive_write_free(a_));
a_ = nullptr;
}
}
void extract(filesystem_v2& fs, size_t max_queued_bytes) override;
private:
void check_result(int res) {
switch (res) {
case ARCHIVE_OK:
break;
case ARCHIVE_WARN:
LOG_WARN << std::string(archive_error_string(a_));
break;
case ARCHIVE_RETRY:
case ARCHIVE_FATAL:
DWARFS_THROW(runtime_error, std::string(archive_error_string(a_)));
}
}
log_proxy<debug_logger_policy> log_;
struct ::archive* a_{nullptr};
};
template <typename LoggerPolicy>
void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2& fs,
size_t max_queued_bytes) {
DWARFS_CHECK(a_, "filesystem not opened");
auto lr = ::archive_entry_linkresolver_new();
::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a_));
::archive_entry* spare = nullptr;
worker_group archiver("archiver", 1);
cache_semaphore sem;
sem.post(max_queued_bytes);
auto do_archive = [&](::archive_entry* ae, entry_view entry) {
if (auto size = ::archive_entry_size(ae);
S_ISREG(entry.mode()) && size > 0) {
auto fd = fs.open(entry);
sem.wait(size);
if (auto ranges = fs.readv(fd, size, 0)) {
archiver.add_job(
[this, &sem, ranges = std::move(*ranges), ae, size]() mutable {
check_result(::archive_write_header(a_, ae));
for (auto& r : ranges) {
auto br = r.get();
check_result(::archive_write_data(a_, br.data(), br.size()));
}
sem.post(size);
::archive_entry_free(ae);
});
} else {
LOG_ERROR << "error reading inode [" << fd
<< "]: " << ::strerror(-ranges.error());
}
} else {
archiver.add_job([this, ae] {
check_result(::archive_write_header(a_, ae));
::archive_entry_free(ae);
});
}
};
fs.walk_inode_order([&](auto entry, auto parent) {
if (entry.inode() == 0) {
return;
}
auto ae = ::archive_entry_new();
struct ::stat stbuf;
if (fs.getattr(entry, &stbuf) != 0) {
DWARFS_THROW(runtime_error, "getattr() failed");
}
std::string path;
path.reserve(256);
parent.append_path_to(path);
if (!path.empty()) {
path += '/';
}
path += entry.name();
::archive_entry_set_pathname(ae, path.c_str());
::archive_entry_copy_stat(ae, &stbuf);
if (S_ISLNK(entry.mode())) {
std::string link;
if (fs.readlink(entry, &link) != 0) {
LOG_ERROR << "readlink() failed";
}
::archive_entry_set_symlink(ae, link.c_str());
}
::archive_entry_linkify(lr, &ae, &spare);
if (ae) {
do_archive(ae, entry);
}
if (spare) {
auto ev = fs.find(::archive_entry_ino(spare));
if (!ev) {
LOG_ERROR << "find() failed";
}
LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
do_archive(spare, *ev);
}
});
archiver.wait();
// As we're visiting *all* hardlinks, we should never see any deferred
// entries.
::archive_entry* ae = nullptr;
::archive_entry_linkify(lr, &ae, &spare);
if (ae) {
DWARFS_THROW(runtime_error, "unexpected deferred entry");
}
::archive_entry_linkresolver_free(lr);
}
filesystem_extractor::filesystem_extractor(logger& lgr)
: impl_(make_unique_logging_object<filesystem_extractor::impl,
filesystem_extractor_, logger_policies>(
lgr)) {}
} // namespace dwarfs

View File

@ -19,34 +19,21 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <condition_variable>
#include <cstring>
#include <exception>
#include <future>
#include <memory>
#include <mutex>
#include <vector>
#include <iostream>
#include <string>
#include <sys/statvfs.h>
#include <unistd.h>
#include <boost/program_options.hpp>
#include <folly/Conv.h>
#include <folly/String.h>
#include <archive.h>
#include <archive_entry.h>
#include "dwarfs/filesystem_extractor.h"
#include "dwarfs/filesystem_v2.h"
#include "dwarfs/fstypes.h"
#include "dwarfs/logger.h"
#include "dwarfs/mmap.h"
#include "dwarfs/options.h"
#include "dwarfs/util.h"
#include "dwarfs/version.h"
#include "dwarfs/worker_group.h"
namespace po = boost::program_options;
@ -54,33 +41,6 @@ using namespace dwarfs;
namespace {
class cache_semaphore {
public:
void post(int64_t n) {
{
std::lock_guard lock(mx_);
size_ += n;
++count_;
}
condition_.notify_one();
}
void wait(int64_t n) {
std::unique_lock lock(mx_);
while (size_ < n && count_ <= 0) {
condition_.wait(lock);
}
size_ -= n;
--count_;
}
private:
std::mutex mx_;
std::condition_variable condition_;
int64_t count_{0};
int64_t size_{0};
};
int dwarfsextract(int argc, char** argv) {
std::string filesystem, output, format, cache_size_str, log_level;
size_t num_workers;
@ -134,153 +94,31 @@ int dwarfsextract(int argc, char** argv) {
fsopts.block_cache.num_workers = num_workers;
fsopts.metadata.enable_nlink = true;
dwarfs::filesystem_v2 fs(lgr, std::make_shared<dwarfs::mmap>(filesystem),
fsopts);
filesystem_v2 fs(lgr, std::make_shared<mmap>(filesystem), fsopts);
filesystem_extractor fsx(lgr);
log_proxy<debug_logger_policy> log_(lgr);
struct ::archive* a;
auto check_result = [&](int res) {
switch (res) {
case ARCHIVE_OK:
break;
case ARCHIVE_WARN:
LOG_WARN << std::string(archive_error_string(a));
break;
case ARCHIVE_RETRY:
case ARCHIVE_FATAL:
DWARFS_THROW(runtime_error, std::string(archive_error_string(a)));
}
};
if (format.empty()) {
if (!output.empty()) {
if (::chdir(output.c_str()) != 0) {
DWARFS_THROW(runtime_error,
output + ": " + std::string(strerror(errno)));
}
}
a = ::archive_write_disk_new();
check_result(::archive_write_disk_set_options(
a,
ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME));
} else {
a = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a, format.c_str()));
check_result(::archive_write_open_filename(
a, vm.count("output") && !output.empty() && output != "-"
? output.c_str()
: nullptr));
}
auto lr = ::archive_entry_linkresolver_new();
::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a));
::archive_entry* spare = nullptr;
worker_group archiver("archiver", 1);
cache_semaphore sem;
size_t max_queued_bytes = 0;
{
struct ::statvfs buf;
fs.statvfs(&buf);
sem.post(fsopts.block_cache.max_bytes > buf.f_bsize
? fsopts.block_cache.max_bytes - buf.f_bsize
: 0);
if (fsopts.block_cache.max_bytes > buf.f_bsize) {
max_queued_bytes = fsopts.block_cache.max_bytes - buf.f_bsize;
}
}
auto do_archive = [&](::archive_entry* ae, entry_view entry) {
if (auto size = ::archive_entry_size(ae);
S_ISREG(entry.mode()) && size > 0) {
auto fd = fs.open(entry);
sem.wait(size);
auto ranges = fs.readv(fd, size, 0);
if (!ranges) {
LOG_ERROR << "error reading inode [" << fd
<< "]: " << ::strerror(-ranges.error());
return;
}
archiver.add_job([&sem, &check_result, ranges = std::move(*ranges), a,
ae, size]() mutable {
check_result(::archive_write_header(a, ae));
for (auto& r : ranges) {
auto br = r.get();
check_result(::archive_write_data(a, br.data(), br.size()));
}
sem.post(size);
::archive_entry_free(ae);
});
} else {
archiver.add_job([&check_result, a, ae] {
check_result(::archive_write_header(a, ae));
::archive_entry_free(ae);
});
if (format.empty()) {
fsx.open_disk(output);
} else {
if (output == "-") {
output.clear();
}
};
fs.walk_inode_order([&](auto entry, auto parent) {
if (entry.inode() == 0) {
return;
}
auto ae = ::archive_entry_new();
struct ::stat stbuf;
if (fs.getattr(entry, &stbuf) != 0) {
DWARFS_THROW(runtime_error, "getattr() failed");
}
std::string path;
path.reserve(256);
parent.append_path_to(path);
if (!path.empty()) {
path += '/';
}
path += entry.name();
::archive_entry_set_pathname(ae, path.c_str());
::archive_entry_copy_stat(ae, &stbuf);
if (S_ISLNK(entry.mode())) {
std::string link;
if (fs.readlink(entry, &link) != 0) {
LOG_ERROR << "readlink() failed";
}
::archive_entry_set_symlink(ae, link.c_str());
}
::archive_entry_linkify(lr, &ae, &spare);
if (ae) {
do_archive(ae, entry);
}
if (spare) {
auto ev = fs.find(::archive_entry_ino(spare));
if (!ev) {
LOG_ERROR << "find() failed";
}
LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
do_archive(spare, *ev);
}
});
archiver.wait();
// As we're visiting *all* hardlinks, we should never see any deferred
// entries.
::archive_entry* ae = nullptr;
::archive_entry_linkify(lr, &ae, &spare);
if (ae) {
DWARFS_THROW(runtime_error, "unexpected deferred entry");
fsx.open_archive(output, format);
}
::archive_entry_linkresolver_free(lr);
check_result(::archive_write_free(a));
fsx.extract(fs, max_queued_bytes);
fsx.close();
} catch (runtime_error const& e) {
std::cerr << "error: " << e.what() << std::endl;
return 1;