mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-11 21:35:27 -04:00
Create filesystem_extractor class
This commit is contained in:
parent
77a409f97a
commit
06f0d3ff07
@ -222,6 +222,7 @@ list(
|
||||
src/dwarfs/console_writer.cpp
|
||||
src/dwarfs/entry.cpp
|
||||
src/dwarfs/error.cpp
|
||||
src/dwarfs/filesystem_extractor.cpp
|
||||
src/dwarfs/filesystem_v2.cpp
|
||||
src/dwarfs/filesystem_writer.cpp
|
||||
src/dwarfs/fstypes.cpp
|
||||
|
63
include/dwarfs/filesystem_extractor.h
Normal file
63
include/dwarfs/filesystem_extractor.h
Normal file
@ -0,0 +1,63 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
class filesystem_v2;
|
||||
class logger;
|
||||
|
||||
class filesystem_extractor {
|
||||
public:
|
||||
filesystem_extractor(logger& lgr);
|
||||
|
||||
void open_archive(std::string const& output, std::string const& format) {
|
||||
return impl_->open_archive(output, format);
|
||||
}
|
||||
|
||||
void open_disk(std::string const& output) { return impl_->open_disk(output); }
|
||||
|
||||
void close() { return impl_->close(); }
|
||||
|
||||
void extract(filesystem_v2& fs, size_t max_queued_bytes) {
|
||||
return impl_->extract(fs, max_queued_bytes);
|
||||
}
|
||||
|
||||
class impl {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
|
||||
virtual void
|
||||
open_archive(std::string const& output, std::string const& format) = 0;
|
||||
virtual void open_disk(std::string const& output) = 0;
|
||||
virtual void close() = 0;
|
||||
virtual void extract(filesystem_v2& fs, size_t max_queued_bytes) = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
std::unique_ptr<impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
248
src/dwarfs/filesystem_extractor.cpp
Normal file
248
src/dwarfs/filesystem_extractor.cpp
Normal file
@ -0,0 +1,248 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
|
||||
#include "dwarfs/filesystem_extractor.h"
|
||||
#include "dwarfs/filesystem_v2.h"
|
||||
#include "dwarfs/fstypes.h"
|
||||
#include "dwarfs/logger.h"
|
||||
#include "dwarfs/options.h"
|
||||
#include "dwarfs/worker_group.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace {
|
||||
|
||||
class cache_semaphore {
|
||||
public:
|
||||
void post(int64_t n) {
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
size_ += n;
|
||||
++count_;
|
||||
}
|
||||
condition_.notify_one();
|
||||
}
|
||||
|
||||
void wait(int64_t n) {
|
||||
std::unique_lock lock(mx_);
|
||||
while (size_ < n && count_ <= 0) {
|
||||
condition_.wait(lock);
|
||||
}
|
||||
size_ -= n;
|
||||
--count_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mx_;
|
||||
std::condition_variable condition_;
|
||||
int64_t count_{0};
|
||||
int64_t size_{0};
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
class filesystem_extractor_ : public filesystem_extractor::impl {
|
||||
public:
|
||||
filesystem_extractor_(logger& lgr)
|
||||
: log_{lgr} {}
|
||||
|
||||
~filesystem_extractor_() override {
|
||||
try {
|
||||
close();
|
||||
} catch (std::exception const& e) {
|
||||
LOG_ERROR << "close() failed in destructor: " << e.what();
|
||||
} catch (...) {
|
||||
LOG_ERROR << "close() failed in destructor";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
open_archive(std::string const& output, std::string const& format) override {
|
||||
a_ = ::archive_write_new();
|
||||
|
||||
check_result(::archive_write_set_format_by_name(a_, format.c_str()));
|
||||
check_result(::archive_write_open_filename(
|
||||
a_, output.empty() ? nullptr : output.c_str()));
|
||||
}
|
||||
|
||||
void open_disk(std::string const& output) override {
|
||||
if (!output.empty()) {
|
||||
if (::chdir(output.c_str()) != 0) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
output + ": " + std::string(strerror(errno)));
|
||||
}
|
||||
}
|
||||
|
||||
a_ = ::archive_write_disk_new();
|
||||
|
||||
check_result(::archive_write_disk_set_options(
|
||||
a_,
|
||||
ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME));
|
||||
}
|
||||
|
||||
void close() override {
|
||||
if (a_) {
|
||||
check_result(::archive_write_free(a_));
|
||||
a_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void extract(filesystem_v2& fs, size_t max_queued_bytes) override;
|
||||
|
||||
private:
|
||||
void check_result(int res) {
|
||||
switch (res) {
|
||||
case ARCHIVE_OK:
|
||||
break;
|
||||
case ARCHIVE_WARN:
|
||||
LOG_WARN << std::string(archive_error_string(a_));
|
||||
break;
|
||||
case ARCHIVE_RETRY:
|
||||
case ARCHIVE_FATAL:
|
||||
DWARFS_THROW(runtime_error, std::string(archive_error_string(a_)));
|
||||
}
|
||||
}
|
||||
|
||||
log_proxy<debug_logger_policy> log_;
|
||||
struct ::archive* a_{nullptr};
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_extractor_<LoggerPolicy>::extract(filesystem_v2& fs,
|
||||
size_t max_queued_bytes) {
|
||||
DWARFS_CHECK(a_, "filesystem not opened");
|
||||
|
||||
auto lr = ::archive_entry_linkresolver_new();
|
||||
|
||||
::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a_));
|
||||
|
||||
::archive_entry* spare = nullptr;
|
||||
|
||||
worker_group archiver("archiver", 1);
|
||||
cache_semaphore sem;
|
||||
|
||||
sem.post(max_queued_bytes);
|
||||
|
||||
auto do_archive = [&](::archive_entry* ae, entry_view entry) {
|
||||
if (auto size = ::archive_entry_size(ae);
|
||||
S_ISREG(entry.mode()) && size > 0) {
|
||||
auto fd = fs.open(entry);
|
||||
|
||||
sem.wait(size);
|
||||
|
||||
if (auto ranges = fs.readv(fd, size, 0)) {
|
||||
archiver.add_job(
|
||||
[this, &sem, ranges = std::move(*ranges), ae, size]() mutable {
|
||||
check_result(::archive_write_header(a_, ae));
|
||||
for (auto& r : ranges) {
|
||||
auto br = r.get();
|
||||
check_result(::archive_write_data(a_, br.data(), br.size()));
|
||||
}
|
||||
sem.post(size);
|
||||
::archive_entry_free(ae);
|
||||
});
|
||||
} else {
|
||||
LOG_ERROR << "error reading inode [" << fd
|
||||
<< "]: " << ::strerror(-ranges.error());
|
||||
}
|
||||
} else {
|
||||
archiver.add_job([this, ae] {
|
||||
check_result(::archive_write_header(a_, ae));
|
||||
::archive_entry_free(ae);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
fs.walk_inode_order([&](auto entry, auto parent) {
|
||||
if (entry.inode() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto ae = ::archive_entry_new();
|
||||
struct ::stat stbuf;
|
||||
|
||||
if (fs.getattr(entry, &stbuf) != 0) {
|
||||
DWARFS_THROW(runtime_error, "getattr() failed");
|
||||
}
|
||||
|
||||
std::string path;
|
||||
path.reserve(256);
|
||||
parent.append_path_to(path);
|
||||
if (!path.empty()) {
|
||||
path += '/';
|
||||
}
|
||||
path += entry.name();
|
||||
|
||||
::archive_entry_set_pathname(ae, path.c_str());
|
||||
::archive_entry_copy_stat(ae, &stbuf);
|
||||
|
||||
if (S_ISLNK(entry.mode())) {
|
||||
std::string link;
|
||||
if (fs.readlink(entry, &link) != 0) {
|
||||
LOG_ERROR << "readlink() failed";
|
||||
}
|
||||
::archive_entry_set_symlink(ae, link.c_str());
|
||||
}
|
||||
|
||||
::archive_entry_linkify(lr, &ae, &spare);
|
||||
|
||||
if (ae) {
|
||||
do_archive(ae, entry);
|
||||
}
|
||||
|
||||
if (spare) {
|
||||
auto ev = fs.find(::archive_entry_ino(spare));
|
||||
if (!ev) {
|
||||
LOG_ERROR << "find() failed";
|
||||
}
|
||||
LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
|
||||
do_archive(spare, *ev);
|
||||
}
|
||||
});
|
||||
|
||||
archiver.wait();
|
||||
|
||||
// As we're visiting *all* hardlinks, we should never see any deferred
|
||||
// entries.
|
||||
::archive_entry* ae = nullptr;
|
||||
::archive_entry_linkify(lr, &ae, &spare);
|
||||
if (ae) {
|
||||
DWARFS_THROW(runtime_error, "unexpected deferred entry");
|
||||
}
|
||||
|
||||
::archive_entry_linkresolver_free(lr);
|
||||
}
|
||||
|
||||
filesystem_extractor::filesystem_extractor(logger& lgr)
|
||||
: impl_(make_unique_logging_object<filesystem_extractor::impl,
|
||||
filesystem_extractor_, logger_policies>(
|
||||
lgr)) {}
|
||||
|
||||
} // namespace dwarfs
|
@ -19,34 +19,21 @@
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <condition_variable>
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include <sys/statvfs.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <folly/Conv.h>
|
||||
#include <folly/String.h>
|
||||
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
|
||||
#include "dwarfs/filesystem_extractor.h"
|
||||
#include "dwarfs/filesystem_v2.h"
|
||||
#include "dwarfs/fstypes.h"
|
||||
#include "dwarfs/logger.h"
|
||||
#include "dwarfs/mmap.h"
|
||||
#include "dwarfs/options.h"
|
||||
#include "dwarfs/util.h"
|
||||
#include "dwarfs/version.h"
|
||||
#include "dwarfs/worker_group.h"
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
@ -54,33 +41,6 @@ using namespace dwarfs;
|
||||
|
||||
namespace {
|
||||
|
||||
class cache_semaphore {
|
||||
public:
|
||||
void post(int64_t n) {
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
size_ += n;
|
||||
++count_;
|
||||
}
|
||||
condition_.notify_one();
|
||||
}
|
||||
|
||||
void wait(int64_t n) {
|
||||
std::unique_lock lock(mx_);
|
||||
while (size_ < n && count_ <= 0) {
|
||||
condition_.wait(lock);
|
||||
}
|
||||
size_ -= n;
|
||||
--count_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mx_;
|
||||
std::condition_variable condition_;
|
||||
int64_t count_{0};
|
||||
int64_t size_{0};
|
||||
};
|
||||
|
||||
int dwarfsextract(int argc, char** argv) {
|
||||
std::string filesystem, output, format, cache_size_str, log_level;
|
||||
size_t num_workers;
|
||||
@ -134,153 +94,31 @@ int dwarfsextract(int argc, char** argv) {
|
||||
fsopts.block_cache.num_workers = num_workers;
|
||||
fsopts.metadata.enable_nlink = true;
|
||||
|
||||
dwarfs::filesystem_v2 fs(lgr, std::make_shared<dwarfs::mmap>(filesystem),
|
||||
fsopts);
|
||||
filesystem_v2 fs(lgr, std::make_shared<mmap>(filesystem), fsopts);
|
||||
filesystem_extractor fsx(lgr);
|
||||
|
||||
log_proxy<debug_logger_policy> log_(lgr);
|
||||
struct ::archive* a;
|
||||
|
||||
auto check_result = [&](int res) {
|
||||
switch (res) {
|
||||
case ARCHIVE_OK:
|
||||
break;
|
||||
case ARCHIVE_WARN:
|
||||
LOG_WARN << std::string(archive_error_string(a));
|
||||
break;
|
||||
case ARCHIVE_RETRY:
|
||||
case ARCHIVE_FATAL:
|
||||
DWARFS_THROW(runtime_error, std::string(archive_error_string(a)));
|
||||
}
|
||||
};
|
||||
|
||||
if (format.empty()) {
|
||||
if (!output.empty()) {
|
||||
if (::chdir(output.c_str()) != 0) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
output + ": " + std::string(strerror(errno)));
|
||||
}
|
||||
}
|
||||
|
||||
a = ::archive_write_disk_new();
|
||||
|
||||
check_result(::archive_write_disk_set_options(
|
||||
a,
|
||||
ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME));
|
||||
} else {
|
||||
a = ::archive_write_new();
|
||||
|
||||
check_result(::archive_write_set_format_by_name(a, format.c_str()));
|
||||
check_result(::archive_write_open_filename(
|
||||
a, vm.count("output") && !output.empty() && output != "-"
|
||||
? output.c_str()
|
||||
: nullptr));
|
||||
}
|
||||
|
||||
auto lr = ::archive_entry_linkresolver_new();
|
||||
|
||||
::archive_entry_linkresolver_set_strategy(lr, ::archive_format(a));
|
||||
|
||||
::archive_entry* spare = nullptr;
|
||||
|
||||
worker_group archiver("archiver", 1);
|
||||
cache_semaphore sem;
|
||||
size_t max_queued_bytes = 0;
|
||||
|
||||
{
|
||||
struct ::statvfs buf;
|
||||
fs.statvfs(&buf);
|
||||
sem.post(fsopts.block_cache.max_bytes > buf.f_bsize
|
||||
? fsopts.block_cache.max_bytes - buf.f_bsize
|
||||
: 0);
|
||||
if (fsopts.block_cache.max_bytes > buf.f_bsize) {
|
||||
max_queued_bytes = fsopts.block_cache.max_bytes - buf.f_bsize;
|
||||
}
|
||||
}
|
||||
|
||||
auto do_archive = [&](::archive_entry* ae, entry_view entry) {
|
||||
if (auto size = ::archive_entry_size(ae);
|
||||
S_ISREG(entry.mode()) && size > 0) {
|
||||
auto fd = fs.open(entry);
|
||||
sem.wait(size);
|
||||
auto ranges = fs.readv(fd, size, 0);
|
||||
if (!ranges) {
|
||||
LOG_ERROR << "error reading inode [" << fd
|
||||
<< "]: " << ::strerror(-ranges.error());
|
||||
return;
|
||||
}
|
||||
archiver.add_job([&sem, &check_result, ranges = std::move(*ranges), a,
|
||||
ae, size]() mutable {
|
||||
check_result(::archive_write_header(a, ae));
|
||||
for (auto& r : ranges) {
|
||||
auto br = r.get();
|
||||
check_result(::archive_write_data(a, br.data(), br.size()));
|
||||
}
|
||||
sem.post(size);
|
||||
::archive_entry_free(ae);
|
||||
});
|
||||
} else {
|
||||
archiver.add_job([&check_result, a, ae] {
|
||||
check_result(::archive_write_header(a, ae));
|
||||
::archive_entry_free(ae);
|
||||
});
|
||||
if (format.empty()) {
|
||||
fsx.open_disk(output);
|
||||
} else {
|
||||
if (output == "-") {
|
||||
output.clear();
|
||||
}
|
||||
};
|
||||
|
||||
fs.walk_inode_order([&](auto entry, auto parent) {
|
||||
if (entry.inode() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto ae = ::archive_entry_new();
|
||||
struct ::stat stbuf;
|
||||
|
||||
if (fs.getattr(entry, &stbuf) != 0) {
|
||||
DWARFS_THROW(runtime_error, "getattr() failed");
|
||||
}
|
||||
|
||||
std::string path;
|
||||
path.reserve(256);
|
||||
parent.append_path_to(path);
|
||||
if (!path.empty()) {
|
||||
path += '/';
|
||||
}
|
||||
path += entry.name();
|
||||
|
||||
::archive_entry_set_pathname(ae, path.c_str());
|
||||
::archive_entry_copy_stat(ae, &stbuf);
|
||||
|
||||
if (S_ISLNK(entry.mode())) {
|
||||
std::string link;
|
||||
if (fs.readlink(entry, &link) != 0) {
|
||||
LOG_ERROR << "readlink() failed";
|
||||
}
|
||||
::archive_entry_set_symlink(ae, link.c_str());
|
||||
}
|
||||
|
||||
::archive_entry_linkify(lr, &ae, &spare);
|
||||
|
||||
if (ae) {
|
||||
do_archive(ae, entry);
|
||||
}
|
||||
|
||||
if (spare) {
|
||||
auto ev = fs.find(::archive_entry_ino(spare));
|
||||
if (!ev) {
|
||||
LOG_ERROR << "find() failed";
|
||||
}
|
||||
LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
|
||||
do_archive(spare, *ev);
|
||||
}
|
||||
});
|
||||
|
||||
archiver.wait();
|
||||
|
||||
// As we're visiting *all* hardlinks, we should never see any deferred
|
||||
// entries.
|
||||
::archive_entry* ae = nullptr;
|
||||
::archive_entry_linkify(lr, &ae, &spare);
|
||||
if (ae) {
|
||||
DWARFS_THROW(runtime_error, "unexpected deferred entry");
|
||||
fsx.open_archive(output, format);
|
||||
}
|
||||
|
||||
::archive_entry_linkresolver_free(lr);
|
||||
check_result(::archive_write_free(a));
|
||||
fsx.extract(fs, max_queued_bytes);
|
||||
|
||||
fsx.close();
|
||||
} catch (runtime_error const& e) {
|
||||
std::cerr << "error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user