mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-10 13:04:15 -04:00
Use asynchronous filesystem access to speed up extraction
This commit is contained in:
parent
12b949525d
commit
1c080920d0
@ -20,8 +20,12 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <condition_variable>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <exception>
|
#include <exception>
|
||||||
|
#include <future>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <sys/statvfs.h>
|
#include <sys/statvfs.h>
|
||||||
@ -42,6 +46,7 @@
|
|||||||
#include "dwarfs/options.h"
|
#include "dwarfs/options.h"
|
||||||
#include "dwarfs/util.h"
|
#include "dwarfs/util.h"
|
||||||
#include "dwarfs/version.h"
|
#include "dwarfs/version.h"
|
||||||
|
#include "dwarfs/worker_group.h"
|
||||||
|
|
||||||
namespace po = boost::program_options;
|
namespace po = boost::program_options;
|
||||||
|
|
||||||
@ -49,6 +54,30 @@ using namespace dwarfs;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
class semaphore {
|
||||||
|
public:
|
||||||
|
void post(uint64_t n = 1) {
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mx_);
|
||||||
|
count_ += n;
|
||||||
|
}
|
||||||
|
condition_.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
void wait(uint64_t n = 1) {
|
||||||
|
std::unique_lock lock(mx_);
|
||||||
|
while (count_ < n) {
|
||||||
|
condition_.wait(lock);
|
||||||
|
}
|
||||||
|
count_ -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::mutex mx_;
|
||||||
|
std::condition_variable condition_;
|
||||||
|
uint64_t count_{0};
|
||||||
|
};
|
||||||
|
|
||||||
int dwarfsextract(int argc, char** argv) {
|
int dwarfsextract(int argc, char** argv) {
|
||||||
std::string filesystem, output, format, cache_size_str, log_level;
|
std::string filesystem, output, format, cache_size_str, log_level;
|
||||||
size_t num_workers;
|
size_t num_workers;
|
||||||
@ -66,7 +95,7 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
po::value<std::string>(&format),
|
po::value<std::string>(&format),
|
||||||
"output format")
|
"output format")
|
||||||
("num-workers,n",
|
("num-workers,n",
|
||||||
po::value<size_t>(&num_workers)->default_value(1),
|
po::value<size_t>(&num_workers)->default_value(4),
|
||||||
"number of worker threads")
|
"number of worker threads")
|
||||||
("cache-size,s",
|
("cache-size,s",
|
||||||
po::value<std::string>(&cache_size_str)->default_value("256m"),
|
po::value<std::string>(&cache_size_str)->default_value("256m"),
|
||||||
@ -150,15 +179,36 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
|
|
||||||
::archive_entry* spare = nullptr;
|
::archive_entry* spare = nullptr;
|
||||||
|
|
||||||
|
worker_group archiver("archiver", 1);
|
||||||
|
semaphore sem;
|
||||||
|
sem.post(fsopts.block_cache.max_bytes);
|
||||||
|
|
||||||
auto do_archive = [&](::archive_entry* ae, entry_view entry) {
|
auto do_archive = [&](::archive_entry* ae, entry_view entry) {
|
||||||
check_result(::archive_write_header(a, ae));
|
if (auto size = ::archive_entry_size(ae);
|
||||||
if (auto size = ::archive_entry_size(ae); size > 0) {
|
S_ISREG(entry.mode()) && size > 0) {
|
||||||
int fh = fs.open(entry);
|
auto fd = fs.open(entry);
|
||||||
iovec_read_buf irb;
|
sem.wait(size);
|
||||||
fs.readv(fh, irb, size, 0);
|
auto ranges = fs.readv(fd, size, 0);
|
||||||
for (auto const& iov : irb.buf) {
|
if (!ranges) {
|
||||||
check_result(::archive_write_data(a, iov.iov_base, iov.iov_len));
|
LOG_ERROR << "error reading inode [" << fd
|
||||||
|
<< "]: " << ::strerror(-ranges.error());
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
archiver.add_job([&sem, &check_result, ranges = std::move(*ranges), a,
|
||||||
|
ae, size]() mutable {
|
||||||
|
check_result(::archive_write_header(a, ae));
|
||||||
|
for (auto& r : ranges) {
|
||||||
|
auto br = r.get();
|
||||||
|
check_result(::archive_write_data(a, br.data(), br.size()));
|
||||||
|
}
|
||||||
|
sem.post(size);
|
||||||
|
::archive_entry_free(ae);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
archiver.add_job([&check_result, a, ae] {
|
||||||
|
check_result(::archive_write_header(a, ae));
|
||||||
|
::archive_entry_free(ae);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -197,7 +247,6 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
|
|
||||||
if (ae) {
|
if (ae) {
|
||||||
do_archive(ae, entry);
|
do_archive(ae, entry);
|
||||||
::archive_entry_free(ae);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (spare) {
|
if (spare) {
|
||||||
@ -207,10 +256,11 @@ int dwarfsextract(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
|
LOG_DEBUG << "archiving spare " << ::archive_entry_pathname(spare);
|
||||||
do_archive(spare, *ev);
|
do_archive(spare, *ev);
|
||||||
::archive_entry_free(spare);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
archiver.wait();
|
||||||
|
|
||||||
// As we're visiting *all* hardlinks, we should never see any deferred
|
// As we're visiting *all* hardlinks, we should never see any deferred
|
||||||
// entries.
|
// entries.
|
||||||
::archive_entry* ae = nullptr;
|
::archive_entry* ae = nullptr;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user