mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 20:12:56 -04:00
Allow rewriting with selective recompression
This commit is contained in:
parent
8328ccd048
commit
3081117de4
@ -111,11 +111,21 @@ Most other options are concerned with compression tuning:
|
||||
care about mount time, you can safely choose `lzma` compression here, as
|
||||
the data will only have to be decompressed once when mounting the image.
|
||||
|
||||
* `--recompress`:
|
||||
Take an existing DwarFS filesystem and recompress it using a different
|
||||
compression algorithm. Note that *only* the compression algorithm, i.e.
|
||||
the `--compression` option, has an impact on the new filesystem. Other
|
||||
options, e.g. `--block-size-bits`, have no impact.
|
||||
* `--recompress`[`=all|block|metadata|none`]:
|
||||
Take an existing DwarFS file system and recompress it using different
|
||||
compression algorithms. If no argument or `all` is given, all sections
|
||||
in the file system image will be recompressed. Note that *only* the
|
||||
compression algorithms, i.e. the `--compression`, `--schema-compression`
|
||||
and `--metadata-compression` options, have an impact on how the new file
|
||||
system is written. Other options, e.g. `--block-size-bits` or `--order`,
|
||||
have no impact. If `none` is given as an argument, none of the sections
|
||||
will be recompressed, but the file system is still rewritten in the
|
||||
latest file system format. This is an easy way of upgrading an old file
|
||||
system image to a new format. If `block` or `metadata` is given, only
|
||||
the block sections (i.e. the actual file data) or the metadata sections
|
||||
are recompressed. This can be useful if you want to switch from compressed
|
||||
metadata to uncompressed metadata without having to rebuild or recompress
|
||||
all the other data.
|
||||
|
||||
* `--set-owner=`*uid*:
|
||||
Set the owner for all entities in the file system. This can reduce the
|
||||
|
@ -44,6 +44,7 @@ struct statvfs;
|
||||
namespace dwarfs {
|
||||
|
||||
struct filesystem_options;
|
||||
struct rewrite_options;
|
||||
struct iovec_read_buf;
|
||||
|
||||
class filesystem_writer;
|
||||
@ -61,7 +62,7 @@ class filesystem_v2 {
|
||||
int inode_offset = 0);
|
||||
|
||||
static void rewrite(logger& lgr, progress& prog, std::shared_ptr<mmif> mm,
|
||||
filesystem_writer& writer);
|
||||
filesystem_writer& writer, rewrite_options const& opts);
|
||||
|
||||
static void identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os,
|
||||
int detail_level = 0);
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/Range.h>
|
||||
|
||||
#include "dwarfs/fstypes.h"
|
||||
#include "dwarfs/worker_group.h"
|
||||
|
||||
@ -64,11 +66,6 @@ class filesystem_writer {
|
||||
const block_compressor& schema_bc,
|
||||
const block_compressor& metadata_bc, size_t max_queue_size);
|
||||
|
||||
// section create_block();
|
||||
// section create_metadata();
|
||||
|
||||
// void add_section(section&& section);
|
||||
|
||||
void write_block(std::vector<uint8_t>&& data) {
|
||||
impl_->write_block(std::move(data));
|
||||
}
|
||||
@ -81,6 +78,11 @@ class filesystem_writer {
|
||||
impl_->write_metadata_v2(std::move(data));
|
||||
}
|
||||
|
||||
void write_compressed_section(section_type type, compression_type compression,
|
||||
folly::ByteRange data) {
|
||||
impl_->write_compressed_section(type, compression, data);
|
||||
}
|
||||
|
||||
void flush() { impl_->flush(); }
|
||||
|
||||
size_t size() const { return impl_->size(); }
|
||||
@ -92,6 +94,9 @@ class filesystem_writer {
|
||||
virtual void write_block(std::vector<uint8_t>&& data) = 0;
|
||||
virtual void write_metadata_v2_schema(std::vector<uint8_t>&& data) = 0;
|
||||
virtual void write_metadata_v2(std::vector<uint8_t>&& data) = 0;
|
||||
virtual void
|
||||
write_compressed_section(section_type type, compression_type compression,
|
||||
folly::ByteRange data) = 0;
|
||||
virtual void flush() = 0;
|
||||
virtual size_t size() const = 0;
|
||||
};
|
||||
|
@ -24,6 +24,8 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include <folly/Range.h>
|
||||
|
||||
#include "dwarfs/fstypes.h"
|
||||
|
||||
namespace dwarfs {
|
||||
@ -42,6 +44,7 @@ class fs_section {
|
||||
std::string description() const { return impl_->description(); }
|
||||
bool check_fast(mmif& mm) const { return impl_->check_fast(mm); }
|
||||
bool verify(mmif& mm) const { return impl_->verify(mm); }
|
||||
folly::ByteRange data(mmif& mm) const { return impl_->data(mm); }
|
||||
|
||||
size_t end() const { return start() + length(); }
|
||||
|
||||
@ -57,6 +60,7 @@ class fs_section {
|
||||
virtual std::string description() const = 0;
|
||||
virtual bool check_fast(mmif& mm) const = 0;
|
||||
virtual bool verify(mmif& mm) const = 0;
|
||||
virtual folly::ByteRange data(mmif& mm) const = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -75,6 +75,11 @@ struct scanner_options {
|
||||
inode_options inode;
|
||||
};
|
||||
|
||||
struct rewrite_options {
|
||||
bool recompress_block{false};
|
||||
bool recompress_metadata{false};
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, file_order_mode mode);
|
||||
|
||||
mlock_mode parse_mlock_mode(std::string_view mode);
|
||||
|
@ -382,13 +382,15 @@ filesystem_v2::filesystem_v2(logger& lgr, std::shared_ptr<mmif> mm,
|
||||
lgr, std::move(mm), options, stat_defaults, inode_offset)) {}
|
||||
|
||||
void filesystem_v2::rewrite(logger& lgr, progress& prog,
|
||||
std::shared_ptr<mmif> mm,
|
||||
filesystem_writer& writer) {
|
||||
std::shared_ptr<mmif> mm, filesystem_writer& writer,
|
||||
rewrite_options const& opts) {
|
||||
// TODO:
|
||||
LOG_PROXY(debug_logger_policy, lgr);
|
||||
filesystem_parser parser(mm);
|
||||
|
||||
std::vector<section_type> section_types;
|
||||
section_map sections;
|
||||
size_t total_block_size = 0;
|
||||
|
||||
while (auto s = parser.next_section()) {
|
||||
if (!s->check_fast(*mm)) {
|
||||
@ -400,36 +402,55 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
|
||||
}
|
||||
if (s->type() == section_type::BLOCK) {
|
||||
++prog.block_count;
|
||||
total_block_size += s->length();
|
||||
} else {
|
||||
if (!sections.emplace(s->type(), *s).second) {
|
||||
DWARFS_THROW(runtime_error, "duplicate section: " + s->name());
|
||||
}
|
||||
section_types.push_back(s->type());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint8_t> schema_raw;
|
||||
std::vector<uint8_t> meta_raw;
|
||||
auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw,
|
||||
metadata_options(), nullptr, 0, true);
|
||||
|
||||
struct ::statvfs stbuf;
|
||||
meta.statvfs(&stbuf);
|
||||
prog.original_size = stbuf.f_blocks * stbuf.f_frsize;
|
||||
if (opts.recompress_metadata) {
|
||||
auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw,
|
||||
metadata_options(), nullptr, 0, true);
|
||||
|
||||
struct ::statvfs stbuf;
|
||||
meta.statvfs(&stbuf);
|
||||
prog.original_size = stbuf.f_blocks * stbuf.f_frsize;
|
||||
} else {
|
||||
prog.original_size = total_block_size;
|
||||
}
|
||||
|
||||
parser.rewind();
|
||||
|
||||
while (auto s = parser.next_section()) {
|
||||
// TODO: multi-thread this?
|
||||
if (s->type() == section_type::BLOCK) {
|
||||
auto block = block_decompressor::decompress(
|
||||
s->compression(), mm->as<uint8_t>(s->start()), s->length());
|
||||
prog.filesystem_size += block.size();
|
||||
writer.write_block(std::move(block));
|
||||
if (opts.recompress_block) {
|
||||
auto block = block_decompressor::decompress(
|
||||
s->compression(), mm->as<uint8_t>(s->start()), s->length());
|
||||
prog.filesystem_size += block.size();
|
||||
writer.write_block(std::move(block));
|
||||
} else {
|
||||
writer.write_compressed_section(s->type(), s->compression(),
|
||||
s->data(*mm));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.write_metadata_v2_schema(std::move(schema_raw));
|
||||
writer.write_metadata_v2(std::move(meta_raw));
|
||||
if (opts.recompress_metadata) {
|
||||
writer.write_metadata_v2_schema(std::move(schema_raw));
|
||||
writer.write_metadata_v2(std::move(meta_raw));
|
||||
} else {
|
||||
for (auto type : section_types) {
|
||||
auto& sec = DWARFS_NOTHROW(sections.at(type));
|
||||
writer.write_compressed_section(type, sec.compression(), sec.data(*mm));
|
||||
}
|
||||
}
|
||||
|
||||
writer.flush();
|
||||
}
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include <folly/Range.h>
|
||||
#include <folly/system/ThreadName.h>
|
||||
|
||||
#include "dwarfs/block_compressor.h"
|
||||
@ -41,20 +42,56 @@
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace {
|
||||
|
||||
class fsblock {
|
||||
public:
|
||||
fsblock(logger& lgr, section_type type, const block_compressor& bc,
|
||||
std::vector<uint8_t>&& data);
|
||||
|
||||
fsblock(section_type type, compression_type compression,
|
||||
folly::ByteRange data);
|
||||
|
||||
void compress(worker_group& wg) { impl_->compress(wg); }
|
||||
void wait_until_compressed() { impl_->wait_until_compressed(); }
|
||||
section_type type() const { return impl_->type(); }
|
||||
compression_type compression() const { return impl_->compression(); }
|
||||
folly::ByteRange data() const { return impl_->data(); }
|
||||
size_t uncompressed_size() const { return impl_->uncompressed_size(); }
|
||||
size_t size() const { return impl_->size(); }
|
||||
|
||||
class impl {
|
||||
public:
|
||||
virtual ~impl() = default;
|
||||
|
||||
virtual void compress(worker_group& wg) = 0;
|
||||
virtual void wait_until_compressed() = 0;
|
||||
virtual section_type type() const = 0;
|
||||
virtual compression_type compression() const = 0;
|
||||
virtual folly::ByteRange data() const = 0;
|
||||
virtual size_t uncompressed_size() const = 0;
|
||||
virtual size_t size() const = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
std::unique_ptr<impl> impl_;
|
||||
};
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
class raw_fsblock : public fsblock::impl {
|
||||
private:
|
||||
class state {
|
||||
public:
|
||||
state(std::vector<uint8_t>&& data)
|
||||
state(std::vector<uint8_t>&& data, logger& lgr)
|
||||
: compressed_(false)
|
||||
, data_(std::move(data)) {}
|
||||
, data_(std::move(data))
|
||||
, LOG_PROXY_INIT(lgr) {}
|
||||
|
||||
template <typename LogProxy>
|
||||
void compress(const block_compressor& bc, LogProxy& lp) {
|
||||
void compress(const block_compressor& bc) {
|
||||
std::vector<uint8_t> tmp;
|
||||
|
||||
{
|
||||
auto td = lp.timed_trace();
|
||||
auto td = LOG_TIMED_TRACE;
|
||||
|
||||
tmp = bc.compress(data_);
|
||||
|
||||
@ -87,50 +124,83 @@ class fsblock {
|
||||
std::condition_variable cond_;
|
||||
std::atomic<bool> compressed_;
|
||||
std::vector<uint8_t> data_;
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
};
|
||||
|
||||
public:
|
||||
fsblock(section_type type, const block_compressor& bc,
|
||||
std::vector<uint8_t>&& data)
|
||||
raw_fsblock(logger& lgr, section_type type, const block_compressor& bc,
|
||||
std::vector<uint8_t>&& data)
|
||||
: type_(type)
|
||||
, bc_(bc)
|
||||
, uncompressed_size_(data.size())
|
||||
, state_(std::make_shared<state>(std::move(data))) {}
|
||||
, state_(std::make_shared<state>(std::move(data), lgr))
|
||||
, LOG_PROXY_INIT(lgr) {}
|
||||
|
||||
template <typename LogProxy>
|
||||
void compress(worker_group& wg, LogProxy& lp) {
|
||||
lp.trace() << "block queued for compression";
|
||||
void compress(worker_group& wg) override {
|
||||
LOG_TRACE << "block queued for compression";
|
||||
|
||||
std::shared_ptr<state> s = state_;
|
||||
|
||||
wg.add_job([&, s] {
|
||||
lp.trace() << "block compression started";
|
||||
s->compress(bc_, lp);
|
||||
LOG_TRACE << "block compression started";
|
||||
s->compress(bc_);
|
||||
});
|
||||
}
|
||||
|
||||
void wait_until_compressed() { state_->wait(); }
|
||||
void wait_until_compressed() override { state_->wait(); }
|
||||
|
||||
section_type type() const { return type_; }
|
||||
section_type type() const override { return type_; }
|
||||
|
||||
compression_type compression() const { return bc_.type(); }
|
||||
compression_type compression() const override { return bc_.type(); }
|
||||
|
||||
const std::vector<uint8_t>& data() const {
|
||||
return state_->data();
|
||||
;
|
||||
}
|
||||
folly::ByteRange data() const override { return state_->data(); }
|
||||
|
||||
size_t uncompressed_size() const { return uncompressed_size_; }
|
||||
size_t uncompressed_size() const override { return uncompressed_size_; }
|
||||
|
||||
size_t size() const { return state_->size(); }
|
||||
size_t size() const override { return state_->size(); }
|
||||
|
||||
private:
|
||||
const section_type type_;
|
||||
block_compressor const& bc_;
|
||||
const size_t uncompressed_size_;
|
||||
std::shared_ptr<state> state_;
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
};
|
||||
|
||||
class compressed_fsblock : public fsblock::impl {
|
||||
public:
|
||||
compressed_fsblock(section_type type, compression_type compression,
|
||||
folly::ByteRange range)
|
||||
: type_(type)
|
||||
, compression_(compression)
|
||||
, range_(range) {}
|
||||
|
||||
void compress(worker_group&) override {}
|
||||
void wait_until_compressed() override {}
|
||||
|
||||
section_type type() const override { return type_; }
|
||||
compression_type compression() const override { return compression_; }
|
||||
|
||||
folly::ByteRange data() const override { return range_; }
|
||||
|
||||
size_t uncompressed_size() const override { return range_.size(); }
|
||||
size_t size() const override { return range_.size(); }
|
||||
|
||||
private:
|
||||
const section_type type_;
|
||||
const compression_type compression_;
|
||||
folly::ByteRange range_;
|
||||
};
|
||||
|
||||
fsblock::fsblock(logger& lgr, section_type type, const block_compressor& bc,
|
||||
std::vector<uint8_t>&& data)
|
||||
: impl_(make_unique_logging_object<impl, raw_fsblock, logger_policies>(
|
||||
lgr, type, bc, std::move(data))) {}
|
||||
|
||||
fsblock::fsblock(section_type type, compression_type compression,
|
||||
folly::ByteRange data)
|
||||
: impl_(std::make_unique<compressed_fsblock>(type, compression, data)) {}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
class filesystem_writer_ : public filesystem_writer::impl {
|
||||
public:
|
||||
@ -144,6 +214,8 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
||||
void write_block(std::vector<uint8_t>&& data) override;
|
||||
void write_metadata_v2_schema(std::vector<uint8_t>&& data) override;
|
||||
void write_metadata_v2(std::vector<uint8_t>&& data) override;
|
||||
void write_compressed_section(section_type type, compression_type compression,
|
||||
folly::ByteRange data) override;
|
||||
void flush() override;
|
||||
size_t size() const override { return os_.tellp(); }
|
||||
|
||||
@ -151,11 +223,11 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
||||
void write_section(section_type type, std::vector<uint8_t>&& data,
|
||||
block_compressor const& bc);
|
||||
void write(section_type type, compression_type compression,
|
||||
const std::vector<uint8_t>& data);
|
||||
folly::ByteRange range);
|
||||
void write(const char* data, size_t size);
|
||||
template <typename T>
|
||||
void write(const T& obj);
|
||||
void write(const std::vector<uint8_t>& data);
|
||||
void write(folly::ByteRange range);
|
||||
void writer_thread();
|
||||
size_t mem_used() const;
|
||||
|
||||
@ -166,7 +238,7 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
||||
const block_compressor& schema_bc_;
|
||||
const block_compressor& metadata_bc_;
|
||||
const size_t max_queue_size_;
|
||||
log_proxy<LoggerPolicy> log_;
|
||||
LOG_PROXY_DECL(LoggerPolicy);
|
||||
std::deque<std::unique_ptr<fsblock>> queue_;
|
||||
mutable std::mutex mx_;
|
||||
std::condition_variable cond_;
|
||||
@ -187,7 +259,7 @@ filesystem_writer_<LoggerPolicy>::filesystem_writer_(
|
||||
, schema_bc_(schema_bc)
|
||||
, metadata_bc_(metadata_bc)
|
||||
, max_queue_size_(max_queue_size)
|
||||
, log_(lgr)
|
||||
, LOG_PROXY_INIT(lgr)
|
||||
, flush_(false)
|
||||
, writer_thread_(&filesystem_writer_::writer_thread, this) {}
|
||||
|
||||
@ -263,14 +335,14 @@ void filesystem_writer_<LoggerPolicy>::write(const T& obj) {
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write(const std::vector<uint8_t>& data) {
|
||||
write(reinterpret_cast<const char*>(&data[0]), data.size());
|
||||
void filesystem_writer_<LoggerPolicy>::write(folly::ByteRange range) {
|
||||
write(reinterpret_cast<const char*>(range.data()), range.size());
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write(section_type type,
|
||||
compression_type compression,
|
||||
const std::vector<uint8_t>& data) {
|
||||
folly::ByteRange range) {
|
||||
section_header_v2 sh;
|
||||
::memcpy(&sh.magic[0], "DWARFS", 6);
|
||||
sh.major = MAJOR_VERSION;
|
||||
@ -278,22 +350,22 @@ void filesystem_writer_<LoggerPolicy>::write(section_type type,
|
||||
sh.number = section_number_++;
|
||||
sh.type = static_cast<uint16_t>(type);
|
||||
sh.compression = static_cast<uint16_t>(compression);
|
||||
sh.length = data.size();
|
||||
sh.length = range.size();
|
||||
|
||||
checksum xxh(checksum::algorithm::XXH3_64);
|
||||
xxh.update(&sh.number,
|
||||
sizeof(section_header_v2) - offsetof(section_header_v2, number));
|
||||
xxh.update(data.data(), data.size());
|
||||
xxh.update(range.data(), range.size());
|
||||
DWARFS_CHECK(xxh.finalize(&sh.xxh3_64), "XXH3-64 checksum failed");
|
||||
|
||||
checksum sha(checksum::algorithm::SHA2_512_256);
|
||||
sha.update(&sh.xxh3_64,
|
||||
sizeof(section_header_v2) - offsetof(section_header_v2, xxh3_64));
|
||||
sha.update(data.data(), data.size());
|
||||
sha.update(range.data(), range.size());
|
||||
DWARFS_CHECK(sha.finalize(&sh.sha2_512_256), "SHA512/256 checksum failed");
|
||||
|
||||
write(sh);
|
||||
write(data);
|
||||
write(range);
|
||||
|
||||
if (type == section_type::BLOCK) {
|
||||
prog_.blocks_written++;
|
||||
@ -312,9 +384,23 @@ void filesystem_writer_<LoggerPolicy>::write_section(
|
||||
}
|
||||
}
|
||||
|
||||
auto fsb = std::make_unique<fsblock>(type, bc, std::move(data));
|
||||
auto fsb =
|
||||
std::make_unique<fsblock>(LOG_GET_LOGGER, type, bc, std::move(data));
|
||||
|
||||
fsb->compress(wg_, log_);
|
||||
fsb->compress(wg_);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mx_);
|
||||
queue_.push_back(std::move(fsb));
|
||||
}
|
||||
|
||||
cond_.notify_one();
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_compressed_section(
|
||||
section_type type, compression_type compression, folly::ByteRange data) {
|
||||
auto fsb = std::make_unique<fsblock>(type, compression, data);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mx_);
|
||||
@ -359,6 +445,8 @@ void filesystem_writer_<LoggerPolicy>::flush() {
|
||||
writer_thread_.join();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
filesystem_writer::filesystem_writer(std::ostream& os, logger& lgr,
|
||||
worker_group& wg, progress& prog,
|
||||
const block_compressor& bc,
|
||||
|
@ -36,13 +36,20 @@ class fs_section_v1 : public fs_section::impl {
|
||||
|
||||
size_t start() const override { return start_; }
|
||||
size_t length() const override { return hdr_.length; }
|
||||
|
||||
compression_type compression() const override { return hdr_.compression; }
|
||||
section_type type() const override { return hdr_.type; }
|
||||
|
||||
std::string name() const override { return get_section_name(hdr_.type); }
|
||||
std::string description() const override { return hdr_.to_string(); }
|
||||
|
||||
bool check_fast(mmif&) const override { return true; }
|
||||
bool verify(mmif&) const override { return true; }
|
||||
|
||||
folly::ByteRange data(mmif& mm) const override {
|
||||
return folly::ByteRange(mm.as<uint8_t>(start_), hdr_.length);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t start_;
|
||||
section_header hdr_;
|
||||
@ -54,16 +61,21 @@ class fs_section_v2 : public fs_section::impl {
|
||||
|
||||
size_t start() const override { return start_; }
|
||||
size_t length() const override { return hdr_.length; }
|
||||
|
||||
compression_type compression() const override {
|
||||
return static_cast<compression_type>(hdr_.compression);
|
||||
}
|
||||
|
||||
section_type type() const override {
|
||||
return static_cast<section_type>(hdr_.type);
|
||||
}
|
||||
|
||||
std::string name() const override {
|
||||
return get_section_name(static_cast<section_type>(hdr_.type));
|
||||
}
|
||||
|
||||
std::string description() const override { return hdr_.to_string(); }
|
||||
|
||||
bool check_fast(mmif& mm) const override {
|
||||
auto hdr_cs_len =
|
||||
sizeof(section_header_v2) - offsetof(section_header_v2, number);
|
||||
@ -71,6 +83,7 @@ class fs_section_v2 : public fs_section::impl {
|
||||
mm.as<void>(start_ - hdr_cs_len),
|
||||
hdr_.length + hdr_cs_len, &hdr_.xxh3_64);
|
||||
}
|
||||
|
||||
bool verify(mmif& mm) const override {
|
||||
auto hdr_sha_len =
|
||||
sizeof(section_header_v2) - offsetof(section_header_v2, xxh3_64);
|
||||
@ -79,6 +92,10 @@ class fs_section_v2 : public fs_section::impl {
|
||||
hdr_.length + hdr_sha_len, &hdr_.sha2_512_256);
|
||||
}
|
||||
|
||||
folly::ByteRange data(mmif& mm) const override {
|
||||
return folly::ByteRange(mm.as<uint8_t>(start_), hdr_.length);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t start_;
|
||||
section_header_v2 hdr_;
|
||||
|
@ -295,9 +295,9 @@ int mkdwarfs(int argc, char** argv) {
|
||||
block_manager::config cfg;
|
||||
std::string path, output, window_sizes, memory_limit, script_arg, compression,
|
||||
schema_compression, metadata_compression, log_level_str, timestamp,
|
||||
time_resolution, order, progress_mode;
|
||||
time_resolution, order, progress_mode, recompress_opts;
|
||||
size_t num_workers, max_scanner_workers;
|
||||
bool recompress = false, no_progress = false;
|
||||
bool no_progress = false;
|
||||
unsigned level;
|
||||
uint16_t uid, gid;
|
||||
|
||||
@ -347,8 +347,8 @@ int mkdwarfs(int argc, char** argv) {
|
||||
po::value<std::string>(&metadata_compression),
|
||||
"metadata compression algorithm")
|
||||
("recompress",
|
||||
po::value<bool>(&recompress)->zero_tokens(),
|
||||
"recompress an existing filesystem")
|
||||
po::value<std::string>(&recompress_opts)->implicit_value("all"),
|
||||
"recompress an existing filesystem (none, block, metadata, all)")
|
||||
("set-owner",
|
||||
po::value<uint16_t>(&uid),
|
||||
"set owner (uid) for whole file system")
|
||||
@ -516,6 +516,24 @@ int mkdwarfs(int argc, char** argv) {
|
||||
order = defaults.order;
|
||||
}
|
||||
|
||||
bool recompress = vm.count("recompress");
|
||||
rewrite_options rw_opts;
|
||||
if (recompress) {
|
||||
std::unordered_map<std::string, unsigned> const modes{
|
||||
{"all", 3},
|
||||
{"metadata", 2},
|
||||
{"block", 1},
|
||||
{"none", 0},
|
||||
};
|
||||
if (auto it = modes.find(recompress_opts); it != modes.end()) {
|
||||
rw_opts.recompress_block = it->second & 1;
|
||||
rw_opts.recompress_metadata = it->second & 2;
|
||||
} else {
|
||||
std::cerr << "invalid recompress mode: " << recompress_opts << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> order_opts;
|
||||
boost::split(order_opts, order, boost::is_any_of(":"));
|
||||
|
||||
@ -721,8 +739,8 @@ int mkdwarfs(int argc, char** argv) {
|
||||
auto ti = LOG_TIMED_INFO;
|
||||
|
||||
if (recompress) {
|
||||
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path),
|
||||
fsw);
|
||||
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path), fsw,
|
||||
rw_opts);
|
||||
wg_writer.wait();
|
||||
} else {
|
||||
options.inode.with_similarity =
|
||||
|
BIN
test/compat-v0.3.0.dwarfs
Normal file
BIN
test/compat-v0.3.0.dwarfs
Normal file
Binary file not shown.
@ -36,6 +36,7 @@
|
||||
#include "dwarfs/scanner.h"
|
||||
#include "dwarfs/script.h"
|
||||
#include "loremipsum.h"
|
||||
#include "mmap_mock.h"
|
||||
|
||||
namespace dwarfs {
|
||||
namespace test {
|
||||
@ -90,26 +91,6 @@ std::map<std::string, simplestat> statmap{
|
||||
};
|
||||
} // namespace
|
||||
|
||||
class mmap_mock : public mmif {
|
||||
public:
|
||||
mmap_mock(const std::string& data)
|
||||
: m_data(data) {}
|
||||
|
||||
void const* addr() const override { return m_data.data(); }
|
||||
|
||||
size_t size() const override { return m_data.size(); }
|
||||
|
||||
boost::system::error_code lock(off_t, size_t) override {
|
||||
return boost::system::error_code();
|
||||
}
|
||||
boost::system::error_code release(off_t, size_t) override {
|
||||
return boost::system::error_code();
|
||||
}
|
||||
|
||||
private:
|
||||
const std::string m_data;
|
||||
};
|
||||
|
||||
class os_access_mock : public os_access {
|
||||
public:
|
||||
std::shared_ptr<dir_reader> opendir(const std::string& path) const override {
|
||||
|
@ -23,14 +23,21 @@
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/json.h>
|
||||
|
||||
#include "dwarfs/block_compressor.h"
|
||||
#include "dwarfs/filesystem_v2.h"
|
||||
#include "dwarfs/filesystem_writer.h"
|
||||
#include "dwarfs/logger.h"
|
||||
#include "dwarfs/mmap.h"
|
||||
#include "dwarfs/options.h"
|
||||
#include "dwarfs/progress.h"
|
||||
#include "dwarfs/worker_group.h"
|
||||
|
||||
#include "mmap_mock.h"
|
||||
|
||||
namespace {
|
||||
|
||||
@ -153,6 +160,7 @@ char const* reference = R"(
|
||||
std::vector<std::string> versions{
|
||||
"0.2.0",
|
||||
"0.2.3",
|
||||
"0.3.0",
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -173,3 +181,39 @@ TEST_P(compat, backwards_compatibility) {
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(dwarfs, compat, ::testing::ValuesIn(versions));
|
||||
|
||||
class rewrite
|
||||
: public testing::TestWithParam<std::tuple<std::string, bool, bool>> {};
|
||||
|
||||
TEST_P(rewrite, filesystem_rewrite) {
|
||||
auto [version, recompress_block, recompress_metadata] = GetParam();
|
||||
|
||||
std::ostringstream oss;
|
||||
stream_logger lgr(oss);
|
||||
auto filename = std::string(TEST_DATA_DIR "/compat-v") + version + ".dwarfs";
|
||||
|
||||
rewrite_options opts;
|
||||
opts.recompress_block = recompress_block;
|
||||
opts.recompress_metadata = recompress_metadata;
|
||||
|
||||
worker_group wg("rewriter", 2);
|
||||
block_compressor bc("null");
|
||||
progress prog([](const progress&, bool) {}, 1000);
|
||||
std::ostringstream rewritten, idss;
|
||||
filesystem_writer fsw(rewritten, lgr, wg, prog, bc, 64 << 20);
|
||||
filesystem_v2::identify(lgr, std::make_shared<mmap>(filename), idss);
|
||||
filesystem_v2::rewrite(lgr, prog, std::make_shared<mmap>(filename), fsw,
|
||||
opts);
|
||||
|
||||
filesystem_v2::identify(
|
||||
lgr, std::make_shared<test::mmap_mock>(rewritten.str()), idss);
|
||||
filesystem_v2 fs(lgr, std::make_shared<test::mmap_mock>(rewritten.str()));
|
||||
auto meta = fs.metadata_as_dynamic();
|
||||
auto ref = folly::parseJson(reference);
|
||||
EXPECT_EQ(ref, meta);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(dwarfs, rewrite,
|
||||
::testing::Combine(::testing::ValuesIn(versions),
|
||||
::testing::Bool(),
|
||||
::testing::Bool()));
|
||||
|
48
test/mmap_mock.h
Normal file
48
test/mmap_mock.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "dwarfs/mmif.h"
|
||||
|
||||
namespace dwarfs {
|
||||
namespace test {
|
||||
|
||||
class mmap_mock : public mmif {
|
||||
public:
|
||||
mmap_mock(const std::string& data)
|
||||
: m_data(data) {}
|
||||
|
||||
void const* addr() const override { return m_data.data(); }
|
||||
|
||||
size_t size() const override { return m_data.size(); }
|
||||
|
||||
boost::system::error_code lock(off_t, size_t) override {
|
||||
return boost::system::error_code();
|
||||
}
|
||||
boost::system::error_code release(off_t, size_t) override {
|
||||
return boost::system::error_code();
|
||||
}
|
||||
|
||||
private:
|
||||
const std::string m_data;
|
||||
};
|
||||
|
||||
} // namespace test
|
||||
} // namespace dwarfs
|
Loading…
x
Reference in New Issue
Block a user