mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-10 13:04:15 -04:00
Allow rewriting with selective recompression
This commit is contained in:
parent
8328ccd048
commit
3081117de4
@ -111,11 +111,21 @@ Most other options are concerned with compression tuning:
|
|||||||
care about mount time, you can safely choose `lzma` compression here, as
|
care about mount time, you can safely choose `lzma` compression here, as
|
||||||
the data will only have to be decompressed once when mounting the image.
|
the data will only have to be decompressed once when mounting the image.
|
||||||
|
|
||||||
* `--recompress`:
|
* `--recompress`[`=all|block|metadata|none`]:
|
||||||
Take an existing DwarFS filesystem and recompress it using a different
|
Take an existing DwarFS file system and recompress it using different
|
||||||
compression algorithm. Note that *only* the compression algorithm, i.e.
|
compression algorithms. If no argument or `all` is given, all sections
|
||||||
the `--compression` option, has an impact on the new filesystem. Other
|
in the file system image will be recompressed. Note that *only* the
|
||||||
options, e.g. `--block-size-bits`, have no impact.
|
compression algorithms, i.e. the `--compression`, `--schema-compression`
|
||||||
|
and `--metadata-compression` options, have an impact on how the new file
|
||||||
|
system is written. Other options, e.g. `--block-size-bits` or `--order`,
|
||||||
|
have no impact. If `none` is given as an argument, none of the sections
|
||||||
|
will be recompressed, but the file system is still rewritten in the
|
||||||
|
latest file system format. This is an easy way of upgrading an old file
|
||||||
|
system image to a new format. If `block` or `metadata` is given, only
|
||||||
|
the block sections (i.e. the actual file data) or the metadata sections
|
||||||
|
are recompressed. This can be useful if you want to switch from compressed
|
||||||
|
metadata to uncompressed metadata without having to rebuild or recompress
|
||||||
|
all the other data.
|
||||||
|
|
||||||
* `--set-owner=`*uid*:
|
* `--set-owner=`*uid*:
|
||||||
Set the owner for all entities in the file system. This can reduce the
|
Set the owner for all entities in the file system. This can reduce the
|
||||||
|
@ -44,6 +44,7 @@ struct statvfs;
|
|||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
|
|
||||||
struct filesystem_options;
|
struct filesystem_options;
|
||||||
|
struct rewrite_options;
|
||||||
struct iovec_read_buf;
|
struct iovec_read_buf;
|
||||||
|
|
||||||
class filesystem_writer;
|
class filesystem_writer;
|
||||||
@ -61,7 +62,7 @@ class filesystem_v2 {
|
|||||||
int inode_offset = 0);
|
int inode_offset = 0);
|
||||||
|
|
||||||
static void rewrite(logger& lgr, progress& prog, std::shared_ptr<mmif> mm,
|
static void rewrite(logger& lgr, progress& prog, std::shared_ptr<mmif> mm,
|
||||||
filesystem_writer& writer);
|
filesystem_writer& writer, rewrite_options const& opts);
|
||||||
|
|
||||||
static void identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os,
|
static void identify(logger& lgr, std::shared_ptr<mmif> mm, std::ostream& os,
|
||||||
int detail_level = 0);
|
int detail_level = 0);
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <folly/Range.h>
|
||||||
|
|
||||||
#include "dwarfs/fstypes.h"
|
#include "dwarfs/fstypes.h"
|
||||||
#include "dwarfs/worker_group.h"
|
#include "dwarfs/worker_group.h"
|
||||||
|
|
||||||
@ -64,11 +66,6 @@ class filesystem_writer {
|
|||||||
const block_compressor& schema_bc,
|
const block_compressor& schema_bc,
|
||||||
const block_compressor& metadata_bc, size_t max_queue_size);
|
const block_compressor& metadata_bc, size_t max_queue_size);
|
||||||
|
|
||||||
// section create_block();
|
|
||||||
// section create_metadata();
|
|
||||||
|
|
||||||
// void add_section(section&& section);
|
|
||||||
|
|
||||||
void write_block(std::vector<uint8_t>&& data) {
|
void write_block(std::vector<uint8_t>&& data) {
|
||||||
impl_->write_block(std::move(data));
|
impl_->write_block(std::move(data));
|
||||||
}
|
}
|
||||||
@ -81,6 +78,11 @@ class filesystem_writer {
|
|||||||
impl_->write_metadata_v2(std::move(data));
|
impl_->write_metadata_v2(std::move(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void write_compressed_section(section_type type, compression_type compression,
|
||||||
|
folly::ByteRange data) {
|
||||||
|
impl_->write_compressed_section(type, compression, data);
|
||||||
|
}
|
||||||
|
|
||||||
void flush() { impl_->flush(); }
|
void flush() { impl_->flush(); }
|
||||||
|
|
||||||
size_t size() const { return impl_->size(); }
|
size_t size() const { return impl_->size(); }
|
||||||
@ -92,6 +94,9 @@ class filesystem_writer {
|
|||||||
virtual void write_block(std::vector<uint8_t>&& data) = 0;
|
virtual void write_block(std::vector<uint8_t>&& data) = 0;
|
||||||
virtual void write_metadata_v2_schema(std::vector<uint8_t>&& data) = 0;
|
virtual void write_metadata_v2_schema(std::vector<uint8_t>&& data) = 0;
|
||||||
virtual void write_metadata_v2(std::vector<uint8_t>&& data) = 0;
|
virtual void write_metadata_v2(std::vector<uint8_t>&& data) = 0;
|
||||||
|
virtual void
|
||||||
|
write_compressed_section(section_type type, compression_type compression,
|
||||||
|
folly::ByteRange data) = 0;
|
||||||
virtual void flush() = 0;
|
virtual void flush() = 0;
|
||||||
virtual size_t size() const = 0;
|
virtual size_t size() const = 0;
|
||||||
};
|
};
|
||||||
|
@ -24,6 +24,8 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include <folly/Range.h>
|
||||||
|
|
||||||
#include "dwarfs/fstypes.h"
|
#include "dwarfs/fstypes.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
@ -42,6 +44,7 @@ class fs_section {
|
|||||||
std::string description() const { return impl_->description(); }
|
std::string description() const { return impl_->description(); }
|
||||||
bool check_fast(mmif& mm) const { return impl_->check_fast(mm); }
|
bool check_fast(mmif& mm) const { return impl_->check_fast(mm); }
|
||||||
bool verify(mmif& mm) const { return impl_->verify(mm); }
|
bool verify(mmif& mm) const { return impl_->verify(mm); }
|
||||||
|
folly::ByteRange data(mmif& mm) const { return impl_->data(mm); }
|
||||||
|
|
||||||
size_t end() const { return start() + length(); }
|
size_t end() const { return start() + length(); }
|
||||||
|
|
||||||
@ -57,6 +60,7 @@ class fs_section {
|
|||||||
virtual std::string description() const = 0;
|
virtual std::string description() const = 0;
|
||||||
virtual bool check_fast(mmif& mm) const = 0;
|
virtual bool check_fast(mmif& mm) const = 0;
|
||||||
virtual bool verify(mmif& mm) const = 0;
|
virtual bool verify(mmif& mm) const = 0;
|
||||||
|
virtual folly::ByteRange data(mmif& mm) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -75,6 +75,11 @@ struct scanner_options {
|
|||||||
inode_options inode;
|
inode_options inode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct rewrite_options {
|
||||||
|
bool recompress_block{false};
|
||||||
|
bool recompress_metadata{false};
|
||||||
|
};
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, file_order_mode mode);
|
std::ostream& operator<<(std::ostream& os, file_order_mode mode);
|
||||||
|
|
||||||
mlock_mode parse_mlock_mode(std::string_view mode);
|
mlock_mode parse_mlock_mode(std::string_view mode);
|
||||||
|
@ -382,13 +382,15 @@ filesystem_v2::filesystem_v2(logger& lgr, std::shared_ptr<mmif> mm,
|
|||||||
lgr, std::move(mm), options, stat_defaults, inode_offset)) {}
|
lgr, std::move(mm), options, stat_defaults, inode_offset)) {}
|
||||||
|
|
||||||
void filesystem_v2::rewrite(logger& lgr, progress& prog,
|
void filesystem_v2::rewrite(logger& lgr, progress& prog,
|
||||||
std::shared_ptr<mmif> mm,
|
std::shared_ptr<mmif> mm, filesystem_writer& writer,
|
||||||
filesystem_writer& writer) {
|
rewrite_options const& opts) {
|
||||||
// TODO:
|
// TODO:
|
||||||
LOG_PROXY(debug_logger_policy, lgr);
|
LOG_PROXY(debug_logger_policy, lgr);
|
||||||
filesystem_parser parser(mm);
|
filesystem_parser parser(mm);
|
||||||
|
|
||||||
|
std::vector<section_type> section_types;
|
||||||
section_map sections;
|
section_map sections;
|
||||||
|
size_t total_block_size = 0;
|
||||||
|
|
||||||
while (auto s = parser.next_section()) {
|
while (auto s = parser.next_section()) {
|
||||||
if (!s->check_fast(*mm)) {
|
if (!s->check_fast(*mm)) {
|
||||||
@ -400,36 +402,55 @@ void filesystem_v2::rewrite(logger& lgr, progress& prog,
|
|||||||
}
|
}
|
||||||
if (s->type() == section_type::BLOCK) {
|
if (s->type() == section_type::BLOCK) {
|
||||||
++prog.block_count;
|
++prog.block_count;
|
||||||
|
total_block_size += s->length();
|
||||||
} else {
|
} else {
|
||||||
if (!sections.emplace(s->type(), *s).second) {
|
if (!sections.emplace(s->type(), *s).second) {
|
||||||
DWARFS_THROW(runtime_error, "duplicate section: " + s->name());
|
DWARFS_THROW(runtime_error, "duplicate section: " + s->name());
|
||||||
}
|
}
|
||||||
|
section_types.push_back(s->type());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> schema_raw;
|
std::vector<uint8_t> schema_raw;
|
||||||
std::vector<uint8_t> meta_raw;
|
std::vector<uint8_t> meta_raw;
|
||||||
|
|
||||||
|
if (opts.recompress_metadata) {
|
||||||
auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw,
|
auto meta = make_metadata(lgr, mm, sections, schema_raw, meta_raw,
|
||||||
metadata_options(), nullptr, 0, true);
|
metadata_options(), nullptr, 0, true);
|
||||||
|
|
||||||
struct ::statvfs stbuf;
|
struct ::statvfs stbuf;
|
||||||
meta.statvfs(&stbuf);
|
meta.statvfs(&stbuf);
|
||||||
prog.original_size = stbuf.f_blocks * stbuf.f_frsize;
|
prog.original_size = stbuf.f_blocks * stbuf.f_frsize;
|
||||||
|
} else {
|
||||||
|
prog.original_size = total_block_size;
|
||||||
|
}
|
||||||
|
|
||||||
parser.rewind();
|
parser.rewind();
|
||||||
|
|
||||||
while (auto s = parser.next_section()) {
|
while (auto s = parser.next_section()) {
|
||||||
// TODO: multi-thread this?
|
// TODO: multi-thread this?
|
||||||
if (s->type() == section_type::BLOCK) {
|
if (s->type() == section_type::BLOCK) {
|
||||||
|
if (opts.recompress_block) {
|
||||||
auto block = block_decompressor::decompress(
|
auto block = block_decompressor::decompress(
|
||||||
s->compression(), mm->as<uint8_t>(s->start()), s->length());
|
s->compression(), mm->as<uint8_t>(s->start()), s->length());
|
||||||
prog.filesystem_size += block.size();
|
prog.filesystem_size += block.size();
|
||||||
writer.write_block(std::move(block));
|
writer.write_block(std::move(block));
|
||||||
|
} else {
|
||||||
|
writer.write_compressed_section(s->type(), s->compression(),
|
||||||
|
s->data(*mm));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (opts.recompress_metadata) {
|
||||||
writer.write_metadata_v2_schema(std::move(schema_raw));
|
writer.write_metadata_v2_schema(std::move(schema_raw));
|
||||||
writer.write_metadata_v2(std::move(meta_raw));
|
writer.write_metadata_v2(std::move(meta_raw));
|
||||||
|
} else {
|
||||||
|
for (auto type : section_types) {
|
||||||
|
auto& sec = DWARFS_NOTHROW(sections.at(type));
|
||||||
|
writer.write_compressed_section(type, sec.compression(), sec.data(*mm));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
writer.flush();
|
writer.flush();
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
|
#include <folly/Range.h>
|
||||||
#include <folly/system/ThreadName.h>
|
#include <folly/system/ThreadName.h>
|
||||||
|
|
||||||
#include "dwarfs/block_compressor.h"
|
#include "dwarfs/block_compressor.h"
|
||||||
@ -41,20 +42,56 @@
|
|||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
class fsblock {
|
class fsblock {
|
||||||
|
public:
|
||||||
|
fsblock(logger& lgr, section_type type, const block_compressor& bc,
|
||||||
|
std::vector<uint8_t>&& data);
|
||||||
|
|
||||||
|
fsblock(section_type type, compression_type compression,
|
||||||
|
folly::ByteRange data);
|
||||||
|
|
||||||
|
void compress(worker_group& wg) { impl_->compress(wg); }
|
||||||
|
void wait_until_compressed() { impl_->wait_until_compressed(); }
|
||||||
|
section_type type() const { return impl_->type(); }
|
||||||
|
compression_type compression() const { return impl_->compression(); }
|
||||||
|
folly::ByteRange data() const { return impl_->data(); }
|
||||||
|
size_t uncompressed_size() const { return impl_->uncompressed_size(); }
|
||||||
|
size_t size() const { return impl_->size(); }
|
||||||
|
|
||||||
|
class impl {
|
||||||
|
public:
|
||||||
|
virtual ~impl() = default;
|
||||||
|
|
||||||
|
virtual void compress(worker_group& wg) = 0;
|
||||||
|
virtual void wait_until_compressed() = 0;
|
||||||
|
virtual section_type type() const = 0;
|
||||||
|
virtual compression_type compression() const = 0;
|
||||||
|
virtual folly::ByteRange data() const = 0;
|
||||||
|
virtual size_t uncompressed_size() const = 0;
|
||||||
|
virtual size_t size() const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<impl> impl_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
class raw_fsblock : public fsblock::impl {
|
||||||
private:
|
private:
|
||||||
class state {
|
class state {
|
||||||
public:
|
public:
|
||||||
state(std::vector<uint8_t>&& data)
|
state(std::vector<uint8_t>&& data, logger& lgr)
|
||||||
: compressed_(false)
|
: compressed_(false)
|
||||||
, data_(std::move(data)) {}
|
, data_(std::move(data))
|
||||||
|
, LOG_PROXY_INIT(lgr) {}
|
||||||
|
|
||||||
template <typename LogProxy>
|
void compress(const block_compressor& bc) {
|
||||||
void compress(const block_compressor& bc, LogProxy& lp) {
|
|
||||||
std::vector<uint8_t> tmp;
|
std::vector<uint8_t> tmp;
|
||||||
|
|
||||||
{
|
{
|
||||||
auto td = lp.timed_trace();
|
auto td = LOG_TIMED_TRACE;
|
||||||
|
|
||||||
tmp = bc.compress(data_);
|
tmp = bc.compress(data_);
|
||||||
|
|
||||||
@ -87,50 +124,83 @@ class fsblock {
|
|||||||
std::condition_variable cond_;
|
std::condition_variable cond_;
|
||||||
std::atomic<bool> compressed_;
|
std::atomic<bool> compressed_;
|
||||||
std::vector<uint8_t> data_;
|
std::vector<uint8_t> data_;
|
||||||
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
fsblock(section_type type, const block_compressor& bc,
|
raw_fsblock(logger& lgr, section_type type, const block_compressor& bc,
|
||||||
std::vector<uint8_t>&& data)
|
std::vector<uint8_t>&& data)
|
||||||
: type_(type)
|
: type_(type)
|
||||||
, bc_(bc)
|
, bc_(bc)
|
||||||
, uncompressed_size_(data.size())
|
, uncompressed_size_(data.size())
|
||||||
, state_(std::make_shared<state>(std::move(data))) {}
|
, state_(std::make_shared<state>(std::move(data), lgr))
|
||||||
|
, LOG_PROXY_INIT(lgr) {}
|
||||||
|
|
||||||
template <typename LogProxy>
|
void compress(worker_group& wg) override {
|
||||||
void compress(worker_group& wg, LogProxy& lp) {
|
LOG_TRACE << "block queued for compression";
|
||||||
lp.trace() << "block queued for compression";
|
|
||||||
|
|
||||||
std::shared_ptr<state> s = state_;
|
std::shared_ptr<state> s = state_;
|
||||||
|
|
||||||
wg.add_job([&, s] {
|
wg.add_job([&, s] {
|
||||||
lp.trace() << "block compression started";
|
LOG_TRACE << "block compression started";
|
||||||
s->compress(bc_, lp);
|
s->compress(bc_);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void wait_until_compressed() { state_->wait(); }
|
void wait_until_compressed() override { state_->wait(); }
|
||||||
|
|
||||||
section_type type() const { return type_; }
|
section_type type() const override { return type_; }
|
||||||
|
|
||||||
compression_type compression() const { return bc_.type(); }
|
compression_type compression() const override { return bc_.type(); }
|
||||||
|
|
||||||
const std::vector<uint8_t>& data() const {
|
folly::ByteRange data() const override { return state_->data(); }
|
||||||
return state_->data();
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t uncompressed_size() const { return uncompressed_size_; }
|
size_t uncompressed_size() const override { return uncompressed_size_; }
|
||||||
|
|
||||||
size_t size() const { return state_->size(); }
|
size_t size() const override { return state_->size(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const section_type type_;
|
const section_type type_;
|
||||||
block_compressor const& bc_;
|
block_compressor const& bc_;
|
||||||
const size_t uncompressed_size_;
|
const size_t uncompressed_size_;
|
||||||
std::shared_ptr<state> state_;
|
std::shared_ptr<state> state_;
|
||||||
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class compressed_fsblock : public fsblock::impl {
|
||||||
|
public:
|
||||||
|
compressed_fsblock(section_type type, compression_type compression,
|
||||||
|
folly::ByteRange range)
|
||||||
|
: type_(type)
|
||||||
|
, compression_(compression)
|
||||||
|
, range_(range) {}
|
||||||
|
|
||||||
|
void compress(worker_group&) override {}
|
||||||
|
void wait_until_compressed() override {}
|
||||||
|
|
||||||
|
section_type type() const override { return type_; }
|
||||||
|
compression_type compression() const override { return compression_; }
|
||||||
|
|
||||||
|
folly::ByteRange data() const override { return range_; }
|
||||||
|
|
||||||
|
size_t uncompressed_size() const override { return range_.size(); }
|
||||||
|
size_t size() const override { return range_.size(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const section_type type_;
|
||||||
|
const compression_type compression_;
|
||||||
|
folly::ByteRange range_;
|
||||||
|
};
|
||||||
|
|
||||||
|
fsblock::fsblock(logger& lgr, section_type type, const block_compressor& bc,
|
||||||
|
std::vector<uint8_t>&& data)
|
||||||
|
: impl_(make_unique_logging_object<impl, raw_fsblock, logger_policies>(
|
||||||
|
lgr, type, bc, std::move(data))) {}
|
||||||
|
|
||||||
|
fsblock::fsblock(section_type type, compression_type compression,
|
||||||
|
folly::ByteRange data)
|
||||||
|
: impl_(std::make_unique<compressed_fsblock>(type, compression, data)) {}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
class filesystem_writer_ : public filesystem_writer::impl {
|
class filesystem_writer_ : public filesystem_writer::impl {
|
||||||
public:
|
public:
|
||||||
@ -144,6 +214,8 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
|||||||
void write_block(std::vector<uint8_t>&& data) override;
|
void write_block(std::vector<uint8_t>&& data) override;
|
||||||
void write_metadata_v2_schema(std::vector<uint8_t>&& data) override;
|
void write_metadata_v2_schema(std::vector<uint8_t>&& data) override;
|
||||||
void write_metadata_v2(std::vector<uint8_t>&& data) override;
|
void write_metadata_v2(std::vector<uint8_t>&& data) override;
|
||||||
|
void write_compressed_section(section_type type, compression_type compression,
|
||||||
|
folly::ByteRange data) override;
|
||||||
void flush() override;
|
void flush() override;
|
||||||
size_t size() const override { return os_.tellp(); }
|
size_t size() const override { return os_.tellp(); }
|
||||||
|
|
||||||
@ -151,11 +223,11 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
|||||||
void write_section(section_type type, std::vector<uint8_t>&& data,
|
void write_section(section_type type, std::vector<uint8_t>&& data,
|
||||||
block_compressor const& bc);
|
block_compressor const& bc);
|
||||||
void write(section_type type, compression_type compression,
|
void write(section_type type, compression_type compression,
|
||||||
const std::vector<uint8_t>& data);
|
folly::ByteRange range);
|
||||||
void write(const char* data, size_t size);
|
void write(const char* data, size_t size);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void write(const T& obj);
|
void write(const T& obj);
|
||||||
void write(const std::vector<uint8_t>& data);
|
void write(folly::ByteRange range);
|
||||||
void writer_thread();
|
void writer_thread();
|
||||||
size_t mem_used() const;
|
size_t mem_used() const;
|
||||||
|
|
||||||
@ -166,7 +238,7 @@ class filesystem_writer_ : public filesystem_writer::impl {
|
|||||||
const block_compressor& schema_bc_;
|
const block_compressor& schema_bc_;
|
||||||
const block_compressor& metadata_bc_;
|
const block_compressor& metadata_bc_;
|
||||||
const size_t max_queue_size_;
|
const size_t max_queue_size_;
|
||||||
log_proxy<LoggerPolicy> log_;
|
LOG_PROXY_DECL(LoggerPolicy);
|
||||||
std::deque<std::unique_ptr<fsblock>> queue_;
|
std::deque<std::unique_ptr<fsblock>> queue_;
|
||||||
mutable std::mutex mx_;
|
mutable std::mutex mx_;
|
||||||
std::condition_variable cond_;
|
std::condition_variable cond_;
|
||||||
@ -187,7 +259,7 @@ filesystem_writer_<LoggerPolicy>::filesystem_writer_(
|
|||||||
, schema_bc_(schema_bc)
|
, schema_bc_(schema_bc)
|
||||||
, metadata_bc_(metadata_bc)
|
, metadata_bc_(metadata_bc)
|
||||||
, max_queue_size_(max_queue_size)
|
, max_queue_size_(max_queue_size)
|
||||||
, log_(lgr)
|
, LOG_PROXY_INIT(lgr)
|
||||||
, flush_(false)
|
, flush_(false)
|
||||||
, writer_thread_(&filesystem_writer_::writer_thread, this) {}
|
, writer_thread_(&filesystem_writer_::writer_thread, this) {}
|
||||||
|
|
||||||
@ -263,14 +335,14 @@ void filesystem_writer_<LoggerPolicy>::write(const T& obj) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void filesystem_writer_<LoggerPolicy>::write(const std::vector<uint8_t>& data) {
|
void filesystem_writer_<LoggerPolicy>::write(folly::ByteRange range) {
|
||||||
write(reinterpret_cast<const char*>(&data[0]), data.size());
|
write(reinterpret_cast<const char*>(range.data()), range.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename LoggerPolicy>
|
template <typename LoggerPolicy>
|
||||||
void filesystem_writer_<LoggerPolicy>::write(section_type type,
|
void filesystem_writer_<LoggerPolicy>::write(section_type type,
|
||||||
compression_type compression,
|
compression_type compression,
|
||||||
const std::vector<uint8_t>& data) {
|
folly::ByteRange range) {
|
||||||
section_header_v2 sh;
|
section_header_v2 sh;
|
||||||
::memcpy(&sh.magic[0], "DWARFS", 6);
|
::memcpy(&sh.magic[0], "DWARFS", 6);
|
||||||
sh.major = MAJOR_VERSION;
|
sh.major = MAJOR_VERSION;
|
||||||
@ -278,22 +350,22 @@ void filesystem_writer_<LoggerPolicy>::write(section_type type,
|
|||||||
sh.number = section_number_++;
|
sh.number = section_number_++;
|
||||||
sh.type = static_cast<uint16_t>(type);
|
sh.type = static_cast<uint16_t>(type);
|
||||||
sh.compression = static_cast<uint16_t>(compression);
|
sh.compression = static_cast<uint16_t>(compression);
|
||||||
sh.length = data.size();
|
sh.length = range.size();
|
||||||
|
|
||||||
checksum xxh(checksum::algorithm::XXH3_64);
|
checksum xxh(checksum::algorithm::XXH3_64);
|
||||||
xxh.update(&sh.number,
|
xxh.update(&sh.number,
|
||||||
sizeof(section_header_v2) - offsetof(section_header_v2, number));
|
sizeof(section_header_v2) - offsetof(section_header_v2, number));
|
||||||
xxh.update(data.data(), data.size());
|
xxh.update(range.data(), range.size());
|
||||||
DWARFS_CHECK(xxh.finalize(&sh.xxh3_64), "XXH3-64 checksum failed");
|
DWARFS_CHECK(xxh.finalize(&sh.xxh3_64), "XXH3-64 checksum failed");
|
||||||
|
|
||||||
checksum sha(checksum::algorithm::SHA2_512_256);
|
checksum sha(checksum::algorithm::SHA2_512_256);
|
||||||
sha.update(&sh.xxh3_64,
|
sha.update(&sh.xxh3_64,
|
||||||
sizeof(section_header_v2) - offsetof(section_header_v2, xxh3_64));
|
sizeof(section_header_v2) - offsetof(section_header_v2, xxh3_64));
|
||||||
sha.update(data.data(), data.size());
|
sha.update(range.data(), range.size());
|
||||||
DWARFS_CHECK(sha.finalize(&sh.sha2_512_256), "SHA512/256 checksum failed");
|
DWARFS_CHECK(sha.finalize(&sh.sha2_512_256), "SHA512/256 checksum failed");
|
||||||
|
|
||||||
write(sh);
|
write(sh);
|
||||||
write(data);
|
write(range);
|
||||||
|
|
||||||
if (type == section_type::BLOCK) {
|
if (type == section_type::BLOCK) {
|
||||||
prog_.blocks_written++;
|
prog_.blocks_written++;
|
||||||
@ -312,9 +384,23 @@ void filesystem_writer_<LoggerPolicy>::write_section(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto fsb = std::make_unique<fsblock>(type, bc, std::move(data));
|
auto fsb =
|
||||||
|
std::make_unique<fsblock>(LOG_GET_LOGGER, type, bc, std::move(data));
|
||||||
|
|
||||||
fsb->compress(wg_, log_);
|
fsb->compress(wg_);
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mx_);
|
||||||
|
queue_.push_back(std::move(fsb));
|
||||||
|
}
|
||||||
|
|
||||||
|
cond_.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename LoggerPolicy>
|
||||||
|
void filesystem_writer_<LoggerPolicy>::write_compressed_section(
|
||||||
|
section_type type, compression_type compression, folly::ByteRange data) {
|
||||||
|
auto fsb = std::make_unique<fsblock>(type, compression, data);
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(mx_);
|
std::lock_guard<std::mutex> lock(mx_);
|
||||||
@ -359,6 +445,8 @@ void filesystem_writer_<LoggerPolicy>::flush() {
|
|||||||
writer_thread_.join();
|
writer_thread_.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
filesystem_writer::filesystem_writer(std::ostream& os, logger& lgr,
|
filesystem_writer::filesystem_writer(std::ostream& os, logger& lgr,
|
||||||
worker_group& wg, progress& prog,
|
worker_group& wg, progress& prog,
|
||||||
const block_compressor& bc,
|
const block_compressor& bc,
|
||||||
|
@ -36,13 +36,20 @@ class fs_section_v1 : public fs_section::impl {
|
|||||||
|
|
||||||
size_t start() const override { return start_; }
|
size_t start() const override { return start_; }
|
||||||
size_t length() const override { return hdr_.length; }
|
size_t length() const override { return hdr_.length; }
|
||||||
|
|
||||||
compression_type compression() const override { return hdr_.compression; }
|
compression_type compression() const override { return hdr_.compression; }
|
||||||
section_type type() const override { return hdr_.type; }
|
section_type type() const override { return hdr_.type; }
|
||||||
|
|
||||||
std::string name() const override { return get_section_name(hdr_.type); }
|
std::string name() const override { return get_section_name(hdr_.type); }
|
||||||
std::string description() const override { return hdr_.to_string(); }
|
std::string description() const override { return hdr_.to_string(); }
|
||||||
|
|
||||||
bool check_fast(mmif&) const override { return true; }
|
bool check_fast(mmif&) const override { return true; }
|
||||||
bool verify(mmif&) const override { return true; }
|
bool verify(mmif&) const override { return true; }
|
||||||
|
|
||||||
|
folly::ByteRange data(mmif& mm) const override {
|
||||||
|
return folly::ByteRange(mm.as<uint8_t>(start_), hdr_.length);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t start_;
|
size_t start_;
|
||||||
section_header hdr_;
|
section_header hdr_;
|
||||||
@ -54,16 +61,21 @@ class fs_section_v2 : public fs_section::impl {
|
|||||||
|
|
||||||
size_t start() const override { return start_; }
|
size_t start() const override { return start_; }
|
||||||
size_t length() const override { return hdr_.length; }
|
size_t length() const override { return hdr_.length; }
|
||||||
|
|
||||||
compression_type compression() const override {
|
compression_type compression() const override {
|
||||||
return static_cast<compression_type>(hdr_.compression);
|
return static_cast<compression_type>(hdr_.compression);
|
||||||
}
|
}
|
||||||
|
|
||||||
section_type type() const override {
|
section_type type() const override {
|
||||||
return static_cast<section_type>(hdr_.type);
|
return static_cast<section_type>(hdr_.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string name() const override {
|
std::string name() const override {
|
||||||
return get_section_name(static_cast<section_type>(hdr_.type));
|
return get_section_name(static_cast<section_type>(hdr_.type));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string description() const override { return hdr_.to_string(); }
|
std::string description() const override { return hdr_.to_string(); }
|
||||||
|
|
||||||
bool check_fast(mmif& mm) const override {
|
bool check_fast(mmif& mm) const override {
|
||||||
auto hdr_cs_len =
|
auto hdr_cs_len =
|
||||||
sizeof(section_header_v2) - offsetof(section_header_v2, number);
|
sizeof(section_header_v2) - offsetof(section_header_v2, number);
|
||||||
@ -71,6 +83,7 @@ class fs_section_v2 : public fs_section::impl {
|
|||||||
mm.as<void>(start_ - hdr_cs_len),
|
mm.as<void>(start_ - hdr_cs_len),
|
||||||
hdr_.length + hdr_cs_len, &hdr_.xxh3_64);
|
hdr_.length + hdr_cs_len, &hdr_.xxh3_64);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool verify(mmif& mm) const override {
|
bool verify(mmif& mm) const override {
|
||||||
auto hdr_sha_len =
|
auto hdr_sha_len =
|
||||||
sizeof(section_header_v2) - offsetof(section_header_v2, xxh3_64);
|
sizeof(section_header_v2) - offsetof(section_header_v2, xxh3_64);
|
||||||
@ -79,6 +92,10 @@ class fs_section_v2 : public fs_section::impl {
|
|||||||
hdr_.length + hdr_sha_len, &hdr_.sha2_512_256);
|
hdr_.length + hdr_sha_len, &hdr_.sha2_512_256);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
folly::ByteRange data(mmif& mm) const override {
|
||||||
|
return folly::ByteRange(mm.as<uint8_t>(start_), hdr_.length);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t start_;
|
size_t start_;
|
||||||
section_header_v2 hdr_;
|
section_header_v2 hdr_;
|
||||||
|
@ -295,9 +295,9 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
block_manager::config cfg;
|
block_manager::config cfg;
|
||||||
std::string path, output, window_sizes, memory_limit, script_arg, compression,
|
std::string path, output, window_sizes, memory_limit, script_arg, compression,
|
||||||
schema_compression, metadata_compression, log_level_str, timestamp,
|
schema_compression, metadata_compression, log_level_str, timestamp,
|
||||||
time_resolution, order, progress_mode;
|
time_resolution, order, progress_mode, recompress_opts;
|
||||||
size_t num_workers, max_scanner_workers;
|
size_t num_workers, max_scanner_workers;
|
||||||
bool recompress = false, no_progress = false;
|
bool no_progress = false;
|
||||||
unsigned level;
|
unsigned level;
|
||||||
uint16_t uid, gid;
|
uint16_t uid, gid;
|
||||||
|
|
||||||
@ -347,8 +347,8 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
po::value<std::string>(&metadata_compression),
|
po::value<std::string>(&metadata_compression),
|
||||||
"metadata compression algorithm")
|
"metadata compression algorithm")
|
||||||
("recompress",
|
("recompress",
|
||||||
po::value<bool>(&recompress)->zero_tokens(),
|
po::value<std::string>(&recompress_opts)->implicit_value("all"),
|
||||||
"recompress an existing filesystem")
|
"recompress an existing filesystem (none, block, metadata, all)")
|
||||||
("set-owner",
|
("set-owner",
|
||||||
po::value<uint16_t>(&uid),
|
po::value<uint16_t>(&uid),
|
||||||
"set owner (uid) for whole file system")
|
"set owner (uid) for whole file system")
|
||||||
@ -516,6 +516,24 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
order = defaults.order;
|
order = defaults.order;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool recompress = vm.count("recompress");
|
||||||
|
rewrite_options rw_opts;
|
||||||
|
if (recompress) {
|
||||||
|
std::unordered_map<std::string, unsigned> const modes{
|
||||||
|
{"all", 3},
|
||||||
|
{"metadata", 2},
|
||||||
|
{"block", 1},
|
||||||
|
{"none", 0},
|
||||||
|
};
|
||||||
|
if (auto it = modes.find(recompress_opts); it != modes.end()) {
|
||||||
|
rw_opts.recompress_block = it->second & 1;
|
||||||
|
rw_opts.recompress_metadata = it->second & 2;
|
||||||
|
} else {
|
||||||
|
std::cerr << "invalid recompress mode: " << recompress_opts << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::string> order_opts;
|
std::vector<std::string> order_opts;
|
||||||
boost::split(order_opts, order, boost::is_any_of(":"));
|
boost::split(order_opts, order, boost::is_any_of(":"));
|
||||||
|
|
||||||
@ -721,8 +739,8 @@ int mkdwarfs(int argc, char** argv) {
|
|||||||
auto ti = LOG_TIMED_INFO;
|
auto ti = LOG_TIMED_INFO;
|
||||||
|
|
||||||
if (recompress) {
|
if (recompress) {
|
||||||
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path),
|
filesystem_v2::rewrite(lgr, prog, std::make_shared<dwarfs::mmap>(path), fsw,
|
||||||
fsw);
|
rw_opts);
|
||||||
wg_writer.wait();
|
wg_writer.wait();
|
||||||
} else {
|
} else {
|
||||||
options.inode.with_similarity =
|
options.inode.with_similarity =
|
||||||
|
BIN
test/compat-v0.3.0.dwarfs
Normal file
BIN
test/compat-v0.3.0.dwarfs
Normal file
Binary file not shown.
@ -36,6 +36,7 @@
|
|||||||
#include "dwarfs/scanner.h"
|
#include "dwarfs/scanner.h"
|
||||||
#include "dwarfs/script.h"
|
#include "dwarfs/script.h"
|
||||||
#include "loremipsum.h"
|
#include "loremipsum.h"
|
||||||
|
#include "mmap_mock.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
namespace test {
|
namespace test {
|
||||||
@ -90,26 +91,6 @@ std::map<std::string, simplestat> statmap{
|
|||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
class mmap_mock : public mmif {
|
|
||||||
public:
|
|
||||||
mmap_mock(const std::string& data)
|
|
||||||
: m_data(data) {}
|
|
||||||
|
|
||||||
void const* addr() const override { return m_data.data(); }
|
|
||||||
|
|
||||||
size_t size() const override { return m_data.size(); }
|
|
||||||
|
|
||||||
boost::system::error_code lock(off_t, size_t) override {
|
|
||||||
return boost::system::error_code();
|
|
||||||
}
|
|
||||||
boost::system::error_code release(off_t, size_t) override {
|
|
||||||
return boost::system::error_code();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
const std::string m_data;
|
|
||||||
};
|
|
||||||
|
|
||||||
class os_access_mock : public os_access {
|
class os_access_mock : public os_access {
|
||||||
public:
|
public:
|
||||||
std::shared_ptr<dir_reader> opendir(const std::string& path) const override {
|
std::shared_ptr<dir_reader> opendir(const std::string& path) const override {
|
||||||
|
@ -23,14 +23,21 @@
|
|||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <folly/json.h>
|
#include <folly/json.h>
|
||||||
|
|
||||||
|
#include "dwarfs/block_compressor.h"
|
||||||
#include "dwarfs/filesystem_v2.h"
|
#include "dwarfs/filesystem_v2.h"
|
||||||
|
#include "dwarfs/filesystem_writer.h"
|
||||||
#include "dwarfs/logger.h"
|
#include "dwarfs/logger.h"
|
||||||
#include "dwarfs/mmap.h"
|
#include "dwarfs/mmap.h"
|
||||||
#include "dwarfs/options.h"
|
#include "dwarfs/options.h"
|
||||||
|
#include "dwarfs/progress.h"
|
||||||
|
#include "dwarfs/worker_group.h"
|
||||||
|
|
||||||
|
#include "mmap_mock.h"
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -153,6 +160,7 @@ char const* reference = R"(
|
|||||||
std::vector<std::string> versions{
|
std::vector<std::string> versions{
|
||||||
"0.2.0",
|
"0.2.0",
|
||||||
"0.2.3",
|
"0.2.3",
|
||||||
|
"0.3.0",
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
@ -173,3 +181,39 @@ TEST_P(compat, backwards_compatibility) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(dwarfs, compat, ::testing::ValuesIn(versions));
|
INSTANTIATE_TEST_SUITE_P(dwarfs, compat, ::testing::ValuesIn(versions));
|
||||||
|
|
||||||
|
class rewrite
|
||||||
|
: public testing::TestWithParam<std::tuple<std::string, bool, bool>> {};
|
||||||
|
|
||||||
|
TEST_P(rewrite, filesystem_rewrite) {
|
||||||
|
auto [version, recompress_block, recompress_metadata] = GetParam();
|
||||||
|
|
||||||
|
std::ostringstream oss;
|
||||||
|
stream_logger lgr(oss);
|
||||||
|
auto filename = std::string(TEST_DATA_DIR "/compat-v") + version + ".dwarfs";
|
||||||
|
|
||||||
|
rewrite_options opts;
|
||||||
|
opts.recompress_block = recompress_block;
|
||||||
|
opts.recompress_metadata = recompress_metadata;
|
||||||
|
|
||||||
|
worker_group wg("rewriter", 2);
|
||||||
|
block_compressor bc("null");
|
||||||
|
progress prog([](const progress&, bool) {}, 1000);
|
||||||
|
std::ostringstream rewritten, idss;
|
||||||
|
filesystem_writer fsw(rewritten, lgr, wg, prog, bc, 64 << 20);
|
||||||
|
filesystem_v2::identify(lgr, std::make_shared<mmap>(filename), idss);
|
||||||
|
filesystem_v2::rewrite(lgr, prog, std::make_shared<mmap>(filename), fsw,
|
||||||
|
opts);
|
||||||
|
|
||||||
|
filesystem_v2::identify(
|
||||||
|
lgr, std::make_shared<test::mmap_mock>(rewritten.str()), idss);
|
||||||
|
filesystem_v2 fs(lgr, std::make_shared<test::mmap_mock>(rewritten.str()));
|
||||||
|
auto meta = fs.metadata_as_dynamic();
|
||||||
|
auto ref = folly::parseJson(reference);
|
||||||
|
EXPECT_EQ(ref, meta);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(dwarfs, rewrite,
|
||||||
|
::testing::Combine(::testing::ValuesIn(versions),
|
||||||
|
::testing::Bool(),
|
||||||
|
::testing::Bool()));
|
||||||
|
48
test/mmap_mock.h
Normal file
48
test/mmap_mock.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of dwarfs.
|
||||||
|
*
|
||||||
|
* dwarfs is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* dwarfs is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "dwarfs/mmif.h"
|
||||||
|
|
||||||
|
namespace dwarfs {
|
||||||
|
namespace test {
|
||||||
|
|
||||||
|
class mmap_mock : public mmif {
|
||||||
|
public:
|
||||||
|
mmap_mock(const std::string& data)
|
||||||
|
: m_data(data) {}
|
||||||
|
|
||||||
|
void const* addr() const override { return m_data.data(); }
|
||||||
|
|
||||||
|
size_t size() const override { return m_data.size(); }
|
||||||
|
|
||||||
|
boost::system::error_code lock(off_t, size_t) override {
|
||||||
|
return boost::system::error_code();
|
||||||
|
}
|
||||||
|
boost::system::error_code release(off_t, size_t) override {
|
||||||
|
return boost::system::error_code();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const std::string m_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace test
|
||||||
|
} // namespace dwarfs
|
Loading…
x
Reference in New Issue
Block a user