From c2da034983ef3c996b96f944e357c63f1f3f46c3 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sat, 22 Jul 2023 17:59:23 +0200 Subject: [PATCH] Compression metadata --- include/dwarfs/block_compressor.h | 22 ++++++++++++++++------ src/dwarfs/compression/brotli.cpp | 11 +++++++---- src/dwarfs/compression/lz4.cpp | 11 +++++++---- src/dwarfs/compression/lzma.cpp | 12 ++++++++---- src/dwarfs/compression/null.cpp | 9 ++++++--- src/dwarfs/compression/zstd.cpp | 14 +++++++++----- src/dwarfs/filesystem_writer.cpp | 4 +++- 7 files changed, 56 insertions(+), 27 deletions(-) diff --git a/include/dwarfs/block_compressor.h b/include/dwarfs/block_compressor.h index 07329a05..375f3d99 100644 --- a/include/dwarfs/block_compressor.h +++ b/include/dwarfs/block_compressor.h @@ -33,6 +33,8 @@ #include #include +#include + #include "dwarfs/compression.h" namespace dwarfs { @@ -55,18 +57,24 @@ class block_compressor { block_compressor(block_compressor&& bc) = default; block_compressor& operator=(block_compressor&& rhs) = default; - std::vector compress(std::vector const& data) const { - return impl_->compress(data); + std::vector + compress(std::vector const& data, folly::dynamic meta) const { + return impl_->compress(data, std::move(meta)); } - std::vector compress(std::vector&& data) const { - return impl_->compress(std::move(data)); + std::vector + compress(std::vector&& data, folly::dynamic meta) const { + return impl_->compress(std::move(data), std::move(meta)); } compression_type type() const { return impl_->type(); } std::string describe() const { return impl_->describe(); } + bool check_metadata(folly::dynamic meta) const { + return impl_->check_metadata(std::move(meta)); + } + class impl { public: virtual ~impl() = default; @@ -74,12 +82,14 @@ class block_compressor { virtual std::unique_ptr clone() const = 0; virtual std::vector - compress(const std::vector& data) const = 0; + compress(const std::vector& data, folly::dynamic meta) const = 0; virtual std::vector - compress(std::vector&& data) const = 0; + compress(std::vector&& data, folly::dynamic meta) const = 0; virtual compression_type type() const = 0; virtual std::string describe() const = 0; + + virtual bool check_metadata(folly::dynamic meta) const = 0; }; private: diff --git a/src/dwarfs/compression/brotli.cpp b/src/dwarfs/compression/brotli.cpp index f096ec89..6eaa3a6d 100644 --- a/src/dwarfs/compression/brotli.cpp +++ b/src/dwarfs/compression/brotli.cpp @@ -49,8 +49,8 @@ class brotli_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector - compress(const std::vector& data) const override { + std::vector compress(const std::vector& data, + folly::dynamic /*meta*/) const override { std::vector compressed; compressed.resize(folly::kMaxVarintLength64 + ::BrotliEncoderMaxCompressedSize(data.size())); @@ -69,8 +69,9 @@ class brotli_block_compressor final : public block_compressor::impl { return compressed; } - std::vector compress(std::vector&& data) const override { - return compress(data); + std::vector + compress(std::vector&& data, folly::dynamic meta) const override { + return compress(data, std::move(meta)); } compression_type type() const override { return compression_type::BROTLI; } @@ -79,6 +80,8 @@ class brotli_block_compressor final : public block_compressor::impl { return fmt::format("brotli [quality={}, lgwin={}]", quality_, window_bits_); } + bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + private: uint32_t const quality_; uint32_t const window_bits_; diff --git a/src/dwarfs/compression/lz4.cpp b/src/dwarfs/compression/lz4.cpp index e40e12b0..7a0c7fc0 100644 --- a/src/dwarfs/compression/lz4.cpp +++ b/src/dwarfs/compression/lz4.cpp @@ -66,8 +66,8 @@ class lz4_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector - compress(const std::vector& data) const override { + std::vector compress(const std::vector& data, + folly::dynamic /*meta*/) const override { std::vector compressed( sizeof(uint32_t) + LZ4_compressBound(folly::to(data.size()))); *reinterpret_cast(&compressed[0]) = data.size(); @@ -84,14 +84,17 @@ class lz4_block_compressor final : public block_compressor::impl { return compressed; } - std::vector compress(std::vector&& data) const override { - return compress(data); + std::vector + compress(std::vector&& data, folly::dynamic meta) const override { + return compress(data, std::move(meta)); } compression_type type() const override { return compression_type::LZ4; } std::string describe() const override { return Policy::describe(level_); } + bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + private: const int level_; }; diff --git a/src/dwarfs/compression/lzma.cpp b/src/dwarfs/compression/lzma.cpp index 6941fc6b..675de239 100644 --- a/src/dwarfs/compression/lzma.cpp +++ b/src/dwarfs/compression/lzma.cpp @@ -63,16 +63,19 @@ class lzma_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } + std::vector compress(const std::vector& data, + folly::dynamic meta) const override; std::vector - compress(const std::vector& data) const override; - std::vector compress(std::vector&& data) const override { - return compress(data); + compress(std::vector&& data, folly::dynamic meta) const override { + return compress(data, std::move(meta)); } compression_type type() const override { return compression_type::LZMA; } std::string describe() const override { return description_; } + bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + private: std::vector compress(const std::vector& data, const lzma_filter* filters) const; @@ -174,7 +177,8 @@ lzma_block_compressor::compress(const std::vector& data, } std::vector -lzma_block_compressor::compress(const std::vector& data) const { +lzma_block_compressor::compress(const std::vector& data, + folly::dynamic /*meta*/) const { std::vector best = compress(data, &filters_[1]); if (filters_[0].id != LZMA_VLI_UNKNOWN) { diff --git a/src/dwarfs/compression/null.cpp b/src/dwarfs/compression/null.cpp index bb5418f7..b473cbaf 100644 --- a/src/dwarfs/compression/null.cpp +++ b/src/dwarfs/compression/null.cpp @@ -37,18 +37,21 @@ class null_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector - compress(const std::vector& data) const override { + std::vector compress(const std::vector& data, + folly::dynamic /*meta*/) const override { return data; } - std::vector compress(std::vector&& data) const override { + std::vector compress(std::vector&& data, + folly::dynamic /*meta*/) const override { return std::move(data); } compression_type type() const override { return compression_type::NONE; } std::string describe() const override { return "null"; } + + bool check_metadata(folly::dynamic /*meta*/) const override { return true; } }; class null_block_decompressor final : public block_decompressor::impl { diff --git a/src/dwarfs/compression/zstd.cpp b/src/dwarfs/compression/zstd.cpp index aff83759..6eb332c6 100644 --- a/src/dwarfs/compression/zstd.cpp +++ b/src/dwarfs/compression/zstd.cpp @@ -54,11 +54,12 @@ class zstd_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector - compress(const std::vector& data) const override; + std::vector compress(const std::vector& data, + folly::dynamic meta) const override; - std::vector compress(std::vector&& data) const override { - return compress(data); + std::vector + compress(std::vector&& data, folly::dynamic meta) const override { + return compress(data, std::move(meta)); } compression_type type() const override { return compression_type::ZSTD; } @@ -67,6 +68,8 @@ class zstd_block_compressor final : public block_compressor::impl { return fmt::format("zstd [level={}]", level_); } + bool check_metadata(folly::dynamic /*meta*/) const override { return true; } + private: class scoped_context; @@ -143,7 +146,8 @@ std::weak_ptr zstd_block_compressor::s_ctxmgr; std::vector -zstd_block_compressor::compress(const std::vector& data) const { +zstd_block_compressor::compress(const std::vector& data, + folly::dynamic /*meta*/) const { std::vector compressed(ZSTD_compressBound(data.size())); scoped_context ctx(*ctxmgr_); auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(), diff --git a/src/dwarfs/filesystem_writer.cpp b/src/dwarfs/filesystem_writer.cpp index 8b7c2136..8f208dfd 100644 --- a/src/dwarfs/filesystem_writer.cpp +++ b/src/dwarfs/filesystem_writer.cpp @@ -101,7 +101,9 @@ class raw_fsblock : public fsblock::impl { wg.add_job([this, prom = std::move(prom)]() mutable { try { - auto tmp = std::make_shared(bc_.compress(data_->vec())); + // TODO: metadata + auto tmp = std::make_shared( + bc_.compress(data_->vec(), folly::dynamic())); { std::lock_guard lock(mx_);