From 39950e085eb0238d4bc72debd3a9250eefb4ae7f Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 31 Mar 2025 12:01:46 +0200 Subject: [PATCH] refactor: new `start_compression` API for more flexible buffer handling --- cmake/libdwarfs.cmake | 1 + include/dwarfs/block_compressor.h | 21 +++----- include/dwarfs/byte_buffer.h | 4 ++ include/dwarfs/vector_byte_buffer.h | 1 + src/block_compressor.cpp | 29 ++++++++--- src/compression/base.cpp | 50 ++++++++++++++++++ src/compression/base.h | 35 +++++++++++++ src/compression/brotli.cpp | 42 ++++++--------- src/compression/flac.cpp | 47 +++++++++-------- src/compression/lz4.cpp | 37 +++++-------- src/compression/lzma.cpp | 27 +++------- src/compression/null.cpp | 31 ++++------- src/compression/ricepp.cpp | 30 ++++------- src/compression/zstd.cpp | 30 ++++------- src/reader/filesystem_v2.cpp | 4 +- src/reader/internal/cached_block.cpp | 13 ++--- src/vector_byte_buffer.cpp | 6 +++ src/writer/filesystem_writer.cpp | 77 ++++++++++++++-------------- test/flac_compressor_test.cpp | 1 + 19 files changed, 258 insertions(+), 228 deletions(-) create mode 100644 src/compression/base.cpp create mode 100644 src/compression/base.h diff --git a/cmake/libdwarfs.cmake b/cmake/libdwarfs.cmake index 3fa666b8..4b1a04bf 100644 --- a/cmake/libdwarfs.cmake +++ b/cmake/libdwarfs.cmake @@ -61,6 +61,7 @@ add_library( $/src/version.cpp + src/compression/base.cpp src/compression/null.cpp src/compression/zstd.cpp $<$:src/compression/lzma.cpp> diff --git a/include/dwarfs/block_compressor.h b/include/dwarfs/block_compressor.h index 582f776e..29761c58 100644 --- a/include/dwarfs/block_compressor.h +++ b/include/dwarfs/block_compressor.h @@ -35,9 +35,9 @@ #include #include +#include #include #include -#include namespace dwarfs { @@ -109,8 +109,9 @@ class block_compressor { class block_decompressor { public: - block_decompressor(compression_type type, std::span data, - mutable_byte_buffer target); + block_decompressor(compression_type type, std::span data); + + shared_byte_buffer start_decompression(mutable_byte_buffer target); bool decompress_frame(size_t frame_size = BUFSIZ) { return impl_->decompress_frame(frame_size); @@ -123,17 +124,13 @@ class block_decompressor { std::optional metadata() const { return impl_->metadata(); } static shared_byte_buffer - decompress(compression_type type, std::span data) { - auto target = vector_byte_buffer::create(); - block_decompressor bd(type, data, target); - bd.decompress_frame(bd.uncompressed_size()); - return target.share(); - } + decompress(compression_type type, std::span data); class impl { public: virtual ~impl() = default; + virtual void start_decompression(mutable_byte_buffer target) = 0; virtual bool decompress_frame(size_t frame_size) = 0; virtual size_t uncompressed_size() const = 0; virtual std::optional metadata() const = 0; @@ -160,8 +157,7 @@ class compression_factory : public compression_info { virtual std::unique_ptr make_compressor(option_map& om) const = 0; virtual std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const = 0; + make_decompressor(std::span data) const = 0; }; namespace detail { @@ -178,8 +174,7 @@ class compression_registry { std::unique_ptr make_compressor(std::string_view spec) const; std::unique_ptr - make_decompressor(compression_type type, std::span data, - mutable_byte_buffer target) const; + make_decompressor(compression_type type, std::span data) const; void for_each_algorithm( std::function const& fn) diff --git a/include/dwarfs/byte_buffer.h b/include/dwarfs/byte_buffer.h index 24153ea6..caa36411 100644 --- a/include/dwarfs/byte_buffer.h +++ b/include/dwarfs/byte_buffer.h @@ -125,10 +125,14 @@ class mutable_byte_buffer { public: using value_type = uint8_t; + mutable_byte_buffer() = default; + explicit mutable_byte_buffer( std::shared_ptr bb) : bb_{std::move(bb)} {} + explicit operator bool() const noexcept { return static_cast(bb_); } + uint8_t const* data() const { return bb_->data(); } uint8_t* data() { return bb_->mutable_data(); } diff --git a/include/dwarfs/vector_byte_buffer.h b/include/dwarfs/vector_byte_buffer.h index f80a4288..5fc595cc 100644 --- a/include/dwarfs/vector_byte_buffer.h +++ b/include/dwarfs/vector_byte_buffer.h @@ -31,6 +31,7 @@ class vector_byte_buffer { public: static mutable_byte_buffer create(); static mutable_byte_buffer create(size_t size); + static mutable_byte_buffer create_reserve(size_t size); static mutable_byte_buffer create(std::string_view data); static mutable_byte_buffer create(std::span data); static mutable_byte_buffer create(std::vector&& data); diff --git a/src/block_compressor.cpp b/src/block_compressor.cpp index 3c1ae60f..19fd0830 100644 --- a/src/block_compressor.cpp +++ b/src/block_compressor.cpp @@ -33,6 +33,7 @@ #include #include #include +#include namespace dwarfs { @@ -41,10 +42,25 @@ block_compressor::block_compressor(std::string const& spec) { } block_decompressor::block_decompressor(compression_type type, - std::span data, - mutable_byte_buffer target) { - impl_ = compression_registry::instance().make_decompressor(type, data, - std::move(target)); + std::span data) { + impl_ = compression_registry::instance().make_decompressor(type, data); +} + +shared_byte_buffer +block_decompressor::decompress(compression_type type, + std::span data) { + block_decompressor bd(type, data); + auto target = vector_byte_buffer::create_reserve(bd.uncompressed_size()); + bd.start_decompression(target); + bd.decompress_frame(bd.uncompressed_size()); + return target.share(); +} + +shared_byte_buffer +block_decompressor::start_decompression(mutable_byte_buffer target) { + impl_->start_decompression(target); + target.freeze_location(); + return target.share(); } compression_registry& compression_registry::instance() { @@ -92,8 +108,7 @@ compression_registry::make_compressor(std::string_view spec) const { std::unique_ptr compression_registry::make_decompressor(compression_type type, - std::span data, - mutable_byte_buffer target) const { + std::span data) const { auto fit = factories_.find(type); if (fit == factories_.end()) { @@ -101,7 +116,7 @@ compression_registry::make_decompressor(compression_type type, "unsupported compression type: " + get_compression_name(type)); } - return fit->second->make_decompressor(data, std::move(target)); + return fit->second->make_decompressor(data); } void compression_registry::for_each_algorithm( diff --git a/src/compression/base.cpp b/src/compression/base.cpp new file mode 100644 index 00000000..0e2f7a8b --- /dev/null +++ b/src/compression/base.cpp @@ -0,0 +1,50 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include + +#include + +#include "base.h" + +namespace dwarfs { + +void block_decompressor_base::start_decompression(mutable_byte_buffer target) { + DWARFS_CHECK(!decompressed_, "decompression already started"); + + decompressed_ = std::move(target); + + auto size = this->uncompressed_size(); + + try { + decompressed_.reserve(size); + } catch (std::bad_alloc const&) { + DWARFS_THROW( + runtime_error, + fmt::format("could not reserve {} bytes for decompressed block", size)); + } +} + +std::optional block_decompressor_base::metadata() const { + return std::nullopt; +} + +} // namespace dwarfs diff --git a/src/compression/base.h b/src/compression/base.h new file mode 100644 index 00000000..51956faa --- /dev/null +++ b/src/compression/base.h @@ -0,0 +1,35 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include + +namespace dwarfs { + +class block_decompressor_base : public block_decompressor::impl { + public: + void start_decompression(mutable_byte_buffer target) override; + std::optional metadata() const override; + + protected: + mutable_byte_buffer decompressed_; +}; + +} // namespace dwarfs diff --git a/src/compression/brotli.cpp b/src/compression/brotli.cpp index 31aff400..f6b95282 100644 --- a/src/compression/brotli.cpp +++ b/src/compression/brotli.cpp @@ -26,13 +26,14 @@ #include -#include #include #include #include #include #include +#include "base.h" + namespace dwarfs { namespace { @@ -87,14 +88,12 @@ class brotli_block_compressor final : public block_compressor::impl { uint32_t const window_bits_; }; -class brotli_block_decompressor final : public block_decompressor::impl { +class brotli_block_decompressor final : public block_decompressor_base { public: - brotli_block_decompressor(std::span data, - mutable_byte_buffer target) - : decompressed_{std::move(target)} - , uncompressed_size_{varint::decode(data)} - , data_{data.data()} - , size_{data.size()} + brotli_block_decompressor(std::span data) + : uncompressed_size_{varint::decode(data)} + , brotli_data_{data.data()} + , brotli_size_{data.size()} , decoder_{::BrotliDecoderCreateInstance(nullptr, nullptr, nullptr), &::BrotliDecoderDestroyInstance} { if (!decoder_) { @@ -104,21 +103,13 @@ class brotli_block_decompressor final : public block_decompressor::impl { BROTLI_DECODER_PARAM_LARGE_WINDOW, 1)) { DWARFS_THROW(runtime_error, "could not set brotli decoder parameter"); } - try { - decompressed_.reserve(uncompressed_size_); - } catch (std::bad_alloc const&) { - DWARFS_THROW( - runtime_error, - fmt::format("could not reserve {} bytes for decompressed block", - uncompressed_size_)); - } } compression_type type() const override { return compression_type::BROTLI; } - std::optional metadata() const override { return std::nullopt; } - bool decompress_frame(size_t frame_size) override { + DWARFS_CHECK(decompressed_, "decompression not started"); + size_t pos = decompressed_.size(); if (pos + frame_size > uncompressed_size_) { @@ -132,8 +123,9 @@ class brotli_block_decompressor final : public block_decompressor::impl { decompressed_.resize(pos + frame_size); uint8_t* next_out = decompressed_.data() + pos; - auto res = ::BrotliDecoderDecompressStream(decoder_.get(), &size_, &data_, - &frame_size, &next_out, nullptr); + auto res = ::BrotliDecoderDecompressStream(decoder_.get(), &brotli_size_, + &brotli_data_, &frame_size, + &next_out, nullptr); if (res == BROTLI_DECODER_RESULT_ERROR) { DWARFS_THROW(runtime_error, @@ -153,10 +145,9 @@ class brotli_block_decompressor final : public block_decompressor::impl { ::BrotliDecoderGetErrorCode(decoder_.get())); } - mutable_byte_buffer decompressed_; size_t const uncompressed_size_; - uint8_t const* data_; - size_t size_; + uint8_t const* brotli_data_; + size_t brotli_size_; std::unique_ptr decoder_; }; @@ -199,9 +190,8 @@ class brotli_compression_factory : public compression_factory { } std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const override { - return std::make_unique(data, std::move(target)); + make_decompressor(std::span data) const override { + return std::make_unique(data); } private: diff --git a/src/compression/flac.cpp b/src/compression/flac.cpp index de2ac87b..a92240da 100644 --- a/src/compression/flac.cpp +++ b/src/compression/flac.cpp @@ -32,7 +32,6 @@ #include -#include #include #include #include @@ -42,6 +41,8 @@ #include +#include "base.h" + namespace dwarfs { namespace { @@ -98,10 +99,9 @@ class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream { class dwarfs_flac_stream_decoder final : public FLAC::Decoder::Stream { public: dwarfs_flac_stream_decoder( - mutable_byte_buffer target, std::span data, + std::span data, thrift::compression::flac_block_header const& header) - : target_{std::move(target)} - , data_{data} + : data_{data} , header_{header} , bytes_per_sample_{(header_.flags().value() & kBytesPerSampleMask) + 1} , xfm_{header_.flags().value() & kFlagBigEndian @@ -115,6 +115,11 @@ class dwarfs_flac_stream_decoder final : public FLAC::Decoder::Stream { : pcm_sample_padding::Msb, bytes_per_sample_, header_.bits_per_sample().value()} {} + void set_target(mutable_byte_buffer target) { + DWARFS_CHECK(!target_, "target buffer already set"); + target_ = std::move(target); + } + // NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays) ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte buffer[], size_t* bytes) override { @@ -147,6 +152,8 @@ class dwarfs_flac_stream_decoder final : public FLAC::Decoder::Stream { } } + DWARFS_CHECK(target_, "target buffer not set"); + auto pos = target_.size(); size_t size = channels * samples * bytes_per_sample_; @@ -384,15 +391,12 @@ class flac_block_compressor final : public block_compressor::impl { bool const exhaustive_; }; -class flac_block_decompressor final : public block_decompressor::impl { +class flac_block_decompressor final : public block_decompressor_base { public: - flac_block_decompressor(std::span data, - mutable_byte_buffer target) - : decompressed_{std::move(target)} - , uncompressed_size_{varint::decode(data)} + flac_block_decompressor(std::span data) + : uncompressed_size_{varint::decode(data)} , header_{decode_header(data)} - , decoder_{std::make_unique(decompressed_, - data, header_)} { + , decoder_{std::make_unique(data, header_)} { decoder_->set_md5_checking(false); decoder_->set_metadata_ignore_all(); @@ -402,16 +406,11 @@ class flac_block_decompressor final : public block_decompressor::impl { fmt::format("[FLAC] could not initialize decoder: {}", FLAC__StreamDecoderInitStatusString[status])); } + } - try { - decompressed_.reserve(uncompressed_size_); - } catch (std::bad_alloc const&) { - DWARFS_THROW( - runtime_error, - fmt::format( - "[FLAC] could not reserve {} bytes for decompressed block", - uncompressed_size_)); - } + void start_decompression(mutable_byte_buffer target) override { + block_decompressor_base::start_decompression(std::move(target)); + decoder_->set_target(decompressed_); } compression_type type() const override { return compression_type::FLAC; } @@ -430,6 +429,8 @@ class flac_block_decompressor final : public block_decompressor::impl { } bool decompress_frame(size_t frame_size) override { + DWARFS_CHECK(decompressed_, "decompression not started"); + size_t pos = decompressed_.size(); if (pos + frame_size > uncompressed_size_) { @@ -471,7 +472,6 @@ class flac_block_decompressor final : public block_decompressor::impl { return hdr; } - mutable_byte_buffer decompressed_; size_t const uncompressed_size_; thrift::compression::flac_block_header const header_; std::unique_ptr decoder_; @@ -508,9 +508,8 @@ class flac_compression_factory : public compression_factory { } std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const override { - return std::make_unique(data, std::move(target)); + make_decompressor(std::span data) const override { + return std::make_unique(data); } private: diff --git a/src/compression/lz4.cpp b/src/compression/lz4.cpp index 715ebd29..99351add 100644 --- a/src/compression/lz4.cpp +++ b/src/compression/lz4.cpp @@ -24,13 +24,14 @@ #include -#include #include #include #include #include #include +#include "base.h" + namespace dwarfs { namespace { @@ -107,28 +108,17 @@ class lz4_block_compressor final : public block_compressor::impl { int const level_; }; -class lz4_block_decompressor final : public block_decompressor::impl { +class lz4_block_decompressor final : public block_decompressor_base { public: - lz4_block_decompressor(std::span data, - mutable_byte_buffer target) - : decompressed_(std::move(target)) - , data_(data.subspan(sizeof(uint32_t))) - , uncompressed_size_(get_uncompressed_size(data.data())) { - try { - decompressed_.reserve(uncompressed_size_); - } catch (std::bad_alloc const&) { - DWARFS_THROW( - runtime_error, - fmt::format("could not reserve {} bytes for decompressed block", - uncompressed_size_)); - } - } + lz4_block_decompressor(std::span data) + : data_(data.subspan(sizeof(uint32_t))) + , uncompressed_size_(get_uncompressed_size(data.data())) {} compression_type type() const override { return compression_type::LZ4; } - std::optional metadata() const override { return std::nullopt; } - bool decompress_frame(size_t) override { + DWARFS_CHECK(decompressed_, "decompression not started"); + if (!error_.empty()) { DWARFS_THROW(runtime_error, error_); } @@ -158,7 +148,6 @@ class lz4_block_decompressor final : public block_decompressor::impl { return size; } - mutable_byte_buffer decompressed_; std::span data_; size_t const uncompressed_size_; std::string error_; @@ -188,9 +177,8 @@ class lz4_compression_factory : public compression_factory { } std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const override { - return std::make_unique(data, std::move(target)); + make_decompressor(std::span data) const override { + return std::make_unique(data); } private: @@ -225,9 +213,8 @@ class lz4hc_compression_factory : public compression_factory { } std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const override { - return std::make_unique(data, std::move(target)); + make_decompressor(std::span data) const override { + return std::make_unique(data); } private: diff --git a/src/compression/lzma.cpp b/src/compression/lzma.cpp index a35d23b8..f0ca1c69 100644 --- a/src/compression/lzma.cpp +++ b/src/compression/lzma.cpp @@ -31,7 +31,6 @@ #include #include -#include #include #include #include @@ -39,6 +38,8 @@ #include #include +#include "base.h" + namespace dwarfs { namespace { @@ -260,12 +261,10 @@ lzma_block_compressor::compress(shared_byte_buffer const& data, return best; } -class lzma_block_decompressor final : public block_decompressor::impl { +class lzma_block_decompressor final : public block_decompressor_base { public: - lzma_block_decompressor(std::span data, - mutable_byte_buffer target) + lzma_block_decompressor(std::span data) : stream_(LZMA_STREAM_INIT) - , decompressed_(std::move(target)) , uncompressed_size_(get_uncompressed_size(data.data(), data.size())) { stream_.next_in = data.data(); stream_.avail_in = data.size(); @@ -274,23 +273,15 @@ class lzma_block_decompressor final : public block_decompressor::impl { DWARFS_THROW(runtime_error, fmt::format("lzma_stream_decoder: {}", lzma_error_string(ret))); } - try { - decompressed_.reserve(uncompressed_size_); - } catch (std::bad_alloc const&) { - DWARFS_THROW( - runtime_error, - fmt::format("could not reserve {} bytes for decompressed block", - uncompressed_size_)); - } } ~lzma_block_decompressor() override { lzma_end(&stream_); } compression_type type() const override { return compression_type::LZMA; } - std::optional metadata() const override { return std::nullopt; } - bool decompress_frame(size_t frame_size) override { + DWARFS_CHECK(decompressed_, "decompression not started"); + if (!error_.empty()) { DWARFS_THROW(runtime_error, error_); } @@ -333,7 +324,6 @@ class lzma_block_decompressor final : public block_decompressor::impl { static size_t get_uncompressed_size(uint8_t const* data, size_t size); lzma_stream stream_; - mutable_byte_buffer decompressed_; size_t const uncompressed_size_; std::string error_; }; @@ -426,9 +416,8 @@ class lzma_compression_factory : public compression_factory { } std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const override { - return std::make_unique(data, std::move(target)); + make_decompressor(std::span data) const override { + return std::make_unique(data); } private: diff --git a/src/compression/null.cpp b/src/compression/null.cpp index 97e58c47..10069377 100644 --- a/src/compression/null.cpp +++ b/src/compression/null.cpp @@ -23,11 +23,12 @@ #include -#include #include #include #include +#include "base.h" + namespace dwarfs { namespace { @@ -58,28 +59,16 @@ class null_block_compressor final : public block_compressor::impl { } }; -class null_block_decompressor final : public block_decompressor::impl { +class null_block_decompressor final : public block_decompressor_base { public: - null_block_decompressor(std::span data, - mutable_byte_buffer target) - : decompressed_(std::move(target)) - , data_(data) { - // TODO: we shouldn't have to copy this to memory at all... - try { - decompressed_.reserve(data.size()); - } catch (std::bad_alloc const&) { - DWARFS_THROW( - runtime_error, - fmt::format("could not reserve {} bytes for decompressed block", - data.size())); - } - } + null_block_decompressor(std::span data) + : data_(data) {} compression_type type() const override { return compression_type::NONE; } - std::optional metadata() const override { return std::nullopt; } - bool decompress_frame(size_t frame_size) override { + DWARFS_CHECK(decompressed_, "decompression not started"); + if (decompressed_.size() + frame_size > data_.size()) { frame_size = data_.size() - decompressed_.size(); } @@ -98,7 +87,6 @@ class null_block_decompressor final : public block_decompressor::impl { size_t uncompressed_size() const override { return data_.size(); } private: - mutable_byte_buffer decompressed_; std::span data_; }; @@ -122,9 +110,8 @@ class null_compression_factory : public compression_factory { } std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const override { - return std::make_unique(data, std::move(target)); + make_decompressor(std::span data) const override { + return std::make_unique(data); } private: diff --git a/src/compression/ricepp.cpp b/src/compression/ricepp.cpp index 17872fbb..2fb68f2e 100644 --- a/src/compression/ricepp.cpp +++ b/src/compression/ricepp.cpp @@ -27,14 +27,16 @@ #include -#include #include #include #include #include +#include #include +#include "base.h" + namespace dwarfs { namespace { @@ -165,12 +167,10 @@ class ricepp_block_compressor final : public block_compressor::impl { size_t const block_size_; }; -class ricepp_block_decompressor final : public block_decompressor::impl { +class ricepp_block_decompressor final : public block_decompressor_base { public: - ricepp_block_decompressor(std::span data, - mutable_byte_buffer target) - : decompressed_{std::move(target)} - , uncompressed_size_{varint::decode(data)} + ricepp_block_decompressor(std::span data) + : uncompressed_size_{varint::decode(data)} , header_{decode_header(data)} , data_{data} , codec_{ricepp::create_codec( @@ -184,16 +184,6 @@ class ricepp_block_decompressor final : public block_decompressor::impl { fmt::format("[RICEPP] unsupported bytes per sample: {}", header_.bytes_per_sample().value())); } - - try { - decompressed_.reserve(uncompressed_size_); - } catch (std::bad_alloc const&) { - DWARFS_THROW( - runtime_error, - fmt::format( - "[RICEPP] could not reserve {} bytes for decompressed block", - uncompressed_size_)); - } } compression_type type() const override { return compression_type::RICEPP; } @@ -209,6 +199,8 @@ class ricepp_block_decompressor final : public block_decompressor::impl { } bool decompress_frame(size_t) override { + DWARFS_CHECK(decompressed_, "decompression not started"); + if (!codec_) { return false; } @@ -243,7 +235,6 @@ class ricepp_block_decompressor final : public block_decompressor::impl { return hdr; } - mutable_byte_buffer decompressed_; size_t const uncompressed_size_; thrift::compression::ricepp_block_header const header_; std::span data_; @@ -277,9 +268,8 @@ class ricepp_compression_factory : public compression_factory { } std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const override { - return std::make_unique(data, std::move(target)); + make_decompressor(std::span data) const override { + return std::make_unique(data); } private: diff --git a/src/compression/zstd.cpp b/src/compression/zstd.cpp index b3fd88ec..5a90d7ac 100644 --- a/src/compression/zstd.cpp +++ b/src/compression/zstd.cpp @@ -25,13 +25,14 @@ #include -#include #include #include #include #include #include +#include "base.h" + #if ZSTD_VERSION_MAJOR > 1 || \ (ZSTD_VERSION_MAJOR == 1 && ZSTD_VERSION_MINOR >= 4) #define ZSTD_MIN_LEVEL ZSTD_minCLevel() @@ -108,12 +109,10 @@ zstd_block_compressor::compress(shared_byte_buffer const& data, return compressed.share(); } -class zstd_block_decompressor final : public block_decompressor::impl { +class zstd_block_decompressor final : public block_decompressor_base { public: - zstd_block_decompressor(std::span data, - mutable_byte_buffer target) - : decompressed_(std::move(target)) - , data_(data) + zstd_block_decompressor(std::span data) + : data_(data) , uncompressed_size_(ZSTD_getFrameContentSize(data.data(), data.size())) { switch (uncompressed_size_) { case ZSTD_CONTENTSIZE_UNKNOWN: @@ -127,22 +126,13 @@ class zstd_block_decompressor final : public block_decompressor::impl { default: break; } - - try { - decompressed_.reserve(uncompressed_size_); - } catch (std::bad_alloc const&) { - DWARFS_THROW( - runtime_error, - fmt::format("could not reserve {} bytes for decompressed block", - uncompressed_size_)); - } } compression_type type() const override { return compression_type::ZSTD; } - std::optional metadata() const override { return std::nullopt; } - bool decompress_frame(size_t /*frame_size*/) override { + DWARFS_CHECK(decompressed_, "decompression not started"); + if (!error_.empty()) { DWARFS_THROW(runtime_error, error_); } @@ -163,7 +153,6 @@ class zstd_block_decompressor final : public block_decompressor::impl { size_t uncompressed_size() const override { return uncompressed_size_; } private: - mutable_byte_buffer decompressed_; std::span data_; unsigned long long const uncompressed_size_; std::string error_; @@ -198,9 +187,8 @@ class zstd_compression_factory : public compression_factory { } std::unique_ptr - make_decompressor(std::span data, - mutable_byte_buffer target) const override { - return std::make_unique(data, std::move(target)); + make_decompressor(std::span data) const override { + return std::make_unique(data); } private: diff --git a/src/reader/filesystem_v2.cpp b/src/reader/filesystem_v2.cpp index da0fe919..ed687bf2 100644 --- a/src/reader/filesystem_v2.cpp +++ b/src/reader/filesystem_v2.cpp @@ -103,9 +103,7 @@ block_decompressor get_block_decompressor(mmif& mm, fs_section const& sec) { fmt::format("attempt to access damaged {} section", sec.name())); } - auto tmp = vector_byte_buffer::create(); - auto span = sec.data(mm); - return {sec.compression(), span, tmp}; + return {sec.compression(), sec.data(mm)}; } std::optional diff --git a/src/reader/internal/cached_block.cpp b/src/reader/internal/cached_block.cpp index bdc875b0..2020f984 100644 --- a/src/reader/internal/cached_block.cpp +++ b/src/reader/internal/cached_block.cpp @@ -47,9 +47,9 @@ class cached_block_ final : public cached_block { cached_block_(logger& lgr, fs_section const& b, std::shared_ptr mm, bool release, bool disable_integrity_check) - : data_{vector_byte_buffer::create()} - , decompressor_{std::make_unique( - b.compression(), mm->span(b.start(), b.length()), data_)} + : decompressor_{std::make_unique( + b.compression(), mm->span(b.start(), b.length()))} + , data_{decompressor_->start_decompression(vector_byte_buffer::create())} , mm_(std::move(mm)) , section_(b) , LOG_PROXY_INIT(lgr) @@ -101,11 +101,6 @@ class cached_block_ final : public cached_block { try_release(); } - if (pos == 0) { - // Freeze the location of the data buffer - data_.freeze_location(); - } - pos = data_.size(); range_end_.store(pos, std::memory_order_release); } @@ -149,8 +144,8 @@ class cached_block_ final : public cached_block { } std::atomic range_end_{0}; - mutable_byte_buffer data_; std::unique_ptr decompressor_; + shared_byte_buffer data_; std::shared_ptr mm_; fs_section section_; LOG_PROXY_DECL(LoggerPolicy); diff --git a/src/vector_byte_buffer.cpp b/src/vector_byte_buffer.cpp index f5cbdc6e..69ba2fe7 100644 --- a/src/vector_byte_buffer.cpp +++ b/src/vector_byte_buffer.cpp @@ -116,6 +116,12 @@ mutable_byte_buffer vector_byte_buffer::create(size_t size) { return mutable_byte_buffer{std::make_shared(size)}; } +mutable_byte_buffer vector_byte_buffer::create_reserve(size_t size) { + auto rv = std::make_shared(); + rv->reserve(size); + return mutable_byte_buffer{std::move(rv)}; +} + mutable_byte_buffer vector_byte_buffer::create(std::string_view data) { return mutable_byte_buffer{std::make_shared(data)}; } diff --git a/src/writer/filesystem_writer.cpp b/src/writer/filesystem_writer.cpp index 83996d19..bcd89d1a 100644 --- a/src/writer/filesystem_writer.cpp +++ b/src/writer/filesystem_writer.cpp @@ -385,46 +385,46 @@ class rewritten_fsblock : public fsblock::impl { std::promise prom; future_ = prom.get_future(); - wg.add_job( - [this, prom = std::move(prom), meta = std::move(meta)]() mutable { - try { - shared_byte_buffer block; + wg.add_job([this, prom = std::move(prom), + meta = std::move(meta)]() mutable { + try { + shared_byte_buffer block; - { - // TODO: we don't have to do this for uncompressed blocks - auto buffer = vector_byte_buffer::create(); - block_decompressor bd(data_comp_type_, data_, buffer); - bd.decompress_frame(bd.uncompressed_size()); + { + // TODO: we don't have to do this for uncompressed blocks + block_decompressor bd(data_comp_type_, data_); + auto buffer = bd.start_decompression(vector_byte_buffer::create()); + bd.decompress_frame(bd.uncompressed_size()); - if (!meta) { - meta = bd.metadata(); - } - - pctx_->bytes_in += buffer.size(); // TODO: data_.size()? - - try { - if (meta) { - block = bc_.compress(buffer.share(), *meta); - } else { - block = bc_.compress(buffer.share()); - } - } catch (bad_compression_ratio_error const&) { - comp_type_ = compression_type::NONE; - } - } - - pctx_->bytes_out += block.size(); - - { - std::lock_guard lock(mx_); - block_data_.emplace(std::move(block)); - } - - prom.set_value(); - } catch (...) { - prom.set_exception(std::current_exception()); + if (!meta) { + meta = bd.metadata(); } - }); + + pctx_->bytes_in += buffer.size(); // TODO: data_.size()? + + try { + if (meta) { + block = bc_.compress(buffer, *meta); + } else { + block = bc_.compress(buffer); + } + } catch (bad_compression_ratio_error const&) { + comp_type_ = compression_type::NONE; + } + } + + pctx_->bytes_out += block.size(); + + { + std::lock_guard lock(mx_); + block_data_.emplace(std::move(block)); + } + + prom.set_value(); + } catch (...) { + prom.set_exception(std::current_exception()); + } + }); } void wait_until_compressed() override { future_.get(); } @@ -894,8 +894,7 @@ void filesystem_writer_::check_block_compression( if (auto reqstr = bc->metadata_requirements(); !reqstr.empty()) { auto req = compression_metadata_requirements{reqstr}; - auto tmp = vector_byte_buffer::create(); - block_decompressor bd(compression, data, tmp); + block_decompressor bd(compression, data); try { req.check(bd.metadata()); diff --git a/test/flac_compressor_test.cpp b/test/flac_compressor_test.cpp index 0ee4afc4..6d481ff8 100644 --- a/test/flac_compressor_test.cpp +++ b/test/flac_compressor_test.cpp @@ -28,6 +28,7 @@ #include #include +#include using namespace dwarfs;