diff --git a/include/dwarfs/block_compressor.h b/include/dwarfs/block_compressor.h index 40662913..582f776e 100644 --- a/include/dwarfs/block_compressor.h +++ b/include/dwarfs/block_compressor.h @@ -61,12 +61,12 @@ class block_compressor { block_compressor(block_compressor&& bc) = default; block_compressor& operator=(block_compressor&& rhs) = default; - std::vector compress(std::span data) const { + shared_byte_buffer compress(shared_byte_buffer const& data) const { return impl_->compress(data, nullptr); } - std::vector - compress(std::span data, std::string const& metadata) const { + shared_byte_buffer + compress(shared_byte_buffer const& data, std::string const& metadata) const { return impl_->compress(data, &metadata); } @@ -91,9 +91,8 @@ class block_compressor { virtual std::unique_ptr clone() const = 0; - virtual std::vector - compress(std::span data, - std::string const* metadata) const = 0; + virtual shared_byte_buffer compress(shared_byte_buffer const& data, + std::string const* metadata) const = 0; virtual compression_type type() const = 0; virtual std::string describe() const = 0; @@ -152,7 +151,7 @@ class compression_info { virtual std::string_view name() const = 0; virtual std::string_view description() const = 0; - virtual std::vector const& options() const = 0; + virtual std::vector const& options() const = 0; // TODO: span? virtual std::set library_dependencies() const = 0; }; diff --git a/include/dwarfs/byte_buffer.h b/include/dwarfs/byte_buffer.h index 4073e718..96c4fa73 100644 --- a/include/dwarfs/byte_buffer.h +++ b/include/dwarfs/byte_buffer.h @@ -25,6 +25,7 @@ #include #include #include +#include namespace dwarfs { @@ -45,15 +46,23 @@ class byte_buffer_interface { public: virtual ~byte_buffer_interface() = default; + virtual uint8_t const* data() const = 0; + virtual size_t size() const = 0; virtual std::span span() const = 0; }; class mutable_byte_buffer_interface : public byte_buffer_interface { public: + virtual uint8_t* mutable_data() = 0; virtual std::span mutable_span() = 0; virtual void clear() = 0; virtual void reserve(size_t size) = 0; virtual void resize(size_t size) = 0; + virtual void shrink_to_fit() = 0; + + // TODO: See if we can do without this. This will *only* be implemented + // in the vector_byte_buffer, other implementations will throw. + virtual std::vector& raw_vector() = 0; }; class shared_byte_buffer { @@ -63,14 +72,16 @@ class shared_byte_buffer { explicit shared_byte_buffer(std::shared_ptr bb) : bb_{std::move(bb)} {} - uint8_t const* data() const { return span().data(); } + uint8_t const* data() const { return bb_->data(); } - size_t size() const { return span().size(); } + size_t size() const { return bb_->size(); } - bool empty() const { return span().empty(); } + bool empty() const { return bb_->size() == 0; } std::span span() const { return bb_->span(); } + void swap(shared_byte_buffer& other) noexcept { std::swap(bb_, other.bb_); } + template friend bool operator==(shared_byte_buffer const& lhs, T const& rhs) { return detail::compare_spans(lhs.span(), {rhs.data(), rhs.size()}) == @@ -78,6 +89,7 @@ class shared_byte_buffer { } template + requires(!std::same_as) friend bool operator==(T const& lhs, shared_byte_buffer const& rhs) { return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()) == std::strong_ordering::equal; @@ -90,6 +102,7 @@ class shared_byte_buffer { } template + requires(!std::same_as) friend std::strong_ordering operator<=>(T const& lhs, shared_byte_buffer const& rhs) { return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()); @@ -105,13 +118,13 @@ class mutable_byte_buffer { std::shared_ptr bb) : bb_{std::move(bb)} {} - uint8_t const* data() const { return span().data(); } + uint8_t const* data() const { return bb_->data(); } - uint8_t* data() { return span().data(); } + uint8_t* data() { return bb_->mutable_data(); } - size_t size() const { return span().size(); } + size_t size() const { return bb_->size(); } - bool empty() const { return span().empty(); } + bool empty() const { return bb_->size() == 0; } std::span span() const { return bb_->span(); } @@ -123,6 +136,12 @@ class mutable_byte_buffer { void resize(size_t size) { bb_->resize(size); } + void shrink_to_fit() { bb_->shrink_to_fit(); } + + std::vector& raw_vector() { return bb_->raw_vector(); } + + void swap(mutable_byte_buffer& other) noexcept { std::swap(bb_, other.bb_); } + template friend bool operator==(mutable_byte_buffer const& lhs, T const& rhs) { return detail::compare_spans(lhs.span(), {rhs.data(), rhs.size()}) == @@ -130,6 +149,7 @@ class mutable_byte_buffer { } template + requires(!std::same_as) friend bool operator==(T const& lhs, mutable_byte_buffer const& rhs) { return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()) == std::strong_ordering::equal; @@ -142,6 +162,7 @@ class mutable_byte_buffer { } template + requires(!std::same_as) friend std::strong_ordering operator<=>(T const& lhs, mutable_byte_buffer const& rhs) { return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()); diff --git a/include/dwarfs/history.h b/include/dwarfs/history.h index 9ee4e018..e9ebe948 100644 --- a/include/dwarfs/history.h +++ b/include/dwarfs/history.h @@ -30,6 +30,7 @@ #include +#include #include namespace dwarfs { @@ -50,7 +51,7 @@ class history { thrift::history::history const& get() const { return *history_; } void append(std::optional> args); size_t size() const; - std::vector serialize() const; + shared_byte_buffer serialize() const; void dump(std::ostream& os) const; nlohmann::json as_json() const; diff --git a/include/dwarfs/vector_byte_buffer.h b/include/dwarfs/vector_byte_buffer.h index c2e54094..f80a4288 100644 --- a/include/dwarfs/vector_byte_buffer.h +++ b/include/dwarfs/vector_byte_buffer.h @@ -21,6 +21,8 @@ #pragma once +#include + #include namespace dwarfs { @@ -28,6 +30,10 @@ namespace dwarfs { class vector_byte_buffer { public: static mutable_byte_buffer create(); + static mutable_byte_buffer create(size_t size); + static mutable_byte_buffer create(std::string_view data); + static mutable_byte_buffer create(std::span data); + static mutable_byte_buffer create(std::vector&& data); }; } // namespace dwarfs diff --git a/include/dwarfs/writer/internal/block_data.h b/include/dwarfs/writer/internal/block_data.h deleted file mode 100644 index e9ea346b..00000000 --- a/include/dwarfs/writer/internal/block_data.h +++ /dev/null @@ -1,54 +0,0 @@ -/* vim:set ts=2 sw=2 sts=2 et: */ -/** - * \author Marcus Holland-Moritz (github@mhxnet.de) - * \copyright Copyright (c) Marcus Holland-Moritz - * - * This file is part of dwarfs. - * - * dwarfs is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * dwarfs is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with dwarfs. If not, see . - */ - -#pragma once - -#include -#include -#include - -namespace dwarfs::writer::internal { - -class block_data { - public: - block_data() = default; - explicit block_data(std::vector&& vec) - : vec_{std::move(vec)} {} - explicit block_data(std::string_view str) - : vec_{str.begin(), str.end()} {} - - std::vector const& vec() const { return vec_; } - std::vector& vec() { return vec_; } - - void reserve(size_t size) { vec_.reserve(size); } - - uint8_t const* data() const { return vec_.data(); } - uint8_t* data() { return vec_.data(); } - - size_t size() const { return vec_.size(); } - - bool empty() const { return vec_.empty(); } - - private: - std::vector vec_; -}; - -} // namespace dwarfs::writer::internal diff --git a/include/dwarfs/writer/internal/filesystem_writer_detail.h b/include/dwarfs/writer/internal/filesystem_writer_detail.h index f8eff04d..cecd3677 100644 --- a/include/dwarfs/writer/internal/filesystem_writer_detail.h +++ b/include/dwarfs/writer/internal/filesystem_writer_detail.h @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -45,8 +46,6 @@ class fs_section; namespace writer::internal { -class block_data; - class filesystem_writer_detail { public: virtual ~filesystem_writer_detail() = default; @@ -71,14 +70,13 @@ class filesystem_writer_detail { virtual void configure_rewrite(size_t filesystem_size, size_t block_count) = 0; virtual void copy_header(std::span header) = 0; - virtual void - write_block(fragment_category cat, std::shared_ptr&& data, - physical_block_cb_type physical_block_cb, - std::optional meta = std::nullopt) = 0; + virtual void write_block(fragment_category cat, shared_byte_buffer data, + physical_block_cb_type physical_block_cb, + std::optional meta = std::nullopt) = 0; virtual void finish_category(fragment_category cat) = 0; - virtual void write_metadata_v2_schema(std::shared_ptr&& data) = 0; - virtual void write_metadata_v2(std::shared_ptr&& data) = 0; - virtual void write_history(std::shared_ptr&& data) = 0; + virtual void write_metadata_v2_schema(shared_byte_buffer data) = 0; + virtual void write_metadata_v2(shared_byte_buffer data) = 0; + virtual void write_history(shared_byte_buffer data) = 0; virtual void check_block_compression( compression_type compression, std::span data, std::optional cat = std::nullopt) = 0; diff --git a/include/dwarfs/writer/internal/metadata_freezer.h b/include/dwarfs/writer/internal/metadata_freezer.h index 59fdea10..ed8aa36f 100644 --- a/include/dwarfs/writer/internal/metadata_freezer.h +++ b/include/dwarfs/writer/internal/metadata_freezer.h @@ -23,7 +23,8 @@ #include #include -#include + +#include namespace dwarfs { @@ -35,7 +36,7 @@ namespace writer::internal { class metadata_freezer { public: - static std::pair, std::vector> + static std::pair freeze(thrift::metadata::metadata const& data); }; diff --git a/include/dwarfs/writer/segmenter.h b/include/dwarfs/writer/segmenter.h index 9d49e2ea..ebb8c122 100644 --- a/include/dwarfs/writer/segmenter.h +++ b/include/dwarfs/writer/segmenter.h @@ -26,6 +26,8 @@ #include #include +#include + namespace dwarfs { struct compression_constraints; @@ -38,7 +40,6 @@ class writer_progress; namespace internal { -class block_data; class block_manager; class chunkable; @@ -55,8 +56,8 @@ class segmenter { unsigned block_size_bits{22}; }; - using block_ready_cb = std::function, size_t logical_block_num)>; + using block_ready_cb = + std::function; segmenter(logger& lgr, writer_progress& prog, std::shared_ptr blkmgr, config const& cfg, diff --git a/src/compression/brotli.cpp b/src/compression/brotli.cpp index 6d62329e..31aff400 100644 --- a/src/compression/brotli.cpp +++ b/src/compression/brotli.cpp @@ -31,6 +31,7 @@ #include #include #include +#include namespace dwarfs { @@ -48,10 +49,9 @@ class brotli_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector - compress(std::span data, - std::string const* /*metadata*/) const override { - std::vector compressed; + shared_byte_buffer compress(shared_byte_buffer const& data, + std::string const* /*metadata*/) const override { + auto compressed = vector_byte_buffer::create(); // TODO: make configurable compressed.resize(varint::max_size + ::BrotliEncoderMaxCompressedSize(data.size())); size_t size_size = varint::encode(data.size(), compressed.data()); @@ -66,7 +66,7 @@ class brotli_block_compressor final : public block_compressor::impl { throw bad_compression_ratio_error(); } compressed.shrink_to_fit(); - return compressed; + return compressed.share(); } compression_type type() const override { return compression_type::BROTLI; } diff --git a/src/compression/flac.cpp b/src/compression/flac.cpp index 4b34de7c..de2ac87b 100644 --- a/src/compression/flac.cpp +++ b/src/compression/flac.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -53,7 +54,7 @@ constexpr size_t const kBlockSize{65536}; class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream { public: - explicit dwarfs_flac_stream_encoder(std::vector& data) + explicit dwarfs_flac_stream_encoder(mutable_byte_buffer& data) : data_{data} , pos_{data_.size()} {} @@ -90,7 +91,7 @@ class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream { } private: - std::vector& data_; + mutable_byte_buffer& data_; size_t pos_; }; @@ -208,8 +209,8 @@ class flac_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector compress(std::span data, - std::string const* metadata) const override { + shared_byte_buffer compress(shared_byte_buffer const& data, + std::string const* metadata) const override { if (!metadata) { DWARFS_THROW(runtime_error, "internal error: flac compression requires metadata"); @@ -265,7 +266,7 @@ class flac_block_compressor final : public block_compressor::impl { pcm_pad = pcm_sample_padding::Msb; } - std::vector compressed; + auto compressed = vector_byte_buffer::create(); // TODO: make configurable { using namespace ::apache::thrift; @@ -286,7 +287,7 @@ class flac_block_compressor final : public block_compressor::impl { CompactSerializer::serialize(hdr, &hdrbuf); compressed.resize(pos + hdrbuf.size()); - ::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size()); + ::memcpy(compressed.data() + pos, hdrbuf.data(), hdrbuf.size()); } dwarfs_flac_stream_encoder encoder(compressed); @@ -341,7 +342,7 @@ class flac_block_compressor final : public block_compressor::impl { compressed.shrink_to_fit(); - return compressed; + return compressed.share(); } compression_type type() const override { return compression_type::FLAC; } diff --git a/src/compression/lz4.cpp b/src/compression/lz4.cpp index f20efbbb..715ebd29 100644 --- a/src/compression/lz4.cpp +++ b/src/compression/lz4.cpp @@ -29,6 +29,7 @@ #include #include #include +#include namespace dwarfs { @@ -69,18 +70,18 @@ class lz4_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector - compress(std::span data, - std::string const* /*metadata*/) const override { - std::vector compressed(sizeof(uint32_t) + - LZ4_compressBound(to(data.size()))); + shared_byte_buffer compress(shared_byte_buffer const& data, + std::string const* /*metadata*/) const override { + auto compressed = vector_byte_buffer::create(); // TODO: make configurable + compressed.resize(sizeof(uint32_t) + + LZ4_compressBound(to(data.size()))); // TODO: this should have been a varint; also, if we ever support // big-endian systems, we'll have to properly convert this uint32_t size = data.size(); std::memcpy(compressed.data(), &size, sizeof(size)); - auto csize = Policy::compress(data.data(), &compressed[sizeof(uint32_t)], - data.size(), - compressed.size() - sizeof(uint32_t), level_); + auto csize = Policy::compress( + data.data(), compressed.data() + sizeof(uint32_t), data.size(), + compressed.size() - sizeof(uint32_t), level_); if (csize == 0) { DWARFS_THROW(runtime_error, "error during compression"); } @@ -88,7 +89,7 @@ class lz4_block_compressor final : public block_compressor::impl { throw bad_compression_ratio_error(); } compressed.resize(sizeof(uint32_t) + csize); - return compressed; + return compressed.share(); } compression_type type() const override { return compression_type::LZ4; } diff --git a/src/compression/lzma.cpp b/src/compression/lzma.cpp index d00eeeac..a35d23b8 100644 --- a/src/compression/lzma.cpp +++ b/src/compression/lzma.cpp @@ -37,6 +37,7 @@ #include #include #include +#include namespace dwarfs { @@ -111,8 +112,8 @@ class lzma_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector compress(std::span data, - std::string const* metadata) const override; + shared_byte_buffer compress(shared_byte_buffer const& data, + std::string const* metadata) const override; compression_type type() const override { return compression_type::LZMA; } @@ -126,8 +127,8 @@ class lzma_block_compressor final : public block_compressor::impl { } private: - std::vector - compress(std::span data, lzma_filter const* filters) const; + shared_byte_buffer + compress(shared_byte_buffer const& data, lzma_filter const* filters) const; static uint32_t get_preset(unsigned level, bool extreme) { uint32_t preset = level; @@ -197,8 +198,8 @@ lzma_block_compressor::lzma_block_compressor(option_map& om) { } } -std::vector -lzma_block_compressor::compress(std::span data, +shared_byte_buffer +lzma_block_compressor::compress(shared_byte_buffer const& data, lzma_filter const* filters) const { lzma_stream s = LZMA_STREAM_INIT; @@ -210,7 +211,8 @@ lzma_block_compressor::compress(std::span data, lzma_action action = LZMA_FINISH; - std::vector compressed(data.size() - 1); + auto compressed = vector_byte_buffer::create(); // TODO: make configurable + compressed.resize(data.size() - 1); s.next_in = data.data(); s.avail_in = data.size(); @@ -234,21 +236,21 @@ lzma_block_compressor::compress(std::span data, lzma_error_string(ret))); } - return compressed; + return compressed.share(); } -std::vector -lzma_block_compressor::compress(std::span data, +shared_byte_buffer +lzma_block_compressor::compress(shared_byte_buffer const& data, std::string const* /*metadata*/) const { auto lzma_opts = opt_lzma_; std::array filters{{{binary_vli_, nullptr}, {LZMA_FILTER_LZMA2, &lzma_opts}, {LZMA_VLI_UNKNOWN, nullptr}}}; - std::vector best = compress(data, &filters[1]); + auto best = compress(data, &filters[1]); if (filters[0].id != LZMA_VLI_UNKNOWN) { - std::vector compressed = compress(data, filters.data()); + auto compressed = compress(data, filters.data()); if (compressed.size() < best.size()) { best.swap(compressed); diff --git a/src/compression/null.cpp b/src/compression/null.cpp index cf5f8681..97e58c47 100644 --- a/src/compression/null.cpp +++ b/src/compression/null.cpp @@ -41,11 +41,9 @@ class null_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - // TODO: we should not have to copy the data here... - std::vector - compress(std::span data, - std::string const* /*metadata*/) const override { - return std::vector(data.begin(), data.end()); + shared_byte_buffer compress(shared_byte_buffer const& data, + std::string const* /*metadata*/) const override { + return data; } compression_type type() const override { return compression_type::NONE; } diff --git a/src/compression/ricepp.cpp b/src/compression/ricepp.cpp index 22dc58df..17872fbb 100644 --- a/src/compression/ricepp.cpp +++ b/src/compression/ricepp.cpp @@ -52,8 +52,8 @@ class ricepp_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector compress(std::span data, - std::string const* metadata) const override { + shared_byte_buffer compress(shared_byte_buffer const& data, + std::string const* metadata) const override { if (!metadata) { DWARFS_THROW(runtime_error, "internal error: ricepp compression requires metadata"); @@ -88,8 +88,10 @@ class ricepp_block_compressor final : public block_compressor::impl { .unused_lsb_count = static_cast(unused_lsb_count), }); - std::vector compressed; + auto compressed = vector_byte_buffer::create(); // TODO: make configurable + // TODO: see if we can resize just once... + // TODO: maybe the mutable_byte_buffer interface can have .append()? { using namespace ::apache::thrift; @@ -111,7 +113,7 @@ class ricepp_block_compressor final : public block_compressor::impl { CompactSerializer::serialize(hdr, &hdrbuf); compressed.resize(pos + hdrbuf.size()); - ::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size()); + ::memcpy(compressed.data() + pos, hdrbuf.data(), hdrbuf.size()); } std::span input{ @@ -121,13 +123,11 @@ class ricepp_block_compressor final : public block_compressor::impl { size_t header_size = compressed.size(); compressed.resize(header_size + codec->worst_case_encoded_bytes(input)); - std::span buffer(compressed); - - auto output = codec->encode(buffer.subspan(header_size), input); + auto output = codec->encode(compressed.span().subspan(header_size), input); compressed.resize(header_size + output.size()); compressed.shrink_to_fit(); - return compressed; + return compressed.share(); } compression_type type() const override { return compression_type::RICEPP; } diff --git a/src/compression/zstd.cpp b/src/compression/zstd.cpp index def9c289..b3fd88ec 100644 --- a/src/compression/zstd.cpp +++ b/src/compression/zstd.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #if ZSTD_VERSION_MAJOR > 1 || \ @@ -54,8 +55,8 @@ class zstd_block_compressor final : public block_compressor::impl { return std::make_unique(*this); } - std::vector compress(std::span data, - std::string const* metadata) const override; + shared_byte_buffer compress(shared_byte_buffer const& data, + std::string const* metadata) const override; compression_type type() const override { return compression_type::ZSTD; } @@ -87,10 +88,11 @@ class zstd_block_compressor final : public block_compressor::impl { int const level_; }; -std::vector -zstd_block_compressor::compress(std::span data, +shared_byte_buffer +zstd_block_compressor::compress(shared_byte_buffer const& data, std::string const* /*metadata*/) const { - std::vector compressed(ZSTD_compressBound(data.size())); + auto compressed = vector_byte_buffer::create(); // TODO: make configurable + compressed.resize(ZSTD_compressBound(data.size())); auto ctx = ctxmgr_->make_context(); auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(), data.data(), data.size(), level_); @@ -103,7 +105,7 @@ zstd_block_compressor::compress(std::span data, } compressed.resize(size); compressed.shrink_to_fit(); - return compressed; + return compressed.share(); } class zstd_block_decompressor final : public block_decompressor::impl { diff --git a/src/history.cpp b/src/history.cpp index cfbdc6a8..edde7c34 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -80,10 +81,10 @@ void history::append(std::optional> args) { size_t history::size() const { return history_->entries()->size(); } -std::vector history::serialize() const { +shared_byte_buffer history::serialize() const { std::string buf; ::apache::thrift::CompactSerializer::serialize(*history_, &buf); - return {buf.begin(), buf.end()}; + return vector_byte_buffer::create(buf).share(); } void history::dump(std::ostream& os) const { diff --git a/src/mapped_byte_buffer.cpp b/src/mapped_byte_buffer.cpp index 4736bd3c..5ca91935 100644 --- a/src/mapped_byte_buffer.cpp +++ b/src/mapped_byte_buffer.cpp @@ -32,6 +32,10 @@ class mapped_byte_buffer_impl : public byte_buffer_interface { : data_{data} , mm_{std::move(mm)} {} + size_t size() const override { return data_.size(); } + + uint8_t const* data() const override { return data_.data(); } + std::span span() const override { return {data_.data(), data_.size()}; } diff --git a/src/reader/filesystem_v2.cpp b/src/reader/filesystem_v2.cpp index 72a1e925..da0fe919 100644 --- a/src/reader/filesystem_v2.cpp +++ b/src/reader/filesystem_v2.cpp @@ -53,7 +53,6 @@ #include #include #include -#include #include namespace dwarfs::reader { diff --git a/src/reader/internal/cached_block.cpp b/src/reader/internal/cached_block.cpp index 1b40e028..0f037b45 100644 --- a/src/reader/internal/cached_block.cpp +++ b/src/reader/internal/cached_block.cpp @@ -78,6 +78,9 @@ class cached_block_ final : public cached_block { // This can be called from any thread size_t range_end() const override { return range_end_.load(); } + // TODO: The code relies on the fact that the data_ buffer is never + // reallocated once block decompression has started. I would like to + // somehow enforce that this cannot happen. uint8_t const* data() const override { return data_.data(); } void decompress_until(size_t end) override { diff --git a/src/utility/rewrite_filesystem.cpp b/src/utility/rewrite_filesystem.cpp index 85acf331..47efd17f 100644 --- a/src/utility/rewrite_filesystem.cpp +++ b/src/utility/rewrite_filesystem.cpp @@ -28,7 +28,6 @@ #include #include -#include #include namespace dwarfs::utility { @@ -38,7 +37,6 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs, dwarfs::writer::category_resolver const& cat_resolver, rewrite_options const& opts) { using dwarfs::writer::fragment_category; - using dwarfs::writer::internal::block_data; LOG_PROXY(debug_logger_policy, lgr); @@ -174,7 +172,7 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs, case section_type::HISTORY: if (opts.enable_history) { history hist{opts.history}; - hist.parse(fs.get_history().serialize()); + hist.parse(fs.get_history().serialize().span()); hist.append(opts.command_line_arguments); LOG_VERBOSE << "updating " << get_section_name(s->type()) << " (" @@ -182,7 +180,7 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs, << "), compressing using '" << writer.get_compressor(s->type()).describe() << "'"; - writer.write_history(std::make_shared(hist.serialize())); + writer.write_history(hist.serialize()); } else { LOG_VERBOSE << "removing " << get_section_name(s->type()); } diff --git a/src/vector_byte_buffer.cpp b/src/vector_byte_buffer.cpp index 35b7a634..a007da6d 100644 --- a/src/vector_byte_buffer.cpp +++ b/src/vector_byte_buffer.cpp @@ -29,6 +29,22 @@ namespace { class vector_byte_buffer_impl : public mutable_byte_buffer_interface { public: + vector_byte_buffer_impl() = default; + explicit vector_byte_buffer_impl(size_t size) + : data_(size) {} + explicit vector_byte_buffer_impl(std::string_view data) + : data_{data.begin(), data.end()} {} + explicit vector_byte_buffer_impl(std::span data) + : data_{data.begin(), data.end()} {} + explicit vector_byte_buffer_impl(std::vector&& data) + : data_{std::move(data)} {} + + size_t size() const override { return data_.size(); } + + uint8_t const* data() const override { return data_.data(); } + + uint8_t* mutable_data() override { return data_.data(); } + std::span span() const override { return {data_.data(), data_.size()}; } @@ -43,6 +59,10 @@ class vector_byte_buffer_impl : public mutable_byte_buffer_interface { void resize(size_t size) override { data_.resize(size); } + void shrink_to_fit() override { data_.shrink_to_fit(); } + + std::vector& raw_vector() override { return data_; } + private: std::vector data_; }; @@ -53,4 +73,21 @@ mutable_byte_buffer vector_byte_buffer::create() { return mutable_byte_buffer{std::make_shared()}; } +mutable_byte_buffer vector_byte_buffer::create(size_t size) { + return mutable_byte_buffer{std::make_shared(size)}; +} + +mutable_byte_buffer vector_byte_buffer::create(std::string_view data) { + return mutable_byte_buffer{std::make_shared(data)}; +} + +mutable_byte_buffer vector_byte_buffer::create(std::span data) { + return mutable_byte_buffer{std::make_shared(data)}; +} + +mutable_byte_buffer vector_byte_buffer::create(std::vector&& data) { + return mutable_byte_buffer{ + std::make_shared(std::move(data))}; +} + } // namespace dwarfs diff --git a/src/writer/filesystem_writer.cpp b/src/writer/filesystem_writer.cpp index 6382f34b..60047825 100644 --- a/src/writer/filesystem_writer.cpp +++ b/src/writer/filesystem_writer.cpp @@ -48,7 +48,6 @@ #include #include -#include #include #include #include @@ -122,8 +121,7 @@ class compression_progress : public progress::context { class fsblock { public: fsblock(section_type type, block_compressor const& bc, - std::shared_ptr&& data, - std::shared_ptr pctx, + shared_byte_buffer data, std::shared_ptr pctx, folly::Function set_block_cb = nullptr); fsblock(section_type type, compression_type compression, @@ -198,12 +196,12 @@ class fsblock_merger_policy { class raw_fsblock : public fsblock::impl { public: raw_fsblock(section_type type, block_compressor const& bc, - std::shared_ptr&& data, + shared_byte_buffer data, std::shared_ptr pctx, folly::Function set_block_cb) : type_{type} , bc_{bc} - , uncompressed_size_{data->size()} + , uncompressed_size_{data.size()} , data_{std::move(data)} , comp_type_{bc_.type()} , pctx_{std::move(pctx)} @@ -215,30 +213,30 @@ class raw_fsblock : public fsblock::impl { std::promise prom; future_ = prom.get_future(); - wg.add_job([this, prom = std::move(prom), - meta = std::move(meta)]() mutable { - try { - std::shared_ptr tmp; + wg.add_job( + [this, prom = std::move(prom), meta = std::move(meta)]() mutable { + try { + shared_byte_buffer tmp; - if (meta) { - tmp = std::make_shared(bc_.compress(data_->vec(), *meta)); - } else { - tmp = std::make_shared(bc_.compress(data_->vec())); - } + if (meta) { + tmp = bc_.compress(data_, *meta); + } else { + tmp = bc_.compress(data_); + } - pctx_->bytes_in += data_->vec().size(); - pctx_->bytes_out += tmp->vec().size(); + pctx_->bytes_in += data_.size(); + pctx_->bytes_out += tmp.size(); - { - std::lock_guard lock(mx_); - data_.swap(tmp); - } - } catch (bad_compression_ratio_error const&) { - comp_type_ = compression_type::NONE; - } + { + std::lock_guard lock(mx_); + data_.swap(tmp); + } + } catch (bad_compression_ratio_error const&) { + comp_type_ = compression_type::NONE; + } - prom.set_value(); - }); + prom.set_value(); + }); } void wait_until_compressed() override { future_.wait(); } @@ -249,13 +247,13 @@ class raw_fsblock : public fsblock::impl { std::string description() const override { return bc_.describe(); } - std::span data() const override { return data_->vec(); } + std::span data() const override { return data_.span(); } size_t uncompressed_size() const override { return uncompressed_size_; } size_t size() const override { std::lock_guard lock(mx_); - return data_->size(); + return data_.size(); } void set_block_no(uint32_t number) override { @@ -291,7 +289,7 @@ class raw_fsblock : public fsblock::impl { block_compressor const& bc_; size_t const uncompressed_size_; mutable std::recursive_mutex mx_; - std::shared_ptr data_; + shared_byte_buffer data_; std::future future_; std::optional number_; std::optional mutable header_; @@ -382,7 +380,7 @@ class rewritten_fsblock : public fsblock::impl { wg.add_job( [this, prom = std::move(prom), meta = std::move(meta)]() mutable { try { - std::vector block; + shared_byte_buffer block; { // TODO: we don't have to do this for uncompressed blocks @@ -398,9 +396,9 @@ class rewritten_fsblock : public fsblock::impl { try { if (meta) { - block = bc_.compress(buffer.span(), *meta); + block = bc_.compress(buffer.share(), *meta); } else { - block = bc_.compress(buffer.span()); + block = bc_.compress(buffer.share()); } } catch (bad_compression_ratio_error const&) { comp_type_ = compression_type::NONE; @@ -411,7 +409,7 @@ class rewritten_fsblock : public fsblock::impl { { std::lock_guard lock(mx_); - block_data_.swap(block); + block_data_.emplace(std::move(block)); } prom.set_value(); @@ -429,13 +427,18 @@ class rewritten_fsblock : public fsblock::impl { std::string description() const override { return bc_.describe(); } - std::span data() const override { return block_data_; } + std::span data() const override { + std::lock_guard lock(mx_); + return block_data_.value().span(); + } size_t uncompressed_size() const override { return data_.size(); } size_t size() const override { std::lock_guard lock(mx_); - return block_data_.size(); + // TODO: this should not be called when block_data_ is not set, figure + // out who calls this + return block_data_.has_value() ? block_data_->size() : 0; } void set_block_no(uint32_t number) override { @@ -467,7 +470,7 @@ class rewritten_fsblock : public fsblock::impl { block_compressor const& bc_; mutable std::recursive_mutex mx_; std::span data_; - std::vector block_data_; + std::optional block_data_; std::future future_; std::optional number_; std::optional mutable header_; @@ -477,7 +480,7 @@ class rewritten_fsblock : public fsblock::impl { }; fsblock::fsblock(section_type type, block_compressor const& bc, - std::shared_ptr&& data, + shared_byte_buffer data, std::shared_ptr pctx, folly::Function set_block_cb) : impl_(std::make_unique(type, bc, std::move(data), @@ -573,13 +576,13 @@ class filesystem_writer_ final : public filesystem_writer_detail { size_t max_active_slots) override; void configure_rewrite(size_t filesystem_size, size_t block_count) override; void copy_header(std::span header) override; - void write_block(fragment_category cat, std::shared_ptr&& data, + void write_block(fragment_category cat, shared_byte_buffer data, physical_block_cb_type physical_block_cb, std::optional meta) override; void finish_category(fragment_category cat) override; - void write_metadata_v2_schema(std::shared_ptr&& data) override; - void write_metadata_v2(std::shared_ptr&& data) override; - void write_history(std::shared_ptr&& data) override; + void write_metadata_v2_schema(shared_byte_buffer data) override; + void write_metadata_v2(shared_byte_buffer data) override; + void write_history(shared_byte_buffer data) override; void check_block_compression( compression_type compression, std::span data, std::optional cat) override; @@ -600,12 +603,11 @@ class filesystem_writer_ final : public filesystem_writer_detail { block_compressor const& compressor_for_category(fragment_category::value_type cat) const; void - write_block_impl(fragment_category cat, std::shared_ptr&& data, + write_block_impl(fragment_category cat, shared_byte_buffer data, block_compressor const& bc, std::optional meta, physical_block_cb_type physical_block_cb); void on_block_merged(block_holder_type holder); - void - write_section_impl(section_type type, std::shared_ptr&& data); + void write_section_impl(section_type type, shared_byte_buffer data); void write(fsblock const& fsb); void write(char const* data, size_t size); template @@ -779,9 +781,8 @@ filesystem_writer_::compressor_for_category( template void filesystem_writer_::write_block_impl( - fragment_category cat, std::shared_ptr&& data, - block_compressor const& bc, std::optional meta, - physical_block_cb_type physical_block_cb) { + fragment_category cat, shared_byte_buffer data, block_compressor const& bc, + std::optional meta, physical_block_cb_type physical_block_cb) { if (!merger_) { DWARFS_THROW(runtime_error, "filesystem_writer not configured"); } @@ -840,7 +841,7 @@ void filesystem_writer_::finish_category(fragment_category cat) { template void filesystem_writer_::write_section_impl( - section_type type, std::shared_ptr&& data) { + section_type type, shared_byte_buffer data) { auto& bc = get_compressor(type, std::nullopt); uint32_t number; @@ -1071,7 +1072,7 @@ void filesystem_writer_::copy_header( template void filesystem_writer_::write_block( - fragment_category cat, std::shared_ptr&& data, + fragment_category cat, shared_byte_buffer data, physical_block_cb_type physical_block_cb, std::optional meta) { write_block_impl(cat, std::move(data), compressor_for_category(cat.value()), std::move(meta), std::move(physical_block_cb)); @@ -1079,19 +1080,18 @@ void filesystem_writer_::write_block( template void filesystem_writer_::write_metadata_v2_schema( - std::shared_ptr&& data) { + shared_byte_buffer data) { write_section_impl(section_type::METADATA_V2_SCHEMA, std::move(data)); } template void filesystem_writer_::write_metadata_v2( - std::shared_ptr&& data) { + shared_byte_buffer data) { write_section_impl(section_type::METADATA_V2, std::move(data)); } template -void filesystem_writer_::write_history( - std::shared_ptr&& data) { +void filesystem_writer_::write_history(shared_byte_buffer data) { write_section_impl(section_type::HISTORY, std::move(data)); } diff --git a/src/writer/internal/metadata_freezer.cpp b/src/writer/internal/metadata_freezer.cpp index 924a39fa..e8fd2958 100644 --- a/src/writer/internal/metadata_freezer.cpp +++ b/src/writer/internal/metadata_freezer.cpp @@ -22,6 +22,8 @@ #include #include +#include + #include #include @@ -34,8 +36,7 @@ namespace dwarfs::writer::internal { namespace { template -std::pair, std::vector> -freeze_to_buffer(T const& x) { +std::pair freeze_to_buffer(T const& x) { using namespace ::apache::thrift::frozen; Layout layout; @@ -44,24 +45,22 @@ freeze_to_buffer(T const& x) { std::string schema; serializeRootLayout(layout, schema); - size_t schema_size = schema.size(); - auto schema_begin = reinterpret_cast(schema.data()); - std::vector schema_buffer(schema_begin, schema_begin + schema_size); + auto schema_buffer = vector_byte_buffer::create(schema); - std::vector data_buffer; - data_buffer.resize(content_size, 0); + auto data_buffer = vector_byte_buffer::create(content_size); folly::MutableByteRange content_range(data_buffer.data(), data_buffer.size()); ByteRangeFreezer::freeze(layout, x, content_range); data_buffer.resize(data_buffer.size() - content_range.size()); + data_buffer.shrink_to_fit(); - return {schema_buffer, data_buffer}; + return {schema_buffer.share(), data_buffer.share()}; } } // namespace -std::pair, std::vector> +std::pair metadata_freezer::freeze(thrift::metadata::metadata const& data) { return freeze_to_buffer(data); } diff --git a/src/writer/scanner.cpp b/src/writer/scanner.cpp index c940483b..27e1ce24 100644 --- a/src/writer/scanner.cpp +++ b/src/writer/scanner.cpp @@ -63,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -1107,13 +1106,13 @@ void scanner_::scan( LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size()); - fsw.write_metadata_v2_schema(std::make_shared(std::move(schema))); - fsw.write_metadata_v2(std::make_shared(std::move(data))); + fsw.write_metadata_v2_schema(schema); + fsw.write_metadata_v2(data); if (options_.enable_history) { history hist(options_.history); hist.append(options_.command_line_arguments); - fsw.write_history(std::make_shared(hist.serialize())); + fsw.write_history(hist.serialize()); } LOG_INFO << "waiting for compression to finish..."; diff --git a/src/writer/segmenter.cpp b/src/writer/segmenter.cpp index 3b047ccb..9f6fc6d8 100644 --- a/src/writer/segmenter.cpp +++ b/src/writer/segmenter.cpp @@ -46,10 +46,10 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -576,23 +576,23 @@ class active_block : private GranularityPolicy { , filter_(bloom_filter_size) , repseqmap_{repseqmap} , repeating_collisions_{repcoll} - , data_{std::make_shared()} { + , data_{vector_byte_buffer::create()} { DWARFS_CHECK((window_step & window_step_mask_) == 0, "window step size not a power of two"); - data_->reserve(this->frames_to_bytes(capacity_in_frames_)); + data_.reserve(this->frames_to_bytes(capacity_in_frames_)); } DWARFS_FORCE_INLINE size_t num() const { return num_; } DWARFS_FORCE_INLINE size_t size_in_frames() const { - return this->bytes_to_frames(data_->size()); + return this->bytes_to_frames(data_.size()); } DWARFS_FORCE_INLINE bool full() const { return size_in_frames() == capacity_in_frames_; } - DWARFS_FORCE_INLINE std::shared_ptr data() const { return data_; } + DWARFS_FORCE_INLINE mutable_byte_buffer data() const { return data_; } DWARFS_FORCE_INLINE void append_bytes(std::span data, bloom_filter& global_filter); @@ -637,7 +637,7 @@ class active_block : private GranularityPolicy { fast_multimap offsets_; repeating_sequence_map_type const& repseqmap_; repeating_collisions_map_type& repeating_collisions_; - std::shared_ptr data_; + mutable_byte_buffer data_; }; class segmenter_progress : public progress::context { @@ -841,7 +841,7 @@ DWARFS_FORCE_INLINE bool active_block::is_existing_repeating_sequence( hash_t hashval, size_t offset) { if (auto it = repseqmap_.find(hashval); it != repseqmap_.end()) [[unlikely]] { - auto& raw = data_->vec(); + auto& raw = data_.raw_vector(); auto winbeg = raw.begin() + frames_to_bytes(offset); auto winend = winbeg + frames_to_bytes(window_size_); auto byte = *winbeg; @@ -881,7 +881,7 @@ active_block::append_bytes( granular_span_adapter>(data); auto v = this->template create< - granular_vector_adapter>(data_->vec()); + granular_vector_adapter>(data_.raw_vector()); auto offset = v.size(); @@ -920,7 +920,7 @@ void segment_match::verify_and_extend( size_t pos, size_t len, size_t begin, size_t end) { auto v = this->template create< granular_vector_adapter>( - block_->data()->vec()); + block_->data().raw_vector()); // First, check if the regions actually match if (v.compare(offset_, data.subspan(pos, len)) == 0) { @@ -1034,7 +1034,7 @@ DWARFS_FORCE_INLINE void segmenter_::block_ready() { auto& block = blocks_.back(); block.finalize(stats_); - block_ready_(block.data(), block.num()); + block_ready_(block.data().share(), block.num()); ++prog_.block_count; } diff --git a/test/flac_compressor_test.cpp b/test/flac_compressor_test.cpp index c3cf5ff4..0ee4afc4 100644 --- a/test/flac_compressor_test.cpp +++ b/test/flac_compressor_test.cpp @@ -60,7 +60,7 @@ std::vector multiplex(std::vector> const& in) { } template -std::vector +shared_byte_buffer make_test_data(int channels, int samples, int bytes, int bits, pcm_sample_endianness end, pcm_sample_signedness sig, pcm_sample_padding pad) { @@ -70,10 +70,11 @@ make_test_data(int channels, int samples, int bytes, int bits, make_sine(bits, samples, 3.1 * ((599 * (c + 1)) % 256))); } auto muxed = multiplex(data); - std::vector out(bytes * channels * samples); + auto out = vector_byte_buffer::create(); + out.resize(bytes * channels * samples); pcm_sample_transformer xfm(end, sig, pad, bytes, bits); - xfm.pack(out, muxed); - return out; + xfm.pack(out.span(), muxed); + return out.share(); } struct data_params { @@ -151,7 +152,7 @@ TEST(flac_compressor, basic) { EXPECT_LT(compressed.size(), data.size() / 2); auto decompressed = - block_decompressor::decompress(compression_type::FLAC, compressed); + block_decompressor::decompress(compression_type::FLAC, compressed.span()); EXPECT_EQ(data, decompressed); } @@ -184,7 +185,7 @@ TEST_P(flac_param, combinations) { EXPECT_LT(compressed.size(), data.size() / 2); auto decompressed = - block_decompressor::decompress(compression_type::FLAC, compressed); + block_decompressor::decompress(compression_type::FLAC, compressed.span()); EXPECT_EQ(data, decompressed); } diff --git a/test/ricepp_compressor_test.cpp b/test/ricepp_compressor_test.cpp index fad9ac5c..12ceb18f 100644 --- a/test/ricepp_compressor_test.cpp +++ b/test/ricepp_compressor_test.cpp @@ -37,6 +37,7 @@ #include #include +#include using namespace dwarfs; @@ -61,8 +62,7 @@ generate_random_data(std::mt19937_64& rng, size_t count, } template -std::vector -make_test_data(int components, int pixels, int unused_lsb) { +shared_byte_buffer make_test_data(int components, int pixels, int unused_lsb) { std::mt19937_64 rng(42); std::uniform_int_distribution any_value( 0, std::numeric_limits::max()); @@ -93,11 +93,11 @@ make_test_data(int components, int pixels, int unused_lsb) { } } - std::vector out; + auto out = vector_byte_buffer::create(); out.resize(tmp.size() * sizeof(ValueType)); std::memcpy(out.data(), tmp.data(), out.size()); - return out; + return out.share(); } struct data_params { @@ -151,8 +151,8 @@ TEST_P(ricepp_param, combinations) { EXPECT_LT(compressed.size(), 7 * data.size() / 10); - auto decompressed = - block_decompressor::decompress(compression_type::RICEPP, compressed); + auto decompressed = block_decompressor::decompress(compression_type::RICEPP, + compressed.span()); ASSERT_EQ(data.size(), decompressed.size()); EXPECT_EQ(data, decompressed); diff --git a/test/segmenter_benchmark.cpp b/test/segmenter_benchmark.cpp index 2ff07170..5779024a 100644 --- a/test/segmenter_benchmark.cpp +++ b/test/segmenter_benchmark.cpp @@ -28,7 +28,6 @@ #include #include -#include #include #include @@ -145,13 +144,12 @@ void run_segmenter_test(unsigned iters, unsigned granularity, dwarfs::writer::writer_progress prog; auto blkmgr = std::make_shared(); - std::vector> written; + std::vector written; dwarfs::writer::segmenter seg( lgr, prog, blkmgr, cfg, cc, total_size, - [&written, - blkmgr](std::shared_ptr blk, - auto logical_block_num) { + [&written, blkmgr](dwarfs::shared_byte_buffer blk, + auto logical_block_num) { auto physical_block_num = written.size(); written.push_back(blk); blkmgr->set_written_block(logical_block_num, physical_block_num, 0); @@ -167,7 +165,7 @@ void run_segmenter_test(unsigned iters, unsigned granularity, size_t segmented [[maybe_unused]]{0}; for (auto const& blk : written) { - segmented += blk->size(); + segmented += blk.size(); } // std::cerr << total_size << " -> " << segmented << fmt::format("