refactor: replace block_data with byte_buffer

This commit is contained in:
Marcus Holland-Moritz 2025-03-29 16:52:54 +01:00
parent a19145388c
commit 06f8728cc3
28 changed files with 251 additions and 235 deletions

View File

@ -61,12 +61,12 @@ class block_compressor {
block_compressor(block_compressor&& bc) = default; block_compressor(block_compressor&& bc) = default;
block_compressor& operator=(block_compressor&& rhs) = default; block_compressor& operator=(block_compressor&& rhs) = default;
std::vector<uint8_t> compress(std::span<uint8_t const> data) const { shared_byte_buffer compress(shared_byte_buffer const& data) const {
return impl_->compress(data, nullptr); return impl_->compress(data, nullptr);
} }
std::vector<uint8_t> shared_byte_buffer
compress(std::span<uint8_t const> data, std::string const& metadata) const { compress(shared_byte_buffer const& data, std::string const& metadata) const {
return impl_->compress(data, &metadata); return impl_->compress(data, &metadata);
} }
@ -91,9 +91,8 @@ class block_compressor {
virtual std::unique_ptr<impl> clone() const = 0; virtual std::unique_ptr<impl> clone() const = 0;
virtual std::vector<uint8_t> virtual shared_byte_buffer compress(shared_byte_buffer const& data,
compress(std::span<uint8_t const> data, std::string const* metadata) const = 0;
std::string const* metadata) const = 0;
virtual compression_type type() const = 0; virtual compression_type type() const = 0;
virtual std::string describe() const = 0; virtual std::string describe() const = 0;
@ -152,7 +151,7 @@ class compression_info {
virtual std::string_view name() const = 0; virtual std::string_view name() const = 0;
virtual std::string_view description() const = 0; virtual std::string_view description() const = 0;
virtual std::vector<std::string> const& options() const = 0; virtual std::vector<std::string> const& options() const = 0; // TODO: span?
virtual std::set<std::string> library_dependencies() const = 0; virtual std::set<std::string> library_dependencies() const = 0;
}; };

View File

@ -25,6 +25,7 @@
#include <concepts> #include <concepts>
#include <memory> #include <memory>
#include <span> #include <span>
#include <vector>
namespace dwarfs { namespace dwarfs {
@ -45,15 +46,23 @@ class byte_buffer_interface {
public: public:
virtual ~byte_buffer_interface() = default; virtual ~byte_buffer_interface() = default;
virtual uint8_t const* data() const = 0;
virtual size_t size() const = 0;
virtual std::span<uint8_t const> span() const = 0; virtual std::span<uint8_t const> span() const = 0;
}; };
class mutable_byte_buffer_interface : public byte_buffer_interface { class mutable_byte_buffer_interface : public byte_buffer_interface {
public: public:
virtual uint8_t* mutable_data() = 0;
virtual std::span<uint8_t> mutable_span() = 0; virtual std::span<uint8_t> mutable_span() = 0;
virtual void clear() = 0; virtual void clear() = 0;
virtual void reserve(size_t size) = 0; virtual void reserve(size_t size) = 0;
virtual void resize(size_t size) = 0; virtual void resize(size_t size) = 0;
virtual void shrink_to_fit() = 0;
// TODO: See if we can do without this. This will *only* be implemented
// in the vector_byte_buffer, other implementations will throw.
virtual std::vector<uint8_t>& raw_vector() = 0;
}; };
class shared_byte_buffer { class shared_byte_buffer {
@ -63,14 +72,16 @@ class shared_byte_buffer {
explicit shared_byte_buffer(std::shared_ptr<byte_buffer_interface const> bb) explicit shared_byte_buffer(std::shared_ptr<byte_buffer_interface const> bb)
: bb_{std::move(bb)} {} : bb_{std::move(bb)} {}
uint8_t const* data() const { return span().data(); } uint8_t const* data() const { return bb_->data(); }
size_t size() const { return span().size(); } size_t size() const { return bb_->size(); }
bool empty() const { return span().empty(); } bool empty() const { return bb_->size() == 0; }
std::span<uint8_t const> span() const { return bb_->span(); } std::span<uint8_t const> span() const { return bb_->span(); }
void swap(shared_byte_buffer& other) noexcept { std::swap(bb_, other.bb_); }
template <detail::byte_range T> template <detail::byte_range T>
friend bool operator==(shared_byte_buffer const& lhs, T const& rhs) { friend bool operator==(shared_byte_buffer const& lhs, T const& rhs) {
return detail::compare_spans(lhs.span(), {rhs.data(), rhs.size()}) == return detail::compare_spans(lhs.span(), {rhs.data(), rhs.size()}) ==
@ -78,6 +89,7 @@ class shared_byte_buffer {
} }
template <detail::byte_range T> template <detail::byte_range T>
requires(!std::same_as<T, shared_byte_buffer>)
friend bool operator==(T const& lhs, shared_byte_buffer const& rhs) { friend bool operator==(T const& lhs, shared_byte_buffer const& rhs) {
return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()) == return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()) ==
std::strong_ordering::equal; std::strong_ordering::equal;
@ -90,6 +102,7 @@ class shared_byte_buffer {
} }
template <detail::byte_range T> template <detail::byte_range T>
requires(!std::same_as<T, shared_byte_buffer>)
friend std::strong_ordering friend std::strong_ordering
operator<=>(T const& lhs, shared_byte_buffer const& rhs) { operator<=>(T const& lhs, shared_byte_buffer const& rhs) {
return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()); return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span());
@ -105,13 +118,13 @@ class mutable_byte_buffer {
std::shared_ptr<mutable_byte_buffer_interface> bb) std::shared_ptr<mutable_byte_buffer_interface> bb)
: bb_{std::move(bb)} {} : bb_{std::move(bb)} {}
uint8_t const* data() const { return span().data(); } uint8_t const* data() const { return bb_->data(); }
uint8_t* data() { return span().data(); } uint8_t* data() { return bb_->mutable_data(); }
size_t size() const { return span().size(); } size_t size() const { return bb_->size(); }
bool empty() const { return span().empty(); } bool empty() const { return bb_->size() == 0; }
std::span<uint8_t const> span() const { return bb_->span(); } std::span<uint8_t const> span() const { return bb_->span(); }
@ -123,6 +136,12 @@ class mutable_byte_buffer {
void resize(size_t size) { bb_->resize(size); } void resize(size_t size) { bb_->resize(size); }
void shrink_to_fit() { bb_->shrink_to_fit(); }
std::vector<uint8_t>& raw_vector() { return bb_->raw_vector(); }
void swap(mutable_byte_buffer& other) noexcept { std::swap(bb_, other.bb_); }
template <detail::byte_range T> template <detail::byte_range T>
friend bool operator==(mutable_byte_buffer const& lhs, T const& rhs) { friend bool operator==(mutable_byte_buffer const& lhs, T const& rhs) {
return detail::compare_spans(lhs.span(), {rhs.data(), rhs.size()}) == return detail::compare_spans(lhs.span(), {rhs.data(), rhs.size()}) ==
@ -130,6 +149,7 @@ class mutable_byte_buffer {
} }
template <detail::byte_range T> template <detail::byte_range T>
requires(!std::same_as<T, mutable_byte_buffer>)
friend bool operator==(T const& lhs, mutable_byte_buffer const& rhs) { friend bool operator==(T const& lhs, mutable_byte_buffer const& rhs) {
return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()) == return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()) ==
std::strong_ordering::equal; std::strong_ordering::equal;
@ -142,6 +162,7 @@ class mutable_byte_buffer {
} }
template <detail::byte_range T> template <detail::byte_range T>
requires(!std::same_as<T, mutable_byte_buffer>)
friend std::strong_ordering friend std::strong_ordering
operator<=>(T const& lhs, mutable_byte_buffer const& rhs) { operator<=>(T const& lhs, mutable_byte_buffer const& rhs) {
return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()); return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span());

View File

@ -30,6 +30,7 @@
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include <dwarfs/byte_buffer.h>
#include <dwarfs/history_config.h> #include <dwarfs/history_config.h>
namespace dwarfs { namespace dwarfs {
@ -50,7 +51,7 @@ class history {
thrift::history::history const& get() const { return *history_; } thrift::history::history const& get() const { return *history_; }
void append(std::optional<std::vector<std::string>> args); void append(std::optional<std::vector<std::string>> args);
size_t size() const; size_t size() const;
std::vector<uint8_t> serialize() const; shared_byte_buffer serialize() const;
void dump(std::ostream& os) const; void dump(std::ostream& os) const;
nlohmann::json as_json() const; nlohmann::json as_json() const;

View File

@ -21,6 +21,8 @@
#pragma once #pragma once
#include <string>
#include <dwarfs/byte_buffer.h> #include <dwarfs/byte_buffer.h>
namespace dwarfs { namespace dwarfs {
@ -28,6 +30,10 @@ namespace dwarfs {
class vector_byte_buffer { class vector_byte_buffer {
public: public:
static mutable_byte_buffer create(); static mutable_byte_buffer create();
static mutable_byte_buffer create(size_t size);
static mutable_byte_buffer create(std::string_view data);
static mutable_byte_buffer create(std::span<uint8_t const> data);
static mutable_byte_buffer create(std::vector<uint8_t>&& data);
}; };
} // namespace dwarfs } // namespace dwarfs

View File

@ -1,54 +0,0 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdint>
#include <string_view>
#include <vector>
namespace dwarfs::writer::internal {
class block_data {
public:
block_data() = default;
explicit block_data(std::vector<uint8_t>&& vec)
: vec_{std::move(vec)} {}
explicit block_data(std::string_view str)
: vec_{str.begin(), str.end()} {}
std::vector<uint8_t> const& vec() const { return vec_; }
std::vector<uint8_t>& vec() { return vec_; }
void reserve(size_t size) { vec_.reserve(size); }
uint8_t const* data() const { return vec_.data(); }
uint8_t* data() { return vec_.data(); }
size_t size() const { return vec_.size(); }
bool empty() const { return vec_.empty(); }
private:
std::vector<uint8_t> vec_;
};
} // namespace dwarfs::writer::internal

View File

@ -31,6 +31,7 @@
#include <vector> #include <vector>
#include <dwarfs/block_compressor.h> #include <dwarfs/block_compressor.h>
#include <dwarfs/byte_buffer.h>
#include <dwarfs/compression_constraints.h> #include <dwarfs/compression_constraints.h>
#include <dwarfs/fstypes.h> #include <dwarfs/fstypes.h>
#include <dwarfs/writer/fragment_category.h> #include <dwarfs/writer/fragment_category.h>
@ -45,8 +46,6 @@ class fs_section;
namespace writer::internal { namespace writer::internal {
class block_data;
class filesystem_writer_detail { class filesystem_writer_detail {
public: public:
virtual ~filesystem_writer_detail() = default; virtual ~filesystem_writer_detail() = default;
@ -71,14 +70,13 @@ class filesystem_writer_detail {
virtual void virtual void
configure_rewrite(size_t filesystem_size, size_t block_count) = 0; configure_rewrite(size_t filesystem_size, size_t block_count) = 0;
virtual void copy_header(std::span<uint8_t const> header) = 0; virtual void copy_header(std::span<uint8_t const> header) = 0;
virtual void virtual void write_block(fragment_category cat, shared_byte_buffer data,
write_block(fragment_category cat, std::shared_ptr<block_data>&& data, physical_block_cb_type physical_block_cb,
physical_block_cb_type physical_block_cb, std::optional<std::string> meta = std::nullopt) = 0;
std::optional<std::string> meta = std::nullopt) = 0;
virtual void finish_category(fragment_category cat) = 0; virtual void finish_category(fragment_category cat) = 0;
virtual void write_metadata_v2_schema(std::shared_ptr<block_data>&& data) = 0; virtual void write_metadata_v2_schema(shared_byte_buffer data) = 0;
virtual void write_metadata_v2(std::shared_ptr<block_data>&& data) = 0; virtual void write_metadata_v2(shared_byte_buffer data) = 0;
virtual void write_history(std::shared_ptr<block_data>&& data) = 0; virtual void write_history(shared_byte_buffer data) = 0;
virtual void check_block_compression( virtual void check_block_compression(
compression_type compression, std::span<uint8_t const> data, compression_type compression, std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat = std::nullopt) = 0; std::optional<fragment_category::value_type> cat = std::nullopt) = 0;

View File

@ -23,7 +23,8 @@
#include <cstdint> #include <cstdint>
#include <utility> #include <utility>
#include <vector>
#include <dwarfs/byte_buffer.h>
namespace dwarfs { namespace dwarfs {
@ -35,7 +36,7 @@ namespace writer::internal {
class metadata_freezer { class metadata_freezer {
public: public:
static std::pair<std::vector<uint8_t>, std::vector<uint8_t>> static std::pair<shared_byte_buffer, shared_byte_buffer>
freeze(thrift::metadata::metadata const& data); freeze(thrift::metadata::metadata const& data);
}; };

View File

@ -26,6 +26,8 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <dwarfs/byte_buffer.h>
namespace dwarfs { namespace dwarfs {
struct compression_constraints; struct compression_constraints;
@ -38,7 +40,6 @@ class writer_progress;
namespace internal { namespace internal {
class block_data;
class block_manager; class block_manager;
class chunkable; class chunkable;
@ -55,8 +56,8 @@ class segmenter {
unsigned block_size_bits{22}; unsigned block_size_bits{22};
}; };
using block_ready_cb = std::function<void( using block_ready_cb =
std::shared_ptr<internal::block_data>, size_t logical_block_num)>; std::function<void(shared_byte_buffer, size_t logical_block_num)>;
segmenter(logger& lgr, writer_progress& prog, segmenter(logger& lgr, writer_progress& prog,
std::shared_ptr<internal::block_manager> blkmgr, config const& cfg, std::shared_ptr<internal::block_manager> blkmgr, config const& cfg,

View File

@ -31,6 +31,7 @@
#include <dwarfs/fstypes.h> #include <dwarfs/fstypes.h>
#include <dwarfs/option_map.h> #include <dwarfs/option_map.h>
#include <dwarfs/varint.h> #include <dwarfs/varint.h>
#include <dwarfs/vector_byte_buffer.h>
namespace dwarfs { namespace dwarfs {
@ -48,10 +49,9 @@ class brotli_block_compressor final : public block_compressor::impl {
return std::make_unique<brotli_block_compressor>(*this); return std::make_unique<brotli_block_compressor>(*this);
} }
std::vector<uint8_t> shared_byte_buffer compress(shared_byte_buffer const& data,
compress(std::span<uint8_t const> data, std::string const* /*metadata*/) const override {
std::string const* /*metadata*/) const override { auto compressed = vector_byte_buffer::create(); // TODO: make configurable
std::vector<uint8_t> compressed;
compressed.resize(varint::max_size + compressed.resize(varint::max_size +
::BrotliEncoderMaxCompressedSize(data.size())); ::BrotliEncoderMaxCompressedSize(data.size()));
size_t size_size = varint::encode(data.size(), compressed.data()); size_t size_size = varint::encode(data.size(), compressed.data());
@ -66,7 +66,7 @@ class brotli_block_compressor final : public block_compressor::impl {
throw bad_compression_ratio_error(); throw bad_compression_ratio_error();
} }
compressed.shrink_to_fit(); compressed.shrink_to_fit();
return compressed; return compressed.share();
} }
compression_type type() const override { return compression_type::BROTLI; } compression_type type() const override { return compression_type::BROTLI; }

View File

@ -38,6 +38,7 @@
#include <dwarfs/option_map.h> #include <dwarfs/option_map.h>
#include <dwarfs/pcm_sample_transformer.h> #include <dwarfs/pcm_sample_transformer.h>
#include <dwarfs/varint.h> #include <dwarfs/varint.h>
#include <dwarfs/vector_byte_buffer.h>
#include <dwarfs/gen-cpp2/compression_types.h> #include <dwarfs/gen-cpp2/compression_types.h>
@ -53,7 +54,7 @@ constexpr size_t const kBlockSize{65536};
class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream { class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream {
public: public:
explicit dwarfs_flac_stream_encoder(std::vector<uint8_t>& data) explicit dwarfs_flac_stream_encoder(mutable_byte_buffer& data)
: data_{data} : data_{data}
, pos_{data_.size()} {} , pos_{data_.size()} {}
@ -90,7 +91,7 @@ class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream {
} }
private: private:
std::vector<uint8_t>& data_; mutable_byte_buffer& data_;
size_t pos_; size_t pos_;
}; };
@ -208,8 +209,8 @@ class flac_block_compressor final : public block_compressor::impl {
return std::make_unique<flac_block_compressor>(*this); return std::make_unique<flac_block_compressor>(*this);
} }
std::vector<uint8_t> compress(std::span<uint8_t const> data, shared_byte_buffer compress(shared_byte_buffer const& data,
std::string const* metadata) const override { std::string const* metadata) const override {
if (!metadata) { if (!metadata) {
DWARFS_THROW(runtime_error, DWARFS_THROW(runtime_error,
"internal error: flac compression requires metadata"); "internal error: flac compression requires metadata");
@ -265,7 +266,7 @@ class flac_block_compressor final : public block_compressor::impl {
pcm_pad = pcm_sample_padding::Msb; pcm_pad = pcm_sample_padding::Msb;
} }
std::vector<uint8_t> compressed; auto compressed = vector_byte_buffer::create(); // TODO: make configurable
{ {
using namespace ::apache::thrift; using namespace ::apache::thrift;
@ -286,7 +287,7 @@ class flac_block_compressor final : public block_compressor::impl {
CompactSerializer::serialize(hdr, &hdrbuf); CompactSerializer::serialize(hdr, &hdrbuf);
compressed.resize(pos + hdrbuf.size()); compressed.resize(pos + hdrbuf.size());
::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size()); ::memcpy(compressed.data() + pos, hdrbuf.data(), hdrbuf.size());
} }
dwarfs_flac_stream_encoder encoder(compressed); dwarfs_flac_stream_encoder encoder(compressed);
@ -341,7 +342,7 @@ class flac_block_compressor final : public block_compressor::impl {
compressed.shrink_to_fit(); compressed.shrink_to_fit();
return compressed; return compressed.share();
} }
compression_type type() const override { return compression_type::FLAC; } compression_type type() const override { return compression_type::FLAC; }

View File

@ -29,6 +29,7 @@
#include <dwarfs/error.h> #include <dwarfs/error.h>
#include <dwarfs/fstypes.h> #include <dwarfs/fstypes.h>
#include <dwarfs/option_map.h> #include <dwarfs/option_map.h>
#include <dwarfs/vector_byte_buffer.h>
namespace dwarfs { namespace dwarfs {
@ -69,18 +70,18 @@ class lz4_block_compressor final : public block_compressor::impl {
return std::make_unique<lz4_block_compressor>(*this); return std::make_unique<lz4_block_compressor>(*this);
} }
std::vector<uint8_t> shared_byte_buffer compress(shared_byte_buffer const& data,
compress(std::span<uint8_t const> data, std::string const* /*metadata*/) const override {
std::string const* /*metadata*/) const override { auto compressed = vector_byte_buffer::create(); // TODO: make configurable
std::vector<uint8_t> compressed(sizeof(uint32_t) + compressed.resize(sizeof(uint32_t) +
LZ4_compressBound(to<int>(data.size()))); LZ4_compressBound(to<int>(data.size())));
// TODO: this should have been a varint; also, if we ever support // TODO: this should have been a varint; also, if we ever support
// big-endian systems, we'll have to properly convert this // big-endian systems, we'll have to properly convert this
uint32_t size = data.size(); uint32_t size = data.size();
std::memcpy(compressed.data(), &size, sizeof(size)); std::memcpy(compressed.data(), &size, sizeof(size));
auto csize = Policy::compress(data.data(), &compressed[sizeof(uint32_t)], auto csize = Policy::compress(
data.size(), data.data(), compressed.data() + sizeof(uint32_t), data.size(),
compressed.size() - sizeof(uint32_t), level_); compressed.size() - sizeof(uint32_t), level_);
if (csize == 0) { if (csize == 0) {
DWARFS_THROW(runtime_error, "error during compression"); DWARFS_THROW(runtime_error, "error during compression");
} }
@ -88,7 +89,7 @@ class lz4_block_compressor final : public block_compressor::impl {
throw bad_compression_ratio_error(); throw bad_compression_ratio_error();
} }
compressed.resize(sizeof(uint32_t) + csize); compressed.resize(sizeof(uint32_t) + csize);
return compressed; return compressed.share();
} }
compression_type type() const override { return compression_type::LZ4; } compression_type type() const override { return compression_type::LZ4; }

View File

@ -37,6 +37,7 @@
#include <dwarfs/option_map.h> #include <dwarfs/option_map.h>
#include <dwarfs/sorted_array_map.h> #include <dwarfs/sorted_array_map.h>
#include <dwarfs/types.h> #include <dwarfs/types.h>
#include <dwarfs/vector_byte_buffer.h>
namespace dwarfs { namespace dwarfs {
@ -111,8 +112,8 @@ class lzma_block_compressor final : public block_compressor::impl {
return std::make_unique<lzma_block_compressor>(*this); return std::make_unique<lzma_block_compressor>(*this);
} }
std::vector<uint8_t> compress(std::span<uint8_t const> data, shared_byte_buffer compress(shared_byte_buffer const& data,
std::string const* metadata) const override; std::string const* metadata) const override;
compression_type type() const override { return compression_type::LZMA; } compression_type type() const override { return compression_type::LZMA; }
@ -126,8 +127,8 @@ class lzma_block_compressor final : public block_compressor::impl {
} }
private: private:
std::vector<uint8_t> shared_byte_buffer
compress(std::span<uint8_t const> data, lzma_filter const* filters) const; compress(shared_byte_buffer const& data, lzma_filter const* filters) const;
static uint32_t get_preset(unsigned level, bool extreme) { static uint32_t get_preset(unsigned level, bool extreme) {
uint32_t preset = level; uint32_t preset = level;
@ -197,8 +198,8 @@ lzma_block_compressor::lzma_block_compressor(option_map& om) {
} }
} }
std::vector<uint8_t> shared_byte_buffer
lzma_block_compressor::compress(std::span<uint8_t const> data, lzma_block_compressor::compress(shared_byte_buffer const& data,
lzma_filter const* filters) const { lzma_filter const* filters) const {
lzma_stream s = LZMA_STREAM_INIT; lzma_stream s = LZMA_STREAM_INIT;
@ -210,7 +211,8 @@ lzma_block_compressor::compress(std::span<uint8_t const> data,
lzma_action action = LZMA_FINISH; lzma_action action = LZMA_FINISH;
std::vector<uint8_t> compressed(data.size() - 1); auto compressed = vector_byte_buffer::create(); // TODO: make configurable
compressed.resize(data.size() - 1);
s.next_in = data.data(); s.next_in = data.data();
s.avail_in = data.size(); s.avail_in = data.size();
@ -234,21 +236,21 @@ lzma_block_compressor::compress(std::span<uint8_t const> data,
lzma_error_string(ret))); lzma_error_string(ret)));
} }
return compressed; return compressed.share();
} }
std::vector<uint8_t> shared_byte_buffer
lzma_block_compressor::compress(std::span<uint8_t const> data, lzma_block_compressor::compress(shared_byte_buffer const& data,
std::string const* /*metadata*/) const { std::string const* /*metadata*/) const {
auto lzma_opts = opt_lzma_; auto lzma_opts = opt_lzma_;
std::array<lzma_filter, 3> filters{{{binary_vli_, nullptr}, std::array<lzma_filter, 3> filters{{{binary_vli_, nullptr},
{LZMA_FILTER_LZMA2, &lzma_opts}, {LZMA_FILTER_LZMA2, &lzma_opts},
{LZMA_VLI_UNKNOWN, nullptr}}}; {LZMA_VLI_UNKNOWN, nullptr}}};
std::vector<uint8_t> best = compress(data, &filters[1]); auto best = compress(data, &filters[1]);
if (filters[0].id != LZMA_VLI_UNKNOWN) { if (filters[0].id != LZMA_VLI_UNKNOWN) {
std::vector<uint8_t> compressed = compress(data, filters.data()); auto compressed = compress(data, filters.data());
if (compressed.size() < best.size()) { if (compressed.size() < best.size()) {
best.swap(compressed); best.swap(compressed);

View File

@ -41,11 +41,9 @@ class null_block_compressor final : public block_compressor::impl {
return std::make_unique<null_block_compressor>(*this); return std::make_unique<null_block_compressor>(*this);
} }
// TODO: we should not have to copy the data here... shared_byte_buffer compress(shared_byte_buffer const& data,
std::vector<uint8_t> std::string const* /*metadata*/) const override {
compress(std::span<uint8_t const> data, return data;
std::string const* /*metadata*/) const override {
return std::vector<uint8_t>(data.begin(), data.end());
} }
compression_type type() const override { return compression_type::NONE; } compression_type type() const override { return compression_type::NONE; }

View File

@ -52,8 +52,8 @@ class ricepp_block_compressor final : public block_compressor::impl {
return std::make_unique<ricepp_block_compressor>(*this); return std::make_unique<ricepp_block_compressor>(*this);
} }
std::vector<uint8_t> compress(std::span<uint8_t const> data, shared_byte_buffer compress(shared_byte_buffer const& data,
std::string const* metadata) const override { std::string const* metadata) const override {
if (!metadata) { if (!metadata) {
DWARFS_THROW(runtime_error, DWARFS_THROW(runtime_error,
"internal error: ricepp compression requires metadata"); "internal error: ricepp compression requires metadata");
@ -88,8 +88,10 @@ class ricepp_block_compressor final : public block_compressor::impl {
.unused_lsb_count = static_cast<unsigned>(unused_lsb_count), .unused_lsb_count = static_cast<unsigned>(unused_lsb_count),
}); });
std::vector<uint8_t> compressed; auto compressed = vector_byte_buffer::create(); // TODO: make configurable
// TODO: see if we can resize just once...
// TODO: maybe the mutable_byte_buffer interface can have .append()?
{ {
using namespace ::apache::thrift; using namespace ::apache::thrift;
@ -111,7 +113,7 @@ class ricepp_block_compressor final : public block_compressor::impl {
CompactSerializer::serialize(hdr, &hdrbuf); CompactSerializer::serialize(hdr, &hdrbuf);
compressed.resize(pos + hdrbuf.size()); compressed.resize(pos + hdrbuf.size());
::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size()); ::memcpy(compressed.data() + pos, hdrbuf.data(), hdrbuf.size());
} }
std::span<pixel_type const> input{ std::span<pixel_type const> input{
@ -121,13 +123,11 @@ class ricepp_block_compressor final : public block_compressor::impl {
size_t header_size = compressed.size(); size_t header_size = compressed.size();
compressed.resize(header_size + codec->worst_case_encoded_bytes(input)); compressed.resize(header_size + codec->worst_case_encoded_bytes(input));
std::span<uint8_t> buffer(compressed); auto output = codec->encode(compressed.span().subspan(header_size), input);
auto output = codec->encode(buffer.subspan(header_size), input);
compressed.resize(header_size + output.size()); compressed.resize(header_size + output.size());
compressed.shrink_to_fit(); compressed.shrink_to_fit();
return compressed; return compressed.share();
} }
compression_type type() const override { return compression_type::RICEPP; } compression_type type() const override { return compression_type::RICEPP; }

View File

@ -29,6 +29,7 @@
#include <dwarfs/error.h> #include <dwarfs/error.h>
#include <dwarfs/fstypes.h> #include <dwarfs/fstypes.h>
#include <dwarfs/option_map.h> #include <dwarfs/option_map.h>
#include <dwarfs/vector_byte_buffer.h>
#include <dwarfs/zstd_context_manager.h> #include <dwarfs/zstd_context_manager.h>
#if ZSTD_VERSION_MAJOR > 1 || \ #if ZSTD_VERSION_MAJOR > 1 || \
@ -54,8 +55,8 @@ class zstd_block_compressor final : public block_compressor::impl {
return std::make_unique<zstd_block_compressor>(*this); return std::make_unique<zstd_block_compressor>(*this);
} }
std::vector<uint8_t> compress(std::span<uint8_t const> data, shared_byte_buffer compress(shared_byte_buffer const& data,
std::string const* metadata) const override; std::string const* metadata) const override;
compression_type type() const override { return compression_type::ZSTD; } compression_type type() const override { return compression_type::ZSTD; }
@ -87,10 +88,11 @@ class zstd_block_compressor final : public block_compressor::impl {
int const level_; int const level_;
}; };
std::vector<uint8_t> shared_byte_buffer
zstd_block_compressor::compress(std::span<uint8_t const> data, zstd_block_compressor::compress(shared_byte_buffer const& data,
std::string const* /*metadata*/) const { std::string const* /*metadata*/) const {
std::vector<uint8_t> compressed(ZSTD_compressBound(data.size())); auto compressed = vector_byte_buffer::create(); // TODO: make configurable
compressed.resize(ZSTD_compressBound(data.size()));
auto ctx = ctxmgr_->make_context(); auto ctx = ctxmgr_->make_context();
auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(), auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(),
data.data(), data.size(), level_); data.data(), data.size(), level_);
@ -103,7 +105,7 @@ zstd_block_compressor::compress(std::span<uint8_t const> data,
} }
compressed.resize(size); compressed.resize(size);
compressed.shrink_to_fit(); compressed.shrink_to_fit();
return compressed; return compressed.share();
} }
class zstd_block_decompressor final : public block_decompressor::impl { class zstd_block_decompressor final : public block_decompressor::impl {

View File

@ -29,6 +29,7 @@
#include <dwarfs/config.h> #include <dwarfs/config.h>
#include <dwarfs/history.h> #include <dwarfs/history.h>
#include <dwarfs/library_dependencies.h> #include <dwarfs/library_dependencies.h>
#include <dwarfs/vector_byte_buffer.h>
#include <dwarfs/version.h> #include <dwarfs/version.h>
#include <dwarfs/gen-cpp2/history_types.h> #include <dwarfs/gen-cpp2/history_types.h>
@ -80,10 +81,10 @@ void history::append(std::optional<std::vector<std::string>> args) {
size_t history::size() const { return history_->entries()->size(); } size_t history::size() const { return history_->entries()->size(); }
std::vector<uint8_t> history::serialize() const { shared_byte_buffer history::serialize() const {
std::string buf; std::string buf;
::apache::thrift::CompactSerializer::serialize(*history_, &buf); ::apache::thrift::CompactSerializer::serialize(*history_, &buf);
return {buf.begin(), buf.end()}; return vector_byte_buffer::create(buf).share();
} }
void history::dump(std::ostream& os) const { void history::dump(std::ostream& os) const {

View File

@ -32,6 +32,10 @@ class mapped_byte_buffer_impl : public byte_buffer_interface {
: data_{data} : data_{data}
, mm_{std::move(mm)} {} , mm_{std::move(mm)} {}
size_t size() const override { return data_.size(); }
uint8_t const* data() const override { return data_.data(); }
std::span<uint8_t const> span() const override { std::span<uint8_t const> span() const override {
return {data_.data(), data_.size()}; return {data_.data(), data_.size()};
} }

View File

@ -53,7 +53,6 @@
#include <dwarfs/reader/internal/filesystem_parser.h> #include <dwarfs/reader/internal/filesystem_parser.h>
#include <dwarfs/reader/internal/inode_reader_v2.h> #include <dwarfs/reader/internal/inode_reader_v2.h>
#include <dwarfs/reader/internal/metadata_v2.h> #include <dwarfs/reader/internal/metadata_v2.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/filesystem_writer_detail.h> #include <dwarfs/writer/internal/filesystem_writer_detail.h>
namespace dwarfs::reader { namespace dwarfs::reader {

View File

@ -78,6 +78,9 @@ class cached_block_ final : public cached_block {
// This can be called from any thread // This can be called from any thread
size_t range_end() const override { return range_end_.load(); } size_t range_end() const override { return range_end_.load(); }
// TODO: The code relies on the fact that the data_ buffer is never
// reallocated once block decompression has started. I would like to
// somehow enforce that this cannot happen.
uint8_t const* data() const override { return data_.data(); } uint8_t const* data() const override { return data_.data(); }
void decompress_until(size_t end) override { void decompress_until(size_t end) override {

View File

@ -28,7 +28,6 @@
#include <dwarfs/writer/filesystem_writer.h> #include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/reader/internal/filesystem_parser.h> #include <dwarfs/reader/internal/filesystem_parser.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/filesystem_writer_detail.h> #include <dwarfs/writer/internal/filesystem_writer_detail.h>
namespace dwarfs::utility { namespace dwarfs::utility {
@ -38,7 +37,6 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
dwarfs::writer::category_resolver const& cat_resolver, dwarfs::writer::category_resolver const& cat_resolver,
rewrite_options const& opts) { rewrite_options const& opts) {
using dwarfs::writer::fragment_category; using dwarfs::writer::fragment_category;
using dwarfs::writer::internal::block_data;
LOG_PROXY(debug_logger_policy, lgr); LOG_PROXY(debug_logger_policy, lgr);
@ -174,7 +172,7 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
case section_type::HISTORY: case section_type::HISTORY:
if (opts.enable_history) { if (opts.enable_history) {
history hist{opts.history}; history hist{opts.history};
hist.parse(fs.get_history().serialize()); hist.parse(fs.get_history().serialize().span());
hist.append(opts.command_line_arguments); hist.append(opts.command_line_arguments);
LOG_VERBOSE << "updating " << get_section_name(s->type()) << " (" LOG_VERBOSE << "updating " << get_section_name(s->type()) << " ("
@ -182,7 +180,7 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
<< "), compressing using '" << "), compressing using '"
<< writer.get_compressor(s->type()).describe() << "'"; << writer.get_compressor(s->type()).describe() << "'";
writer.write_history(std::make_shared<block_data>(hist.serialize())); writer.write_history(hist.serialize());
} else { } else {
LOG_VERBOSE << "removing " << get_section_name(s->type()); LOG_VERBOSE << "removing " << get_section_name(s->type());
} }

View File

@ -29,6 +29,22 @@ namespace {
class vector_byte_buffer_impl : public mutable_byte_buffer_interface { class vector_byte_buffer_impl : public mutable_byte_buffer_interface {
public: public:
vector_byte_buffer_impl() = default;
explicit vector_byte_buffer_impl(size_t size)
: data_(size) {}
explicit vector_byte_buffer_impl(std::string_view data)
: data_{data.begin(), data.end()} {}
explicit vector_byte_buffer_impl(std::span<uint8_t const> data)
: data_{data.begin(), data.end()} {}
explicit vector_byte_buffer_impl(std::vector<uint8_t>&& data)
: data_{std::move(data)} {}
size_t size() const override { return data_.size(); }
uint8_t const* data() const override { return data_.data(); }
uint8_t* mutable_data() override { return data_.data(); }
std::span<uint8_t const> span() const override { std::span<uint8_t const> span() const override {
return {data_.data(), data_.size()}; return {data_.data(), data_.size()};
} }
@ -43,6 +59,10 @@ class vector_byte_buffer_impl : public mutable_byte_buffer_interface {
void resize(size_t size) override { data_.resize(size); } void resize(size_t size) override { data_.resize(size); }
void shrink_to_fit() override { data_.shrink_to_fit(); }
std::vector<uint8_t>& raw_vector() override { return data_; }
private: private:
std::vector<uint8_t> data_; std::vector<uint8_t> data_;
}; };
@ -53,4 +73,21 @@ mutable_byte_buffer vector_byte_buffer::create() {
return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>()}; return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>()};
} }
mutable_byte_buffer vector_byte_buffer::create(size_t size) {
return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>(size)};
}
mutable_byte_buffer vector_byte_buffer::create(std::string_view data) {
return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>(data)};
}
mutable_byte_buffer vector_byte_buffer::create(std::span<uint8_t const> data) {
return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>(data)};
}
mutable_byte_buffer vector_byte_buffer::create(std::vector<uint8_t>&& data) {
return mutable_byte_buffer{
std::make_shared<vector_byte_buffer_impl>(std::move(data))};
}
} // namespace dwarfs } // namespace dwarfs

View File

@ -48,7 +48,6 @@
#include <dwarfs/internal/fs_section.h> #include <dwarfs/internal/fs_section.h>
#include <dwarfs/internal/worker_group.h> #include <dwarfs/internal/worker_group.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/filesystem_writer_detail.h> #include <dwarfs/writer/internal/filesystem_writer_detail.h>
#include <dwarfs/writer/internal/multi_queue_block_merger.h> #include <dwarfs/writer/internal/multi_queue_block_merger.h>
#include <dwarfs/writer/internal/progress.h> #include <dwarfs/writer/internal/progress.h>
@ -122,8 +121,7 @@ class compression_progress : public progress::context {
class fsblock { class fsblock {
public: public:
fsblock(section_type type, block_compressor const& bc, fsblock(section_type type, block_compressor const& bc,
std::shared_ptr<block_data>&& data, shared_byte_buffer data, std::shared_ptr<compression_progress> pctx,
std::shared_ptr<compression_progress> pctx,
folly::Function<void(size_t)> set_block_cb = nullptr); folly::Function<void(size_t)> set_block_cb = nullptr);
fsblock(section_type type, compression_type compression, fsblock(section_type type, compression_type compression,
@ -198,12 +196,12 @@ class fsblock_merger_policy {
class raw_fsblock : public fsblock::impl { class raw_fsblock : public fsblock::impl {
public: public:
raw_fsblock(section_type type, block_compressor const& bc, raw_fsblock(section_type type, block_compressor const& bc,
std::shared_ptr<block_data>&& data, shared_byte_buffer data,
std::shared_ptr<compression_progress> pctx, std::shared_ptr<compression_progress> pctx,
folly::Function<void(size_t)> set_block_cb) folly::Function<void(size_t)> set_block_cb)
: type_{type} : type_{type}
, bc_{bc} , bc_{bc}
, uncompressed_size_{data->size()} , uncompressed_size_{data.size()}
, data_{std::move(data)} , data_{std::move(data)}
, comp_type_{bc_.type()} , comp_type_{bc_.type()}
, pctx_{std::move(pctx)} , pctx_{std::move(pctx)}
@ -215,30 +213,30 @@ class raw_fsblock : public fsblock::impl {
std::promise<void> prom; std::promise<void> prom;
future_ = prom.get_future(); future_ = prom.get_future();
wg.add_job([this, prom = std::move(prom), wg.add_job(
meta = std::move(meta)]() mutable { [this, prom = std::move(prom), meta = std::move(meta)]() mutable {
try { try {
std::shared_ptr<block_data> tmp; shared_byte_buffer tmp;
if (meta) { if (meta) {
tmp = std::make_shared<block_data>(bc_.compress(data_->vec(), *meta)); tmp = bc_.compress(data_, *meta);
} else { } else {
tmp = std::make_shared<block_data>(bc_.compress(data_->vec())); tmp = bc_.compress(data_);
} }
pctx_->bytes_in += data_->vec().size(); pctx_->bytes_in += data_.size();
pctx_->bytes_out += tmp->vec().size(); pctx_->bytes_out += tmp.size();
{ {
std::lock_guard lock(mx_); std::lock_guard lock(mx_);
data_.swap(tmp); data_.swap(tmp);
} }
} catch (bad_compression_ratio_error const&) { } catch (bad_compression_ratio_error const&) {
comp_type_ = compression_type::NONE; comp_type_ = compression_type::NONE;
} }
prom.set_value(); prom.set_value();
}); });
} }
void wait_until_compressed() override { future_.wait(); } void wait_until_compressed() override { future_.wait(); }
@ -249,13 +247,13 @@ class raw_fsblock : public fsblock::impl {
std::string description() const override { return bc_.describe(); } std::string description() const override { return bc_.describe(); }
std::span<uint8_t const> data() const override { return data_->vec(); } std::span<uint8_t const> data() const override { return data_.span(); }
size_t uncompressed_size() const override { return uncompressed_size_; } size_t uncompressed_size() const override { return uncompressed_size_; }
size_t size() const override { size_t size() const override {
std::lock_guard lock(mx_); std::lock_guard lock(mx_);
return data_->size(); return data_.size();
} }
void set_block_no(uint32_t number) override { void set_block_no(uint32_t number) override {
@ -291,7 +289,7 @@ class raw_fsblock : public fsblock::impl {
block_compressor const& bc_; block_compressor const& bc_;
size_t const uncompressed_size_; size_t const uncompressed_size_;
mutable std::recursive_mutex mx_; mutable std::recursive_mutex mx_;
std::shared_ptr<block_data> data_; shared_byte_buffer data_;
std::future<void> future_; std::future<void> future_;
std::optional<uint32_t> number_; std::optional<uint32_t> number_;
std::optional<section_header_v2> mutable header_; std::optional<section_header_v2> mutable header_;
@ -382,7 +380,7 @@ class rewritten_fsblock : public fsblock::impl {
wg.add_job( wg.add_job(
[this, prom = std::move(prom), meta = std::move(meta)]() mutable { [this, prom = std::move(prom), meta = std::move(meta)]() mutable {
try { try {
std::vector<uint8_t> block; shared_byte_buffer block;
{ {
// TODO: we don't have to do this for uncompressed blocks // TODO: we don't have to do this for uncompressed blocks
@ -398,9 +396,9 @@ class rewritten_fsblock : public fsblock::impl {
try { try {
if (meta) { if (meta) {
block = bc_.compress(buffer.span(), *meta); block = bc_.compress(buffer.share(), *meta);
} else { } else {
block = bc_.compress(buffer.span()); block = bc_.compress(buffer.share());
} }
} catch (bad_compression_ratio_error const&) { } catch (bad_compression_ratio_error const&) {
comp_type_ = compression_type::NONE; comp_type_ = compression_type::NONE;
@ -411,7 +409,7 @@ class rewritten_fsblock : public fsblock::impl {
{ {
std::lock_guard lock(mx_); std::lock_guard lock(mx_);
block_data_.swap(block); block_data_.emplace(std::move(block));
} }
prom.set_value(); prom.set_value();
@ -429,13 +427,18 @@ class rewritten_fsblock : public fsblock::impl {
std::string description() const override { return bc_.describe(); } std::string description() const override { return bc_.describe(); }
std::span<uint8_t const> data() const override { return block_data_; } std::span<uint8_t const> data() const override {
std::lock_guard lock(mx_);
return block_data_.value().span();
}
size_t uncompressed_size() const override { return data_.size(); } size_t uncompressed_size() const override { return data_.size(); }
size_t size() const override { size_t size() const override {
std::lock_guard lock(mx_); std::lock_guard lock(mx_);
return block_data_.size(); // TODO: this should not be called when block_data_ is not set, figure
// out who calls this
return block_data_.has_value() ? block_data_->size() : 0;
} }
void set_block_no(uint32_t number) override { void set_block_no(uint32_t number) override {
@ -467,7 +470,7 @@ class rewritten_fsblock : public fsblock::impl {
block_compressor const& bc_; block_compressor const& bc_;
mutable std::recursive_mutex mx_; mutable std::recursive_mutex mx_;
std::span<uint8_t const> data_; std::span<uint8_t const> data_;
std::vector<uint8_t> block_data_; std::optional<shared_byte_buffer> block_data_;
std::future<void> future_; std::future<void> future_;
std::optional<uint32_t> number_; std::optional<uint32_t> number_;
std::optional<section_header_v2> mutable header_; std::optional<section_header_v2> mutable header_;
@ -477,7 +480,7 @@ class rewritten_fsblock : public fsblock::impl {
}; };
fsblock::fsblock(section_type type, block_compressor const& bc, fsblock::fsblock(section_type type, block_compressor const& bc,
std::shared_ptr<block_data>&& data, shared_byte_buffer data,
std::shared_ptr<compression_progress> pctx, std::shared_ptr<compression_progress> pctx,
folly::Function<void(size_t)> set_block_cb) folly::Function<void(size_t)> set_block_cb)
: impl_(std::make_unique<raw_fsblock>(type, bc, std::move(data), : impl_(std::make_unique<raw_fsblock>(type, bc, std::move(data),
@ -573,13 +576,13 @@ class filesystem_writer_ final : public filesystem_writer_detail {
size_t max_active_slots) override; size_t max_active_slots) override;
void configure_rewrite(size_t filesystem_size, size_t block_count) override; void configure_rewrite(size_t filesystem_size, size_t block_count) override;
void copy_header(std::span<uint8_t const> header) override; void copy_header(std::span<uint8_t const> header) override;
void write_block(fragment_category cat, std::shared_ptr<block_data>&& data, void write_block(fragment_category cat, shared_byte_buffer data,
physical_block_cb_type physical_block_cb, physical_block_cb_type physical_block_cb,
std::optional<std::string> meta) override; std::optional<std::string> meta) override;
void finish_category(fragment_category cat) override; void finish_category(fragment_category cat) override;
void write_metadata_v2_schema(std::shared_ptr<block_data>&& data) override; void write_metadata_v2_schema(shared_byte_buffer data) override;
void write_metadata_v2(std::shared_ptr<block_data>&& data) override; void write_metadata_v2(shared_byte_buffer data) override;
void write_history(std::shared_ptr<block_data>&& data) override; void write_history(shared_byte_buffer data) override;
void check_block_compression( void check_block_compression(
compression_type compression, std::span<uint8_t const> data, compression_type compression, std::span<uint8_t const> data,
std::optional<fragment_category::value_type> cat) override; std::optional<fragment_category::value_type> cat) override;
@ -600,12 +603,11 @@ class filesystem_writer_ final : public filesystem_writer_detail {
block_compressor const& block_compressor const&
compressor_for_category(fragment_category::value_type cat) const; compressor_for_category(fragment_category::value_type cat) const;
void void
write_block_impl(fragment_category cat, std::shared_ptr<block_data>&& data, write_block_impl(fragment_category cat, shared_byte_buffer data,
block_compressor const& bc, std::optional<std::string> meta, block_compressor const& bc, std::optional<std::string> meta,
physical_block_cb_type physical_block_cb); physical_block_cb_type physical_block_cb);
void on_block_merged(block_holder_type holder); void on_block_merged(block_holder_type holder);
void void write_section_impl(section_type type, shared_byte_buffer data);
write_section_impl(section_type type, std::shared_ptr<block_data>&& data);
void write(fsblock const& fsb); void write(fsblock const& fsb);
void write(char const* data, size_t size); void write(char const* data, size_t size);
template <typename T> template <typename T>
@ -779,9 +781,8 @@ filesystem_writer_<LoggerPolicy>::compressor_for_category(
template <typename LoggerPolicy> template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_block_impl( void filesystem_writer_<LoggerPolicy>::write_block_impl(
fragment_category cat, std::shared_ptr<block_data>&& data, fragment_category cat, shared_byte_buffer data, block_compressor const& bc,
block_compressor const& bc, std::optional<std::string> meta, std::optional<std::string> meta, physical_block_cb_type physical_block_cb) {
physical_block_cb_type physical_block_cb) {
if (!merger_) { if (!merger_) {
DWARFS_THROW(runtime_error, "filesystem_writer not configured"); DWARFS_THROW(runtime_error, "filesystem_writer not configured");
} }
@ -840,7 +841,7 @@ void filesystem_writer_<LoggerPolicy>::finish_category(fragment_category cat) {
template <typename LoggerPolicy> template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_section_impl( void filesystem_writer_<LoggerPolicy>::write_section_impl(
section_type type, std::shared_ptr<block_data>&& data) { section_type type, shared_byte_buffer data) {
auto& bc = get_compressor(type, std::nullopt); auto& bc = get_compressor(type, std::nullopt);
uint32_t number; uint32_t number;
@ -1071,7 +1072,7 @@ void filesystem_writer_<LoggerPolicy>::copy_header(
template <typename LoggerPolicy> template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_block( void filesystem_writer_<LoggerPolicy>::write_block(
fragment_category cat, std::shared_ptr<block_data>&& data, fragment_category cat, shared_byte_buffer data,
physical_block_cb_type physical_block_cb, std::optional<std::string> meta) { physical_block_cb_type physical_block_cb, std::optional<std::string> meta) {
write_block_impl(cat, std::move(data), compressor_for_category(cat.value()), write_block_impl(cat, std::move(data), compressor_for_category(cat.value()),
std::move(meta), std::move(physical_block_cb)); std::move(meta), std::move(physical_block_cb));
@ -1079,19 +1080,18 @@ void filesystem_writer_<LoggerPolicy>::write_block(
template <typename LoggerPolicy> template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_metadata_v2_schema( void filesystem_writer_<LoggerPolicy>::write_metadata_v2_schema(
std::shared_ptr<block_data>&& data) { shared_byte_buffer data) {
write_section_impl(section_type::METADATA_V2_SCHEMA, std::move(data)); write_section_impl(section_type::METADATA_V2_SCHEMA, std::move(data));
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_metadata_v2( void filesystem_writer_<LoggerPolicy>::write_metadata_v2(
std::shared_ptr<block_data>&& data) { shared_byte_buffer data) {
write_section_impl(section_type::METADATA_V2, std::move(data)); write_section_impl(section_type::METADATA_V2, std::move(data));
} }
template <typename LoggerPolicy> template <typename LoggerPolicy>
void filesystem_writer_<LoggerPolicy>::write_history( void filesystem_writer_<LoggerPolicy>::write_history(shared_byte_buffer data) {
std::shared_ptr<block_data>&& data) {
write_section_impl(section_type::HISTORY, std::move(data)); write_section_impl(section_type::HISTORY, std::move(data));
} }

View File

@ -22,6 +22,8 @@
#include <thrift/lib/cpp2/frozen/FrozenUtil.h> #include <thrift/lib/cpp2/frozen/FrozenUtil.h>
#include <thrift/lib/cpp2/protocol/Serializer.h> #include <thrift/lib/cpp2/protocol/Serializer.h>
#include <dwarfs/vector_byte_buffer.h>
#include <dwarfs/writer/internal/metadata_freezer.h> #include <dwarfs/writer/internal/metadata_freezer.h>
#include <dwarfs/gen-cpp2/metadata_layouts.h> #include <dwarfs/gen-cpp2/metadata_layouts.h>
@ -34,8 +36,7 @@ namespace dwarfs::writer::internal {
namespace { namespace {
template <class T> template <class T>
std::pair<std::vector<uint8_t>, std::vector<uint8_t>> std::pair<shared_byte_buffer, shared_byte_buffer> freeze_to_buffer(T const& x) {
freeze_to_buffer(T const& x) {
using namespace ::apache::thrift::frozen; using namespace ::apache::thrift::frozen;
Layout<T> layout; Layout<T> layout;
@ -44,24 +45,22 @@ freeze_to_buffer(T const& x) {
std::string schema; std::string schema;
serializeRootLayout(layout, schema); serializeRootLayout(layout, schema);
size_t schema_size = schema.size(); auto schema_buffer = vector_byte_buffer::create(schema);
auto schema_begin = reinterpret_cast<uint8_t const*>(schema.data());
std::vector<uint8_t> schema_buffer(schema_begin, schema_begin + schema_size);
std::vector<uint8_t> data_buffer; auto data_buffer = vector_byte_buffer::create(content_size);
data_buffer.resize(content_size, 0);
folly::MutableByteRange content_range(data_buffer.data(), data_buffer.size()); folly::MutableByteRange content_range(data_buffer.data(), data_buffer.size());
ByteRangeFreezer::freeze(layout, x, content_range); ByteRangeFreezer::freeze(layout, x, content_range);
data_buffer.resize(data_buffer.size() - content_range.size()); data_buffer.resize(data_buffer.size() - content_range.size());
data_buffer.shrink_to_fit();
return {schema_buffer, data_buffer}; return {schema_buffer.share(), data_buffer.share()};
} }
} // namespace } // namespace
std::pair<std::vector<uint8_t>, std::vector<uint8_t>> std::pair<shared_byte_buffer, shared_byte_buffer>
metadata_freezer::freeze(thrift::metadata::metadata const& data) { metadata_freezer::freeze(thrift::metadata::metadata const& data) {
return freeze_to_buffer(data); return freeze_to_buffer(data);
} }

View File

@ -63,7 +63,6 @@
#include <dwarfs/internal/features.h> #include <dwarfs/internal/features.h>
#include <dwarfs/internal/string_table.h> #include <dwarfs/internal/string_table.h>
#include <dwarfs/internal/worker_group.h> #include <dwarfs/internal/worker_group.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/block_manager.h> #include <dwarfs/writer/internal/block_manager.h>
#include <dwarfs/writer/internal/entry.h> #include <dwarfs/writer/internal/entry.h>
#include <dwarfs/writer/internal/file_scanner.h> #include <dwarfs/writer/internal/file_scanner.h>
@ -1107,13 +1106,13 @@ void scanner_<LoggerPolicy>::scan(
LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size()); LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size());
fsw.write_metadata_v2_schema(std::make_shared<block_data>(std::move(schema))); fsw.write_metadata_v2_schema(schema);
fsw.write_metadata_v2(std::make_shared<block_data>(std::move(data))); fsw.write_metadata_v2(data);
if (options_.enable_history) { if (options_.enable_history) {
history hist(options_.history); history hist(options_.history);
hist.append(options_.command_line_arguments); hist.append(options_.command_line_arguments);
fsw.write_history(std::make_shared<block_data>(hist.serialize())); fsw.write_history(hist.serialize());
} }
LOG_INFO << "waiting for compression to finish..."; LOG_INFO << "waiting for compression to finish...";

View File

@ -46,10 +46,10 @@
#include <dwarfs/error.h> #include <dwarfs/error.h>
#include <dwarfs/logger.h> #include <dwarfs/logger.h>
#include <dwarfs/util.h> #include <dwarfs/util.h>
#include <dwarfs/vector_byte_buffer.h>
#include <dwarfs/writer/segmenter.h> #include <dwarfs/writer/segmenter.h>
#include <dwarfs/writer/writer_progress.h> #include <dwarfs/writer/writer_progress.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/block_manager.h> #include <dwarfs/writer/internal/block_manager.h>
#include <dwarfs/writer/internal/chunkable.h> #include <dwarfs/writer/internal/chunkable.h>
#include <dwarfs/writer/internal/cyclic_hash.h> #include <dwarfs/writer/internal/cyclic_hash.h>
@ -576,23 +576,23 @@ class active_block : private GranularityPolicy {
, filter_(bloom_filter_size) , filter_(bloom_filter_size)
, repseqmap_{repseqmap} , repseqmap_{repseqmap}
, repeating_collisions_{repcoll} , repeating_collisions_{repcoll}
, data_{std::make_shared<block_data>()} { , data_{vector_byte_buffer::create()} {
DWARFS_CHECK((window_step & window_step_mask_) == 0, DWARFS_CHECK((window_step & window_step_mask_) == 0,
"window step size not a power of two"); "window step size not a power of two");
data_->reserve(this->frames_to_bytes(capacity_in_frames_)); data_.reserve(this->frames_to_bytes(capacity_in_frames_));
} }
DWARFS_FORCE_INLINE size_t num() const { return num_; } DWARFS_FORCE_INLINE size_t num() const { return num_; }
DWARFS_FORCE_INLINE size_t size_in_frames() const { DWARFS_FORCE_INLINE size_t size_in_frames() const {
return this->bytes_to_frames(data_->size()); return this->bytes_to_frames(data_.size());
} }
DWARFS_FORCE_INLINE bool full() const { DWARFS_FORCE_INLINE bool full() const {
return size_in_frames() == capacity_in_frames_; return size_in_frames() == capacity_in_frames_;
} }
DWARFS_FORCE_INLINE std::shared_ptr<block_data> data() const { return data_; } DWARFS_FORCE_INLINE mutable_byte_buffer data() const { return data_; }
DWARFS_FORCE_INLINE void DWARFS_FORCE_INLINE void
append_bytes(std::span<uint8_t const> data, bloom_filter& global_filter); append_bytes(std::span<uint8_t const> data, bloom_filter& global_filter);
@ -637,7 +637,7 @@ class active_block : private GranularityPolicy {
fast_multimap<hash_t, offset_t, num_inline_offsets> offsets_; fast_multimap<hash_t, offset_t, num_inline_offsets> offsets_;
repeating_sequence_map_type const& repseqmap_; repeating_sequence_map_type const& repseqmap_;
repeating_collisions_map_type& repeating_collisions_; repeating_collisions_map_type& repeating_collisions_;
std::shared_ptr<block_data> data_; mutable_byte_buffer data_;
}; };
class segmenter_progress : public progress::context { class segmenter_progress : public progress::context {
@ -841,7 +841,7 @@ DWARFS_FORCE_INLINE bool
active_block<LoggerPolicy, GranularityPolicy>::is_existing_repeating_sequence( active_block<LoggerPolicy, GranularityPolicy>::is_existing_repeating_sequence(
hash_t hashval, size_t offset) { hash_t hashval, size_t offset) {
if (auto it = repseqmap_.find(hashval); it != repseqmap_.end()) [[unlikely]] { if (auto it = repseqmap_.find(hashval); it != repseqmap_.end()) [[unlikely]] {
auto& raw = data_->vec(); auto& raw = data_.raw_vector();
auto winbeg = raw.begin() + frames_to_bytes(offset); auto winbeg = raw.begin() + frames_to_bytes(offset);
auto winend = winbeg + frames_to_bytes(window_size_); auto winend = winbeg + frames_to_bytes(window_size_);
auto byte = *winbeg; auto byte = *winbeg;
@ -881,7 +881,7 @@ active_block<LoggerPolicy, GranularityPolicy>::append_bytes(
granular_span_adapter<uint8_t const, GranularityPolicy>>(data); granular_span_adapter<uint8_t const, GranularityPolicy>>(data);
auto v = this->template create< auto v = this->template create<
granular_vector_adapter<uint8_t, GranularityPolicy>>(data_->vec()); granular_vector_adapter<uint8_t, GranularityPolicy>>(data_.raw_vector());
auto offset = v.size(); auto offset = v.size();
@ -920,7 +920,7 @@ void segment_match<LoggerPolicy, GranularityPolicy>::verify_and_extend(
size_t pos, size_t len, size_t begin, size_t end) { size_t pos, size_t len, size_t begin, size_t end) {
auto v = this->template create< auto v = this->template create<
granular_vector_adapter<uint8_t, GranularityPolicy>>( granular_vector_adapter<uint8_t, GranularityPolicy>>(
block_->data()->vec()); block_->data().raw_vector());
// First, check if the regions actually match // First, check if the regions actually match
if (v.compare(offset_, data.subspan(pos, len)) == 0) { if (v.compare(offset_, data.subspan(pos, len)) == 0) {
@ -1034,7 +1034,7 @@ DWARFS_FORCE_INLINE void
segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() { segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() {
auto& block = blocks_.back(); auto& block = blocks_.back();
block.finalize(stats_); block.finalize(stats_);
block_ready_(block.data(), block.num()); block_ready_(block.data().share(), block.num());
++prog_.block_count; ++prog_.block_count;
} }

View File

@ -60,7 +60,7 @@ std::vector<T> multiplex(std::vector<std::vector<T>> const& in) {
} }
template <typename T = int32_t> template <typename T = int32_t>
std::vector<uint8_t> shared_byte_buffer
make_test_data(int channels, int samples, int bytes, int bits, make_test_data(int channels, int samples, int bytes, int bits,
pcm_sample_endianness end, pcm_sample_signedness sig, pcm_sample_endianness end, pcm_sample_signedness sig,
pcm_sample_padding pad) { pcm_sample_padding pad) {
@ -70,10 +70,11 @@ make_test_data(int channels, int samples, int bytes, int bits,
make_sine<T>(bits, samples, 3.1 * ((599 * (c + 1)) % 256))); make_sine<T>(bits, samples, 3.1 * ((599 * (c + 1)) % 256)));
} }
auto muxed = multiplex(data); auto muxed = multiplex(data);
std::vector<uint8_t> out(bytes * channels * samples); auto out = vector_byte_buffer::create();
out.resize(bytes * channels * samples);
pcm_sample_transformer<T> xfm(end, sig, pad, bytes, bits); pcm_sample_transformer<T> xfm(end, sig, pad, bytes, bits);
xfm.pack(out, muxed); xfm.pack(out.span(), muxed);
return out; return out.share();
} }
struct data_params { struct data_params {
@ -151,7 +152,7 @@ TEST(flac_compressor, basic) {
EXPECT_LT(compressed.size(), data.size() / 2); EXPECT_LT(compressed.size(), data.size() / 2);
auto decompressed = auto decompressed =
block_decompressor::decompress(compression_type::FLAC, compressed); block_decompressor::decompress(compression_type::FLAC, compressed.span());
EXPECT_EQ(data, decompressed); EXPECT_EQ(data, decompressed);
} }
@ -184,7 +185,7 @@ TEST_P(flac_param, combinations) {
EXPECT_LT(compressed.size(), data.size() / 2); EXPECT_LT(compressed.size(), data.size() / 2);
auto decompressed = auto decompressed =
block_decompressor::decompress(compression_type::FLAC, compressed); block_decompressor::decompress(compression_type::FLAC, compressed.span());
EXPECT_EQ(data, decompressed); EXPECT_EQ(data, decompressed);
} }

View File

@ -37,6 +37,7 @@
#include <range/v3/view/concat.hpp> #include <range/v3/view/concat.hpp>
#include <dwarfs/block_compressor.h> #include <dwarfs/block_compressor.h>
#include <dwarfs/vector_byte_buffer.h>
using namespace dwarfs; using namespace dwarfs;
@ -61,8 +62,7 @@ generate_random_data(std::mt19937_64& rng, size_t count,
} }
template <std::unsigned_integral ValueType> template <std::unsigned_integral ValueType>
std::vector<uint8_t> shared_byte_buffer make_test_data(int components, int pixels, int unused_lsb) {
make_test_data(int components, int pixels, int unused_lsb) {
std::mt19937_64 rng(42); std::mt19937_64 rng(42);
std::uniform_int_distribution<ValueType> any_value( std::uniform_int_distribution<ValueType> any_value(
0, std::numeric_limits<ValueType>::max()); 0, std::numeric_limits<ValueType>::max());
@ -93,11 +93,11 @@ make_test_data(int components, int pixels, int unused_lsb) {
} }
} }
std::vector<uint8_t> out; auto out = vector_byte_buffer::create();
out.resize(tmp.size() * sizeof(ValueType)); out.resize(tmp.size() * sizeof(ValueType));
std::memcpy(out.data(), tmp.data(), out.size()); std::memcpy(out.data(), tmp.data(), out.size());
return out; return out.share();
} }
struct data_params { struct data_params {
@ -151,8 +151,8 @@ TEST_P(ricepp_param, combinations) {
EXPECT_LT(compressed.size(), 7 * data.size() / 10); EXPECT_LT(compressed.size(), 7 * data.size() / 10);
auto decompressed = auto decompressed = block_decompressor::decompress(compression_type::RICEPP,
block_decompressor::decompress(compression_type::RICEPP, compressed); compressed.span());
ASSERT_EQ(data.size(), decompressed.size()); ASSERT_EQ(data.size(), decompressed.size());
EXPECT_EQ(data, decompressed); EXPECT_EQ(data, decompressed);

View File

@ -28,7 +28,6 @@
#include <dwarfs/writer/segmenter.h> #include <dwarfs/writer/segmenter.h>
#include <dwarfs/writer/writer_progress.h> #include <dwarfs/writer/writer_progress.h>
#include <dwarfs/writer/internal/block_data.h>
#include <dwarfs/writer/internal/block_manager.h> #include <dwarfs/writer/internal/block_manager.h>
#include <dwarfs/writer/internal/chunkable.h> #include <dwarfs/writer/internal/chunkable.h>
@ -145,13 +144,12 @@ void run_segmenter_test(unsigned iters, unsigned granularity,
dwarfs::writer::writer_progress prog; dwarfs::writer::writer_progress prog;
auto blkmgr = std::make_shared<dwarfs::writer::internal::block_manager>(); auto blkmgr = std::make_shared<dwarfs::writer::internal::block_manager>();
std::vector<std::shared_ptr<dwarfs::writer::internal::block_data>> written; std::vector<dwarfs::shared_byte_buffer> written;
dwarfs::writer::segmenter seg( dwarfs::writer::segmenter seg(
lgr, prog, blkmgr, cfg, cc, total_size, lgr, prog, blkmgr, cfg, cc, total_size,
[&written, [&written, blkmgr](dwarfs::shared_byte_buffer blk,
blkmgr](std::shared_ptr<dwarfs::writer::internal::block_data> blk, auto logical_block_num) {
auto logical_block_num) {
auto physical_block_num = written.size(); auto physical_block_num = written.size();
written.push_back(blk); written.push_back(blk);
blkmgr->set_written_block(logical_block_num, physical_block_num, 0); blkmgr->set_written_block(logical_block_num, physical_block_num, 0);
@ -167,7 +165,7 @@ void run_segmenter_test(unsigned iters, unsigned granularity,
size_t segmented [[maybe_unused]]{0}; size_t segmented [[maybe_unused]]{0};
for (auto const& blk : written) { for (auto const& blk : written) {
segmented += blk->size(); segmented += blk.size();
} }
// std::cerr << total_size << " -> " << segmented << fmt::format(" // std::cerr << total_size << " -> " << segmented << fmt::format("