mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-04 02:06:22 -04:00
refactor: replace block_data
with byte_buffer
This commit is contained in:
parent
a19145388c
commit
06f8728cc3
@ -61,12 +61,12 @@ class block_compressor {
|
||||
block_compressor(block_compressor&& bc) = default;
|
||||
block_compressor& operator=(block_compressor&& rhs) = default;
|
||||
|
||||
std::vector<uint8_t> compress(std::span<uint8_t const> data) const {
|
||||
shared_byte_buffer compress(shared_byte_buffer const& data) const {
|
||||
return impl_->compress(data, nullptr);
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::span<uint8_t const> data, std::string const& metadata) const {
|
||||
shared_byte_buffer
|
||||
compress(shared_byte_buffer const& data, std::string const& metadata) const {
|
||||
return impl_->compress(data, &metadata);
|
||||
}
|
||||
|
||||
@ -91,9 +91,8 @@ class block_compressor {
|
||||
|
||||
virtual std::unique_ptr<impl> clone() const = 0;
|
||||
|
||||
virtual std::vector<uint8_t>
|
||||
compress(std::span<uint8_t const> data,
|
||||
std::string const* metadata) const = 0;
|
||||
virtual shared_byte_buffer compress(shared_byte_buffer const& data,
|
||||
std::string const* metadata) const = 0;
|
||||
|
||||
virtual compression_type type() const = 0;
|
||||
virtual std::string describe() const = 0;
|
||||
@ -152,7 +151,7 @@ class compression_info {
|
||||
|
||||
virtual std::string_view name() const = 0;
|
||||
virtual std::string_view description() const = 0;
|
||||
virtual std::vector<std::string> const& options() const = 0;
|
||||
virtual std::vector<std::string> const& options() const = 0; // TODO: span?
|
||||
virtual std::set<std::string> library_dependencies() const = 0;
|
||||
};
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <concepts>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
@ -45,15 +46,23 @@ class byte_buffer_interface {
|
||||
public:
|
||||
virtual ~byte_buffer_interface() = default;
|
||||
|
||||
virtual uint8_t const* data() const = 0;
|
||||
virtual size_t size() const = 0;
|
||||
virtual std::span<uint8_t const> span() const = 0;
|
||||
};
|
||||
|
||||
class mutable_byte_buffer_interface : public byte_buffer_interface {
|
||||
public:
|
||||
virtual uint8_t* mutable_data() = 0;
|
||||
virtual std::span<uint8_t> mutable_span() = 0;
|
||||
virtual void clear() = 0;
|
||||
virtual void reserve(size_t size) = 0;
|
||||
virtual void resize(size_t size) = 0;
|
||||
virtual void shrink_to_fit() = 0;
|
||||
|
||||
// TODO: See if we can do without this. This will *only* be implemented
|
||||
// in the vector_byte_buffer, other implementations will throw.
|
||||
virtual std::vector<uint8_t>& raw_vector() = 0;
|
||||
};
|
||||
|
||||
class shared_byte_buffer {
|
||||
@ -63,14 +72,16 @@ class shared_byte_buffer {
|
||||
explicit shared_byte_buffer(std::shared_ptr<byte_buffer_interface const> bb)
|
||||
: bb_{std::move(bb)} {}
|
||||
|
||||
uint8_t const* data() const { return span().data(); }
|
||||
uint8_t const* data() const { return bb_->data(); }
|
||||
|
||||
size_t size() const { return span().size(); }
|
||||
size_t size() const { return bb_->size(); }
|
||||
|
||||
bool empty() const { return span().empty(); }
|
||||
bool empty() const { return bb_->size() == 0; }
|
||||
|
||||
std::span<uint8_t const> span() const { return bb_->span(); }
|
||||
|
||||
void swap(shared_byte_buffer& other) noexcept { std::swap(bb_, other.bb_); }
|
||||
|
||||
template <detail::byte_range T>
|
||||
friend bool operator==(shared_byte_buffer const& lhs, T const& rhs) {
|
||||
return detail::compare_spans(lhs.span(), {rhs.data(), rhs.size()}) ==
|
||||
@ -78,6 +89,7 @@ class shared_byte_buffer {
|
||||
}
|
||||
|
||||
template <detail::byte_range T>
|
||||
requires(!std::same_as<T, shared_byte_buffer>)
|
||||
friend bool operator==(T const& lhs, shared_byte_buffer const& rhs) {
|
||||
return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()) ==
|
||||
std::strong_ordering::equal;
|
||||
@ -90,6 +102,7 @@ class shared_byte_buffer {
|
||||
}
|
||||
|
||||
template <detail::byte_range T>
|
||||
requires(!std::same_as<T, shared_byte_buffer>)
|
||||
friend std::strong_ordering
|
||||
operator<=>(T const& lhs, shared_byte_buffer const& rhs) {
|
||||
return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span());
|
||||
@ -105,13 +118,13 @@ class mutable_byte_buffer {
|
||||
std::shared_ptr<mutable_byte_buffer_interface> bb)
|
||||
: bb_{std::move(bb)} {}
|
||||
|
||||
uint8_t const* data() const { return span().data(); }
|
||||
uint8_t const* data() const { return bb_->data(); }
|
||||
|
||||
uint8_t* data() { return span().data(); }
|
||||
uint8_t* data() { return bb_->mutable_data(); }
|
||||
|
||||
size_t size() const { return span().size(); }
|
||||
size_t size() const { return bb_->size(); }
|
||||
|
||||
bool empty() const { return span().empty(); }
|
||||
bool empty() const { return bb_->size() == 0; }
|
||||
|
||||
std::span<uint8_t const> span() const { return bb_->span(); }
|
||||
|
||||
@ -123,6 +136,12 @@ class mutable_byte_buffer {
|
||||
|
||||
void resize(size_t size) { bb_->resize(size); }
|
||||
|
||||
void shrink_to_fit() { bb_->shrink_to_fit(); }
|
||||
|
||||
std::vector<uint8_t>& raw_vector() { return bb_->raw_vector(); }
|
||||
|
||||
void swap(mutable_byte_buffer& other) noexcept { std::swap(bb_, other.bb_); }
|
||||
|
||||
template <detail::byte_range T>
|
||||
friend bool operator==(mutable_byte_buffer const& lhs, T const& rhs) {
|
||||
return detail::compare_spans(lhs.span(), {rhs.data(), rhs.size()}) ==
|
||||
@ -130,6 +149,7 @@ class mutable_byte_buffer {
|
||||
}
|
||||
|
||||
template <detail::byte_range T>
|
||||
requires(!std::same_as<T, mutable_byte_buffer>)
|
||||
friend bool operator==(T const& lhs, mutable_byte_buffer const& rhs) {
|
||||
return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span()) ==
|
||||
std::strong_ordering::equal;
|
||||
@ -142,6 +162,7 @@ class mutable_byte_buffer {
|
||||
}
|
||||
|
||||
template <detail::byte_range T>
|
||||
requires(!std::same_as<T, mutable_byte_buffer>)
|
||||
friend std::strong_ordering
|
||||
operator<=>(T const& lhs, mutable_byte_buffer const& rhs) {
|
||||
return detail::compare_spans({lhs.data(), lhs.size()}, rhs.span());
|
||||
|
@ -30,6 +30,7 @@
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <dwarfs/byte_buffer.h>
|
||||
#include <dwarfs/history_config.h>
|
||||
|
||||
namespace dwarfs {
|
||||
@ -50,7 +51,7 @@ class history {
|
||||
thrift::history::history const& get() const { return *history_; }
|
||||
void append(std::optional<std::vector<std::string>> args);
|
||||
size_t size() const;
|
||||
std::vector<uint8_t> serialize() const;
|
||||
shared_byte_buffer serialize() const;
|
||||
void dump(std::ostream& os) const;
|
||||
nlohmann::json as_json() const;
|
||||
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <dwarfs/byte_buffer.h>
|
||||
|
||||
namespace dwarfs {
|
||||
@ -28,6 +30,10 @@ namespace dwarfs {
|
||||
class vector_byte_buffer {
|
||||
public:
|
||||
static mutable_byte_buffer create();
|
||||
static mutable_byte_buffer create(size_t size);
|
||||
static mutable_byte_buffer create(std::string_view data);
|
||||
static mutable_byte_buffer create(std::span<uint8_t const> data);
|
||||
static mutable_byte_buffer create(std::vector<uint8_t>&& data);
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -1,54 +0,0 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace dwarfs::writer::internal {
|
||||
|
||||
class block_data {
|
||||
public:
|
||||
block_data() = default;
|
||||
explicit block_data(std::vector<uint8_t>&& vec)
|
||||
: vec_{std::move(vec)} {}
|
||||
explicit block_data(std::string_view str)
|
||||
: vec_{str.begin(), str.end()} {}
|
||||
|
||||
std::vector<uint8_t> const& vec() const { return vec_; }
|
||||
std::vector<uint8_t>& vec() { return vec_; }
|
||||
|
||||
void reserve(size_t size) { vec_.reserve(size); }
|
||||
|
||||
uint8_t const* data() const { return vec_.data(); }
|
||||
uint8_t* data() { return vec_.data(); }
|
||||
|
||||
size_t size() const { return vec_.size(); }
|
||||
|
||||
bool empty() const { return vec_.empty(); }
|
||||
|
||||
private:
|
||||
std::vector<uint8_t> vec_;
|
||||
};
|
||||
|
||||
} // namespace dwarfs::writer::internal
|
@ -31,6 +31,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include <dwarfs/block_compressor.h>
|
||||
#include <dwarfs/byte_buffer.h>
|
||||
#include <dwarfs/compression_constraints.h>
|
||||
#include <dwarfs/fstypes.h>
|
||||
#include <dwarfs/writer/fragment_category.h>
|
||||
@ -45,8 +46,6 @@ class fs_section;
|
||||
|
||||
namespace writer::internal {
|
||||
|
||||
class block_data;
|
||||
|
||||
class filesystem_writer_detail {
|
||||
public:
|
||||
virtual ~filesystem_writer_detail() = default;
|
||||
@ -71,14 +70,13 @@ class filesystem_writer_detail {
|
||||
virtual void
|
||||
configure_rewrite(size_t filesystem_size, size_t block_count) = 0;
|
||||
virtual void copy_header(std::span<uint8_t const> header) = 0;
|
||||
virtual void
|
||||
write_block(fragment_category cat, std::shared_ptr<block_data>&& data,
|
||||
physical_block_cb_type physical_block_cb,
|
||||
std::optional<std::string> meta = std::nullopt) = 0;
|
||||
virtual void write_block(fragment_category cat, shared_byte_buffer data,
|
||||
physical_block_cb_type physical_block_cb,
|
||||
std::optional<std::string> meta = std::nullopt) = 0;
|
||||
virtual void finish_category(fragment_category cat) = 0;
|
||||
virtual void write_metadata_v2_schema(std::shared_ptr<block_data>&& data) = 0;
|
||||
virtual void write_metadata_v2(std::shared_ptr<block_data>&& data) = 0;
|
||||
virtual void write_history(std::shared_ptr<block_data>&& data) = 0;
|
||||
virtual void write_metadata_v2_schema(shared_byte_buffer data) = 0;
|
||||
virtual void write_metadata_v2(shared_byte_buffer data) = 0;
|
||||
virtual void write_history(shared_byte_buffer data) = 0;
|
||||
virtual void check_block_compression(
|
||||
compression_type compression, std::span<uint8_t const> data,
|
||||
std::optional<fragment_category::value_type> cat = std::nullopt) = 0;
|
||||
|
@ -23,7 +23,8 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <dwarfs/byte_buffer.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
@ -35,7 +36,7 @@ namespace writer::internal {
|
||||
|
||||
class metadata_freezer {
|
||||
public:
|
||||
static std::pair<std::vector<uint8_t>, std::vector<uint8_t>>
|
||||
static std::pair<shared_byte_buffer, shared_byte_buffer>
|
||||
freeze(thrift::metadata::metadata const& data);
|
||||
};
|
||||
|
||||
|
@ -26,6 +26,8 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <dwarfs/byte_buffer.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
struct compression_constraints;
|
||||
@ -38,7 +40,6 @@ class writer_progress;
|
||||
|
||||
namespace internal {
|
||||
|
||||
class block_data;
|
||||
class block_manager;
|
||||
class chunkable;
|
||||
|
||||
@ -55,8 +56,8 @@ class segmenter {
|
||||
unsigned block_size_bits{22};
|
||||
};
|
||||
|
||||
using block_ready_cb = std::function<void(
|
||||
std::shared_ptr<internal::block_data>, size_t logical_block_num)>;
|
||||
using block_ready_cb =
|
||||
std::function<void(shared_byte_buffer, size_t logical_block_num)>;
|
||||
|
||||
segmenter(logger& lgr, writer_progress& prog,
|
||||
std::shared_ptr<internal::block_manager> blkmgr, config const& cfg,
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <dwarfs/fstypes.h>
|
||||
#include <dwarfs/option_map.h>
|
||||
#include <dwarfs/varint.h>
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
@ -48,10 +49,9 @@ class brotli_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<brotli_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::span<uint8_t const> data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
std::vector<uint8_t> compressed;
|
||||
shared_byte_buffer compress(shared_byte_buffer const& data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
auto compressed = vector_byte_buffer::create(); // TODO: make configurable
|
||||
compressed.resize(varint::max_size +
|
||||
::BrotliEncoderMaxCompressedSize(data.size()));
|
||||
size_t size_size = varint::encode(data.size(), compressed.data());
|
||||
@ -66,7 +66,7 @@ class brotli_block_compressor final : public block_compressor::impl {
|
||||
throw bad_compression_ratio_error();
|
||||
}
|
||||
compressed.shrink_to_fit();
|
||||
return compressed;
|
||||
return compressed.share();
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::BROTLI; }
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include <dwarfs/option_map.h>
|
||||
#include <dwarfs/pcm_sample_transformer.h>
|
||||
#include <dwarfs/varint.h>
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
|
||||
#include <dwarfs/gen-cpp2/compression_types.h>
|
||||
|
||||
@ -53,7 +54,7 @@ constexpr size_t const kBlockSize{65536};
|
||||
|
||||
class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream {
|
||||
public:
|
||||
explicit dwarfs_flac_stream_encoder(std::vector<uint8_t>& data)
|
||||
explicit dwarfs_flac_stream_encoder(mutable_byte_buffer& data)
|
||||
: data_{data}
|
||||
, pos_{data_.size()} {}
|
||||
|
||||
@ -90,7 +91,7 @@ class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream {
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<uint8_t>& data_;
|
||||
mutable_byte_buffer& data_;
|
||||
size_t pos_;
|
||||
};
|
||||
|
||||
@ -208,8 +209,8 @@ class flac_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<flac_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(std::span<uint8_t const> data,
|
||||
std::string const* metadata) const override {
|
||||
shared_byte_buffer compress(shared_byte_buffer const& data,
|
||||
std::string const* metadata) const override {
|
||||
if (!metadata) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
"internal error: flac compression requires metadata");
|
||||
@ -265,7 +266,7 @@ class flac_block_compressor final : public block_compressor::impl {
|
||||
pcm_pad = pcm_sample_padding::Msb;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compressed;
|
||||
auto compressed = vector_byte_buffer::create(); // TODO: make configurable
|
||||
|
||||
{
|
||||
using namespace ::apache::thrift;
|
||||
@ -286,7 +287,7 @@ class flac_block_compressor final : public block_compressor::impl {
|
||||
CompactSerializer::serialize(hdr, &hdrbuf);
|
||||
|
||||
compressed.resize(pos + hdrbuf.size());
|
||||
::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size());
|
||||
::memcpy(compressed.data() + pos, hdrbuf.data(), hdrbuf.size());
|
||||
}
|
||||
|
||||
dwarfs_flac_stream_encoder encoder(compressed);
|
||||
@ -341,7 +342,7 @@ class flac_block_compressor final : public block_compressor::impl {
|
||||
|
||||
compressed.shrink_to_fit();
|
||||
|
||||
return compressed;
|
||||
return compressed.share();
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::FLAC; }
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <dwarfs/error.h>
|
||||
#include <dwarfs/fstypes.h>
|
||||
#include <dwarfs/option_map.h>
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
@ -69,18 +70,18 @@ class lz4_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<lz4_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::span<uint8_t const> data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
std::vector<uint8_t> compressed(sizeof(uint32_t) +
|
||||
LZ4_compressBound(to<int>(data.size())));
|
||||
shared_byte_buffer compress(shared_byte_buffer const& data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
auto compressed = vector_byte_buffer::create(); // TODO: make configurable
|
||||
compressed.resize(sizeof(uint32_t) +
|
||||
LZ4_compressBound(to<int>(data.size())));
|
||||
// TODO: this should have been a varint; also, if we ever support
|
||||
// big-endian systems, we'll have to properly convert this
|
||||
uint32_t size = data.size();
|
||||
std::memcpy(compressed.data(), &size, sizeof(size));
|
||||
auto csize = Policy::compress(data.data(), &compressed[sizeof(uint32_t)],
|
||||
data.size(),
|
||||
compressed.size() - sizeof(uint32_t), level_);
|
||||
auto csize = Policy::compress(
|
||||
data.data(), compressed.data() + sizeof(uint32_t), data.size(),
|
||||
compressed.size() - sizeof(uint32_t), level_);
|
||||
if (csize == 0) {
|
||||
DWARFS_THROW(runtime_error, "error during compression");
|
||||
}
|
||||
@ -88,7 +89,7 @@ class lz4_block_compressor final : public block_compressor::impl {
|
||||
throw bad_compression_ratio_error();
|
||||
}
|
||||
compressed.resize(sizeof(uint32_t) + csize);
|
||||
return compressed;
|
||||
return compressed.share();
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::LZ4; }
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <dwarfs/option_map.h>
|
||||
#include <dwarfs/sorted_array_map.h>
|
||||
#include <dwarfs/types.h>
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
@ -111,8 +112,8 @@ class lzma_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<lzma_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(std::span<uint8_t const> data,
|
||||
std::string const* metadata) const override;
|
||||
shared_byte_buffer compress(shared_byte_buffer const& data,
|
||||
std::string const* metadata) const override;
|
||||
|
||||
compression_type type() const override { return compression_type::LZMA; }
|
||||
|
||||
@ -126,8 +127,8 @@ class lzma_block_compressor final : public block_compressor::impl {
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<uint8_t>
|
||||
compress(std::span<uint8_t const> data, lzma_filter const* filters) const;
|
||||
shared_byte_buffer
|
||||
compress(shared_byte_buffer const& data, lzma_filter const* filters) const;
|
||||
|
||||
static uint32_t get_preset(unsigned level, bool extreme) {
|
||||
uint32_t preset = level;
|
||||
@ -197,8 +198,8 @@ lzma_block_compressor::lzma_block_compressor(option_map& om) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
lzma_block_compressor::compress(std::span<uint8_t const> data,
|
||||
shared_byte_buffer
|
||||
lzma_block_compressor::compress(shared_byte_buffer const& data,
|
||||
lzma_filter const* filters) const {
|
||||
lzma_stream s = LZMA_STREAM_INIT;
|
||||
|
||||
@ -210,7 +211,8 @@ lzma_block_compressor::compress(std::span<uint8_t const> data,
|
||||
|
||||
lzma_action action = LZMA_FINISH;
|
||||
|
||||
std::vector<uint8_t> compressed(data.size() - 1);
|
||||
auto compressed = vector_byte_buffer::create(); // TODO: make configurable
|
||||
compressed.resize(data.size() - 1);
|
||||
|
||||
s.next_in = data.data();
|
||||
s.avail_in = data.size();
|
||||
@ -234,21 +236,21 @@ lzma_block_compressor::compress(std::span<uint8_t const> data,
|
||||
lzma_error_string(ret)));
|
||||
}
|
||||
|
||||
return compressed;
|
||||
return compressed.share();
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
lzma_block_compressor::compress(std::span<uint8_t const> data,
|
||||
shared_byte_buffer
|
||||
lzma_block_compressor::compress(shared_byte_buffer const& data,
|
||||
std::string const* /*metadata*/) const {
|
||||
auto lzma_opts = opt_lzma_;
|
||||
std::array<lzma_filter, 3> filters{{{binary_vli_, nullptr},
|
||||
{LZMA_FILTER_LZMA2, &lzma_opts},
|
||||
{LZMA_VLI_UNKNOWN, nullptr}}};
|
||||
|
||||
std::vector<uint8_t> best = compress(data, &filters[1]);
|
||||
auto best = compress(data, &filters[1]);
|
||||
|
||||
if (filters[0].id != LZMA_VLI_UNKNOWN) {
|
||||
std::vector<uint8_t> compressed = compress(data, filters.data());
|
||||
auto compressed = compress(data, filters.data());
|
||||
|
||||
if (compressed.size() < best.size()) {
|
||||
best.swap(compressed);
|
||||
|
@ -41,11 +41,9 @@ class null_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<null_block_compressor>(*this);
|
||||
}
|
||||
|
||||
// TODO: we should not have to copy the data here...
|
||||
std::vector<uint8_t>
|
||||
compress(std::span<uint8_t const> data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
return std::vector<uint8_t>(data.begin(), data.end());
|
||||
shared_byte_buffer compress(shared_byte_buffer const& data,
|
||||
std::string const* /*metadata*/) const override {
|
||||
return data;
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::NONE; }
|
||||
|
@ -52,8 +52,8 @@ class ricepp_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<ricepp_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(std::span<uint8_t const> data,
|
||||
std::string const* metadata) const override {
|
||||
shared_byte_buffer compress(shared_byte_buffer const& data,
|
||||
std::string const* metadata) const override {
|
||||
if (!metadata) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
"internal error: ricepp compression requires metadata");
|
||||
@ -88,8 +88,10 @@ class ricepp_block_compressor final : public block_compressor::impl {
|
||||
.unused_lsb_count = static_cast<unsigned>(unused_lsb_count),
|
||||
});
|
||||
|
||||
std::vector<uint8_t> compressed;
|
||||
auto compressed = vector_byte_buffer::create(); // TODO: make configurable
|
||||
|
||||
// TODO: see if we can resize just once...
|
||||
// TODO: maybe the mutable_byte_buffer interface can have .append()?
|
||||
{
|
||||
using namespace ::apache::thrift;
|
||||
|
||||
@ -111,7 +113,7 @@ class ricepp_block_compressor final : public block_compressor::impl {
|
||||
CompactSerializer::serialize(hdr, &hdrbuf);
|
||||
|
||||
compressed.resize(pos + hdrbuf.size());
|
||||
::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size());
|
||||
::memcpy(compressed.data() + pos, hdrbuf.data(), hdrbuf.size());
|
||||
}
|
||||
|
||||
std::span<pixel_type const> input{
|
||||
@ -121,13 +123,11 @@ class ricepp_block_compressor final : public block_compressor::impl {
|
||||
size_t header_size = compressed.size();
|
||||
compressed.resize(header_size + codec->worst_case_encoded_bytes(input));
|
||||
|
||||
std::span<uint8_t> buffer(compressed);
|
||||
|
||||
auto output = codec->encode(buffer.subspan(header_size), input);
|
||||
auto output = codec->encode(compressed.span().subspan(header_size), input);
|
||||
compressed.resize(header_size + output.size());
|
||||
compressed.shrink_to_fit();
|
||||
|
||||
return compressed;
|
||||
return compressed.share();
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::RICEPP; }
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <dwarfs/error.h>
|
||||
#include <dwarfs/fstypes.h>
|
||||
#include <dwarfs/option_map.h>
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
#include <dwarfs/zstd_context_manager.h>
|
||||
|
||||
#if ZSTD_VERSION_MAJOR > 1 || \
|
||||
@ -54,8 +55,8 @@ class zstd_block_compressor final : public block_compressor::impl {
|
||||
return std::make_unique<zstd_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(std::span<uint8_t const> data,
|
||||
std::string const* metadata) const override;
|
||||
shared_byte_buffer compress(shared_byte_buffer const& data,
|
||||
std::string const* metadata) const override;
|
||||
|
||||
compression_type type() const override { return compression_type::ZSTD; }
|
||||
|
||||
@ -87,10 +88,11 @@ class zstd_block_compressor final : public block_compressor::impl {
|
||||
int const level_;
|
||||
};
|
||||
|
||||
std::vector<uint8_t>
|
||||
zstd_block_compressor::compress(std::span<uint8_t const> data,
|
||||
shared_byte_buffer
|
||||
zstd_block_compressor::compress(shared_byte_buffer const& data,
|
||||
std::string const* /*metadata*/) const {
|
||||
std::vector<uint8_t> compressed(ZSTD_compressBound(data.size()));
|
||||
auto compressed = vector_byte_buffer::create(); // TODO: make configurable
|
||||
compressed.resize(ZSTD_compressBound(data.size()));
|
||||
auto ctx = ctxmgr_->make_context();
|
||||
auto size = ZSTD_compressCCtx(ctx.get(), compressed.data(), compressed.size(),
|
||||
data.data(), data.size(), level_);
|
||||
@ -103,7 +105,7 @@ zstd_block_compressor::compress(std::span<uint8_t const> data,
|
||||
}
|
||||
compressed.resize(size);
|
||||
compressed.shrink_to_fit();
|
||||
return compressed;
|
||||
return compressed.share();
|
||||
}
|
||||
|
||||
class zstd_block_decompressor final : public block_decompressor::impl {
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <dwarfs/config.h>
|
||||
#include <dwarfs/history.h>
|
||||
#include <dwarfs/library_dependencies.h>
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
#include <dwarfs/version.h>
|
||||
|
||||
#include <dwarfs/gen-cpp2/history_types.h>
|
||||
@ -80,10 +81,10 @@ void history::append(std::optional<std::vector<std::string>> args) {
|
||||
|
||||
size_t history::size() const { return history_->entries()->size(); }
|
||||
|
||||
std::vector<uint8_t> history::serialize() const {
|
||||
shared_byte_buffer history::serialize() const {
|
||||
std::string buf;
|
||||
::apache::thrift::CompactSerializer::serialize(*history_, &buf);
|
||||
return {buf.begin(), buf.end()};
|
||||
return vector_byte_buffer::create(buf).share();
|
||||
}
|
||||
|
||||
void history::dump(std::ostream& os) const {
|
||||
|
@ -32,6 +32,10 @@ class mapped_byte_buffer_impl : public byte_buffer_interface {
|
||||
: data_{data}
|
||||
, mm_{std::move(mm)} {}
|
||||
|
||||
size_t size() const override { return data_.size(); }
|
||||
|
||||
uint8_t const* data() const override { return data_.data(); }
|
||||
|
||||
std::span<uint8_t const> span() const override {
|
||||
return {data_.data(), data_.size()};
|
||||
}
|
||||
|
@ -53,7 +53,6 @@
|
||||
#include <dwarfs/reader/internal/filesystem_parser.h>
|
||||
#include <dwarfs/reader/internal/inode_reader_v2.h>
|
||||
#include <dwarfs/reader/internal/metadata_v2.h>
|
||||
#include <dwarfs/writer/internal/block_data.h>
|
||||
#include <dwarfs/writer/internal/filesystem_writer_detail.h>
|
||||
|
||||
namespace dwarfs::reader {
|
||||
|
@ -78,6 +78,9 @@ class cached_block_ final : public cached_block {
|
||||
// This can be called from any thread
|
||||
size_t range_end() const override { return range_end_.load(); }
|
||||
|
||||
// TODO: The code relies on the fact that the data_ buffer is never
|
||||
// reallocated once block decompression has started. I would like to
|
||||
// somehow enforce that this cannot happen.
|
||||
uint8_t const* data() const override { return data_.data(); }
|
||||
|
||||
void decompress_until(size_t end) override {
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include <dwarfs/writer/filesystem_writer.h>
|
||||
|
||||
#include <dwarfs/reader/internal/filesystem_parser.h>
|
||||
#include <dwarfs/writer/internal/block_data.h>
|
||||
#include <dwarfs/writer/internal/filesystem_writer_detail.h>
|
||||
|
||||
namespace dwarfs::utility {
|
||||
@ -38,7 +37,6 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
||||
dwarfs::writer::category_resolver const& cat_resolver,
|
||||
rewrite_options const& opts) {
|
||||
using dwarfs::writer::fragment_category;
|
||||
using dwarfs::writer::internal::block_data;
|
||||
|
||||
LOG_PROXY(debug_logger_policy, lgr);
|
||||
|
||||
@ -174,7 +172,7 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
||||
case section_type::HISTORY:
|
||||
if (opts.enable_history) {
|
||||
history hist{opts.history};
|
||||
hist.parse(fs.get_history().serialize());
|
||||
hist.parse(fs.get_history().serialize().span());
|
||||
hist.append(opts.command_line_arguments);
|
||||
|
||||
LOG_VERBOSE << "updating " << get_section_name(s->type()) << " ("
|
||||
@ -182,7 +180,7 @@ void rewrite_filesystem(logger& lgr, dwarfs::reader::filesystem_v2 const& fs,
|
||||
<< "), compressing using '"
|
||||
<< writer.get_compressor(s->type()).describe() << "'";
|
||||
|
||||
writer.write_history(std::make_shared<block_data>(hist.serialize()));
|
||||
writer.write_history(hist.serialize());
|
||||
} else {
|
||||
LOG_VERBOSE << "removing " << get_section_name(s->type());
|
||||
}
|
||||
|
@ -29,6 +29,22 @@ namespace {
|
||||
|
||||
class vector_byte_buffer_impl : public mutable_byte_buffer_interface {
|
||||
public:
|
||||
vector_byte_buffer_impl() = default;
|
||||
explicit vector_byte_buffer_impl(size_t size)
|
||||
: data_(size) {}
|
||||
explicit vector_byte_buffer_impl(std::string_view data)
|
||||
: data_{data.begin(), data.end()} {}
|
||||
explicit vector_byte_buffer_impl(std::span<uint8_t const> data)
|
||||
: data_{data.begin(), data.end()} {}
|
||||
explicit vector_byte_buffer_impl(std::vector<uint8_t>&& data)
|
||||
: data_{std::move(data)} {}
|
||||
|
||||
size_t size() const override { return data_.size(); }
|
||||
|
||||
uint8_t const* data() const override { return data_.data(); }
|
||||
|
||||
uint8_t* mutable_data() override { return data_.data(); }
|
||||
|
||||
std::span<uint8_t const> span() const override {
|
||||
return {data_.data(), data_.size()};
|
||||
}
|
||||
@ -43,6 +59,10 @@ class vector_byte_buffer_impl : public mutable_byte_buffer_interface {
|
||||
|
||||
void resize(size_t size) override { data_.resize(size); }
|
||||
|
||||
void shrink_to_fit() override { data_.shrink_to_fit(); }
|
||||
|
||||
std::vector<uint8_t>& raw_vector() override { return data_; }
|
||||
|
||||
private:
|
||||
std::vector<uint8_t> data_;
|
||||
};
|
||||
@ -53,4 +73,21 @@ mutable_byte_buffer vector_byte_buffer::create() {
|
||||
return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>()};
|
||||
}
|
||||
|
||||
mutable_byte_buffer vector_byte_buffer::create(size_t size) {
|
||||
return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>(size)};
|
||||
}
|
||||
|
||||
mutable_byte_buffer vector_byte_buffer::create(std::string_view data) {
|
||||
return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>(data)};
|
||||
}
|
||||
|
||||
mutable_byte_buffer vector_byte_buffer::create(std::span<uint8_t const> data) {
|
||||
return mutable_byte_buffer{std::make_shared<vector_byte_buffer_impl>(data)};
|
||||
}
|
||||
|
||||
mutable_byte_buffer vector_byte_buffer::create(std::vector<uint8_t>&& data) {
|
||||
return mutable_byte_buffer{
|
||||
std::make_shared<vector_byte_buffer_impl>(std::move(data))};
|
||||
}
|
||||
|
||||
} // namespace dwarfs
|
||||
|
@ -48,7 +48,6 @@
|
||||
|
||||
#include <dwarfs/internal/fs_section.h>
|
||||
#include <dwarfs/internal/worker_group.h>
|
||||
#include <dwarfs/writer/internal/block_data.h>
|
||||
#include <dwarfs/writer/internal/filesystem_writer_detail.h>
|
||||
#include <dwarfs/writer/internal/multi_queue_block_merger.h>
|
||||
#include <dwarfs/writer/internal/progress.h>
|
||||
@ -122,8 +121,7 @@ class compression_progress : public progress::context {
|
||||
class fsblock {
|
||||
public:
|
||||
fsblock(section_type type, block_compressor const& bc,
|
||||
std::shared_ptr<block_data>&& data,
|
||||
std::shared_ptr<compression_progress> pctx,
|
||||
shared_byte_buffer data, std::shared_ptr<compression_progress> pctx,
|
||||
folly::Function<void(size_t)> set_block_cb = nullptr);
|
||||
|
||||
fsblock(section_type type, compression_type compression,
|
||||
@ -198,12 +196,12 @@ class fsblock_merger_policy {
|
||||
class raw_fsblock : public fsblock::impl {
|
||||
public:
|
||||
raw_fsblock(section_type type, block_compressor const& bc,
|
||||
std::shared_ptr<block_data>&& data,
|
||||
shared_byte_buffer data,
|
||||
std::shared_ptr<compression_progress> pctx,
|
||||
folly::Function<void(size_t)> set_block_cb)
|
||||
: type_{type}
|
||||
, bc_{bc}
|
||||
, uncompressed_size_{data->size()}
|
||||
, uncompressed_size_{data.size()}
|
||||
, data_{std::move(data)}
|
||||
, comp_type_{bc_.type()}
|
||||
, pctx_{std::move(pctx)}
|
||||
@ -215,30 +213,30 @@ class raw_fsblock : public fsblock::impl {
|
||||
std::promise<void> prom;
|
||||
future_ = prom.get_future();
|
||||
|
||||
wg.add_job([this, prom = std::move(prom),
|
||||
meta = std::move(meta)]() mutable {
|
||||
try {
|
||||
std::shared_ptr<block_data> tmp;
|
||||
wg.add_job(
|
||||
[this, prom = std::move(prom), meta = std::move(meta)]() mutable {
|
||||
try {
|
||||
shared_byte_buffer tmp;
|
||||
|
||||
if (meta) {
|
||||
tmp = std::make_shared<block_data>(bc_.compress(data_->vec(), *meta));
|
||||
} else {
|
||||
tmp = std::make_shared<block_data>(bc_.compress(data_->vec()));
|
||||
}
|
||||
if (meta) {
|
||||
tmp = bc_.compress(data_, *meta);
|
||||
} else {
|
||||
tmp = bc_.compress(data_);
|
||||
}
|
||||
|
||||
pctx_->bytes_in += data_->vec().size();
|
||||
pctx_->bytes_out += tmp->vec().size();
|
||||
pctx_->bytes_in += data_.size();
|
||||
pctx_->bytes_out += tmp.size();
|
||||
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
data_.swap(tmp);
|
||||
}
|
||||
} catch (bad_compression_ratio_error const&) {
|
||||
comp_type_ = compression_type::NONE;
|
||||
}
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
data_.swap(tmp);
|
||||
}
|
||||
} catch (bad_compression_ratio_error const&) {
|
||||
comp_type_ = compression_type::NONE;
|
||||
}
|
||||
|
||||
prom.set_value();
|
||||
});
|
||||
prom.set_value();
|
||||
});
|
||||
}
|
||||
|
||||
void wait_until_compressed() override { future_.wait(); }
|
||||
@ -249,13 +247,13 @@ class raw_fsblock : public fsblock::impl {
|
||||
|
||||
std::string description() const override { return bc_.describe(); }
|
||||
|
||||
std::span<uint8_t const> data() const override { return data_->vec(); }
|
||||
std::span<uint8_t const> data() const override { return data_.span(); }
|
||||
|
||||
size_t uncompressed_size() const override { return uncompressed_size_; }
|
||||
|
||||
size_t size() const override {
|
||||
std::lock_guard lock(mx_);
|
||||
return data_->size();
|
||||
return data_.size();
|
||||
}
|
||||
|
||||
void set_block_no(uint32_t number) override {
|
||||
@ -291,7 +289,7 @@ class raw_fsblock : public fsblock::impl {
|
||||
block_compressor const& bc_;
|
||||
size_t const uncompressed_size_;
|
||||
mutable std::recursive_mutex mx_;
|
||||
std::shared_ptr<block_data> data_;
|
||||
shared_byte_buffer data_;
|
||||
std::future<void> future_;
|
||||
std::optional<uint32_t> number_;
|
||||
std::optional<section_header_v2> mutable header_;
|
||||
@ -382,7 +380,7 @@ class rewritten_fsblock : public fsblock::impl {
|
||||
wg.add_job(
|
||||
[this, prom = std::move(prom), meta = std::move(meta)]() mutable {
|
||||
try {
|
||||
std::vector<uint8_t> block;
|
||||
shared_byte_buffer block;
|
||||
|
||||
{
|
||||
// TODO: we don't have to do this for uncompressed blocks
|
||||
@ -398,9 +396,9 @@ class rewritten_fsblock : public fsblock::impl {
|
||||
|
||||
try {
|
||||
if (meta) {
|
||||
block = bc_.compress(buffer.span(), *meta);
|
||||
block = bc_.compress(buffer.share(), *meta);
|
||||
} else {
|
||||
block = bc_.compress(buffer.span());
|
||||
block = bc_.compress(buffer.share());
|
||||
}
|
||||
} catch (bad_compression_ratio_error const&) {
|
||||
comp_type_ = compression_type::NONE;
|
||||
@ -411,7 +409,7 @@ class rewritten_fsblock : public fsblock::impl {
|
||||
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
block_data_.swap(block);
|
||||
block_data_.emplace(std::move(block));
|
||||
}
|
||||
|
||||
prom.set_value();
|
||||
@ -429,13 +427,18 @@ class rewritten_fsblock : public fsblock::impl {
|
||||
|
||||
std::string description() const override { return bc_.describe(); }
|
||||
|
||||
std::span<uint8_t const> data() const override { return block_data_; }
|
||||
std::span<uint8_t const> data() const override {
|
||||
std::lock_guard lock(mx_);
|
||||
return block_data_.value().span();
|
||||
}
|
||||
|
||||
size_t uncompressed_size() const override { return data_.size(); }
|
||||
|
||||
size_t size() const override {
|
||||
std::lock_guard lock(mx_);
|
||||
return block_data_.size();
|
||||
// TODO: this should not be called when block_data_ is not set, figure
|
||||
// out who calls this
|
||||
return block_data_.has_value() ? block_data_->size() : 0;
|
||||
}
|
||||
|
||||
void set_block_no(uint32_t number) override {
|
||||
@ -467,7 +470,7 @@ class rewritten_fsblock : public fsblock::impl {
|
||||
block_compressor const& bc_;
|
||||
mutable std::recursive_mutex mx_;
|
||||
std::span<uint8_t const> data_;
|
||||
std::vector<uint8_t> block_data_;
|
||||
std::optional<shared_byte_buffer> block_data_;
|
||||
std::future<void> future_;
|
||||
std::optional<uint32_t> number_;
|
||||
std::optional<section_header_v2> mutable header_;
|
||||
@ -477,7 +480,7 @@ class rewritten_fsblock : public fsblock::impl {
|
||||
};
|
||||
|
||||
fsblock::fsblock(section_type type, block_compressor const& bc,
|
||||
std::shared_ptr<block_data>&& data,
|
||||
shared_byte_buffer data,
|
||||
std::shared_ptr<compression_progress> pctx,
|
||||
folly::Function<void(size_t)> set_block_cb)
|
||||
: impl_(std::make_unique<raw_fsblock>(type, bc, std::move(data),
|
||||
@ -573,13 +576,13 @@ class filesystem_writer_ final : public filesystem_writer_detail {
|
||||
size_t max_active_slots) override;
|
||||
void configure_rewrite(size_t filesystem_size, size_t block_count) override;
|
||||
void copy_header(std::span<uint8_t const> header) override;
|
||||
void write_block(fragment_category cat, std::shared_ptr<block_data>&& data,
|
||||
void write_block(fragment_category cat, shared_byte_buffer data,
|
||||
physical_block_cb_type physical_block_cb,
|
||||
std::optional<std::string> meta) override;
|
||||
void finish_category(fragment_category cat) override;
|
||||
void write_metadata_v2_schema(std::shared_ptr<block_data>&& data) override;
|
||||
void write_metadata_v2(std::shared_ptr<block_data>&& data) override;
|
||||
void write_history(std::shared_ptr<block_data>&& data) override;
|
||||
void write_metadata_v2_schema(shared_byte_buffer data) override;
|
||||
void write_metadata_v2(shared_byte_buffer data) override;
|
||||
void write_history(shared_byte_buffer data) override;
|
||||
void check_block_compression(
|
||||
compression_type compression, std::span<uint8_t const> data,
|
||||
std::optional<fragment_category::value_type> cat) override;
|
||||
@ -600,12 +603,11 @@ class filesystem_writer_ final : public filesystem_writer_detail {
|
||||
block_compressor const&
|
||||
compressor_for_category(fragment_category::value_type cat) const;
|
||||
void
|
||||
write_block_impl(fragment_category cat, std::shared_ptr<block_data>&& data,
|
||||
write_block_impl(fragment_category cat, shared_byte_buffer data,
|
||||
block_compressor const& bc, std::optional<std::string> meta,
|
||||
physical_block_cb_type physical_block_cb);
|
||||
void on_block_merged(block_holder_type holder);
|
||||
void
|
||||
write_section_impl(section_type type, std::shared_ptr<block_data>&& data);
|
||||
void write_section_impl(section_type type, shared_byte_buffer data);
|
||||
void write(fsblock const& fsb);
|
||||
void write(char const* data, size_t size);
|
||||
template <typename T>
|
||||
@ -779,9 +781,8 @@ filesystem_writer_<LoggerPolicy>::compressor_for_category(
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_block_impl(
|
||||
fragment_category cat, std::shared_ptr<block_data>&& data,
|
||||
block_compressor const& bc, std::optional<std::string> meta,
|
||||
physical_block_cb_type physical_block_cb) {
|
||||
fragment_category cat, shared_byte_buffer data, block_compressor const& bc,
|
||||
std::optional<std::string> meta, physical_block_cb_type physical_block_cb) {
|
||||
if (!merger_) {
|
||||
DWARFS_THROW(runtime_error, "filesystem_writer not configured");
|
||||
}
|
||||
@ -840,7 +841,7 @@ void filesystem_writer_<LoggerPolicy>::finish_category(fragment_category cat) {
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_section_impl(
|
||||
section_type type, std::shared_ptr<block_data>&& data) {
|
||||
section_type type, shared_byte_buffer data) {
|
||||
auto& bc = get_compressor(type, std::nullopt);
|
||||
|
||||
uint32_t number;
|
||||
@ -1071,7 +1072,7 @@ void filesystem_writer_<LoggerPolicy>::copy_header(
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_block(
|
||||
fragment_category cat, std::shared_ptr<block_data>&& data,
|
||||
fragment_category cat, shared_byte_buffer data,
|
||||
physical_block_cb_type physical_block_cb, std::optional<std::string> meta) {
|
||||
write_block_impl(cat, std::move(data), compressor_for_category(cat.value()),
|
||||
std::move(meta), std::move(physical_block_cb));
|
||||
@ -1079,19 +1080,18 @@ void filesystem_writer_<LoggerPolicy>::write_block(
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_metadata_v2_schema(
|
||||
std::shared_ptr<block_data>&& data) {
|
||||
shared_byte_buffer data) {
|
||||
write_section_impl(section_type::METADATA_V2_SCHEMA, std::move(data));
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_metadata_v2(
|
||||
std::shared_ptr<block_data>&& data) {
|
||||
shared_byte_buffer data) {
|
||||
write_section_impl(section_type::METADATA_V2, std::move(data));
|
||||
}
|
||||
|
||||
template <typename LoggerPolicy>
|
||||
void filesystem_writer_<LoggerPolicy>::write_history(
|
||||
std::shared_ptr<block_data>&& data) {
|
||||
void filesystem_writer_<LoggerPolicy>::write_history(shared_byte_buffer data) {
|
||||
write_section_impl(section_type::HISTORY, std::move(data));
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,8 @@
|
||||
#include <thrift/lib/cpp2/frozen/FrozenUtil.h>
|
||||
#include <thrift/lib/cpp2/protocol/Serializer.h>
|
||||
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
|
||||
#include <dwarfs/writer/internal/metadata_freezer.h>
|
||||
|
||||
#include <dwarfs/gen-cpp2/metadata_layouts.h>
|
||||
@ -34,8 +36,7 @@ namespace dwarfs::writer::internal {
|
||||
namespace {
|
||||
|
||||
template <class T>
|
||||
std::pair<std::vector<uint8_t>, std::vector<uint8_t>>
|
||||
freeze_to_buffer(T const& x) {
|
||||
std::pair<shared_byte_buffer, shared_byte_buffer> freeze_to_buffer(T const& x) {
|
||||
using namespace ::apache::thrift::frozen;
|
||||
|
||||
Layout<T> layout;
|
||||
@ -44,24 +45,22 @@ freeze_to_buffer(T const& x) {
|
||||
std::string schema;
|
||||
serializeRootLayout(layout, schema);
|
||||
|
||||
size_t schema_size = schema.size();
|
||||
auto schema_begin = reinterpret_cast<uint8_t const*>(schema.data());
|
||||
std::vector<uint8_t> schema_buffer(schema_begin, schema_begin + schema_size);
|
||||
auto schema_buffer = vector_byte_buffer::create(schema);
|
||||
|
||||
std::vector<uint8_t> data_buffer;
|
||||
data_buffer.resize(content_size, 0);
|
||||
auto data_buffer = vector_byte_buffer::create(content_size);
|
||||
|
||||
folly::MutableByteRange content_range(data_buffer.data(), data_buffer.size());
|
||||
ByteRangeFreezer::freeze(layout, x, content_range);
|
||||
|
||||
data_buffer.resize(data_buffer.size() - content_range.size());
|
||||
data_buffer.shrink_to_fit();
|
||||
|
||||
return {schema_buffer, data_buffer};
|
||||
return {schema_buffer.share(), data_buffer.share()};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::pair<std::vector<uint8_t>, std::vector<uint8_t>>
|
||||
std::pair<shared_byte_buffer, shared_byte_buffer>
|
||||
metadata_freezer::freeze(thrift::metadata::metadata const& data) {
|
||||
return freeze_to_buffer(data);
|
||||
}
|
||||
|
@ -63,7 +63,6 @@
|
||||
#include <dwarfs/internal/features.h>
|
||||
#include <dwarfs/internal/string_table.h>
|
||||
#include <dwarfs/internal/worker_group.h>
|
||||
#include <dwarfs/writer/internal/block_data.h>
|
||||
#include <dwarfs/writer/internal/block_manager.h>
|
||||
#include <dwarfs/writer/internal/entry.h>
|
||||
#include <dwarfs/writer/internal/file_scanner.h>
|
||||
@ -1107,13 +1106,13 @@ void scanner_<LoggerPolicy>::scan(
|
||||
|
||||
LOG_VERBOSE << "uncompressed metadata size: " << size_with_unit(data.size());
|
||||
|
||||
fsw.write_metadata_v2_schema(std::make_shared<block_data>(std::move(schema)));
|
||||
fsw.write_metadata_v2(std::make_shared<block_data>(std::move(data)));
|
||||
fsw.write_metadata_v2_schema(schema);
|
||||
fsw.write_metadata_v2(data);
|
||||
|
||||
if (options_.enable_history) {
|
||||
history hist(options_.history);
|
||||
hist.append(options_.command_line_arguments);
|
||||
fsw.write_history(std::make_shared<block_data>(hist.serialize()));
|
||||
fsw.write_history(hist.serialize());
|
||||
}
|
||||
|
||||
LOG_INFO << "waiting for compression to finish...";
|
||||
|
@ -46,10 +46,10 @@
|
||||
#include <dwarfs/error.h>
|
||||
#include <dwarfs/logger.h>
|
||||
#include <dwarfs/util.h>
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
#include <dwarfs/writer/segmenter.h>
|
||||
#include <dwarfs/writer/writer_progress.h>
|
||||
|
||||
#include <dwarfs/writer/internal/block_data.h>
|
||||
#include <dwarfs/writer/internal/block_manager.h>
|
||||
#include <dwarfs/writer/internal/chunkable.h>
|
||||
#include <dwarfs/writer/internal/cyclic_hash.h>
|
||||
@ -576,23 +576,23 @@ class active_block : private GranularityPolicy {
|
||||
, filter_(bloom_filter_size)
|
||||
, repseqmap_{repseqmap}
|
||||
, repeating_collisions_{repcoll}
|
||||
, data_{std::make_shared<block_data>()} {
|
||||
, data_{vector_byte_buffer::create()} {
|
||||
DWARFS_CHECK((window_step & window_step_mask_) == 0,
|
||||
"window step size not a power of two");
|
||||
data_->reserve(this->frames_to_bytes(capacity_in_frames_));
|
||||
data_.reserve(this->frames_to_bytes(capacity_in_frames_));
|
||||
}
|
||||
|
||||
DWARFS_FORCE_INLINE size_t num() const { return num_; }
|
||||
|
||||
DWARFS_FORCE_INLINE size_t size_in_frames() const {
|
||||
return this->bytes_to_frames(data_->size());
|
||||
return this->bytes_to_frames(data_.size());
|
||||
}
|
||||
|
||||
DWARFS_FORCE_INLINE bool full() const {
|
||||
return size_in_frames() == capacity_in_frames_;
|
||||
}
|
||||
|
||||
DWARFS_FORCE_INLINE std::shared_ptr<block_data> data() const { return data_; }
|
||||
DWARFS_FORCE_INLINE mutable_byte_buffer data() const { return data_; }
|
||||
|
||||
DWARFS_FORCE_INLINE void
|
||||
append_bytes(std::span<uint8_t const> data, bloom_filter& global_filter);
|
||||
@ -637,7 +637,7 @@ class active_block : private GranularityPolicy {
|
||||
fast_multimap<hash_t, offset_t, num_inline_offsets> offsets_;
|
||||
repeating_sequence_map_type const& repseqmap_;
|
||||
repeating_collisions_map_type& repeating_collisions_;
|
||||
std::shared_ptr<block_data> data_;
|
||||
mutable_byte_buffer data_;
|
||||
};
|
||||
|
||||
class segmenter_progress : public progress::context {
|
||||
@ -841,7 +841,7 @@ DWARFS_FORCE_INLINE bool
|
||||
active_block<LoggerPolicy, GranularityPolicy>::is_existing_repeating_sequence(
|
||||
hash_t hashval, size_t offset) {
|
||||
if (auto it = repseqmap_.find(hashval); it != repseqmap_.end()) [[unlikely]] {
|
||||
auto& raw = data_->vec();
|
||||
auto& raw = data_.raw_vector();
|
||||
auto winbeg = raw.begin() + frames_to_bytes(offset);
|
||||
auto winend = winbeg + frames_to_bytes(window_size_);
|
||||
auto byte = *winbeg;
|
||||
@ -881,7 +881,7 @@ active_block<LoggerPolicy, GranularityPolicy>::append_bytes(
|
||||
granular_span_adapter<uint8_t const, GranularityPolicy>>(data);
|
||||
|
||||
auto v = this->template create<
|
||||
granular_vector_adapter<uint8_t, GranularityPolicy>>(data_->vec());
|
||||
granular_vector_adapter<uint8_t, GranularityPolicy>>(data_.raw_vector());
|
||||
|
||||
auto offset = v.size();
|
||||
|
||||
@ -920,7 +920,7 @@ void segment_match<LoggerPolicy, GranularityPolicy>::verify_and_extend(
|
||||
size_t pos, size_t len, size_t begin, size_t end) {
|
||||
auto v = this->template create<
|
||||
granular_vector_adapter<uint8_t, GranularityPolicy>>(
|
||||
block_->data()->vec());
|
||||
block_->data().raw_vector());
|
||||
|
||||
// First, check if the regions actually match
|
||||
if (v.compare(offset_, data.subspan(pos, len)) == 0) {
|
||||
@ -1034,7 +1034,7 @@ DWARFS_FORCE_INLINE void
|
||||
segmenter_<LoggerPolicy, SegmentingPolicy>::block_ready() {
|
||||
auto& block = blocks_.back();
|
||||
block.finalize(stats_);
|
||||
block_ready_(block.data(), block.num());
|
||||
block_ready_(block.data().share(), block.num());
|
||||
++prog_.block_count;
|
||||
}
|
||||
|
||||
|
@ -60,7 +60,7 @@ std::vector<T> multiplex(std::vector<std::vector<T>> const& in) {
|
||||
}
|
||||
|
||||
template <typename T = int32_t>
|
||||
std::vector<uint8_t>
|
||||
shared_byte_buffer
|
||||
make_test_data(int channels, int samples, int bytes, int bits,
|
||||
pcm_sample_endianness end, pcm_sample_signedness sig,
|
||||
pcm_sample_padding pad) {
|
||||
@ -70,10 +70,11 @@ make_test_data(int channels, int samples, int bytes, int bits,
|
||||
make_sine<T>(bits, samples, 3.1 * ((599 * (c + 1)) % 256)));
|
||||
}
|
||||
auto muxed = multiplex(data);
|
||||
std::vector<uint8_t> out(bytes * channels * samples);
|
||||
auto out = vector_byte_buffer::create();
|
||||
out.resize(bytes * channels * samples);
|
||||
pcm_sample_transformer<T> xfm(end, sig, pad, bytes, bits);
|
||||
xfm.pack(out, muxed);
|
||||
return out;
|
||||
xfm.pack(out.span(), muxed);
|
||||
return out.share();
|
||||
}
|
||||
|
||||
struct data_params {
|
||||
@ -151,7 +152,7 @@ TEST(flac_compressor, basic) {
|
||||
EXPECT_LT(compressed.size(), data.size() / 2);
|
||||
|
||||
auto decompressed =
|
||||
block_decompressor::decompress(compression_type::FLAC, compressed);
|
||||
block_decompressor::decompress(compression_type::FLAC, compressed.span());
|
||||
|
||||
EXPECT_EQ(data, decompressed);
|
||||
}
|
||||
@ -184,7 +185,7 @@ TEST_P(flac_param, combinations) {
|
||||
EXPECT_LT(compressed.size(), data.size() / 2);
|
||||
|
||||
auto decompressed =
|
||||
block_decompressor::decompress(compression_type::FLAC, compressed);
|
||||
block_decompressor::decompress(compression_type::FLAC, compressed.span());
|
||||
|
||||
EXPECT_EQ(data, decompressed);
|
||||
}
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <range/v3/view/concat.hpp>
|
||||
|
||||
#include <dwarfs/block_compressor.h>
|
||||
#include <dwarfs/vector_byte_buffer.h>
|
||||
|
||||
using namespace dwarfs;
|
||||
|
||||
@ -61,8 +62,7 @@ generate_random_data(std::mt19937_64& rng, size_t count,
|
||||
}
|
||||
|
||||
template <std::unsigned_integral ValueType>
|
||||
std::vector<uint8_t>
|
||||
make_test_data(int components, int pixels, int unused_lsb) {
|
||||
shared_byte_buffer make_test_data(int components, int pixels, int unused_lsb) {
|
||||
std::mt19937_64 rng(42);
|
||||
std::uniform_int_distribution<ValueType> any_value(
|
||||
0, std::numeric_limits<ValueType>::max());
|
||||
@ -93,11 +93,11 @@ make_test_data(int components, int pixels, int unused_lsb) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint8_t> out;
|
||||
auto out = vector_byte_buffer::create();
|
||||
out.resize(tmp.size() * sizeof(ValueType));
|
||||
std::memcpy(out.data(), tmp.data(), out.size());
|
||||
|
||||
return out;
|
||||
return out.share();
|
||||
}
|
||||
|
||||
struct data_params {
|
||||
@ -151,8 +151,8 @@ TEST_P(ricepp_param, combinations) {
|
||||
|
||||
EXPECT_LT(compressed.size(), 7 * data.size() / 10);
|
||||
|
||||
auto decompressed =
|
||||
block_decompressor::decompress(compression_type::RICEPP, compressed);
|
||||
auto decompressed = block_decompressor::decompress(compression_type::RICEPP,
|
||||
compressed.span());
|
||||
|
||||
ASSERT_EQ(data.size(), decompressed.size());
|
||||
EXPECT_EQ(data, decompressed);
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include <dwarfs/writer/segmenter.h>
|
||||
#include <dwarfs/writer/writer_progress.h>
|
||||
|
||||
#include <dwarfs/writer/internal/block_data.h>
|
||||
#include <dwarfs/writer/internal/block_manager.h>
|
||||
#include <dwarfs/writer/internal/chunkable.h>
|
||||
|
||||
@ -145,13 +144,12 @@ void run_segmenter_test(unsigned iters, unsigned granularity,
|
||||
dwarfs::writer::writer_progress prog;
|
||||
auto blkmgr = std::make_shared<dwarfs::writer::internal::block_manager>();
|
||||
|
||||
std::vector<std::shared_ptr<dwarfs::writer::internal::block_data>> written;
|
||||
std::vector<dwarfs::shared_byte_buffer> written;
|
||||
|
||||
dwarfs::writer::segmenter seg(
|
||||
lgr, prog, blkmgr, cfg, cc, total_size,
|
||||
[&written,
|
||||
blkmgr](std::shared_ptr<dwarfs::writer::internal::block_data> blk,
|
||||
auto logical_block_num) {
|
||||
[&written, blkmgr](dwarfs::shared_byte_buffer blk,
|
||||
auto logical_block_num) {
|
||||
auto physical_block_num = written.size();
|
||||
written.push_back(blk);
|
||||
blkmgr->set_written_block(logical_block_num, physical_block_num, 0);
|
||||
@ -167,7 +165,7 @@ void run_segmenter_test(unsigned iters, unsigned granularity,
|
||||
size_t segmented [[maybe_unused]]{0};
|
||||
|
||||
for (auto const& blk : written) {
|
||||
segmented += blk->size();
|
||||
segmented += blk.size();
|
||||
}
|
||||
|
||||
// std::cerr << total_size << " -> " << segmented << fmt::format("
|
||||
|
Loading…
x
Reference in New Issue
Block a user