dwarfs/test/ricepp_compressor_test.cpp
Marcus Holland-Moritz 9996748134 feat: replace vector_byte_buffer with malloc_byte_buffer
The latter doesn't always explicitly initialize the allocated memory,
which can help improve performance.
2025-04-09 21:42:32 +02:00

166 lines
4.9 KiB
C++

/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: GPL-3.0-only
*/
#include <algorithm>
#include <array>
#include <concepts>
#include <cstdint>
#include <random>
#include <gtest/gtest.h>
#include <fmt/format.h>
#include <folly/lang/Bits.h>
#include <nlohmann/json.hpp>
#include <range/v3/range/conversion.hpp>
#include <range/v3/view/concat.hpp>
#include <dwarfs/block_compressor.h>
#include <dwarfs/block_decompressor.h>
#include <dwarfs/malloc_byte_buffer.h>
using namespace dwarfs;
namespace {
template <std::unsigned_integral ValueType>
std::vector<ValueType>
generate_random_data(std::mt19937_64& rng, size_t count,
unsigned unused_lsb_count = 0, unsigned full_chance = 50) {
std::uniform_int_distribution<unsigned> dist(0, full_chance);
std::uniform_int_distribution<ValueType> noise(30000, 31000);
std::uniform_int_distribution<ValueType> full(
0, std::numeric_limits<ValueType>::max());
std::vector<ValueType> data(count);
ValueType mask = static_cast<ValueType>(std::numeric_limits<ValueType>::max()
<< unused_lsb_count);
std::generate(data.begin(), data.end(), [&]() {
auto v = dist(rng) == 0 ? full(rng) : noise(rng);
return folly::Endian::big<ValueType>(v & mask);
});
return data;
}
template <std::unsigned_integral ValueType>
shared_byte_buffer make_test_data(int components, int pixels, int unused_lsb) {
std::mt19937_64 rng(42);
std::uniform_int_distribution<ValueType> any_value(
0, std::numeric_limits<ValueType>::max());
std::vector<std::vector<ValueType>> data(components);
auto random_value = [&]() {
return folly::Endian::big<ValueType>(any_value(rng) << unused_lsb);
};
for (auto& d : data) {
auto d1 = generate_random_data<ValueType>(rng, pixels / 3, unused_lsb);
auto d2 = std::vector<ValueType>(pixels / 3, random_value());
auto d3 = generate_random_data<ValueType>(
rng, pixels - (d1.size() + d2.size()), unused_lsb, 0);
d = ranges::views::concat(d1, d2, d3) | ranges::to_vector;
}
if (data.size() < 1 || data.size() > 2) {
throw std::runtime_error("invalid number of components");
}
std::vector<ValueType> tmp;
tmp.resize(data.size() * data[0].size());
for (size_t i = 0; i < data[0].size(); ++i) {
for (size_t j = 0; j < data.size(); ++j) {
tmp[i * data.size() + j] = data[j][i];
}
}
auto out = malloc_byte_buffer::create();
out.resize(tmp.size() * sizeof(ValueType));
std::memcpy(out.data(), tmp.data(), out.size());
return out.share();
}
struct data_params {
data_params(int components, int pixels, int unused, int block = 0)
: num_components{components}
, num_pixels{pixels}
, unused_lsb{unused}
, block_size{block} {}
int num_components;
int num_pixels;
int unused_lsb;
int block_size;
};
std::ostream& operator<<(std::ostream& os, data_params const& p) {
os << "{comp=" << p.num_components << ", pix=" << p.num_pixels
<< ", lsb=" << p.unused_lsb << ", block=" << p.block_size << "}";
return os;
}
std::vector<data_params> const data_parameters{
// clang-format off
{ 1, 1000, 0, 16 },
{ 2, 1000, 2, 32 },
{ 1, 1000, 4, 64 },
{ 2, 3333, 6, 99 },
// clang-format on
};
} // namespace
class ricepp_param : public testing::TestWithParam<data_params> {};
TEST_P(ricepp_param, combinations) {
auto param = GetParam();
nlohmann::json meta{
{"endianness", "big"},
{"bytes_per_sample", 2},
{"unused_lsb_count", param.unused_lsb},
{"component_count", param.num_components},
};
auto const data = make_test_data<uint16_t>(
param.num_components, param.num_pixels, param.unused_lsb);
block_compressor comp(fmt::format("ricepp:block_size={}", param.block_size));
auto compressed = comp.compress(data, meta.dump());
EXPECT_LT(compressed.size(), 7 * data.size() / 10);
auto decompressed = block_decompressor::decompress(compression_type::RICEPP,
compressed.span());
ASSERT_EQ(data.size(), decompressed.size());
EXPECT_EQ(data, decompressed);
}
INSTANTIATE_TEST_SUITE_P(dwarfs, ricepp_param,
::testing::ValuesIn(data_parameters));