mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-08 03:49:44 -04:00
feat(ricepp): x86 CPU specific implementations
This commit is contained in:
parent
2f13a26bab
commit
8fc589b30b
@ -39,11 +39,45 @@ if(WIN32)
|
||||
add_compile_definitions(_WIN32_WINNT=0x0601 WINVER=0x0601)
|
||||
endif()
|
||||
|
||||
add_library(ricepp ricepp.cpp)
|
||||
add_library(ricepp_fallback OBJECT ricepp_cpuspecific.cpp)
|
||||
target_compile_definitions(ricepp_fallback PRIVATE RICEPP_CPU_VARIANT=fallback)
|
||||
list(APPEND RICEPP_LIBS_CPUSPECIFIC ricepp_fallback)
|
||||
|
||||
target_include_directories(ricepp PUBLIC include)
|
||||
if(NOT (WIN32 OR CMAKE_CXX_FLAGS MATCHES "-march="))
|
||||
CHECK_CXX_COMPILER_FLAG(-mbmi2 COMPILER_SUPPORTS_MBMI2)
|
||||
CHECK_CXX_COMPILER_FLAG(-mavx512vl COMPILER_SUPPORTS_MAVX512VL)
|
||||
CHECK_CXX_COMPILER_FLAG(-mavx512vbmi COMPILER_SUPPORTS_MAVX512VBMI)
|
||||
|
||||
if(COMPILER_SUPPORTS_MBMI2)
|
||||
add_library(ricepp_bmi2 OBJECT ricepp_cpuspecific.cpp)
|
||||
target_compile_options(ricepp_bmi2 PRIVATE -mbmi2)
|
||||
target_compile_definitions(ricepp_bmi2 PRIVATE RICEPP_CPU_VARIANT=has_bmi2)
|
||||
list(APPEND RICEPP_LIBS_CPUSPECIFIC ricepp_bmi2)
|
||||
list(APPEND RICEPP_CPU_SUPPORT RICEPP_CPU_BMI2)
|
||||
|
||||
if(COMPILER_SUPPORTS_MAVX512VL AND COMPILER_SUPPORTS_MAVX512VBMI)
|
||||
add_library(ricepp_bmi2_avx512 OBJECT ricepp_cpuspecific.cpp)
|
||||
target_compile_options(ricepp_bmi2_avx512 PRIVATE -mbmi2 -mavx512vl -mavx512vbmi)
|
||||
target_compile_definitions(ricepp_bmi2_avx512 PRIVATE RICEPP_CPU_VARIANT=has_bmi2_avx512)
|
||||
list(APPEND RICEPP_LIBS_CPUSPECIFIC ricepp_bmi2_avx512)
|
||||
list(APPEND RICEPP_CPU_SUPPORT RICEPP_CPU_BMI2_AVX512)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
foreach(target ${RICEPP_LIBS_CPUSPECIFIC})
|
||||
message(STATUS "[ricepp] adding CPU target: ${target}")
|
||||
target_include_directories(${target} PUBLIC include)
|
||||
target_link_libraries(${target} PUBLIC range-v3)
|
||||
target_compile_features(${target} PUBLIC cxx_std_20)
|
||||
list(APPEND RICEPP_OBJECTS_CPUSPECIFIC $<TARGET_OBJECTS:${target}>)
|
||||
endforeach()
|
||||
|
||||
add_library(ricepp ricepp.cpp ${RICEPP_OBJECTS_CPUSPECIFIC})
|
||||
target_link_libraries(ricepp PUBLIC range-v3)
|
||||
target_include_directories(ricepp PUBLIC include)
|
||||
target_compile_features(ricepp PUBLIC cxx_std_20)
|
||||
target_compile_definitions(ricepp PRIVATE ${RICEPP_CPU_SUPPORT})
|
||||
|
||||
# # TODO: remove/rework
|
||||
# add_executable(ricepp_demo ricepp_demo.cpp)
|
||||
|
@ -19,251 +19,60 @@
|
||||
* along with ricepp. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string_view>
|
||||
|
||||
#include <ricepp/bitstream_reader.h>
|
||||
#include <ricepp/bitstream_writer.h>
|
||||
#include <ricepp/byteswap.h>
|
||||
#include <ricepp/codec.h>
|
||||
#include <ricepp/ricepp.h>
|
||||
|
||||
#include "ricepp_cpuspecific.h"
|
||||
|
||||
namespace ricepp {
|
||||
|
||||
namespace {
|
||||
|
||||
template <std::unsigned_integral ValueType>
|
||||
class dynamic_pixel_traits {
|
||||
public:
|
||||
using value_type = ValueType;
|
||||
static constexpr size_t const kBitCount =
|
||||
std::numeric_limits<value_type>::digits;
|
||||
static constexpr value_type const kAllOnes =
|
||||
std::numeric_limits<value_type>::max();
|
||||
detail::cpu_variant get_cpu_variant_init() {
|
||||
#ifndef _WIN32
|
||||
#if defined(__has_builtin)
|
||||
#if __has_builtin(__builtin_cpu_supports)
|
||||
__builtin_cpu_init();
|
||||
|
||||
dynamic_pixel_traits(std::endian byteorder,
|
||||
unsigned unused_lsb_count) noexcept
|
||||
: unused_lsb_count_{unused_lsb_count}
|
||||
, byteorder_{byteorder}
|
||||
#ifndef NDEBUG
|
||||
, lsb_mask_{static_cast<value_type>(~(kAllOnes << unused_lsb_count))}
|
||||
, msb_mask_{static_cast<value_type>(~(kAllOnes >> unused_lsb_count))}
|
||||
bool const has_avx512vl = __builtin_cpu_supports("avx512vl");
|
||||
bool const has_avx512vbmi = __builtin_cpu_supports("avx512vbmi");
|
||||
bool const has_bmi2 = __builtin_cpu_supports("bmi2");
|
||||
|
||||
if (has_avx512vl && has_avx512vbmi && has_bmi2) {
|
||||
return detail::cpu_variant::has_bmi2_avx512;
|
||||
}
|
||||
|
||||
if (has_bmi2) {
|
||||
return detail::cpu_variant::has_bmi2;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
assert(unused_lsb_count < kBitCount);
|
||||
}
|
||||
|
||||
[[nodiscard]] value_type read(value_type value) const noexcept {
|
||||
value_type tmp = byteswap(value, byteorder_);
|
||||
assert((tmp & lsb_mask_) == 0);
|
||||
return tmp >> unused_lsb_count_;
|
||||
}
|
||||
|
||||
[[nodiscard]] value_type write(value_type value) const noexcept {
|
||||
assert((value & msb_mask_) == 0);
|
||||
return byteswap(static_cast<value_type>(value << unused_lsb_count_),
|
||||
byteorder_);
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned const unused_lsb_count_;
|
||||
std::endian const byteorder_;
|
||||
#ifndef NDEBUG
|
||||
value_type const lsb_mask_;
|
||||
value_type const msb_mask_;
|
||||
#endif
|
||||
};
|
||||
|
||||
template <std::unsigned_integral ValueType, std::endian ByteOrder,
|
||||
unsigned UnusedLsbCount>
|
||||
class static_pixel_traits {
|
||||
public:
|
||||
using value_type = ValueType;
|
||||
static constexpr size_t const kBitCount =
|
||||
std::numeric_limits<value_type>::digits;
|
||||
static constexpr value_type const kAllOnes =
|
||||
std::numeric_limits<value_type>::max();
|
||||
static constexpr std::endian const kByteOrder = ByteOrder;
|
||||
static constexpr unsigned const kUnusedLsbCount = UnusedLsbCount;
|
||||
static constexpr value_type const kLsbMask =
|
||||
static_cast<value_type>(~(kAllOnes << kUnusedLsbCount));
|
||||
static constexpr value_type const kMsbMask =
|
||||
static_cast<value_type>(~(kAllOnes >> kUnusedLsbCount));
|
||||
static_assert(kUnusedLsbCount < kBitCount);
|
||||
|
||||
[[nodiscard]] static value_type read(value_type value) noexcept {
|
||||
value_type tmp = byteswap<kByteOrder>(value);
|
||||
assert((tmp & kLsbMask) == 0);
|
||||
return tmp >> kUnusedLsbCount;
|
||||
}
|
||||
|
||||
[[nodiscard]] static value_type write(value_type value) noexcept {
|
||||
assert((value & kMsbMask) == 0);
|
||||
return byteswap<kByteOrder>(
|
||||
static_cast<value_type>(value << kUnusedLsbCount));
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t MaxBlockSize, size_t ComponentStreamCount,
|
||||
typename PixelTraits>
|
||||
class codec_impl final
|
||||
: public codec_interface<typename PixelTraits::value_type>,
|
||||
public PixelTraits {
|
||||
public:
|
||||
using pixel_type = typename PixelTraits::value_type;
|
||||
using codec_type = codec<MaxBlockSize, ComponentStreamCount, PixelTraits>;
|
||||
|
||||
codec_impl(PixelTraits const& traits, size_t block_size)
|
||||
: PixelTraits{traits}
|
||||
, block_size_{block_size} {}
|
||||
|
||||
std::vector<uint8_t>
|
||||
encode(std::span<pixel_type const> input) const override {
|
||||
return encode_impl(input.data(), input.size());
|
||||
}
|
||||
|
||||
size_t worst_case_encoded_bytes(size_t pixel_count) const override {
|
||||
codec_type codec{block_size_, *this};
|
||||
return worst_case_encoded_bytes_impl(codec, pixel_count);
|
||||
}
|
||||
|
||||
size_t
|
||||
worst_case_encoded_bytes(std::span<pixel_type const> input) const override {
|
||||
return worst_case_encoded_bytes(input.size());
|
||||
}
|
||||
|
||||
std::span<uint8_t> encode(std::span<uint8_t> output,
|
||||
std::span<pixel_type const> input) const override {
|
||||
return encode_impl(output.data(), output.size(), input.data(),
|
||||
input.size());
|
||||
}
|
||||
|
||||
void decode(std::span<pixel_type> output,
|
||||
std::span<uint8_t const> input) const override {
|
||||
decode_impl(output.data(), output.size(), input.data(), input.size());
|
||||
}
|
||||
|
||||
private:
|
||||
size_t worst_case_encoded_bytes_impl(codec_type& codec, size_t size) const {
|
||||
return (codec.worst_case_bit_count(size) + 8 - 1) / 8;
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
encode_impl(pixel_type const* __restrict input, size_t size) const {
|
||||
return encode_impl(std::span<pixel_type const>{input, size});
|
||||
}
|
||||
|
||||
std::span<uint8_t>
|
||||
encode_impl(uint8_t* __restrict output, size_t output_size,
|
||||
pixel_type const* __restrict input, size_t input_size) const {
|
||||
return encode_impl(std::span<uint8_t>{output, output_size},
|
||||
std::span<pixel_type const>{input, input_size});
|
||||
}
|
||||
|
||||
void decode_impl(pixel_type* __restrict output, size_t output_size,
|
||||
uint8_t const* __restrict input, size_t input_size) const {
|
||||
return decode_impl(std::span<pixel_type>{output, output_size},
|
||||
std::span<uint8_t const>{input, input_size});
|
||||
}
|
||||
|
||||
std::vector<uint8_t> encode_impl(std::span<pixel_type const> input) const {
|
||||
std::vector<uint8_t> output;
|
||||
codec_type codec{block_size_, *this};
|
||||
output.resize(worst_case_encoded_bytes_impl(codec, input.size()));
|
||||
bitstream_writer writer{output.begin()};
|
||||
codec.encode(input, writer);
|
||||
output.resize(std::distance(output.begin(), writer.iterator()));
|
||||
return output;
|
||||
}
|
||||
|
||||
std::span<uint8_t> encode_impl(std::span<uint8_t> output,
|
||||
std::span<pixel_type const> input) const {
|
||||
codec_type codec{block_size_, *this};
|
||||
assert(output.size() >= worst_case_encoded_bytes_impl(codec, input.size()));
|
||||
bitstream_writer writer{output.begin()};
|
||||
codec.encode(input, writer);
|
||||
return std::span<uint8_t>{output.begin(), writer.iterator()};
|
||||
}
|
||||
|
||||
void decode_impl(std::span<pixel_type> output,
|
||||
std::span<uint8_t const> input) const {
|
||||
bitstream_reader reader{input.begin(), input.end()};
|
||||
codec_type codec{block_size_, *this};
|
||||
codec.decode(output, reader);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t const block_size_;
|
||||
};
|
||||
|
||||
template <size_t ComponentStreamCount, typename PixelTraits>
|
||||
std::unique_ptr<codec_interface<typename PixelTraits::value_type>>
|
||||
create_codec_(size_t block_size, PixelTraits const& traits) {
|
||||
if (block_size <= 512) {
|
||||
return std::make_unique<codec_impl<512, ComponentStreamCount, PixelTraits>>(
|
||||
traits, block_size);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
return detail::cpu_variant::fallback;
|
||||
}
|
||||
|
||||
template <typename PixelTraits>
|
||||
std::unique_ptr<codec_interface<typename PixelTraits::value_type>>
|
||||
create_codec_(size_t block_size, size_t component_stream_count,
|
||||
PixelTraits const& traits) {
|
||||
switch (component_stream_count) {
|
||||
case 1:
|
||||
return create_codec_<1, PixelTraits>(block_size, traits);
|
||||
|
||||
case 2:
|
||||
return create_codec_<2, PixelTraits>(block_size, traits);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
detail::cpu_variant get_cpu_variant() {
|
||||
static detail::cpu_variant const variant = get_cpu_variant_init();
|
||||
return variant;
|
||||
}
|
||||
|
||||
template <std::unsigned_integral PixelValueType, std::endian ByteOrder,
|
||||
unsigned UnusedLsbCount>
|
||||
std::unique_ptr<codec_interface<PixelValueType>>
|
||||
create_codec_(size_t block_size, size_t component_stream_count) {
|
||||
using pixel_traits =
|
||||
static_pixel_traits<PixelValueType, ByteOrder, UnusedLsbCount>;
|
||||
|
||||
if (auto codec = create_codec_<pixel_traits>(
|
||||
block_size, component_stream_count, pixel_traits{})) {
|
||||
return codec;
|
||||
void show_cpu_variant(std::string_view variant) {
|
||||
if (std::getenv("RICEPP_SHOW_CPU_VARIANT")) {
|
||||
std::cerr << "ricepp: using " << variant << " CPU variant\n";
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <std::unsigned_integral PixelValueType>
|
||||
std::unique_ptr<codec_interface<PixelValueType>>
|
||||
create_codec_(codec_config const& config) {
|
||||
if (config.byteorder == std::endian::big) {
|
||||
switch (config.unused_lsb_count) {
|
||||
case 0:
|
||||
return create_codec_<PixelValueType, std::endian::big, 0>(
|
||||
config.block_size, config.component_stream_count);
|
||||
|
||||
case 2:
|
||||
return create_codec_<PixelValueType, std::endian::big, 2>(
|
||||
config.block_size, config.component_stream_count);
|
||||
|
||||
case 4:
|
||||
return create_codec_<PixelValueType, std::endian::big, 4>(
|
||||
config.block_size, config.component_stream_count);
|
||||
}
|
||||
}
|
||||
|
||||
using pixel_traits = dynamic_pixel_traits<PixelValueType>;
|
||||
|
||||
return create_codec_<pixel_traits>(
|
||||
config.block_size, config.component_stream_count,
|
||||
pixel_traits{config.byteorder, config.unused_lsb_count});
|
||||
void show_cpu_variant_once(std::string_view variant) {
|
||||
static auto const _ = [&variant]() {
|
||||
show_cpu_variant(variant);
|
||||
return true;
|
||||
}();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -271,11 +80,30 @@ create_codec_(codec_config const& config) {
|
||||
template <>
|
||||
std::unique_ptr<codec_interface<uint16_t>>
|
||||
create_codec<uint16_t>(codec_config const& config) {
|
||||
if (auto codec = create_codec_<uint16_t>(config)) {
|
||||
return codec;
|
||||
switch (get_cpu_variant()) {
|
||||
#ifdef RICEPP_CPU_BMI2_AVX512
|
||||
case detail::cpu_variant::has_bmi2_avx512:
|
||||
show_cpu_variant_once("BMI2+AVX512");
|
||||
return detail::create_codec_cpuspecific_<
|
||||
uint16_t, detail::cpu_variant::has_bmi2_avx512>(config);
|
||||
#endif
|
||||
|
||||
#ifdef RICEPP_CPU_BMI2
|
||||
case detail::cpu_variant::has_bmi2:
|
||||
show_cpu_variant_once("BMI2");
|
||||
return detail::create_codec_cpuspecific_<uint16_t,
|
||||
detail::cpu_variant::has_bmi2>(
|
||||
config);
|
||||
#endif
|
||||
|
||||
default:
|
||||
show_cpu_variant_once("fallback");
|
||||
return detail::create_codec_cpuspecific_<uint16_t,
|
||||
detail::cpu_variant::fallback>(
|
||||
config);
|
||||
}
|
||||
|
||||
throw std::runtime_error("Unsupported configuration");
|
||||
throw std::runtime_error("internal error: unknown CPU variant");
|
||||
}
|
||||
|
||||
} // namespace ricepp
|
||||
|
289
ricepp/ricepp_cpuspecific.cpp
Normal file
289
ricepp/ricepp_cpuspecific.cpp
Normal file
@ -0,0 +1,289 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of ricepp.
|
||||
*
|
||||
* ricepp is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* ricepp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with ricepp. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <ricepp/bitstream_reader.h>
|
||||
#include <ricepp/bitstream_writer.h>
|
||||
#include <ricepp/byteswap.h>
|
||||
#include <ricepp/codec.h>
|
||||
#include <ricepp/ricepp.h>
|
||||
|
||||
#include "ricepp_cpuspecific.h"
|
||||
|
||||
namespace ricepp {
|
||||
|
||||
namespace {
|
||||
|
||||
template <std::unsigned_integral ValueType>
|
||||
class dynamic_pixel_traits {
|
||||
public:
|
||||
using value_type = ValueType;
|
||||
static constexpr size_t const kBitCount =
|
||||
std::numeric_limits<value_type>::digits;
|
||||
static constexpr value_type const kAllOnes =
|
||||
std::numeric_limits<value_type>::max();
|
||||
|
||||
dynamic_pixel_traits(std::endian byteorder,
|
||||
unsigned unused_lsb_count) noexcept
|
||||
: unused_lsb_count_{unused_lsb_count}
|
||||
, byteorder_{byteorder}
|
||||
#ifndef NDEBUG
|
||||
, lsb_mask_{static_cast<value_type>(~(kAllOnes << unused_lsb_count))}
|
||||
, msb_mask_{static_cast<value_type>(~(kAllOnes >> unused_lsb_count))}
|
||||
#endif
|
||||
{
|
||||
assert(unused_lsb_count < kBitCount);
|
||||
}
|
||||
|
||||
[[nodiscard]] value_type read(value_type value) const noexcept {
|
||||
value_type tmp = byteswap(value, byteorder_);
|
||||
assert((tmp & lsb_mask_) == 0);
|
||||
return tmp >> unused_lsb_count_;
|
||||
}
|
||||
|
||||
[[nodiscard]] value_type write(value_type value) const noexcept {
|
||||
assert((value & msb_mask_) == 0);
|
||||
return byteswap(static_cast<value_type>(value << unused_lsb_count_),
|
||||
byteorder_);
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned const unused_lsb_count_;
|
||||
std::endian const byteorder_;
|
||||
#ifndef NDEBUG
|
||||
value_type const lsb_mask_;
|
||||
value_type const msb_mask_;
|
||||
#endif
|
||||
};
|
||||
|
||||
template <std::unsigned_integral ValueType, std::endian ByteOrder,
|
||||
unsigned UnusedLsbCount>
|
||||
class static_pixel_traits {
|
||||
public:
|
||||
using value_type = ValueType;
|
||||
static constexpr size_t const kBitCount =
|
||||
std::numeric_limits<value_type>::digits;
|
||||
static constexpr value_type const kAllOnes =
|
||||
std::numeric_limits<value_type>::max();
|
||||
static constexpr std::endian const kByteOrder = ByteOrder;
|
||||
static constexpr unsigned const kUnusedLsbCount = UnusedLsbCount;
|
||||
static constexpr value_type const kLsbMask =
|
||||
static_cast<value_type>(~(kAllOnes << kUnusedLsbCount));
|
||||
static constexpr value_type const kMsbMask =
|
||||
static_cast<value_type>(~(kAllOnes >> kUnusedLsbCount));
|
||||
static_assert(kUnusedLsbCount < kBitCount);
|
||||
|
||||
[[nodiscard]] static value_type read(value_type value) noexcept {
|
||||
value_type tmp = byteswap<kByteOrder>(value);
|
||||
assert((tmp & kLsbMask) == 0);
|
||||
return tmp >> kUnusedLsbCount;
|
||||
}
|
||||
|
||||
[[nodiscard]] static value_type write(value_type value) noexcept {
|
||||
assert((value & kMsbMask) == 0);
|
||||
return byteswap<kByteOrder>(
|
||||
static_cast<value_type>(value << kUnusedLsbCount));
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t MaxBlockSize, size_t ComponentStreamCount,
|
||||
typename PixelTraits>
|
||||
class codec_impl final
|
||||
: public codec_interface<typename PixelTraits::value_type>,
|
||||
public PixelTraits {
|
||||
public:
|
||||
using pixel_type = typename PixelTraits::value_type;
|
||||
using codec_type = codec<MaxBlockSize, ComponentStreamCount, PixelTraits>;
|
||||
|
||||
codec_impl(PixelTraits const& traits, size_t block_size)
|
||||
: PixelTraits{traits}
|
||||
, block_size_{block_size} {}
|
||||
|
||||
std::vector<uint8_t>
|
||||
encode(std::span<pixel_type const> input) const override {
|
||||
return encode_impl(input.data(), input.size());
|
||||
}
|
||||
|
||||
size_t worst_case_encoded_bytes(size_t pixel_count) const override {
|
||||
codec_type codec{block_size_, *this};
|
||||
return worst_case_encoded_bytes_impl(codec, pixel_count);
|
||||
}
|
||||
|
||||
size_t
|
||||
worst_case_encoded_bytes(std::span<pixel_type const> input) const override {
|
||||
return worst_case_encoded_bytes(input.size());
|
||||
}
|
||||
|
||||
std::span<uint8_t> encode(std::span<uint8_t> output,
|
||||
std::span<pixel_type const> input) const override {
|
||||
return encode_impl(output.data(), output.size(), input.data(),
|
||||
input.size());
|
||||
}
|
||||
|
||||
void decode(std::span<pixel_type> output,
|
||||
std::span<uint8_t const> input) const override {
|
||||
decode_impl(output.data(), output.size(), input.data(), input.size());
|
||||
}
|
||||
|
||||
private:
|
||||
size_t worst_case_encoded_bytes_impl(codec_type& codec, size_t size) const {
|
||||
return (codec.worst_case_bit_count(size) + 8 - 1) / 8;
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
encode_impl(pixel_type const* __restrict input, size_t size) const {
|
||||
return encode_impl(std::span<pixel_type const>{input, size});
|
||||
}
|
||||
|
||||
std::span<uint8_t>
|
||||
encode_impl(uint8_t* __restrict output, size_t output_size,
|
||||
pixel_type const* __restrict input, size_t input_size) const {
|
||||
return encode_impl(std::span<uint8_t>{output, output_size},
|
||||
std::span<pixel_type const>{input, input_size});
|
||||
}
|
||||
|
||||
void decode_impl(pixel_type* __restrict output, size_t output_size,
|
||||
uint8_t const* __restrict input, size_t input_size) const {
|
||||
return decode_impl(std::span<pixel_type>{output, output_size},
|
||||
std::span<uint8_t const>{input, input_size});
|
||||
}
|
||||
|
||||
std::vector<uint8_t> encode_impl(std::span<pixel_type const> input) const {
|
||||
std::vector<uint8_t> output;
|
||||
codec_type codec{block_size_, *this};
|
||||
output.resize(worst_case_encoded_bytes_impl(codec, input.size()));
|
||||
bitstream_writer writer{output.begin()};
|
||||
codec.encode(input, writer);
|
||||
output.resize(std::distance(output.begin(), writer.iterator()));
|
||||
return output;
|
||||
}
|
||||
|
||||
std::span<uint8_t> encode_impl(std::span<uint8_t> output,
|
||||
std::span<pixel_type const> input) const {
|
||||
codec_type codec{block_size_, *this};
|
||||
assert(output.size() >= worst_case_encoded_bytes_impl(codec, input.size()));
|
||||
bitstream_writer writer{output.begin()};
|
||||
codec.encode(input, writer);
|
||||
return std::span<uint8_t>{output.begin(), writer.iterator()};
|
||||
}
|
||||
|
||||
void decode_impl(std::span<pixel_type> output,
|
||||
std::span<uint8_t const> input) const {
|
||||
bitstream_reader reader{input.begin(), input.end()};
|
||||
codec_type codec{block_size_, *this};
|
||||
codec.decode(output, reader);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t const block_size_;
|
||||
};
|
||||
|
||||
template <size_t ComponentStreamCount, typename PixelTraits>
|
||||
std::unique_ptr<codec_interface<typename PixelTraits::value_type>>
|
||||
create_codec_(size_t block_size, PixelTraits const& traits) {
|
||||
if (block_size <= 512) {
|
||||
return std::make_unique<codec_impl<512, ComponentStreamCount, PixelTraits>>(
|
||||
traits, block_size);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename PixelTraits>
|
||||
std::unique_ptr<codec_interface<typename PixelTraits::value_type>>
|
||||
create_codec_(size_t block_size, size_t component_stream_count,
|
||||
PixelTraits const& traits) {
|
||||
switch (component_stream_count) {
|
||||
case 1:
|
||||
return create_codec_<1, PixelTraits>(block_size, traits);
|
||||
|
||||
case 2:
|
||||
return create_codec_<2, PixelTraits>(block_size, traits);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <std::unsigned_integral PixelValueType, std::endian ByteOrder,
|
||||
unsigned UnusedLsbCount>
|
||||
std::unique_ptr<codec_interface<PixelValueType>>
|
||||
create_codec_(size_t block_size, size_t component_stream_count) {
|
||||
using pixel_traits =
|
||||
static_pixel_traits<PixelValueType, ByteOrder, UnusedLsbCount>;
|
||||
|
||||
if (auto codec = create_codec_<pixel_traits>(
|
||||
block_size, component_stream_count, pixel_traits{})) {
|
||||
return codec;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <std::unsigned_integral PixelValueType>
|
||||
std::unique_ptr<codec_interface<PixelValueType>>
|
||||
create_codec_(codec_config const& config) {
|
||||
if (config.byteorder == std::endian::big) {
|
||||
switch (config.unused_lsb_count) {
|
||||
case 0:
|
||||
return create_codec_<PixelValueType, std::endian::big, 0>(
|
||||
config.block_size, config.component_stream_count);
|
||||
|
||||
case 2:
|
||||
return create_codec_<PixelValueType, std::endian::big, 2>(
|
||||
config.block_size, config.component_stream_count);
|
||||
|
||||
case 4:
|
||||
return create_codec_<PixelValueType, std::endian::big, 4>(
|
||||
config.block_size, config.component_stream_count);
|
||||
}
|
||||
}
|
||||
|
||||
using pixel_traits = dynamic_pixel_traits<PixelValueType>;
|
||||
|
||||
return create_codec_<pixel_traits>(
|
||||
config.block_size, config.component_stream_count,
|
||||
pixel_traits{config.byteorder, config.unused_lsb_count});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <>
|
||||
std::unique_ptr<codec_interface<uint16_t>>
|
||||
create_codec_cpuspecific_<uint16_t, cpu_variant::RICEPP_CPU_VARIANT>(
|
||||
codec_config const& config) {
|
||||
if (auto codec = create_codec_<uint16_t>(config)) {
|
||||
return codec;
|
||||
}
|
||||
|
||||
throw std::runtime_error("Unsupported configuration");
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ricepp
|
46
ricepp/ricepp_cpuspecific.h
Normal file
46
ricepp/ricepp_cpuspecific.h
Normal file
@ -0,0 +1,46 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of ricepp.
|
||||
*
|
||||
* ricepp is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* ricepp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with ricepp. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <concepts>
|
||||
#include <memory>
|
||||
|
||||
#include <ricepp/codec_interface.h>
|
||||
|
||||
namespace ricepp {
|
||||
|
||||
struct codec_config;
|
||||
|
||||
namespace detail {
|
||||
|
||||
enum class cpu_variant {
|
||||
fallback,
|
||||
has_bmi2,
|
||||
has_bmi2_avx512,
|
||||
};
|
||||
|
||||
template <std::unsigned_integral PixelT, cpu_variant CPU>
|
||||
std::unique_ptr<codec_interface<PixelT>>
|
||||
create_codec_cpuspecific_(codec_config const& config);
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ricepp
|
Loading…
x
Reference in New Issue
Block a user