mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-11 13:30:47 -04:00
chore(ricepp): force inlining, massively speeds up Windows version
This commit is contained in:
parent
3b7d6ed861
commit
cd1efac062
@ -30,6 +30,7 @@
|
|||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include <ricepp/byteswap.h>
|
#include <ricepp/byteswap.h>
|
||||||
|
#include <ricepp/detail/compiler.h>
|
||||||
|
|
||||||
namespace ricepp {
|
namespace ricepp {
|
||||||
|
|
||||||
@ -46,10 +47,10 @@ class bitstream_reader final {
|
|||||||
: beg_{std::move(beg)}
|
: beg_{std::move(beg)}
|
||||||
, end_{std::move(end)} {}
|
, end_{std::move(end)} {}
|
||||||
|
|
||||||
bool read_bit() { return read_bits_impl(1); }
|
RICEPP_FORCE_INLINE bool read_bit() { return read_bits_impl(1); }
|
||||||
|
|
||||||
template <std::unsigned_integral T>
|
template <std::unsigned_integral T>
|
||||||
T read_bits(size_t num_bits) {
|
RICEPP_FORCE_INLINE T read_bits(size_t num_bits) {
|
||||||
assert(num_bits <= std::numeric_limits<T>::digits);
|
assert(num_bits <= std::numeric_limits<T>::digits);
|
||||||
T bits = 0;
|
T bits = 0;
|
||||||
uint16_t pos = 0;
|
uint16_t pos = 0;
|
||||||
@ -68,7 +69,7 @@ class bitstream_reader final {
|
|||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t find_first_set() {
|
RICEPP_FORCE_INLINE size_t find_first_set() {
|
||||||
size_t zeros = 0;
|
size_t zeros = 0;
|
||||||
if (bit_pos_ != 0) {
|
if (bit_pos_ != 0) {
|
||||||
if (peek_bit()) [[likely]] {
|
if (peek_bit()) [[likely]] {
|
||||||
@ -103,24 +104,24 @@ class bitstream_reader final {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bits_type read_bits_impl(size_t num_bits) {
|
RICEPP_FORCE_INLINE bits_type read_bits_impl(size_t num_bits) {
|
||||||
auto bits = peek_bits(num_bits);
|
auto bits = peek_bits(num_bits);
|
||||||
skip_bits(num_bits);
|
skip_bits(num_bits);
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
void skip_bits(size_t num_bits) {
|
RICEPP_FORCE_INLINE void skip_bits(size_t num_bits) {
|
||||||
assert(bit_pos_ + num_bits <= kBitsTypeBits);
|
assert(bit_pos_ + num_bits <= kBitsTypeBits);
|
||||||
bit_pos_ += num_bits;
|
bit_pos_ += num_bits;
|
||||||
bit_pos_ &= kBitsTypeBits - 1;
|
bit_pos_ &= kBitsTypeBits - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool peek_bit() {
|
RICEPP_FORCE_INLINE bool peek_bit() {
|
||||||
assert(bit_pos_ > 0 && bit_pos_ < kBitsTypeBits);
|
assert(bit_pos_ > 0 && bit_pos_ < kBitsTypeBits);
|
||||||
return (data_ >> bit_pos_) & 1;
|
return (data_ >> bit_pos_) & 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bits_type peek_bits(size_t num_bits) {
|
RICEPP_FORCE_INLINE bits_type peek_bits(size_t num_bits) {
|
||||||
assert(bit_pos_ + num_bits <= kBitsTypeBits);
|
assert(bit_pos_ + num_bits <= kBitsTypeBits);
|
||||||
if (bit_pos_ == 0) [[unlikely]] {
|
if (bit_pos_ == 0) [[unlikely]] {
|
||||||
data_ = read_packet();
|
data_ = read_packet();
|
||||||
@ -138,14 +139,14 @@ class bitstream_reader final {
|
|||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
bits_type read_packet() {
|
RICEPP_FORCE_INLINE bits_type read_packet() {
|
||||||
if (beg_ == end_) [[unlikely]] {
|
if (beg_ == end_) [[unlikely]] {
|
||||||
throw std::out_of_range{"bitstream_reader::read_packet"};
|
throw std::out_of_range{"bitstream_reader::read_packet"};
|
||||||
}
|
}
|
||||||
return read_packet_nocheck();
|
return read_packet_nocheck();
|
||||||
}
|
}
|
||||||
|
|
||||||
bits_type read_packet_nocheck()
|
RICEPP_FORCE_INLINE bits_type read_packet_nocheck()
|
||||||
requires std::contiguous_iterator<iterator_type>
|
requires std::contiguous_iterator<iterator_type>
|
||||||
{
|
{
|
||||||
bits_type bits{};
|
bits_type bits{};
|
||||||
@ -160,7 +161,7 @@ class bitstream_reader final {
|
|||||||
return byteswap<std::endian::little>(bits);
|
return byteswap<std::endian::little>(bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
bits_type read_packet_nocheck()
|
RICEPP_FORCE_INLINE bits_type read_packet_nocheck()
|
||||||
requires(!std::contiguous_iterator<iterator_type>)
|
requires(!std::contiguous_iterator<iterator_type>)
|
||||||
{
|
{
|
||||||
bits_type bits{};
|
bits_type bits{};
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include <ricepp/byteswap.h>
|
#include <ricepp/byteswap.h>
|
||||||
|
#include <ricepp/detail/compiler.h>
|
||||||
|
|
||||||
namespace ricepp {
|
namespace ricepp {
|
||||||
|
|
||||||
@ -57,12 +58,12 @@ class bitstream_writer final {
|
|||||||
bitstream_writer(OutputIt out)
|
bitstream_writer(OutputIt out)
|
||||||
: out_{out} {}
|
: out_{out} {}
|
||||||
|
|
||||||
void write_bit(bool bit) {
|
RICEPP_FORCE_INLINE void write_bit(bool bit) {
|
||||||
assert(bit_pos_ < kBitsTypeBits);
|
assert(bit_pos_ < kBitsTypeBits);
|
||||||
write_bits_impl(bit, 1);
|
write_bits_impl(bit, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_bit(bool bit, size_t repeat) {
|
RICEPP_FORCE_INLINE void write_bit(bool bit, size_t repeat) {
|
||||||
bits_type const bits = bit ? ~bits_type{} : bits_type{};
|
bits_type const bits = bit ? ~bits_type{} : bits_type{};
|
||||||
if (bit_pos_ != 0) [[likely]] {
|
if (bit_pos_ != 0) [[likely]] {
|
||||||
auto remaining_bits = kBitsTypeBits - bit_pos_;
|
auto remaining_bits = kBitsTypeBits - bit_pos_;
|
||||||
@ -81,7 +82,7 @@ class bitstream_writer final {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <std::unsigned_integral T>
|
template <std::unsigned_integral T>
|
||||||
void write_bits(T bits, size_t num_bits) {
|
RICEPP_FORCE_INLINE void write_bits(T bits, size_t num_bits) {
|
||||||
static constexpr size_t kArgBits{std::numeric_limits<T>::digits};
|
static constexpr size_t kArgBits{std::numeric_limits<T>::digits};
|
||||||
assert(bit_pos_ < kBitsTypeBits);
|
assert(bit_pos_ < kBitsTypeBits);
|
||||||
assert(num_bits <= kArgBits);
|
assert(num_bits <= kArgBits);
|
||||||
@ -108,7 +109,7 @@ class bitstream_writer final {
|
|||||||
iterator_type iterator() const { return out_; }
|
iterator_type iterator() const { return out_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void write_bits_impl(bits_type bits, size_t num_bits) {
|
RICEPP_FORCE_INLINE void write_bits_impl(bits_type bits, size_t num_bits) {
|
||||||
assert(bit_pos_ + num_bits <= kBitsTypeBits);
|
assert(bit_pos_ + num_bits <= kBitsTypeBits);
|
||||||
if (num_bits < kBitsTypeBits) {
|
if (num_bits < kBitsTypeBits) {
|
||||||
bits &= (static_cast<bits_type>(1) << num_bits) - 1;
|
bits &= (static_cast<bits_type>(1) << num_bits) - 1;
|
||||||
@ -122,7 +123,7 @@ class bitstream_writer final {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_packet(bits_type bits) {
|
RICEPP_FORCE_INLINE void write_packet(bits_type bits) {
|
||||||
size_t const to_copy =
|
size_t const to_copy =
|
||||||
bit_pos_ == 0 ? sizeof(bits_type) : (bit_pos_ + 7) / 8;
|
bit_pos_ == 0 ? sizeof(bits_type) : (bit_pos_ + 7) / 8;
|
||||||
bits = byteswap<std::endian::little>(bits);
|
bits = byteswap<std::endian::little>(bits);
|
||||||
|
@ -28,19 +28,22 @@
|
|||||||
|
|
||||||
#include <range/v3/algorithm/reverse.hpp>
|
#include <range/v3/algorithm/reverse.hpp>
|
||||||
|
|
||||||
|
#include <ricepp/detail/compiler.h>
|
||||||
|
|
||||||
namespace ricepp {
|
namespace ricepp {
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
template <std::unsigned_integral T>
|
template <std::unsigned_integral T>
|
||||||
[[nodiscard]] constexpr T byteswap_fallback(T value) noexcept {
|
[[nodiscard]] RICEPP_FORCE_INLINE constexpr T
|
||||||
|
byteswap_fallback(T value) noexcept {
|
||||||
auto value_repr = std::bit_cast<std::array<std::byte, sizeof(T)>>(value);
|
auto value_repr = std::bit_cast<std::array<std::byte, sizeof(T)>>(value);
|
||||||
ranges::reverse(value_repr);
|
ranges::reverse(value_repr);
|
||||||
return std::bit_cast<T>(value_repr);
|
return std::bit_cast<T>(value_repr);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <std::unsigned_integral T>
|
template <std::unsigned_integral T>
|
||||||
[[nodiscard]] constexpr T byteswap(T value) noexcept {
|
[[nodiscard]] RICEPP_FORCE_INLINE constexpr T byteswap(T value) noexcept {
|
||||||
#if __cpp_lib_byteswap >= 202110L
|
#if __cpp_lib_byteswap >= 202110L
|
||||||
return std::byteswap(value);
|
return std::byteswap(value);
|
||||||
#elif defined(__GNUC__) || defined(__clang__)
|
#elif defined(__GNUC__) || defined(__clang__)
|
||||||
@ -73,7 +76,8 @@ template <std::unsigned_integral T>
|
|||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
template <std::unsigned_integral T>
|
template <std::unsigned_integral T>
|
||||||
[[nodiscard]] T byteswap(T value, std::endian byteorder) noexcept {
|
[[nodiscard]] RICEPP_FORCE_INLINE T byteswap(T value,
|
||||||
|
std::endian byteorder) noexcept {
|
||||||
static_assert(std::endian::native == std::endian::little ||
|
static_assert(std::endian::native == std::endian::little ||
|
||||||
std::endian::native == std::endian::big);
|
std::endian::native == std::endian::big);
|
||||||
if constexpr (sizeof(T) > 1) {
|
if constexpr (sizeof(T) > 1) {
|
||||||
@ -85,7 +89,7 @@ template <std::unsigned_integral T>
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <std::endian byteorder, std::unsigned_integral T>
|
template <std::endian byteorder, std::unsigned_integral T>
|
||||||
[[nodiscard]] constexpr T byteswap(T value) noexcept {
|
[[nodiscard]] RICEPP_FORCE_INLINE constexpr T byteswap(T value) noexcept {
|
||||||
static_assert(std::endian::native == std::endian::little ||
|
static_assert(std::endian::native == std::endian::little ||
|
||||||
std::endian::native == std::endian::big);
|
std::endian::native == std::endian::big);
|
||||||
if constexpr (sizeof(T) > 1 && byteorder != std::endian::native) {
|
if constexpr (sizeof(T) > 1 && byteorder != std::endian::native) {
|
||||||
|
30
ricepp/include/ricepp/detail/compiler.h
Normal file
30
ricepp/include/ricepp/detail/compiler.h
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||||
|
/**
|
||||||
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||||
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||||
|
*
|
||||||
|
* This file is part of ricepp.
|
||||||
|
*
|
||||||
|
* ricepp is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* ricepp is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with ricepp. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#define RICEPP_FORCE_INLINE inline __attribute__((__always_inline__))
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
#define RICEPP_FORCE_INLINE __forceinline
|
||||||
|
#else
|
||||||
|
#define RICEPP_FORCE_INLINE inline
|
||||||
|
#endif
|
@ -28,6 +28,7 @@
|
|||||||
#include <range/v3/range/concepts.hpp>
|
#include <range/v3/range/concepts.hpp>
|
||||||
|
|
||||||
#include <ricepp/bitstream_reader.h>
|
#include <ricepp/bitstream_reader.h>
|
||||||
|
#include <ricepp/detail/compiler.h>
|
||||||
|
|
||||||
namespace ricepp::detail {
|
namespace ricepp::detail {
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <ricepp/bitstream_writer.h>
|
#include <ricepp/bitstream_writer.h>
|
||||||
#include <ricepp/byteswap.h>
|
#include <ricepp/byteswap.h>
|
||||||
#include <ricepp/codec.h>
|
#include <ricepp/codec.h>
|
||||||
|
#include <ricepp/detail/compiler.h>
|
||||||
#include <ricepp/ricepp.h>
|
#include <ricepp/ricepp.h>
|
||||||
|
|
||||||
#include "ricepp_cpuspecific.h"
|
#include "ricepp_cpuspecific.h"
|
||||||
@ -57,13 +58,15 @@ class dynamic_pixel_traits {
|
|||||||
assert(unused_lsb_count < kBitCount);
|
assert(unused_lsb_count < kBitCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] value_type read(value_type value) const noexcept {
|
[[nodiscard]] RICEPP_FORCE_INLINE value_type
|
||||||
|
read(value_type value) const noexcept {
|
||||||
value_type tmp = byteswap(value, byteorder_);
|
value_type tmp = byteswap(value, byteorder_);
|
||||||
assert((tmp & lsb_mask_) == 0);
|
assert((tmp & lsb_mask_) == 0);
|
||||||
return tmp >> unused_lsb_count_;
|
return tmp >> unused_lsb_count_;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] value_type write(value_type value) const noexcept {
|
[[nodiscard]] RICEPP_FORCE_INLINE value_type
|
||||||
|
write(value_type value) const noexcept {
|
||||||
assert((value & msb_mask_) == 0);
|
assert((value & msb_mask_) == 0);
|
||||||
return byteswap(static_cast<value_type>(value << unused_lsb_count_),
|
return byteswap(static_cast<value_type>(value << unused_lsb_count_),
|
||||||
byteorder_);
|
byteorder_);
|
||||||
@ -95,13 +98,15 @@ class static_pixel_traits {
|
|||||||
static_cast<value_type>(~(kAllOnes >> kUnusedLsbCount));
|
static_cast<value_type>(~(kAllOnes >> kUnusedLsbCount));
|
||||||
static_assert(kUnusedLsbCount < kBitCount);
|
static_assert(kUnusedLsbCount < kBitCount);
|
||||||
|
|
||||||
[[nodiscard]] static value_type read(value_type value) noexcept {
|
[[nodiscard]] static RICEPP_FORCE_INLINE value_type
|
||||||
|
read(value_type value) noexcept {
|
||||||
value_type tmp = byteswap<kByteOrder>(value);
|
value_type tmp = byteswap<kByteOrder>(value);
|
||||||
assert((tmp & kLsbMask) == 0);
|
assert((tmp & kLsbMask) == 0);
|
||||||
return tmp >> kUnusedLsbCount;
|
return tmp >> kUnusedLsbCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] static value_type write(value_type value) noexcept {
|
[[nodiscard]] static RICEPP_FORCE_INLINE value_type
|
||||||
|
write(value_type value) noexcept {
|
||||||
assert((value & kMsbMask) == 0);
|
assert((value & kMsbMask) == 0);
|
||||||
return byteswap<kByteOrder>(
|
return byteswap<kByteOrder>(
|
||||||
static_cast<value_type>(value << kUnusedLsbCount));
|
static_cast<value_type>(value << kUnusedLsbCount));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user