diff --git a/ricepp/include/ricepp/bitstream_reader.h b/ricepp/include/ricepp/bitstream_reader.h index 709d1bf2..69eb7e25 100644 --- a/ricepp/include/ricepp/bitstream_reader.h +++ b/ricepp/include/ricepp/bitstream_reader.h @@ -54,7 +54,7 @@ class bitstream_reader final { assert(num_bits <= std::numeric_limits::digits); T bits = 0; uint16_t pos = 0; - if (num_bits > 0) { + if (num_bits > 0) [[likely]] { for (;;) { size_t const remain = kBitsTypeBits - bit_pos_; if (num_bits <= remain) { @@ -91,11 +91,11 @@ class bitstream_reader final { if (bits != bits_type{}) [[likely]] { size_t const ffs = std::countr_zero(bits); assert(ffs < kBitsTypeBits); - if (ffs + 1 == kBitsTypeBits) [[unlikely]] { - bit_pos_ = 0; - } else { + if (ffs + 1 != kBitsTypeBits) { data_ = bits; bit_pos_ = ffs + 1; + } else { + bit_pos_ = 0; } return zeros + ffs; } @@ -112,8 +112,7 @@ class bitstream_reader final { RICEPP_FORCE_INLINE void skip_bits(size_t num_bits) { assert(bit_pos_ + num_bits <= kBitsTypeBits); - bit_pos_ += num_bits; - bit_pos_ &= kBitsTypeBits - 1; + bit_pos_ = (bit_pos_ + num_bits) & (kBitsTypeBits - 1); } RICEPP_FORCE_INLINE bool peek_bit() { @@ -123,16 +122,17 @@ class bitstream_reader final { RICEPP_FORCE_INLINE bits_type peek_bits(size_t num_bits) { assert(bit_pos_ + num_bits <= kBitsTypeBits); - if (bit_pos_ == 0) [[unlikely]] { + auto const bp = bit_pos_; + if (bp == 0) { data_ = read_packet(); } // The remainder of this function is equivalent to: // - // return _bextr_u64(data_, bit_pos_, num_bits); + // return _bextr_u64(data_, bp, num_bits); // // However, in practice, at least clang generates code that is as fast // as the intrinsic, so we use the following code for portability. - bits_type bits = data_ >> bit_pos_; + bits_type bits = data_ >> bp; if (num_bits < kBitsTypeBits) [[likely]] { bits &= (static_cast(1) << num_bits) - 1; } diff --git a/ricepp/include/ricepp/bitstream_writer.h b/ricepp/include/ricepp/bitstream_writer.h index d096c45d..60f83241 100644 --- a/ricepp/include/ricepp/bitstream_writer.h +++ b/ricepp/include/ricepp/bitstream_writer.h @@ -76,7 +76,7 @@ class bitstream_writer final { write_packet(bits); repeat -= kBitsTypeBits; } - if (repeat > 0) { + if (repeat > 0) [[likely]] { write_bits_impl(bits, repeat); } } @@ -86,11 +86,17 @@ class bitstream_writer final { static constexpr size_t kArgBits{std::numeric_limits::digits}; assert(bit_pos_ < kBitsTypeBits); assert(num_bits <= kArgBits); - while (num_bits > 0) { - size_t const bits_to_write = std::min(num_bits, kBitsTypeBits - bit_pos_); - write_bits_impl(bits, bits_to_write); - bits >>= bits_to_write; - num_bits -= bits_to_write; + if (num_bits > 0) [[likely]] { + for (;;) { + size_t const bits_to_write = + std::min(num_bits, kBitsTypeBits - bit_pos_); + write_bits_impl(bits, bits_to_write); + bits >>= bits_to_write; + if (num_bits == bits_to_write) [[likely]] { + break; + } + num_bits -= bits_to_write; + } } } @@ -111,7 +117,7 @@ class bitstream_writer final { private: RICEPP_FORCE_INLINE void write_bits_impl(bits_type bits, size_t num_bits) { assert(bit_pos_ + num_bits <= kBitsTypeBits); - if (num_bits < kBitsTypeBits) { + if (num_bits < kBitsTypeBits) [[likely]] { bits &= (static_cast(1) << num_bits) - 1; } data_ |= bits << bit_pos_; diff --git a/ricepp/include/ricepp/detail/decode.h b/ricepp/include/ricepp/detail/decode.h index 278596ef..36f06d2c 100644 --- a/ricepp/include/ricepp/detail/decode.h +++ b/ricepp/include/ricepp/detail/decode.h @@ -50,22 +50,26 @@ void decode_block(V block, BitstreamReader& reader, PixelTraits const& traits, auto const fsp1 = reader.template read_bits(kFsBits); - if (fsp1 == 0) [[unlikely]] { - std::fill(block.begin(), block.end(), traits.write(last)); - } else if (fsp1 > kFsMax) [[unlikely]] { - for (auto& b : block) { - b = reader.template read_bits(kPixelBits); + if (fsp1 > 0) { + if (fsp1 <= kFsMax) { + auto const fs = fsp1 - 1; + for (auto& b : block) { + value_type diff = reader.find_first_set() << fs; + diff |= reader.template read_bits(fs); + last += static_cast>( + ((diff & 1) * value_type(-1)) ^ (diff >> 1)); + // last += static_cast>( + // (diff & 1) ? ~(diff >> 1) : (diff >> 1)); + b = traits.write(last); + } + } else { + for (auto& b : block) { + b = reader.template read_bits(kPixelBits); + } + last = traits.read(block.back()); } - last = traits.read(block.back()); } else { - auto const fs = fsp1 - 1; - for (auto& b : block) { - value_type diff = reader.find_first_set() << fs; - diff |= reader.template read_bits(fs); - last += static_cast>( - (diff & 1) ? ~(diff >> 1) : (diff >> 1)); - b = traits.write(last); - } + std::fill(block.begin(), block.end(), traits.write(last)); } last_value = last; diff --git a/ricepp/include/ricepp/detail/encode.h b/ricepp/include/ricepp/detail/encode.h index 8b255bad..630fa34b 100644 --- a/ricepp/include/ricepp/detail/encode.h +++ b/ricepp/include/ricepp/detail/encode.h @@ -117,23 +117,12 @@ void encode_block(V block, BitstreamWriter& writer, PixelTraits const& traits, last_value = last; - if (sum == 0) [[unlikely]] { - // All differences are zero, so just write a zero fs and we're done. - writer.write_bits(0U, kFsBits); - } else { + if (sum > 0) [[likely]] { // Find the best bit position to split the difference values. auto const [fs, bits_used] = compute_best_split(delta, block.size(), sum); - if (fs >= kFsMax || bits_used >= kPixelBits * block.size()) [[unlikely]] { - // Difference values are too large for entropy coding. Just plain copy - // the input pixel data. This is really unlikely, so reading the input - // pixels again is fine. - writer.write_bits(kFsMax + 1, kFsBits); - for (auto& b : block) { - writer.write_bits(b, kPixelBits); - } - } else { + if (fs < kFsMax && bits_used < kPixelBits * block.size()) [[likely]] { // Encode the difference values using Rice entropy coding. writer.write_bits(fs + 1, kFsBits); for (size_t i = 0; i < block.size(); ++i) { @@ -145,7 +134,18 @@ void encode_block(V block, BitstreamWriter& writer, PixelTraits const& traits, writer.write_bit(1); writer.write_bits(diff, fs); } + } else { + // Difference values are too large for entropy coding. Just plain copy + // the input pixel data. This is really unlikely, so reading the input + // pixels again is fine. + writer.write_bits(kFsMax + 1, kFsBits); + for (auto& b : block) { + writer.write_bits(b, kPixelBits); + } } + } else { + // All differences are zero, so just write a zero fs and we're done. + writer.write_bits(0U, kFsBits); } }