From e08faf2c0c1dc8cb473184ff47ca84f68e8f2a1e Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sun, 23 Jul 2023 15:47:27 +0200 Subject: [PATCH] Basic working FLAC compression --- CMakeLists.txt | 67 ++++- include/dwarfs/compression.h | 3 +- src/dwarfs/compression/flac.cpp | 484 ++++++++++++++++++++++++++++++++ test/flac_compressor_test.cpp | 202 +++++++++++++ thrift/compression.thrift | 39 +++ vcpkg.json | 1 + 6 files changed, 794 insertions(+), 2 deletions(-) create mode 100644 src/dwarfs/compression/flac.cpp create mode 100644 test/flac_compressor_test.cpp create mode 100644 thrift/compression.thrift diff --git a/CMakeLists.txt b/CMakeLists.txt index 405e291b..f3e57231 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,6 +214,7 @@ if(PKG_CONFIG_FOUND) pkg_check_modules(LIBBROTLIENC IMPORTED_TARGET libbrotlienc>=1.0.9) pkg_check_modules(LIBARCHIVE IMPORTED_TARGET libarchive>=3.6.0) pkg_check_modules(LIBMAGIC IMPORTED_TARGET libmagic>=5.38) + pkg_check_modules(FLAC IMPORTED_TARGET flac++>=1.4.2) pkg_check_modules(ZSTD IMPORTED_TARGET libzstd>=1.5.2) pkg_check_modules(XXHASH IMPORTED_TARGET libxxhash>=0.8.1) endif() @@ -421,6 +422,10 @@ if(LIBBROTLIDEC_FOUND AND LIBBROTLIENC_FOUND) list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/brotli.cpp) endif() +if(FLAC_FOUND) + list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/flac.cpp) +endif() + list( APPEND LIBDWARFS_CATEGORIZER_SRC @@ -447,7 +452,7 @@ target_compile_definitions( ) target_link_libraries(dwarfs_categorizer folly) -target_link_libraries(dwarfs_compression folly) +target_link_libraries(dwarfs_compression folly compression_thrift) target_link_libraries(dwarfs_tool dwarfs) if(STATIC_BUILD_DO_NOT_USE) @@ -576,6 +581,13 @@ if(WITH_TESTS) gtest_discover_tests(dwarfs_utils_test DISCOVERY_TIMEOUT 120) gtest_discover_tests(dwarfs_pcm_sample_transformer_test DISCOVERY_TIMEOUT 120) + if(FLAC_FOUND) + add_executable(dwarfs_flac_compressor_test test/flac_compressor_test.cpp) + target_link_libraries(dwarfs_flac_compressor_test gtest gtest_main) + list(APPEND BINARY_TARGETS dwarfs_flac_compressor_test) + gtest_discover_tests(dwarfs_flac_compressor_test DISCOVERY_TIMEOUT 120) + endif() + target_compile_definitions(dwarfs_compat_test PRIVATE TEST_DATA_DIR=\"${CMAKE_SOURCE_DIR}/test\") @@ -641,10 +653,32 @@ list( ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.h ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.tcc ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types_custom_protocol.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types_fwd.h ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visit_by_thrift_field_metadata.h ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visit_union.h ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visitation.h) +list( + APPEND + COMPRESSION_THRIFT_SRC + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_clients.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_constants.cpp + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_constants.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.cpp + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_for_each_field.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_handlers.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_metadata.cpp + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_metadata.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.cpp + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.tcc + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types_custom_protocol.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types_fwd.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visit_by_thrift_field_metadata.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visit_union.h + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visitation.h) + add_custom_command( OUTPUT thrift/lib/thrift/_keep COMMAND ${CMAKE_COMMAND} -E make_directory thrift/lib/thrift @@ -689,6 +723,22 @@ add_custom_command( WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs ) +add_custom_command( + OUTPUT ${COMPRESSION_THRIFT_SRC} + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/thrift/compression.thrift + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/compression.thrift + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1 + -I ${CMAKE_CURRENT_SOURCE_DIR}/fbthrift + -o ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs + --gen mstch_cpp2 + compression.thrift + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1 + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/_keep + ${CMAKE_CURRENT_SOURCE_DIR}/thrift/compression.thrift + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs +) + list( APPEND INCLUDE_DIRS @@ -745,11 +795,19 @@ add_library( ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.cpp ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.cpp) +add_library( + compression_thrift + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.cpp + ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.cpp) + set_property(TARGET metadata_thrift PROPERTY CXX_STANDARD 20) +set_property(TARGET compression_thrift PROPERTY CXX_STANDARD 20) target_include_directories(metadata_thrift PRIVATE ${INCLUDE_DIRS}) +target_include_directories(compression_thrift PRIVATE ${INCLUDE_DIRS}) target_link_libraries(metadata_thrift thrift_light) +target_link_libraries(compression_thrift thrift_light) foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer dwarfs_tool ${BINARY_TARGETS} ${MAIN_TARGETS}) @@ -769,6 +827,7 @@ foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer $<$:DWARFS_HAVE_LIBLZ4> $<$:DWARFS_HAVE_LIBLZMA> $<$,$>:DWARFS_HAVE_LIBBROTLI> + $<$:DWARFS_HAVE_FLAC> ) if(DWARFS_USE_EXCEPTION_TRACER) @@ -855,6 +914,10 @@ if(LIBLZMA_FOUND) target_link_libraries(dwarfs PkgConfig::LIBLZMA) endif() +if(FLAC_FOUND) + target_link_libraries(dwarfs PkgConfig::FLAC) +endif() + if(LIBBROTLIDEC_FOUND AND LIBBROTLIENC_FOUND) target_link_libraries(dwarfs PkgConfig::LIBBROTLIDEC PkgConfig::LIBBROTLIENC) endif() @@ -924,6 +987,7 @@ if(STATIC_BUILD_DO_NOT_USE) import_static_lib(static_libunwind "libunwind.a") import_static_lib(static_libarchive "libarchive.a") import_static_lib(static_libmagic "libmagic.a") + import_static_lib(static_libflac "libFLAC++.a") set_target_properties(static_libunwind PROPERTIES INTERFACE_LINK_LIBRARIES PkgConfig::LIBLZMA) @@ -935,6 +999,7 @@ if(STATIC_BUILD_DO_NOT_USE) static_libz) target_link_libraries(dwarfs_categorizer static_libmagic) + target_link_libraries(dwarfs_compression static_libflac) foreach(tgt ${BINARY_TARGETS}) if(PREFER_SYSTEM_LIBFMT) diff --git a/include/dwarfs/compression.h b/include/dwarfs/compression.h index 34c70e7b..9bb126bd 100644 --- a/include/dwarfs/compression.h +++ b/include/dwarfs/compression.h @@ -30,7 +30,8 @@ DWARFS_COMPRESSION_TYPE(ZSTD, 2) SEPARATOR \ DWARFS_COMPRESSION_TYPE(LZ4, 3) SEPARATOR \ DWARFS_COMPRESSION_TYPE(LZ4HC, 4) SEPARATOR \ - DWARFS_COMPRESSION_TYPE(BROTLI, 5) + DWARFS_COMPRESSION_TYPE(BROTLI, 5) SEPARATOR \ + DWARFS_COMPRESSION_TYPE(FLAC, 6) // clang-format on namespace dwarfs { diff --git a/src/dwarfs/compression/flac.cpp b/src/dwarfs/compression/flac.cpp new file mode 100644 index 00000000..6c04c0bf --- /dev/null +++ b/src/dwarfs/compression/flac.cpp @@ -0,0 +1,484 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include +#include + +#include +#include + +#include + +#include + +#include + +#include "dwarfs/block_compressor.h" +#include "dwarfs/compression.h" +#include "dwarfs/error.h" +#include "dwarfs/option_map.h" +#include "dwarfs/pcm_sample_transformer.h" + +#include "dwarfs/gen-cpp2/compression_types.h" + +namespace dwarfs { + +namespace { + +constexpr uint8_t const kFlagBigEndian{0x80}; +constexpr uint8_t const kFlagSigned{0x40}; +constexpr uint8_t const kFlagLsbPadding{0x20}; +constexpr uint8_t const kBytesPerSampleMask{0x03}; +constexpr size_t const kBlockSize{65536}; + +class dwarfs_flac_stream_encoder : public FLAC::Encoder::Stream { + public: + dwarfs_flac_stream_encoder(std::vector& data) + : data_{data} + , pos_{data_.size()} {} + + ::FLAC__StreamEncoderReadStatus + read_callback(FLAC__byte buffer[], size_t* bytes) override { + ::memcpy(buffer, data_.data() + pos_, *bytes); + return FLAC__STREAM_ENCODER_READ_STATUS_CONTINUE; + } + + ::FLAC__StreamEncoderWriteStatus + write_callback(const FLAC__byte buffer[], size_t bytes, uint32_t, + uint32_t) override { + size_t end = pos_ + bytes; + if (data_.size() < end) { + data_.resize(end); + } + ::memcpy(data_.data() + pos_, buffer, bytes); + pos_ += bytes; + return FLAC__STREAM_ENCODER_WRITE_STATUS_OK; + } + + ::FLAC__StreamEncoderSeekStatus + seek_callback(FLAC__uint64 absolute_byte_offset) override { + pos_ = absolute_byte_offset; + return FLAC__STREAM_ENCODER_SEEK_STATUS_OK; + } + + ::FLAC__StreamEncoderTellStatus + tell_callback(FLAC__uint64* absolute_byte_offset) override { + *absolute_byte_offset = pos_; + return FLAC__STREAM_ENCODER_TELL_STATUS_OK; + } + + private: + std::vector& data_; + size_t pos_; +}; + +class dwarfs_flac_stream_decoder : public FLAC::Decoder::Stream { + public: + dwarfs_flac_stream_decoder( + std::vector& target, std::span data, + thrift::compression::flac_block_header const& header) + : target_{target} + , data_{data} + , header_{header} + , bytes_per_sample_{(header_.flags().value() & kBytesPerSampleMask) + 1} + , xfm_{header_.flags().value() & kFlagBigEndian + ? pcm_sample_endianness::Big + : pcm_sample_endianness::Little, + header_.flags().value() & kFlagSigned + ? pcm_sample_signedness::Signed + : pcm_sample_signedness::Unsigned, + header_.flags().value() & kFlagLsbPadding + ? pcm_sample_padding::Lsb + : pcm_sample_padding::Msb, + bytes_per_sample_, header_.bits_per_sample().value()} {} + + ::FLAC__StreamDecoderReadStatus + read_callback(FLAC__byte buffer[], size_t* bytes) override { + if (pos_ >= data_.size()) { + return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM; + } + + if (pos_ + *bytes > data_.size()) { + *bytes = data_.size() - pos_; + } + + if (*bytes > 0) { + ::memcpy(buffer, data_.data() + pos_, *bytes); + } + + pos_ += *bytes; + + return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE; + } + + ::FLAC__StreamDecoderWriteStatus + write_callback(const ::FLAC__Frame* frame, + const FLAC__int32* const buffer[]) override { + auto samples = frame->header.blocksize; + auto channels = frame->header.channels; + tmp_.resize(channels * samples); + for (uint_fast32_t i = 0; i < samples; ++i) { + for (uint_fast32_t c = 0; c < channels; ++c) { + tmp_[i * channels + c] = buffer[c][i]; + } + } + + auto pos = target_.size(); + size_t size = channels * samples * bytes_per_sample_; + + target_.resize(pos + size); + + xfm_.pack(std::span(&target_[pos], size), tmp_); + + return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE; + } + + void error_callback(::FLAC__StreamDecoderErrorStatus status) override { + DWARFS_THROW(runtime_error, + fmt::format("[FLAC] decoder error: {}", + FLAC__StreamDecoderErrorStatusString[status])); + } + + ::FLAC__StreamDecoderSeekStatus + seek_callback(FLAC__uint64 absolute_byte_offset) override { + if (absolute_byte_offset > data_.size()) { + return ::FLAC__STREAM_DECODER_SEEK_STATUS_ERROR; + } + pos_ = absolute_byte_offset; + return ::FLAC__STREAM_DECODER_SEEK_STATUS_OK; + } + + ::FLAC__StreamDecoderTellStatus + tell_callback(FLAC__uint64* absolute_byte_offset) override { + *absolute_byte_offset = pos_; + return ::FLAC__STREAM_DECODER_TELL_STATUS_OK; + } + + ::FLAC__StreamDecoderLengthStatus + length_callback(FLAC__uint64* stream_length) override { + *stream_length = data_.size(); + return ::FLAC__STREAM_DECODER_LENGTH_STATUS_OK; + } + + bool eof_callback() override { return pos_ >= data_.size(); } + + private: + std::vector& target_; + std::vector tmp_; + std::span data_; + thrift::compression::flac_block_header const& header_; + int const bytes_per_sample_; + pcm_sample_transformer xfm_; + size_t pos_{0}; +}; + +class flac_block_compressor final : public block_compressor::impl { + public: + flac_block_compressor(uint32_t level, bool exhaustive) + : level_{level} + , exhaustive_{exhaustive} {} + + flac_block_compressor(const flac_block_compressor& rhs) = default; + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + + std::vector compress(const std::vector& data, + folly::dynamic meta) const override { + auto endianness = meta["endianness"].asString(); + auto signedness = meta["signedness"].asString(); + auto padding = meta["padding"].asString(); + auto num_channels = meta["number_of_channels"].asInt(); + auto bits_per_sample = meta["bits_per_sample"].asInt(); + auto bytes_per_sample = meta["bytes_per_sample"].asInt(); + + assert(1 <= bytes_per_sample && bytes_per_sample <= 4); + assert(8 <= bits_per_sample && bits_per_sample <= 32); + assert(1 <= num_channels); + + if (data.size() % (num_channels * bytes_per_sample)) { + DWARFS_THROW( + runtime_error, + fmt::format("unexpected PCM waveform configuration: {} bytes to " + "compress, {} channels, {} bytes per sample", + data.size(), num_channels, bytes_per_sample)); + } + + size_t num_samples = data.size() / (num_channels * bytes_per_sample); + + pcm_sample_endianness pcm_end; + pcm_sample_signedness pcm_sig; + pcm_sample_padding pcm_pad; + + uint8_t flags = bytes_per_sample - 1; + + if (endianness == "big") { + flags |= kFlagBigEndian; + pcm_end = pcm_sample_endianness::Big; + } else { + pcm_end = pcm_sample_endianness::Little; + } + + if (signedness == "signed") { + flags |= kFlagSigned; + pcm_sig = pcm_sample_signedness::Signed; + } else { + pcm_sig = pcm_sample_signedness::Unsigned; + } + + if (padding == "lsb") { + flags |= kFlagLsbPadding; + pcm_pad = pcm_sample_padding::Lsb; + } else { + pcm_pad = pcm_sample_padding::Msb; + } + + std::vector compressed; + + { + using namespace ::apache::thrift; + + compressed.reserve(5 * data.size() / 8); // optimistic guess + compressed.resize(folly::kMaxVarintLength64); + + size_t pos = 0; + pos += folly::encodeVarint(data.size(), compressed.data() + pos); + compressed.resize(pos); + + thrift::compression::flac_block_header hdr; + hdr.num_channels() = num_channels; + hdr.bits_per_sample() = bits_per_sample; + hdr.flags() = flags; + + std::string hdrbuf; + CompactSerializer::serialize(hdr, &hdrbuf); + + compressed.resize(pos + hdrbuf.size()); + ::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size()); + pos += hdrbuf.size(); + } + + dwarfs_flac_stream_encoder encoder(compressed); + + encoder.set_streamable_subset(false); + encoder.set_channels(num_channels); + encoder.set_bits_per_sample(bits_per_sample); + encoder.set_sample_rate(48000); // TODO: see if a fixed rate makes sense + encoder.set_compression_level(level_); + encoder.set_do_exhaustive_model_search(exhaustive_); + encoder.set_total_samples_estimate(num_samples); + + if (encoder.init() != FLAC__STREAM_ENCODER_INIT_STATUS_OK) { + DWARFS_THROW( + runtime_error, + fmt::format("[FLAC] init: {}", encoder.get_state().as_cstring())); + } + + pcm_sample_transformer xfm(pcm_end, pcm_sig, pcm_pad, + bytes_per_sample, bits_per_sample); + + const auto samples_per_call = kBlockSize / num_channels; + std::vector buffer; + size_t input_pos = 0; + + while (num_samples > 0) { + size_t n = std::min(num_samples, samples_per_call); + buffer.resize(n * num_channels); + xfm.unpack(buffer, + std::span(data.data() + input_pos, + buffer.size() * bytes_per_sample)); + + if (!encoder.process_interleaved(buffer.data(), n)) { + DWARFS_THROW( + runtime_error, + fmt::format("[FLAC] failed to process interleaved samples: {}", + encoder.get_state().as_cstring())); + } + + input_pos += buffer.size() * bytes_per_sample; + num_samples -= n; + } + + if (!encoder.finish()) { + DWARFS_THROW(runtime_error, "[FLAC] failed to finish encoder"); + } + + if (compressed.size() >= data.size()) { + throw bad_compression_ratio_error(); + } + + compressed.shrink_to_fit(); + + return compressed; + } + + std::vector + compress(std::vector&& data, folly::dynamic meta) const override { + return compress(data, std::move(meta)); + } + + compression_type type() const override { return compression_type::FLAC; } + + std::string describe() const override { + return fmt::format("flac [level={}{}]", level_, + exhaustive_ ? ", exhaustive" : ""); + } + + bool check_metadata(folly::dynamic meta) const override { + if (meta.empty()) { + return false; + } + + return meta.count("endianness") > 0 && meta.count("signedness") > 0 && + meta.count("padding") > 0 && meta.count("bytes_per_sample") > 0 && + meta.count("bits_per_sample") > 0 && + meta.count("number_of_channels") > 0; + } + + private: + uint32_t const level_; + bool const exhaustive_; +}; + +class flac_block_decompressor final : public block_decompressor::impl { + public: + flac_block_decompressor(const uint8_t* data, size_t size, + std::vector& target) + : flac_block_decompressor(folly::Range(data, size), + target) {} + + flac_block_decompressor(folly::Range data, + std::vector& target) + : decompressed_{target} + , uncompressed_size_{folly::decodeVarint(data)} + , header_{decode_header(data)} + , decoder_{std::make_unique( + decompressed_, std::span(data.data(), data.size()), + header_)} { + decoder_->set_md5_checking(false); + decoder_->set_metadata_ignore_all(); + + if (auto status = decoder_->init(); + status != FLAC__STREAM_DECODER_INIT_STATUS_OK) { + DWARFS_THROW(runtime_error, + fmt::format("[FLAC] could not initialize decoder: {}", + FLAC__StreamDecoderInitStatusString[status])); + } + + try { + decompressed_.reserve(uncompressed_size_); + } catch (std::bad_alloc const&) { + DWARFS_THROW( + runtime_error, + fmt::format( + "[FLAC] could not reserve {} bytes for decompressed block", + uncompressed_size_)); + } + } + + compression_type type() const override { return compression_type::FLAC; } + + bool decompress_frame(size_t frame_size) override { + size_t pos = decompressed_.size(); + + if (pos + frame_size > uncompressed_size_) { + assert(uncompressed_size_ >= pos); + frame_size = uncompressed_size_ - pos; + } + + size_t wanted = pos + frame_size; + + assert(wanted <= uncompressed_size_); + assert(frame_size > 0); + + while (decompressed_.size() < wanted) { + if (!decoder_->process_single()) { + DWARFS_THROW(runtime_error, + fmt::format("[FLAC] failed to process frame: {}", + decoder_->get_state().as_cstring())); + } + } + + if (decompressed_.size() == uncompressed_size_) { + decoder_.reset(); + } + + return true; + } + + size_t uncompressed_size() const override { return uncompressed_size_; } + + private: + static thrift::compression::flac_block_header + decode_header(folly::Range& range) { + using namespace ::apache::thrift; + thrift::compression::flac_block_header hdr; + auto size = CompactSerializer::deserialize(range, hdr); + range.advance(size); + return hdr; + } + + std::vector& decompressed_; + folly::Range backup_data_; + + size_t const uncompressed_size_; + thrift::compression::flac_block_header const header_; + std::unique_ptr decoder_; +}; + +class flac_compression_factory : public compression_factory { + public: + flac_compression_factory() + : options_{ + fmt::format("level=[0..8]"), + fmt::format("exhaustive"), + } {} + + std::string_view name() const override { return "flac"; } + + std::string_view description() const override { return "FLAC compression"; } + + std::vector const& options() const override { return options_; } + + std::unique_ptr + make_compressor(option_map& om) const override { + return std::make_unique( + om.get("level", 6), om.get("exhaustive", false)); + } + + std::unique_ptr + make_decompressor(std::span data, + std::vector& target) const override { + return std::make_unique(data.data(), data.size(), + target); + } + + private: + std::vector const options_; +}; + +} // namespace + +REGISTER_COMPRESSION_FACTORY(compression_type::FLAC, flac_compression_factory) + +} // namespace dwarfs diff --git a/test/flac_compressor_test.cpp b/test/flac_compressor_test.cpp new file mode 100644 index 00000000..03dc967a --- /dev/null +++ b/test/flac_compressor_test.cpp @@ -0,0 +1,202 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include + +#include + +#include "dwarfs/block_compressor.h" +#include "dwarfs/pcm_sample_transformer.h" + +using namespace dwarfs; + +namespace { + +template +std::vector make_sine(int bits, size_t length, double period) { + std::vector rv(length); + double amplitude = (1 << bits) / 2; + for (size_t i = 0; i < length; ++i) { + rv[i] = static_cast( + amplitude * std::sin(2 * std::numbers::pi * i / period) - 0.5); + } + return rv; +} + +template +std::vector multiplex(std::vector> const& in) { + auto samples = in.front().size(); + auto channels = in.size(); + std::vector out(channels * samples); + + for (size_t i = 0; i < samples; ++i) { + for (size_t c = 0; c < channels; ++c) { + out[i * channels + c] = in.at(c).at(i); + } + } + + return out; +} + +template +std::vector +make_test_data(int channels, int samples, int bytes, int bits, + pcm_sample_endianness end, pcm_sample_signedness sig, + pcm_sample_padding pad) { + std::vector> data; + for (int c = 0; c < channels; ++c) { + data.emplace_back( + make_sine(bits, samples, 3.1 * ((599 * (c + 1)) % 256))); + } + auto muxed = multiplex(data); + std::vector out(bytes * channels * samples); + pcm_sample_transformer xfm(end, sig, pad, bytes, bits); + xfm.pack(out, muxed); + return out; +} + +struct data_params { + data_params(int channels, int samples, int bytes, int bits) + : num_channels{channels} + , num_samples{samples} + , bytes_per_sample{bytes} + , bits_per_sample{bits} {} + + int num_channels; + int num_samples; + int bytes_per_sample; + int bits_per_sample; +}; + +std::ostream& operator<<(std::ostream& os, data_params const& p) { + os << "{channels=" << p.num_channels << ", samples=" << p.num_samples + << ", bytes=" << p.bytes_per_sample << ", bits=" << p.bits_per_sample + << "}"; + return os; +} + +std::vector const data_parameters{ + // clang-format off + { 1, 1000, 2, 16}, + { 3, 1000, 1, 8}, + { 1, 1000, 2, 12}, + { 1, 100000, 3, 20}, + { 8, 10000, 3, 20}, + { 4, 10000, 4, 20}, + { 4, 10000, 4, 24}, + { 4, 10000, 3, 24}, + { 7, 799999, 4, 32}, + // clang-format on +}; + +} // namespace + +TEST(flac_compressor, sine) { + { + auto test = make_sine(8, 5, 4.0); + std::vector ref{0, 127, 0, -128, 0}; + EXPECT_EQ(test, ref); + } + { + auto test = make_sine(5, 5, 4.0); + std::vector ref{0, 15, 0, -16, 0}; + EXPECT_EQ(test, ref); + } + { + auto test = make_sine(16, 5, 4.0); + std::vector ref{0, 32767, 0, -32768, 0}; + EXPECT_EQ(test, ref); + } + { + auto test = make_sine(12, 5, 4.0); + std::vector ref{0, 2047, 0, -2048, 0}; + EXPECT_EQ(test, ref); + } +} + +TEST(flac_compressor, basic) { + folly::dynamic meta = folly::dynamic::object; + meta.insert("endianness", "little"); + meta.insert("signedness", "signed"); + meta.insert("padding", "msb"); + meta.insert("bytes_per_sample", 2); + meta.insert("bits_per_sample", 16); + meta.insert("number_of_channels", 2); + + auto const data = + make_test_data(2, 1000, 2, 16, pcm_sample_endianness::Little, + pcm_sample_signedness::Signed, pcm_sample_padding::Msb); + + block_compressor comp("flac"); + + auto compressed = comp.compress(data, std::move(meta)); + + EXPECT_LT(compressed.size(), data.size() / 2); + + auto decompressed = block_decompressor::decompress( + compression_type::FLAC, compressed.data(), compressed.size()); + + EXPECT_EQ(data, decompressed); +} + +class flac_param : public testing::TestWithParam< + std::tuple> {}; + +TEST_P(flac_param, combinations) { + auto [end, sig, pad, param] = GetParam(); + + folly::dynamic meta = folly::dynamic::object; + meta.insert("endianness", + end == pcm_sample_endianness::Big ? "big" : "little"); + meta.insert("signedness", + sig == pcm_sample_signedness::Signed ? "signed" : "unsigned"); + meta.insert("padding", pad == pcm_sample_padding::Msb ? "msb" : "lsb"); + meta.insert("bytes_per_sample", param.bytes_per_sample); + meta.insert("bits_per_sample", param.bits_per_sample); + meta.insert("number_of_channels", param.num_channels); + + auto const data = make_test_data(param.num_channels, param.num_samples, + param.bytes_per_sample, + param.bits_per_sample, end, sig, pad); + + block_compressor comp("flac"); + + auto compressed = comp.compress(data, std::move(meta)); + + EXPECT_LT(compressed.size(), data.size() / 2); + + auto decompressed = block_decompressor::decompress( + compression_type::FLAC, compressed.data(), compressed.size()); + + EXPECT_EQ(data, decompressed); +} + +INSTANTIATE_TEST_SUITE_P( + dwarfs, flac_param, + ::testing::Combine(::testing::Values(pcm_sample_endianness::Big, + pcm_sample_endianness::Little), + ::testing::Values(pcm_sample_signedness::Signed, + pcm_sample_signedness::Unsigned), + ::testing::Values(pcm_sample_padding::Lsb, + pcm_sample_padding::Msb), + ::testing::ValuesIn(data_parameters))); diff --git a/thrift/compression.thrift b/thrift/compression.thrift new file mode 100644 index 00000000..1903e9c9 --- /dev/null +++ b/thrift/compression.thrift @@ -0,0 +1,39 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +include "thrift/annotation/cpp.thrift" + +namespace cpp2 dwarfs.thrift.compression + +@cpp.Type{name = "uint8_t"} +typedef byte UInt8 +@cpp.Type{name = "uint16_t"} +typedef i16 UInt16 +@cpp.Type{name = "uint32_t"} +typedef i32 UInt32 +@cpp.Type{name = "uint64_t"} +typedef i64 UInt64 + +struct flac_block_header { + 1: UInt16 num_channels + 2: UInt8 bits_per_sample + 3: UInt8 flags +} diff --git a/vcpkg.json b/vcpkg.json index fca846b0..d7c26219 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -17,6 +17,7 @@ "glog", "libarchive", "libevent", + "libflac", "libmagic", "openssl", "pkgconf",