From e08faf2c0c1dc8cb473184ff47ca84f68e8f2a1e Mon Sep 17 00:00:00 2001
From: Marcus Holland-Moritz <github@mhxnet.de>
Date: Sun, 23 Jul 2023 15:47:27 +0200
Subject: [PATCH] Basic working FLAC compression

---
 CMakeLists.txt                  |  67 ++++-
 include/dwarfs/compression.h    |   3 +-
 src/dwarfs/compression/flac.cpp | 484 ++++++++++++++++++++++++++++++++
 test/flac_compressor_test.cpp   | 202 +++++++++++++
 thrift/compression.thrift       |  39 +++
 vcpkg.json                      |   1 +
 6 files changed, 794 insertions(+), 2 deletions(-)
 create mode 100644 src/dwarfs/compression/flac.cpp
 create mode 100644 test/flac_compressor_test.cpp
 create mode 100644 thrift/compression.thrift

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 405e291b..f3e57231 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -214,6 +214,7 @@ if(PKG_CONFIG_FOUND)
   pkg_check_modules(LIBBROTLIENC IMPORTED_TARGET libbrotlienc>=1.0.9)
   pkg_check_modules(LIBARCHIVE IMPORTED_TARGET libarchive>=3.6.0)
   pkg_check_modules(LIBMAGIC IMPORTED_TARGET libmagic>=5.38)
+  pkg_check_modules(FLAC IMPORTED_TARGET flac++>=1.4.2)
   pkg_check_modules(ZSTD IMPORTED_TARGET libzstd>=1.5.2)
   pkg_check_modules(XXHASH IMPORTED_TARGET libxxhash>=0.8.1)
 endif()
@@ -421,6 +422,10 @@ if(LIBBROTLIDEC_FOUND AND LIBBROTLIENC_FOUND)
   list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/brotli.cpp)
 endif()
 
+if(FLAC_FOUND)
+  list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/flac.cpp)
+endif()
+
 list(
   APPEND
   LIBDWARFS_CATEGORIZER_SRC
@@ -447,7 +452,7 @@ target_compile_definitions(
 )
 
 target_link_libraries(dwarfs_categorizer folly)
-target_link_libraries(dwarfs_compression folly)
+target_link_libraries(dwarfs_compression folly compression_thrift)
 target_link_libraries(dwarfs_tool dwarfs)
 
 if(STATIC_BUILD_DO_NOT_USE)
@@ -576,6 +581,13 @@ if(WITH_TESTS)
   gtest_discover_tests(dwarfs_utils_test DISCOVERY_TIMEOUT 120)
   gtest_discover_tests(dwarfs_pcm_sample_transformer_test DISCOVERY_TIMEOUT 120)
 
+  if(FLAC_FOUND)
+    add_executable(dwarfs_flac_compressor_test test/flac_compressor_test.cpp)
+    target_link_libraries(dwarfs_flac_compressor_test gtest gtest_main)
+    list(APPEND BINARY_TARGETS dwarfs_flac_compressor_test)
+    gtest_discover_tests(dwarfs_flac_compressor_test DISCOVERY_TIMEOUT 120)
+  endif()
+
   target_compile_definitions(dwarfs_compat_test
                              PRIVATE TEST_DATA_DIR=\"${CMAKE_SOURCE_DIR}/test\")
 
@@ -641,10 +653,32 @@ list(
   ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.h
   ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.tcc
   ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types_custom_protocol.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types_fwd.h
   ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visit_by_thrift_field_metadata.h
   ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visit_union.h
   ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visitation.h)
 
+list(
+  APPEND
+  COMPRESSION_THRIFT_SRC
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_clients.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_constants.cpp
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_constants.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.cpp
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_for_each_field.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_handlers.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_metadata.cpp
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_metadata.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.cpp
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.tcc
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types_custom_protocol.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types_fwd.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visit_by_thrift_field_metadata.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visit_union.h
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visitation.h)
+
 add_custom_command(
   OUTPUT thrift/lib/thrift/_keep
   COMMAND ${CMAKE_COMMAND} -E make_directory thrift/lib/thrift
@@ -689,6 +723,22 @@ add_custom_command(
   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs
 )
 
+add_custom_command(
+  OUTPUT ${COMPRESSION_THRIFT_SRC}
+  COMMAND ${CMAKE_COMMAND} -E copy
+              ${CMAKE_CURRENT_SOURCE_DIR}/thrift/compression.thrift
+              ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/compression.thrift
+  COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1
+              -I ${CMAKE_CURRENT_SOURCE_DIR}/fbthrift
+              -o ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs
+              --gen mstch_cpp2
+              compression.thrift
+  DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1
+          ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/_keep
+          ${CMAKE_CURRENT_SOURCE_DIR}/thrift/compression.thrift
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs
+)
+
 list(
   APPEND
   INCLUDE_DIRS
@@ -745,11 +795,19 @@ add_library(
   ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.cpp
   ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.cpp)
 
+add_library(
+  compression_thrift
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.cpp
+  ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.cpp)
+
 set_property(TARGET metadata_thrift PROPERTY CXX_STANDARD 20)
+set_property(TARGET compression_thrift PROPERTY CXX_STANDARD 20)
 
 target_include_directories(metadata_thrift PRIVATE ${INCLUDE_DIRS})
+target_include_directories(compression_thrift PRIVATE ${INCLUDE_DIRS})
 
 target_link_libraries(metadata_thrift thrift_light)
+target_link_libraries(compression_thrift thrift_light)
 
 foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer
             dwarfs_tool ${BINARY_TARGETS} ${MAIN_TARGETS})
@@ -769,6 +827,7 @@ foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer
             $<$<BOOL:${LIBLZ4_FOUND}>:DWARFS_HAVE_LIBLZ4>
             $<$<BOOL:${LIBLZMA_FOUND}>:DWARFS_HAVE_LIBLZMA>
             $<$<AND:$<BOOL:${LIBBROTLIDEC_FOUND}>,$<BOOL:${LIBBROTLIENC_FOUND}>>:DWARFS_HAVE_LIBBROTLI>
+            $<$<BOOL:${FLAC_FOUND}>:DWARFS_HAVE_FLAC>
   )
 
   if(DWARFS_USE_EXCEPTION_TRACER)
@@ -855,6 +914,10 @@ if(LIBLZMA_FOUND)
   target_link_libraries(dwarfs PkgConfig::LIBLZMA)
 endif()
 
+if(FLAC_FOUND)
+  target_link_libraries(dwarfs PkgConfig::FLAC)
+endif()
+
 if(LIBBROTLIDEC_FOUND AND LIBBROTLIENC_FOUND)
   target_link_libraries(dwarfs PkgConfig::LIBBROTLIDEC PkgConfig::LIBBROTLIENC)
 endif()
@@ -924,6 +987,7 @@ if(STATIC_BUILD_DO_NOT_USE)
   import_static_lib(static_libunwind "libunwind.a")
   import_static_lib(static_libarchive "libarchive.a")
   import_static_lib(static_libmagic "libmagic.a")
+  import_static_lib(static_libflac "libFLAC++.a")
 
   set_target_properties(static_libunwind PROPERTIES INTERFACE_LINK_LIBRARIES
                                                     PkgConfig::LIBLZMA)
@@ -935,6 +999,7 @@ if(STATIC_BUILD_DO_NOT_USE)
                                                 static_libz)
 
   target_link_libraries(dwarfs_categorizer static_libmagic)
+  target_link_libraries(dwarfs_compression static_libflac)
 
   foreach(tgt ${BINARY_TARGETS})
     if(PREFER_SYSTEM_LIBFMT)
diff --git a/include/dwarfs/compression.h b/include/dwarfs/compression.h
index 34c70e7b..9bb126bd 100644
--- a/include/dwarfs/compression.h
+++ b/include/dwarfs/compression.h
@@ -30,7 +30,8 @@
   DWARFS_COMPRESSION_TYPE(ZSTD,   2) SEPARATOR                           \
   DWARFS_COMPRESSION_TYPE(LZ4,    3) SEPARATOR                           \
   DWARFS_COMPRESSION_TYPE(LZ4HC,  4) SEPARATOR                           \
-  DWARFS_COMPRESSION_TYPE(BROTLI, 5)
+  DWARFS_COMPRESSION_TYPE(BROTLI, 5) SEPARATOR                           \
+  DWARFS_COMPRESSION_TYPE(FLAC,   6)
 // clang-format on
 
 namespace dwarfs {
diff --git a/src/dwarfs/compression/flac.cpp b/src/dwarfs/compression/flac.cpp
new file mode 100644
index 00000000..6c04c0bf
--- /dev/null
+++ b/src/dwarfs/compression/flac.cpp
@@ -0,0 +1,484 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author     Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright  Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <cassert>
+#include <cstring>
+#include <span>
+
+#include <FLAC++/decoder.h>
+#include <FLAC++/encoder.h>
+
+#include <thrift/lib/cpp2/protocol/Serializer.h>
+
+#include <fmt/format.h>
+
+#include <folly/Varint.h>
+
+#include "dwarfs/block_compressor.h"
+#include "dwarfs/compression.h"
+#include "dwarfs/error.h"
+#include "dwarfs/option_map.h"
+#include "dwarfs/pcm_sample_transformer.h"
+
+#include "dwarfs/gen-cpp2/compression_types.h"
+
+namespace dwarfs {
+
+namespace {
+
+constexpr uint8_t const kFlagBigEndian{0x80};
+constexpr uint8_t const kFlagSigned{0x40};
+constexpr uint8_t const kFlagLsbPadding{0x20};
+constexpr uint8_t const kBytesPerSampleMask{0x03};
+constexpr size_t const kBlockSize{65536};
+
+class dwarfs_flac_stream_encoder : public FLAC::Encoder::Stream {
+ public:
+  dwarfs_flac_stream_encoder(std::vector<uint8_t>& data)
+      : data_{data}
+      , pos_{data_.size()} {}
+
+  ::FLAC__StreamEncoderReadStatus
+  read_callback(FLAC__byte buffer[], size_t* bytes) override {
+    ::memcpy(buffer, data_.data() + pos_, *bytes);
+    return FLAC__STREAM_ENCODER_READ_STATUS_CONTINUE;
+  }
+
+  ::FLAC__StreamEncoderWriteStatus
+  write_callback(const FLAC__byte buffer[], size_t bytes, uint32_t,
+                 uint32_t) override {
+    size_t end = pos_ + bytes;
+    if (data_.size() < end) {
+      data_.resize(end);
+    }
+    ::memcpy(data_.data() + pos_, buffer, bytes);
+    pos_ += bytes;
+    return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
+  }
+
+  ::FLAC__StreamEncoderSeekStatus
+  seek_callback(FLAC__uint64 absolute_byte_offset) override {
+    pos_ = absolute_byte_offset;
+    return FLAC__STREAM_ENCODER_SEEK_STATUS_OK;
+  }
+
+  ::FLAC__StreamEncoderTellStatus
+  tell_callback(FLAC__uint64* absolute_byte_offset) override {
+    *absolute_byte_offset = pos_;
+    return FLAC__STREAM_ENCODER_TELL_STATUS_OK;
+  }
+
+ private:
+  std::vector<uint8_t>& data_;
+  size_t pos_;
+};
+
+class dwarfs_flac_stream_decoder : public FLAC::Decoder::Stream {
+ public:
+  dwarfs_flac_stream_decoder(
+      std::vector<uint8_t>& target, std::span<uint8_t const> data,
+      thrift::compression::flac_block_header const& header)
+      : target_{target}
+      , data_{data}
+      , header_{header}
+      , bytes_per_sample_{(header_.flags().value() & kBytesPerSampleMask) + 1}
+      , xfm_{header_.flags().value() & kFlagBigEndian
+                 ? pcm_sample_endianness::Big
+                 : pcm_sample_endianness::Little,
+             header_.flags().value() & kFlagSigned
+                 ? pcm_sample_signedness::Signed
+                 : pcm_sample_signedness::Unsigned,
+             header_.flags().value() & kFlagLsbPadding
+                 ? pcm_sample_padding::Lsb
+                 : pcm_sample_padding::Msb,
+             bytes_per_sample_, header_.bits_per_sample().value()} {}
+
+  ::FLAC__StreamDecoderReadStatus
+  read_callback(FLAC__byte buffer[], size_t* bytes) override {
+    if (pos_ >= data_.size()) {
+      return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
+    }
+
+    if (pos_ + *bytes > data_.size()) {
+      *bytes = data_.size() - pos_;
+    }
+
+    if (*bytes > 0) {
+      ::memcpy(buffer, data_.data() + pos_, *bytes);
+    }
+
+    pos_ += *bytes;
+
+    return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
+  }
+
+  ::FLAC__StreamDecoderWriteStatus
+  write_callback(const ::FLAC__Frame* frame,
+                 const FLAC__int32* const buffer[]) override {
+    auto samples = frame->header.blocksize;
+    auto channels = frame->header.channels;
+    tmp_.resize(channels * samples);
+    for (uint_fast32_t i = 0; i < samples; ++i) {
+      for (uint_fast32_t c = 0; c < channels; ++c) {
+        tmp_[i * channels + c] = buffer[c][i];
+      }
+    }
+
+    auto pos = target_.size();
+    size_t size = channels * samples * bytes_per_sample_;
+
+    target_.resize(pos + size);
+
+    xfm_.pack(std::span<uint8_t>(&target_[pos], size), tmp_);
+
+    return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
+  }
+
+  void error_callback(::FLAC__StreamDecoderErrorStatus status) override {
+    DWARFS_THROW(runtime_error,
+                 fmt::format("[FLAC] decoder error: {}",
+                             FLAC__StreamDecoderErrorStatusString[status]));
+  }
+
+  ::FLAC__StreamDecoderSeekStatus
+  seek_callback(FLAC__uint64 absolute_byte_offset) override {
+    if (absolute_byte_offset > data_.size()) {
+      return ::FLAC__STREAM_DECODER_SEEK_STATUS_ERROR;
+    }
+    pos_ = absolute_byte_offset;
+    return ::FLAC__STREAM_DECODER_SEEK_STATUS_OK;
+  }
+
+  ::FLAC__StreamDecoderTellStatus
+  tell_callback(FLAC__uint64* absolute_byte_offset) override {
+    *absolute_byte_offset = pos_;
+    return ::FLAC__STREAM_DECODER_TELL_STATUS_OK;
+  }
+
+  ::FLAC__StreamDecoderLengthStatus
+  length_callback(FLAC__uint64* stream_length) override {
+    *stream_length = data_.size();
+    return ::FLAC__STREAM_DECODER_LENGTH_STATUS_OK;
+  }
+
+  bool eof_callback() override { return pos_ >= data_.size(); }
+
+ private:
+  std::vector<uint8_t>& target_;
+  std::vector<FLAC__int32> tmp_;
+  std::span<uint8_t const> data_;
+  thrift::compression::flac_block_header const& header_;
+  int const bytes_per_sample_;
+  pcm_sample_transformer<FLAC__int32> xfm_;
+  size_t pos_{0};
+};
+
+class flac_block_compressor final : public block_compressor::impl {
+ public:
+  flac_block_compressor(uint32_t level, bool exhaustive)
+      : level_{level}
+      , exhaustive_{exhaustive} {}
+
+  flac_block_compressor(const flac_block_compressor& rhs) = default;
+
+  std::unique_ptr<block_compressor::impl> clone() const override {
+    return std::make_unique<flac_block_compressor>(*this);
+  }
+
+  std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
+                                folly::dynamic meta) const override {
+    auto endianness = meta["endianness"].asString();
+    auto signedness = meta["signedness"].asString();
+    auto padding = meta["padding"].asString();
+    auto num_channels = meta["number_of_channels"].asInt();
+    auto bits_per_sample = meta["bits_per_sample"].asInt();
+    auto bytes_per_sample = meta["bytes_per_sample"].asInt();
+
+    assert(1 <= bytes_per_sample && bytes_per_sample <= 4);
+    assert(8 <= bits_per_sample && bits_per_sample <= 32);
+    assert(1 <= num_channels);
+
+    if (data.size() % (num_channels * bytes_per_sample)) {
+      DWARFS_THROW(
+          runtime_error,
+          fmt::format("unexpected PCM waveform configuration: {} bytes to "
+                      "compress, {} channels, {} bytes per sample",
+                      data.size(), num_channels, bytes_per_sample));
+    }
+
+    size_t num_samples = data.size() / (num_channels * bytes_per_sample);
+
+    pcm_sample_endianness pcm_end;
+    pcm_sample_signedness pcm_sig;
+    pcm_sample_padding pcm_pad;
+
+    uint8_t flags = bytes_per_sample - 1;
+
+    if (endianness == "big") {
+      flags |= kFlagBigEndian;
+      pcm_end = pcm_sample_endianness::Big;
+    } else {
+      pcm_end = pcm_sample_endianness::Little;
+    }
+
+    if (signedness == "signed") {
+      flags |= kFlagSigned;
+      pcm_sig = pcm_sample_signedness::Signed;
+    } else {
+      pcm_sig = pcm_sample_signedness::Unsigned;
+    }
+
+    if (padding == "lsb") {
+      flags |= kFlagLsbPadding;
+      pcm_pad = pcm_sample_padding::Lsb;
+    } else {
+      pcm_pad = pcm_sample_padding::Msb;
+    }
+
+    std::vector<uint8_t> compressed;
+
+    {
+      using namespace ::apache::thrift;
+
+      compressed.reserve(5 * data.size() / 8); // optimistic guess
+      compressed.resize(folly::kMaxVarintLength64);
+
+      size_t pos = 0;
+      pos += folly::encodeVarint(data.size(), compressed.data() + pos);
+      compressed.resize(pos);
+
+      thrift::compression::flac_block_header hdr;
+      hdr.num_channels() = num_channels;
+      hdr.bits_per_sample() = bits_per_sample;
+      hdr.flags() = flags;
+
+      std::string hdrbuf;
+      CompactSerializer::serialize(hdr, &hdrbuf);
+
+      compressed.resize(pos + hdrbuf.size());
+      ::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size());
+      pos += hdrbuf.size();
+    }
+
+    dwarfs_flac_stream_encoder encoder(compressed);
+
+    encoder.set_streamable_subset(false);
+    encoder.set_channels(num_channels);
+    encoder.set_bits_per_sample(bits_per_sample);
+    encoder.set_sample_rate(48000); // TODO: see if a fixed rate makes sense
+    encoder.set_compression_level(level_);
+    encoder.set_do_exhaustive_model_search(exhaustive_);
+    encoder.set_total_samples_estimate(num_samples);
+
+    if (encoder.init() != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
+      DWARFS_THROW(
+          runtime_error,
+          fmt::format("[FLAC] init: {}", encoder.get_state().as_cstring()));
+    }
+
+    pcm_sample_transformer<FLAC__int32> xfm(pcm_end, pcm_sig, pcm_pad,
+                                            bytes_per_sample, bits_per_sample);
+
+    const auto samples_per_call = kBlockSize / num_channels;
+    std::vector<FLAC__int32> buffer;
+    size_t input_pos = 0;
+
+    while (num_samples > 0) {
+      size_t n = std::min(num_samples, samples_per_call);
+      buffer.resize(n * num_channels);
+      xfm.unpack(buffer,
+                 std::span<uint8_t const>(data.data() + input_pos,
+                                          buffer.size() * bytes_per_sample));
+
+      if (!encoder.process_interleaved(buffer.data(), n)) {
+        DWARFS_THROW(
+            runtime_error,
+            fmt::format("[FLAC] failed to process interleaved samples: {}",
+                        encoder.get_state().as_cstring()));
+      }
+
+      input_pos += buffer.size() * bytes_per_sample;
+      num_samples -= n;
+    }
+
+    if (!encoder.finish()) {
+      DWARFS_THROW(runtime_error, "[FLAC] failed to finish encoder");
+    }
+
+    if (compressed.size() >= data.size()) {
+      throw bad_compression_ratio_error();
+    }
+
+    compressed.shrink_to_fit();
+
+    return compressed;
+  }
+
+  std::vector<uint8_t>
+  compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
+    return compress(data, std::move(meta));
+  }
+
+  compression_type type() const override { return compression_type::FLAC; }
+
+  std::string describe() const override {
+    return fmt::format("flac [level={}{}]", level_,
+                       exhaustive_ ? ", exhaustive" : "");
+  }
+
+  bool check_metadata(folly::dynamic meta) const override {
+    if (meta.empty()) {
+      return false;
+    }
+
+    return meta.count("endianness") > 0 && meta.count("signedness") > 0 &&
+           meta.count("padding") > 0 && meta.count("bytes_per_sample") > 0 &&
+           meta.count("bits_per_sample") > 0 &&
+           meta.count("number_of_channels") > 0;
+  }
+
+ private:
+  uint32_t const level_;
+  bool const exhaustive_;
+};
+
+class flac_block_decompressor final : public block_decompressor::impl {
+ public:
+  flac_block_decompressor(const uint8_t* data, size_t size,
+                          std::vector<uint8_t>& target)
+      : flac_block_decompressor(folly::Range<uint8_t const*>(data, size),
+                                target) {}
+
+  flac_block_decompressor(folly::Range<uint8_t const*> data,
+                          std::vector<uint8_t>& target)
+      : decompressed_{target}
+      , uncompressed_size_{folly::decodeVarint(data)}
+      , header_{decode_header(data)}
+      , decoder_{std::make_unique<dwarfs_flac_stream_decoder>(
+            decompressed_, std::span<uint8_t const>(data.data(), data.size()),
+            header_)} {
+    decoder_->set_md5_checking(false);
+    decoder_->set_metadata_ignore_all();
+
+    if (auto status = decoder_->init();
+        status != FLAC__STREAM_DECODER_INIT_STATUS_OK) {
+      DWARFS_THROW(runtime_error,
+                   fmt::format("[FLAC] could not initialize decoder: {}",
+                               FLAC__StreamDecoderInitStatusString[status]));
+    }
+
+    try {
+      decompressed_.reserve(uncompressed_size_);
+    } catch (std::bad_alloc const&) {
+      DWARFS_THROW(
+          runtime_error,
+          fmt::format(
+              "[FLAC] could not reserve {} bytes for decompressed block",
+              uncompressed_size_));
+    }
+  }
+
+  compression_type type() const override { return compression_type::FLAC; }
+
+  bool decompress_frame(size_t frame_size) override {
+    size_t pos = decompressed_.size();
+
+    if (pos + frame_size > uncompressed_size_) {
+      assert(uncompressed_size_ >= pos);
+      frame_size = uncompressed_size_ - pos;
+    }
+
+    size_t wanted = pos + frame_size;
+
+    assert(wanted <= uncompressed_size_);
+    assert(frame_size > 0);
+
+    while (decompressed_.size() < wanted) {
+      if (!decoder_->process_single()) {
+        DWARFS_THROW(runtime_error,
+                     fmt::format("[FLAC] failed to process frame: {}",
+                                 decoder_->get_state().as_cstring()));
+      }
+    }
+
+    if (decompressed_.size() == uncompressed_size_) {
+      decoder_.reset();
+    }
+
+    return true;
+  }
+
+  size_t uncompressed_size() const override { return uncompressed_size_; }
+
+ private:
+  static thrift::compression::flac_block_header
+  decode_header(folly::Range<uint8_t const*>& range) {
+    using namespace ::apache::thrift;
+    thrift::compression::flac_block_header hdr;
+    auto size = CompactSerializer::deserialize(range, hdr);
+    range.advance(size);
+    return hdr;
+  }
+
+  std::vector<uint8_t>& decompressed_;
+  folly::Range<uint8_t const*> backup_data_;
+
+  size_t const uncompressed_size_;
+  thrift::compression::flac_block_header const header_;
+  std::unique_ptr<dwarfs_flac_stream_decoder> decoder_;
+};
+
+class flac_compression_factory : public compression_factory {
+ public:
+  flac_compression_factory()
+      : options_{
+            fmt::format("level=[0..8]"),
+            fmt::format("exhaustive"),
+        } {}
+
+  std::string_view name() const override { return "flac"; }
+
+  std::string_view description() const override { return "FLAC compression"; }
+
+  std::vector<std::string> const& options() const override { return options_; }
+
+  std::unique_ptr<block_compressor::impl>
+  make_compressor(option_map& om) const override {
+    return std::make_unique<flac_block_compressor>(
+        om.get<uint32_t>("level", 6), om.get<bool>("exhaustive", false));
+  }
+
+  std::unique_ptr<block_decompressor::impl>
+  make_decompressor(std::span<uint8_t const> data,
+                    std::vector<uint8_t>& target) const override {
+    return std::make_unique<flac_block_decompressor>(data.data(), data.size(),
+                                                     target);
+  }
+
+ private:
+  std::vector<std::string> const options_;
+};
+
+} // namespace
+
+REGISTER_COMPRESSION_FACTORY(compression_type::FLAC, flac_compression_factory)
+
+} // namespace dwarfs
diff --git a/test/flac_compressor_test.cpp b/test/flac_compressor_test.cpp
new file mode 100644
index 00000000..03dc967a
--- /dev/null
+++ b/test/flac_compressor_test.cpp
@@ -0,0 +1,202 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author     Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright  Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <cmath>
+#include <numbers>
+
+#include <gtest/gtest.h>
+
+#include "dwarfs/block_compressor.h"
+#include "dwarfs/pcm_sample_transformer.h"
+
+using namespace dwarfs;
+
+namespace {
+
+template <typename T>
+std::vector<T> make_sine(int bits, size_t length, double period) {
+  std::vector<T> rv(length);
+  double amplitude = (1 << bits) / 2;
+  for (size_t i = 0; i < length; ++i) {
+    rv[i] = static_cast<T>(
+        amplitude * std::sin(2 * std::numbers::pi * i / period) - 0.5);
+  }
+  return rv;
+}
+
+template <typename T>
+std::vector<T> multiplex(std::vector<std::vector<T>> const& in) {
+  auto samples = in.front().size();
+  auto channels = in.size();
+  std::vector<T> out(channels * samples);
+
+  for (size_t i = 0; i < samples; ++i) {
+    for (size_t c = 0; c < channels; ++c) {
+      out[i * channels + c] = in.at(c).at(i);
+    }
+  }
+
+  return out;
+}
+
+template <typename T = int32_t>
+std::vector<uint8_t>
+make_test_data(int channels, int samples, int bytes, int bits,
+               pcm_sample_endianness end, pcm_sample_signedness sig,
+               pcm_sample_padding pad) {
+  std::vector<std::vector<T>> data;
+  for (int c = 0; c < channels; ++c) {
+    data.emplace_back(
+        make_sine<T>(bits, samples, 3.1 * ((599 * (c + 1)) % 256)));
+  }
+  auto muxed = multiplex(data);
+  std::vector<uint8_t> out(bytes * channels * samples);
+  pcm_sample_transformer<T> xfm(end, sig, pad, bytes, bits);
+  xfm.pack(out, muxed);
+  return out;
+}
+
+struct data_params {
+  data_params(int channels, int samples, int bytes, int bits)
+      : num_channels{channels}
+      , num_samples{samples}
+      , bytes_per_sample{bytes}
+      , bits_per_sample{bits} {}
+
+  int num_channels;
+  int num_samples;
+  int bytes_per_sample;
+  int bits_per_sample;
+};
+
+std::ostream& operator<<(std::ostream& os, data_params const& p) {
+  os << "{channels=" << p.num_channels << ", samples=" << p.num_samples
+     << ", bytes=" << p.bytes_per_sample << ", bits=" << p.bits_per_sample
+     << "}";
+  return os;
+}
+
+std::vector<data_params> const data_parameters{
+    // clang-format off
+    { 1,   1000, 2, 16},
+    { 3,   1000, 1, 8},
+    { 1,   1000, 2, 12},
+    { 1, 100000, 3, 20},
+    { 8,  10000, 3, 20},
+    { 4,  10000, 4, 20},
+    { 4,  10000, 4, 24},
+    { 4,  10000, 3, 24},
+    { 7, 799999, 4, 32},
+    // clang-format on
+};
+
+} // namespace
+
+TEST(flac_compressor, sine) {
+  {
+    auto test = make_sine<int8_t>(8, 5, 4.0);
+    std::vector<int8_t> ref{0, 127, 0, -128, 0};
+    EXPECT_EQ(test, ref);
+  }
+  {
+    auto test = make_sine<int8_t>(5, 5, 4.0);
+    std::vector<int8_t> ref{0, 15, 0, -16, 0};
+    EXPECT_EQ(test, ref);
+  }
+  {
+    auto test = make_sine<int16_t>(16, 5, 4.0);
+    std::vector<int16_t> ref{0, 32767, 0, -32768, 0};
+    EXPECT_EQ(test, ref);
+  }
+  {
+    auto test = make_sine<int16_t>(12, 5, 4.0);
+    std::vector<int16_t> ref{0, 2047, 0, -2048, 0};
+    EXPECT_EQ(test, ref);
+  }
+}
+
+TEST(flac_compressor, basic) {
+  folly::dynamic meta = folly::dynamic::object;
+  meta.insert("endianness", "little");
+  meta.insert("signedness", "signed");
+  meta.insert("padding", "msb");
+  meta.insert("bytes_per_sample", 2);
+  meta.insert("bits_per_sample", 16);
+  meta.insert("number_of_channels", 2);
+
+  auto const data =
+      make_test_data(2, 1000, 2, 16, pcm_sample_endianness::Little,
+                     pcm_sample_signedness::Signed, pcm_sample_padding::Msb);
+
+  block_compressor comp("flac");
+
+  auto compressed = comp.compress(data, std::move(meta));
+
+  EXPECT_LT(compressed.size(), data.size() / 2);
+
+  auto decompressed = block_decompressor::decompress(
+      compression_type::FLAC, compressed.data(), compressed.size());
+
+  EXPECT_EQ(data, decompressed);
+}
+
+class flac_param : public testing::TestWithParam<
+                       std::tuple<pcm_sample_endianness, pcm_sample_signedness,
+                                  pcm_sample_padding, data_params>> {};
+
+TEST_P(flac_param, combinations) {
+  auto [end, sig, pad, param] = GetParam();
+
+  folly::dynamic meta = folly::dynamic::object;
+  meta.insert("endianness",
+              end == pcm_sample_endianness::Big ? "big" : "little");
+  meta.insert("signedness",
+              sig == pcm_sample_signedness::Signed ? "signed" : "unsigned");
+  meta.insert("padding", pad == pcm_sample_padding::Msb ? "msb" : "lsb");
+  meta.insert("bytes_per_sample", param.bytes_per_sample);
+  meta.insert("bits_per_sample", param.bits_per_sample);
+  meta.insert("number_of_channels", param.num_channels);
+
+  auto const data = make_test_data(param.num_channels, param.num_samples,
+                                   param.bytes_per_sample,
+                                   param.bits_per_sample, end, sig, pad);
+
+  block_compressor comp("flac");
+
+  auto compressed = comp.compress(data, std::move(meta));
+
+  EXPECT_LT(compressed.size(), data.size() / 2);
+
+  auto decompressed = block_decompressor::decompress(
+      compression_type::FLAC, compressed.data(), compressed.size());
+
+  EXPECT_EQ(data, decompressed);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    dwarfs, flac_param,
+    ::testing::Combine(::testing::Values(pcm_sample_endianness::Big,
+                                         pcm_sample_endianness::Little),
+                       ::testing::Values(pcm_sample_signedness::Signed,
+                                         pcm_sample_signedness::Unsigned),
+                       ::testing::Values(pcm_sample_padding::Lsb,
+                                         pcm_sample_padding::Msb),
+                       ::testing::ValuesIn(data_parameters)));
diff --git a/thrift/compression.thrift b/thrift/compression.thrift
new file mode 100644
index 00000000..1903e9c9
--- /dev/null
+++ b/thrift/compression.thrift
@@ -0,0 +1,39 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author     Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright  Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+include "thrift/annotation/cpp.thrift"
+
+namespace cpp2 dwarfs.thrift.compression
+
+@cpp.Type{name = "uint8_t"}
+typedef byte UInt8
+@cpp.Type{name = "uint16_t"}
+typedef i16 UInt16
+@cpp.Type{name = "uint32_t"}
+typedef i32 UInt32
+@cpp.Type{name = "uint64_t"}
+typedef i64 UInt64
+
+struct flac_block_header {
+   1: UInt16 num_channels
+   2: UInt8 bits_per_sample
+   3: UInt8 flags
+}
diff --git a/vcpkg.json b/vcpkg.json
index fca846b0..d7c26219 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -17,6 +17,7 @@
     "glog",
     "libarchive",
     "libevent",
+    "libflac",
     "libmagic",
     "openssl",
     "pkgconf",