mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-13 14:27:30 -04:00
Basic working FLAC compression
This commit is contained in:
parent
4d5c039f12
commit
e08faf2c0c
@ -214,6 +214,7 @@ if(PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(LIBBROTLIENC IMPORTED_TARGET libbrotlienc>=1.0.9)
|
||||
pkg_check_modules(LIBARCHIVE IMPORTED_TARGET libarchive>=3.6.0)
|
||||
pkg_check_modules(LIBMAGIC IMPORTED_TARGET libmagic>=5.38)
|
||||
pkg_check_modules(FLAC IMPORTED_TARGET flac++>=1.4.2)
|
||||
pkg_check_modules(ZSTD IMPORTED_TARGET libzstd>=1.5.2)
|
||||
pkg_check_modules(XXHASH IMPORTED_TARGET libxxhash>=0.8.1)
|
||||
endif()
|
||||
@ -421,6 +422,10 @@ if(LIBBROTLIDEC_FOUND AND LIBBROTLIENC_FOUND)
|
||||
list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/brotli.cpp)
|
||||
endif()
|
||||
|
||||
if(FLAC_FOUND)
|
||||
list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/flac.cpp)
|
||||
endif()
|
||||
|
||||
list(
|
||||
APPEND
|
||||
LIBDWARFS_CATEGORIZER_SRC
|
||||
@ -447,7 +452,7 @@ target_compile_definitions(
|
||||
)
|
||||
|
||||
target_link_libraries(dwarfs_categorizer folly)
|
||||
target_link_libraries(dwarfs_compression folly)
|
||||
target_link_libraries(dwarfs_compression folly compression_thrift)
|
||||
target_link_libraries(dwarfs_tool dwarfs)
|
||||
|
||||
if(STATIC_BUILD_DO_NOT_USE)
|
||||
@ -576,6 +581,13 @@ if(WITH_TESTS)
|
||||
gtest_discover_tests(dwarfs_utils_test DISCOVERY_TIMEOUT 120)
|
||||
gtest_discover_tests(dwarfs_pcm_sample_transformer_test DISCOVERY_TIMEOUT 120)
|
||||
|
||||
if(FLAC_FOUND)
|
||||
add_executable(dwarfs_flac_compressor_test test/flac_compressor_test.cpp)
|
||||
target_link_libraries(dwarfs_flac_compressor_test gtest gtest_main)
|
||||
list(APPEND BINARY_TARGETS dwarfs_flac_compressor_test)
|
||||
gtest_discover_tests(dwarfs_flac_compressor_test DISCOVERY_TIMEOUT 120)
|
||||
endif()
|
||||
|
||||
target_compile_definitions(dwarfs_compat_test
|
||||
PRIVATE TEST_DATA_DIR=\"${CMAKE_SOURCE_DIR}/test\")
|
||||
|
||||
@ -641,10 +653,32 @@ list(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.tcc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types_custom_protocol.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types_fwd.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visit_by_thrift_field_metadata.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visit_union.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_visitation.h)
|
||||
|
||||
list(
|
||||
APPEND
|
||||
COMPRESSION_THRIFT_SRC
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_clients.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_constants.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_constants.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_for_each_field.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_handlers.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_metadata.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_metadata.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.tcc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types_custom_protocol.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types_fwd.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visit_by_thrift_field_metadata.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visit_union.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_visitation.h)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT thrift/lib/thrift/_keep
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory thrift/lib/thrift
|
||||
@ -689,6 +723,22 @@ add_custom_command(
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${COMPRESSION_THRIFT_SRC}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/thrift/compression.thrift
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/compression.thrift
|
||||
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/fbthrift
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs
|
||||
--gen mstch_cpp2
|
||||
compression.thrift
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bin/thrift1
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/_keep
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/thrift/compression.thrift
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs
|
||||
)
|
||||
|
||||
list(
|
||||
APPEND
|
||||
INCLUDE_DIRS
|
||||
@ -745,11 +795,19 @@ add_library(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_types.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/metadata_data.cpp)
|
||||
|
||||
add_library(
|
||||
compression_thrift
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_types.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/thrift/dwarfs/gen-cpp2/compression_data.cpp)
|
||||
|
||||
set_property(TARGET metadata_thrift PROPERTY CXX_STANDARD 20)
|
||||
set_property(TARGET compression_thrift PROPERTY CXX_STANDARD 20)
|
||||
|
||||
target_include_directories(metadata_thrift PRIVATE ${INCLUDE_DIRS})
|
||||
target_include_directories(compression_thrift PRIVATE ${INCLUDE_DIRS})
|
||||
|
||||
target_link_libraries(metadata_thrift thrift_light)
|
||||
target_link_libraries(compression_thrift thrift_light)
|
||||
|
||||
foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer
|
||||
dwarfs_tool ${BINARY_TARGETS} ${MAIN_TARGETS})
|
||||
@ -769,6 +827,7 @@ foreach(tgt dwarfs dwarfs_compression dwarfs_categorizer
|
||||
$<$<BOOL:${LIBLZ4_FOUND}>:DWARFS_HAVE_LIBLZ4>
|
||||
$<$<BOOL:${LIBLZMA_FOUND}>:DWARFS_HAVE_LIBLZMA>
|
||||
$<$<AND:$<BOOL:${LIBBROTLIDEC_FOUND}>,$<BOOL:${LIBBROTLIENC_FOUND}>>:DWARFS_HAVE_LIBBROTLI>
|
||||
$<$<BOOL:${FLAC_FOUND}>:DWARFS_HAVE_FLAC>
|
||||
)
|
||||
|
||||
if(DWARFS_USE_EXCEPTION_TRACER)
|
||||
@ -855,6 +914,10 @@ if(LIBLZMA_FOUND)
|
||||
target_link_libraries(dwarfs PkgConfig::LIBLZMA)
|
||||
endif()
|
||||
|
||||
if(FLAC_FOUND)
|
||||
target_link_libraries(dwarfs PkgConfig::FLAC)
|
||||
endif()
|
||||
|
||||
if(LIBBROTLIDEC_FOUND AND LIBBROTLIENC_FOUND)
|
||||
target_link_libraries(dwarfs PkgConfig::LIBBROTLIDEC PkgConfig::LIBBROTLIENC)
|
||||
endif()
|
||||
@ -924,6 +987,7 @@ if(STATIC_BUILD_DO_NOT_USE)
|
||||
import_static_lib(static_libunwind "libunwind.a")
|
||||
import_static_lib(static_libarchive "libarchive.a")
|
||||
import_static_lib(static_libmagic "libmagic.a")
|
||||
import_static_lib(static_libflac "libFLAC++.a")
|
||||
|
||||
set_target_properties(static_libunwind PROPERTIES INTERFACE_LINK_LIBRARIES
|
||||
PkgConfig::LIBLZMA)
|
||||
@ -935,6 +999,7 @@ if(STATIC_BUILD_DO_NOT_USE)
|
||||
static_libz)
|
||||
|
||||
target_link_libraries(dwarfs_categorizer static_libmagic)
|
||||
target_link_libraries(dwarfs_compression static_libflac)
|
||||
|
||||
foreach(tgt ${BINARY_TARGETS})
|
||||
if(PREFER_SYSTEM_LIBFMT)
|
||||
|
@ -30,7 +30,8 @@
|
||||
DWARFS_COMPRESSION_TYPE(ZSTD, 2) SEPARATOR \
|
||||
DWARFS_COMPRESSION_TYPE(LZ4, 3) SEPARATOR \
|
||||
DWARFS_COMPRESSION_TYPE(LZ4HC, 4) SEPARATOR \
|
||||
DWARFS_COMPRESSION_TYPE(BROTLI, 5)
|
||||
DWARFS_COMPRESSION_TYPE(BROTLI, 5) SEPARATOR \
|
||||
DWARFS_COMPRESSION_TYPE(FLAC, 6)
|
||||
// clang-format on
|
||||
|
||||
namespace dwarfs {
|
||||
|
484
src/dwarfs/compression/flac.cpp
Normal file
484
src/dwarfs/compression/flac.cpp
Normal file
@ -0,0 +1,484 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <span>
|
||||
|
||||
#include <FLAC++/decoder.h>
|
||||
#include <FLAC++/encoder.h>
|
||||
|
||||
#include <thrift/lib/cpp2/protocol/Serializer.h>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <folly/Varint.h>
|
||||
|
||||
#include "dwarfs/block_compressor.h"
|
||||
#include "dwarfs/compression.h"
|
||||
#include "dwarfs/error.h"
|
||||
#include "dwarfs/option_map.h"
|
||||
#include "dwarfs/pcm_sample_transformer.h"
|
||||
|
||||
#include "dwarfs/gen-cpp2/compression_types.h"
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint8_t const kFlagBigEndian{0x80};
|
||||
constexpr uint8_t const kFlagSigned{0x40};
|
||||
constexpr uint8_t const kFlagLsbPadding{0x20};
|
||||
constexpr uint8_t const kBytesPerSampleMask{0x03};
|
||||
constexpr size_t const kBlockSize{65536};
|
||||
|
||||
class dwarfs_flac_stream_encoder : public FLAC::Encoder::Stream {
|
||||
public:
|
||||
dwarfs_flac_stream_encoder(std::vector<uint8_t>& data)
|
||||
: data_{data}
|
||||
, pos_{data_.size()} {}
|
||||
|
||||
::FLAC__StreamEncoderReadStatus
|
||||
read_callback(FLAC__byte buffer[], size_t* bytes) override {
|
||||
::memcpy(buffer, data_.data() + pos_, *bytes);
|
||||
return FLAC__STREAM_ENCODER_READ_STATUS_CONTINUE;
|
||||
}
|
||||
|
||||
::FLAC__StreamEncoderWriteStatus
|
||||
write_callback(const FLAC__byte buffer[], size_t bytes, uint32_t,
|
||||
uint32_t) override {
|
||||
size_t end = pos_ + bytes;
|
||||
if (data_.size() < end) {
|
||||
data_.resize(end);
|
||||
}
|
||||
::memcpy(data_.data() + pos_, buffer, bytes);
|
||||
pos_ += bytes;
|
||||
return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
|
||||
}
|
||||
|
||||
::FLAC__StreamEncoderSeekStatus
|
||||
seek_callback(FLAC__uint64 absolute_byte_offset) override {
|
||||
pos_ = absolute_byte_offset;
|
||||
return FLAC__STREAM_ENCODER_SEEK_STATUS_OK;
|
||||
}
|
||||
|
||||
::FLAC__StreamEncoderTellStatus
|
||||
tell_callback(FLAC__uint64* absolute_byte_offset) override {
|
||||
*absolute_byte_offset = pos_;
|
||||
return FLAC__STREAM_ENCODER_TELL_STATUS_OK;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<uint8_t>& data_;
|
||||
size_t pos_;
|
||||
};
|
||||
|
||||
class dwarfs_flac_stream_decoder : public FLAC::Decoder::Stream {
|
||||
public:
|
||||
dwarfs_flac_stream_decoder(
|
||||
std::vector<uint8_t>& target, std::span<uint8_t const> data,
|
||||
thrift::compression::flac_block_header const& header)
|
||||
: target_{target}
|
||||
, data_{data}
|
||||
, header_{header}
|
||||
, bytes_per_sample_{(header_.flags().value() & kBytesPerSampleMask) + 1}
|
||||
, xfm_{header_.flags().value() & kFlagBigEndian
|
||||
? pcm_sample_endianness::Big
|
||||
: pcm_sample_endianness::Little,
|
||||
header_.flags().value() & kFlagSigned
|
||||
? pcm_sample_signedness::Signed
|
||||
: pcm_sample_signedness::Unsigned,
|
||||
header_.flags().value() & kFlagLsbPadding
|
||||
? pcm_sample_padding::Lsb
|
||||
: pcm_sample_padding::Msb,
|
||||
bytes_per_sample_, header_.bits_per_sample().value()} {}
|
||||
|
||||
::FLAC__StreamDecoderReadStatus
|
||||
read_callback(FLAC__byte buffer[], size_t* bytes) override {
|
||||
if (pos_ >= data_.size()) {
|
||||
return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
|
||||
}
|
||||
|
||||
if (pos_ + *bytes > data_.size()) {
|
||||
*bytes = data_.size() - pos_;
|
||||
}
|
||||
|
||||
if (*bytes > 0) {
|
||||
::memcpy(buffer, data_.data() + pos_, *bytes);
|
||||
}
|
||||
|
||||
pos_ += *bytes;
|
||||
|
||||
return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
|
||||
}
|
||||
|
||||
::FLAC__StreamDecoderWriteStatus
|
||||
write_callback(const ::FLAC__Frame* frame,
|
||||
const FLAC__int32* const buffer[]) override {
|
||||
auto samples = frame->header.blocksize;
|
||||
auto channels = frame->header.channels;
|
||||
tmp_.resize(channels * samples);
|
||||
for (uint_fast32_t i = 0; i < samples; ++i) {
|
||||
for (uint_fast32_t c = 0; c < channels; ++c) {
|
||||
tmp_[i * channels + c] = buffer[c][i];
|
||||
}
|
||||
}
|
||||
|
||||
auto pos = target_.size();
|
||||
size_t size = channels * samples * bytes_per_sample_;
|
||||
|
||||
target_.resize(pos + size);
|
||||
|
||||
xfm_.pack(std::span<uint8_t>(&target_[pos], size), tmp_);
|
||||
|
||||
return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
|
||||
}
|
||||
|
||||
void error_callback(::FLAC__StreamDecoderErrorStatus status) override {
|
||||
DWARFS_THROW(runtime_error,
|
||||
fmt::format("[FLAC] decoder error: {}",
|
||||
FLAC__StreamDecoderErrorStatusString[status]));
|
||||
}
|
||||
|
||||
::FLAC__StreamDecoderSeekStatus
|
||||
seek_callback(FLAC__uint64 absolute_byte_offset) override {
|
||||
if (absolute_byte_offset > data_.size()) {
|
||||
return ::FLAC__STREAM_DECODER_SEEK_STATUS_ERROR;
|
||||
}
|
||||
pos_ = absolute_byte_offset;
|
||||
return ::FLAC__STREAM_DECODER_SEEK_STATUS_OK;
|
||||
}
|
||||
|
||||
::FLAC__StreamDecoderTellStatus
|
||||
tell_callback(FLAC__uint64* absolute_byte_offset) override {
|
||||
*absolute_byte_offset = pos_;
|
||||
return ::FLAC__STREAM_DECODER_TELL_STATUS_OK;
|
||||
}
|
||||
|
||||
::FLAC__StreamDecoderLengthStatus
|
||||
length_callback(FLAC__uint64* stream_length) override {
|
||||
*stream_length = data_.size();
|
||||
return ::FLAC__STREAM_DECODER_LENGTH_STATUS_OK;
|
||||
}
|
||||
|
||||
bool eof_callback() override { return pos_ >= data_.size(); }
|
||||
|
||||
private:
|
||||
std::vector<uint8_t>& target_;
|
||||
std::vector<FLAC__int32> tmp_;
|
||||
std::span<uint8_t const> data_;
|
||||
thrift::compression::flac_block_header const& header_;
|
||||
int const bytes_per_sample_;
|
||||
pcm_sample_transformer<FLAC__int32> xfm_;
|
||||
size_t pos_{0};
|
||||
};
|
||||
|
||||
class flac_block_compressor final : public block_compressor::impl {
|
||||
public:
|
||||
flac_block_compressor(uint32_t level, bool exhaustive)
|
||||
: level_{level}
|
||||
, exhaustive_{exhaustive} {}
|
||||
|
||||
flac_block_compressor(const flac_block_compressor& rhs) = default;
|
||||
|
||||
std::unique_ptr<block_compressor::impl> clone() const override {
|
||||
return std::make_unique<flac_block_compressor>(*this);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compress(const std::vector<uint8_t>& data,
|
||||
folly::dynamic meta) const override {
|
||||
auto endianness = meta["endianness"].asString();
|
||||
auto signedness = meta["signedness"].asString();
|
||||
auto padding = meta["padding"].asString();
|
||||
auto num_channels = meta["number_of_channels"].asInt();
|
||||
auto bits_per_sample = meta["bits_per_sample"].asInt();
|
||||
auto bytes_per_sample = meta["bytes_per_sample"].asInt();
|
||||
|
||||
assert(1 <= bytes_per_sample && bytes_per_sample <= 4);
|
||||
assert(8 <= bits_per_sample && bits_per_sample <= 32);
|
||||
assert(1 <= num_channels);
|
||||
|
||||
if (data.size() % (num_channels * bytes_per_sample)) {
|
||||
DWARFS_THROW(
|
||||
runtime_error,
|
||||
fmt::format("unexpected PCM waveform configuration: {} bytes to "
|
||||
"compress, {} channels, {} bytes per sample",
|
||||
data.size(), num_channels, bytes_per_sample));
|
||||
}
|
||||
|
||||
size_t num_samples = data.size() / (num_channels * bytes_per_sample);
|
||||
|
||||
pcm_sample_endianness pcm_end;
|
||||
pcm_sample_signedness pcm_sig;
|
||||
pcm_sample_padding pcm_pad;
|
||||
|
||||
uint8_t flags = bytes_per_sample - 1;
|
||||
|
||||
if (endianness == "big") {
|
||||
flags |= kFlagBigEndian;
|
||||
pcm_end = pcm_sample_endianness::Big;
|
||||
} else {
|
||||
pcm_end = pcm_sample_endianness::Little;
|
||||
}
|
||||
|
||||
if (signedness == "signed") {
|
||||
flags |= kFlagSigned;
|
||||
pcm_sig = pcm_sample_signedness::Signed;
|
||||
} else {
|
||||
pcm_sig = pcm_sample_signedness::Unsigned;
|
||||
}
|
||||
|
||||
if (padding == "lsb") {
|
||||
flags |= kFlagLsbPadding;
|
||||
pcm_pad = pcm_sample_padding::Lsb;
|
||||
} else {
|
||||
pcm_pad = pcm_sample_padding::Msb;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> compressed;
|
||||
|
||||
{
|
||||
using namespace ::apache::thrift;
|
||||
|
||||
compressed.reserve(5 * data.size() / 8); // optimistic guess
|
||||
compressed.resize(folly::kMaxVarintLength64);
|
||||
|
||||
size_t pos = 0;
|
||||
pos += folly::encodeVarint(data.size(), compressed.data() + pos);
|
||||
compressed.resize(pos);
|
||||
|
||||
thrift::compression::flac_block_header hdr;
|
||||
hdr.num_channels() = num_channels;
|
||||
hdr.bits_per_sample() = bits_per_sample;
|
||||
hdr.flags() = flags;
|
||||
|
||||
std::string hdrbuf;
|
||||
CompactSerializer::serialize(hdr, &hdrbuf);
|
||||
|
||||
compressed.resize(pos + hdrbuf.size());
|
||||
::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size());
|
||||
pos += hdrbuf.size();
|
||||
}
|
||||
|
||||
dwarfs_flac_stream_encoder encoder(compressed);
|
||||
|
||||
encoder.set_streamable_subset(false);
|
||||
encoder.set_channels(num_channels);
|
||||
encoder.set_bits_per_sample(bits_per_sample);
|
||||
encoder.set_sample_rate(48000); // TODO: see if a fixed rate makes sense
|
||||
encoder.set_compression_level(level_);
|
||||
encoder.set_do_exhaustive_model_search(exhaustive_);
|
||||
encoder.set_total_samples_estimate(num_samples);
|
||||
|
||||
if (encoder.init() != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
|
||||
DWARFS_THROW(
|
||||
runtime_error,
|
||||
fmt::format("[FLAC] init: {}", encoder.get_state().as_cstring()));
|
||||
}
|
||||
|
||||
pcm_sample_transformer<FLAC__int32> xfm(pcm_end, pcm_sig, pcm_pad,
|
||||
bytes_per_sample, bits_per_sample);
|
||||
|
||||
const auto samples_per_call = kBlockSize / num_channels;
|
||||
std::vector<FLAC__int32> buffer;
|
||||
size_t input_pos = 0;
|
||||
|
||||
while (num_samples > 0) {
|
||||
size_t n = std::min(num_samples, samples_per_call);
|
||||
buffer.resize(n * num_channels);
|
||||
xfm.unpack(buffer,
|
||||
std::span<uint8_t const>(data.data() + input_pos,
|
||||
buffer.size() * bytes_per_sample));
|
||||
|
||||
if (!encoder.process_interleaved(buffer.data(), n)) {
|
||||
DWARFS_THROW(
|
||||
runtime_error,
|
||||
fmt::format("[FLAC] failed to process interleaved samples: {}",
|
||||
encoder.get_state().as_cstring()));
|
||||
}
|
||||
|
||||
input_pos += buffer.size() * bytes_per_sample;
|
||||
num_samples -= n;
|
||||
}
|
||||
|
||||
if (!encoder.finish()) {
|
||||
DWARFS_THROW(runtime_error, "[FLAC] failed to finish encoder");
|
||||
}
|
||||
|
||||
if (compressed.size() >= data.size()) {
|
||||
throw bad_compression_ratio_error();
|
||||
}
|
||||
|
||||
compressed.shrink_to_fit();
|
||||
|
||||
return compressed;
|
||||
}
|
||||
|
||||
std::vector<uint8_t>
|
||||
compress(std::vector<uint8_t>&& data, folly::dynamic meta) const override {
|
||||
return compress(data, std::move(meta));
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::FLAC; }
|
||||
|
||||
std::string describe() const override {
|
||||
return fmt::format("flac [level={}{}]", level_,
|
||||
exhaustive_ ? ", exhaustive" : "");
|
||||
}
|
||||
|
||||
bool check_metadata(folly::dynamic meta) const override {
|
||||
if (meta.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return meta.count("endianness") > 0 && meta.count("signedness") > 0 &&
|
||||
meta.count("padding") > 0 && meta.count("bytes_per_sample") > 0 &&
|
||||
meta.count("bits_per_sample") > 0 &&
|
||||
meta.count("number_of_channels") > 0;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t const level_;
|
||||
bool const exhaustive_;
|
||||
};
|
||||
|
||||
class flac_block_decompressor final : public block_decompressor::impl {
|
||||
public:
|
||||
flac_block_decompressor(const uint8_t* data, size_t size,
|
||||
std::vector<uint8_t>& target)
|
||||
: flac_block_decompressor(folly::Range<uint8_t const*>(data, size),
|
||||
target) {}
|
||||
|
||||
flac_block_decompressor(folly::Range<uint8_t const*> data,
|
||||
std::vector<uint8_t>& target)
|
||||
: decompressed_{target}
|
||||
, uncompressed_size_{folly::decodeVarint(data)}
|
||||
, header_{decode_header(data)}
|
||||
, decoder_{std::make_unique<dwarfs_flac_stream_decoder>(
|
||||
decompressed_, std::span<uint8_t const>(data.data(), data.size()),
|
||||
header_)} {
|
||||
decoder_->set_md5_checking(false);
|
||||
decoder_->set_metadata_ignore_all();
|
||||
|
||||
if (auto status = decoder_->init();
|
||||
status != FLAC__STREAM_DECODER_INIT_STATUS_OK) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
fmt::format("[FLAC] could not initialize decoder: {}",
|
||||
FLAC__StreamDecoderInitStatusString[status]));
|
||||
}
|
||||
|
||||
try {
|
||||
decompressed_.reserve(uncompressed_size_);
|
||||
} catch (std::bad_alloc const&) {
|
||||
DWARFS_THROW(
|
||||
runtime_error,
|
||||
fmt::format(
|
||||
"[FLAC] could not reserve {} bytes for decompressed block",
|
||||
uncompressed_size_));
|
||||
}
|
||||
}
|
||||
|
||||
compression_type type() const override { return compression_type::FLAC; }
|
||||
|
||||
bool decompress_frame(size_t frame_size) override {
|
||||
size_t pos = decompressed_.size();
|
||||
|
||||
if (pos + frame_size > uncompressed_size_) {
|
||||
assert(uncompressed_size_ >= pos);
|
||||
frame_size = uncompressed_size_ - pos;
|
||||
}
|
||||
|
||||
size_t wanted = pos + frame_size;
|
||||
|
||||
assert(wanted <= uncompressed_size_);
|
||||
assert(frame_size > 0);
|
||||
|
||||
while (decompressed_.size() < wanted) {
|
||||
if (!decoder_->process_single()) {
|
||||
DWARFS_THROW(runtime_error,
|
||||
fmt::format("[FLAC] failed to process frame: {}",
|
||||
decoder_->get_state().as_cstring()));
|
||||
}
|
||||
}
|
||||
|
||||
if (decompressed_.size() == uncompressed_size_) {
|
||||
decoder_.reset();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t uncompressed_size() const override { return uncompressed_size_; }
|
||||
|
||||
private:
|
||||
static thrift::compression::flac_block_header
|
||||
decode_header(folly::Range<uint8_t const*>& range) {
|
||||
using namespace ::apache::thrift;
|
||||
thrift::compression::flac_block_header hdr;
|
||||
auto size = CompactSerializer::deserialize(range, hdr);
|
||||
range.advance(size);
|
||||
return hdr;
|
||||
}
|
||||
|
||||
std::vector<uint8_t>& decompressed_;
|
||||
folly::Range<uint8_t const*> backup_data_;
|
||||
|
||||
size_t const uncompressed_size_;
|
||||
thrift::compression::flac_block_header const header_;
|
||||
std::unique_ptr<dwarfs_flac_stream_decoder> decoder_;
|
||||
};
|
||||
|
||||
class flac_compression_factory : public compression_factory {
|
||||
public:
|
||||
flac_compression_factory()
|
||||
: options_{
|
||||
fmt::format("level=[0..8]"),
|
||||
fmt::format("exhaustive"),
|
||||
} {}
|
||||
|
||||
std::string_view name() const override { return "flac"; }
|
||||
|
||||
std::string_view description() const override { return "FLAC compression"; }
|
||||
|
||||
std::vector<std::string> const& options() const override { return options_; }
|
||||
|
||||
std::unique_ptr<block_compressor::impl>
|
||||
make_compressor(option_map& om) const override {
|
||||
return std::make_unique<flac_block_compressor>(
|
||||
om.get<uint32_t>("level", 6), om.get<bool>("exhaustive", false));
|
||||
}
|
||||
|
||||
std::unique_ptr<block_decompressor::impl>
|
||||
make_decompressor(std::span<uint8_t const> data,
|
||||
std::vector<uint8_t>& target) const override {
|
||||
return std::make_unique<flac_block_decompressor>(data.data(), data.size(),
|
||||
target);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::string> const options_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_COMPRESSION_FACTORY(compression_type::FLAC, flac_compression_factory)
|
||||
|
||||
} // namespace dwarfs
|
202
test/flac_compressor_test.cpp
Normal file
202
test/flac_compressor_test.cpp
Normal file
@ -0,0 +1,202 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cmath>
|
||||
#include <numbers>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "dwarfs/block_compressor.h"
|
||||
#include "dwarfs/pcm_sample_transformer.h"
|
||||
|
||||
using namespace dwarfs;
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> make_sine(int bits, size_t length, double period) {
|
||||
std::vector<T> rv(length);
|
||||
double amplitude = (1 << bits) / 2;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
rv[i] = static_cast<T>(
|
||||
amplitude * std::sin(2 * std::numbers::pi * i / period) - 0.5);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> multiplex(std::vector<std::vector<T>> const& in) {
|
||||
auto samples = in.front().size();
|
||||
auto channels = in.size();
|
||||
std::vector<T> out(channels * samples);
|
||||
|
||||
for (size_t i = 0; i < samples; ++i) {
|
||||
for (size_t c = 0; c < channels; ++c) {
|
||||
out[i * channels + c] = in.at(c).at(i);
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename T = int32_t>
|
||||
std::vector<uint8_t>
|
||||
make_test_data(int channels, int samples, int bytes, int bits,
|
||||
pcm_sample_endianness end, pcm_sample_signedness sig,
|
||||
pcm_sample_padding pad) {
|
||||
std::vector<std::vector<T>> data;
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
data.emplace_back(
|
||||
make_sine<T>(bits, samples, 3.1 * ((599 * (c + 1)) % 256)));
|
||||
}
|
||||
auto muxed = multiplex(data);
|
||||
std::vector<uint8_t> out(bytes * channels * samples);
|
||||
pcm_sample_transformer<T> xfm(end, sig, pad, bytes, bits);
|
||||
xfm.pack(out, muxed);
|
||||
return out;
|
||||
}
|
||||
|
||||
struct data_params {
|
||||
data_params(int channels, int samples, int bytes, int bits)
|
||||
: num_channels{channels}
|
||||
, num_samples{samples}
|
||||
, bytes_per_sample{bytes}
|
||||
, bits_per_sample{bits} {}
|
||||
|
||||
int num_channels;
|
||||
int num_samples;
|
||||
int bytes_per_sample;
|
||||
int bits_per_sample;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, data_params const& p) {
|
||||
os << "{channels=" << p.num_channels << ", samples=" << p.num_samples
|
||||
<< ", bytes=" << p.bytes_per_sample << ", bits=" << p.bits_per_sample
|
||||
<< "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
std::vector<data_params> const data_parameters{
|
||||
// clang-format off
|
||||
{ 1, 1000, 2, 16},
|
||||
{ 3, 1000, 1, 8},
|
||||
{ 1, 1000, 2, 12},
|
||||
{ 1, 100000, 3, 20},
|
||||
{ 8, 10000, 3, 20},
|
||||
{ 4, 10000, 4, 20},
|
||||
{ 4, 10000, 4, 24},
|
||||
{ 4, 10000, 3, 24},
|
||||
{ 7, 799999, 4, 32},
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(flac_compressor, sine) {
|
||||
{
|
||||
auto test = make_sine<int8_t>(8, 5, 4.0);
|
||||
std::vector<int8_t> ref{0, 127, 0, -128, 0};
|
||||
EXPECT_EQ(test, ref);
|
||||
}
|
||||
{
|
||||
auto test = make_sine<int8_t>(5, 5, 4.0);
|
||||
std::vector<int8_t> ref{0, 15, 0, -16, 0};
|
||||
EXPECT_EQ(test, ref);
|
||||
}
|
||||
{
|
||||
auto test = make_sine<int16_t>(16, 5, 4.0);
|
||||
std::vector<int16_t> ref{0, 32767, 0, -32768, 0};
|
||||
EXPECT_EQ(test, ref);
|
||||
}
|
||||
{
|
||||
auto test = make_sine<int16_t>(12, 5, 4.0);
|
||||
std::vector<int16_t> ref{0, 2047, 0, -2048, 0};
|
||||
EXPECT_EQ(test, ref);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(flac_compressor, basic) {
|
||||
folly::dynamic meta = folly::dynamic::object;
|
||||
meta.insert("endianness", "little");
|
||||
meta.insert("signedness", "signed");
|
||||
meta.insert("padding", "msb");
|
||||
meta.insert("bytes_per_sample", 2);
|
||||
meta.insert("bits_per_sample", 16);
|
||||
meta.insert("number_of_channels", 2);
|
||||
|
||||
auto const data =
|
||||
make_test_data(2, 1000, 2, 16, pcm_sample_endianness::Little,
|
||||
pcm_sample_signedness::Signed, pcm_sample_padding::Msb);
|
||||
|
||||
block_compressor comp("flac");
|
||||
|
||||
auto compressed = comp.compress(data, std::move(meta));
|
||||
|
||||
EXPECT_LT(compressed.size(), data.size() / 2);
|
||||
|
||||
auto decompressed = block_decompressor::decompress(
|
||||
compression_type::FLAC, compressed.data(), compressed.size());
|
||||
|
||||
EXPECT_EQ(data, decompressed);
|
||||
}
|
||||
|
||||
class flac_param : public testing::TestWithParam<
|
||||
std::tuple<pcm_sample_endianness, pcm_sample_signedness,
|
||||
pcm_sample_padding, data_params>> {};
|
||||
|
||||
TEST_P(flac_param, combinations) {
|
||||
auto [end, sig, pad, param] = GetParam();
|
||||
|
||||
folly::dynamic meta = folly::dynamic::object;
|
||||
meta.insert("endianness",
|
||||
end == pcm_sample_endianness::Big ? "big" : "little");
|
||||
meta.insert("signedness",
|
||||
sig == pcm_sample_signedness::Signed ? "signed" : "unsigned");
|
||||
meta.insert("padding", pad == pcm_sample_padding::Msb ? "msb" : "lsb");
|
||||
meta.insert("bytes_per_sample", param.bytes_per_sample);
|
||||
meta.insert("bits_per_sample", param.bits_per_sample);
|
||||
meta.insert("number_of_channels", param.num_channels);
|
||||
|
||||
auto const data = make_test_data(param.num_channels, param.num_samples,
|
||||
param.bytes_per_sample,
|
||||
param.bits_per_sample, end, sig, pad);
|
||||
|
||||
block_compressor comp("flac");
|
||||
|
||||
auto compressed = comp.compress(data, std::move(meta));
|
||||
|
||||
EXPECT_LT(compressed.size(), data.size() / 2);
|
||||
|
||||
auto decompressed = block_decompressor::decompress(
|
||||
compression_type::FLAC, compressed.data(), compressed.size());
|
||||
|
||||
EXPECT_EQ(data, decompressed);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
dwarfs, flac_param,
|
||||
::testing::Combine(::testing::Values(pcm_sample_endianness::Big,
|
||||
pcm_sample_endianness::Little),
|
||||
::testing::Values(pcm_sample_signedness::Signed,
|
||||
pcm_sample_signedness::Unsigned),
|
||||
::testing::Values(pcm_sample_padding::Lsb,
|
||||
pcm_sample_padding::Msb),
|
||||
::testing::ValuesIn(data_parameters)));
|
39
thrift/compression.thrift
Normal file
39
thrift/compression.thrift
Normal file
@ -0,0 +1,39 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
include "thrift/annotation/cpp.thrift"
|
||||
|
||||
namespace cpp2 dwarfs.thrift.compression
|
||||
|
||||
@cpp.Type{name = "uint8_t"}
|
||||
typedef byte UInt8
|
||||
@cpp.Type{name = "uint16_t"}
|
||||
typedef i16 UInt16
|
||||
@cpp.Type{name = "uint32_t"}
|
||||
typedef i32 UInt32
|
||||
@cpp.Type{name = "uint64_t"}
|
||||
typedef i64 UInt64
|
||||
|
||||
struct flac_block_header {
|
||||
1: UInt16 num_channels
|
||||
2: UInt8 bits_per_sample
|
||||
3: UInt8 flags
|
||||
}
|
@ -17,6 +17,7 @@
|
||||
"glog",
|
||||
"libarchive",
|
||||
"libevent",
|
||||
"libflac",
|
||||
"libmagic",
|
||||
"openssl",
|
||||
"pkgconf",
|
||||
|
Loading…
x
Reference in New Issue
Block a user