diff --git a/CMakeLists.txt b/CMakeLists.txt index ec87483a..010f987d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,6 +171,8 @@ pkg_check_modules(FUSE IMPORTED_TARGET fuse>=2.9.9) pkg_check_modules(FUSE3 IMPORTED_TARGET fuse3>=3.10.5) pkg_check_modules(LIBLZ4 IMPORTED_TARGET liblz4>=1.9.3) pkg_check_modules(LIBLZMA IMPORTED_TARGET liblzma>=5.2.5) +pkg_check_modules(LIBBROTLIDEC IMPORTED_TARGET libbrotlidec>=1.0.9) +pkg_check_modules(LIBBROTLIENC IMPORTED_TARGET libbrotlienc>=1.0.9) pkg_check_modules(LIBARCHIVE IMPORTED_TARGET libarchive>=3.6.0) pkg_check_modules(ZSTD IMPORTED_TARGET libzstd>=1.5.2) pkg_check_modules(XXHASH IMPORTED_TARGET libxxhash>=0.8.1) @@ -343,6 +345,10 @@ if(LIBLZ4_FOUND) list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/lz4.cpp) endif() +if(LIBBROTLIDEC_FOUND AND LIBBROTLIENC_FOUND) + list(APPEND LIBDWARFS_COMPRESSION_SRC src/dwarfs/compression/brotli.cpp) +endif() + add_library(dwarfs ${LIBDWARFS_SRC}) add_library(dwarfs_compression ${LIBDWARFS_COMPRESSION_SRC}) @@ -577,6 +583,7 @@ foreach(tgt dwarfs dwarfs_compression ${BINARY_TARGETS}) $<$:DWARFS_USE_JEMALLOC> $<$:DWARFS_HAVE_LIBLZ4> $<$:DWARFS_HAVE_LIBLZMA> + $<$:DWARFS_HAVE_LIBBROTLI> $<$:DWARFS_HAVE_PYTHON>) if(DWARFS_USE_EXCEPTION_TRACER) @@ -644,7 +651,9 @@ target_link_libraries( fsst ${Boost_LIBRARIES} PkgConfig::LIBLZ4 - PkgConfig::LIBLZMA) + PkgConfig::LIBLZMA + PkgConfig::LIBBROTLIENC + PkgConfig::LIBBROTLIDEC) if(NOT STATIC_BUILD_DO_NOT_USE) target_link_libraries(dwarfs PkgConfig::LIBARCHIVE) diff --git a/include/dwarfs/compression.h b/include/dwarfs/compression.h index d8f9f9cf..34c70e7b 100644 --- a/include/dwarfs/compression.h +++ b/include/dwarfs/compression.h @@ -29,7 +29,8 @@ DWARFS_COMPRESSION_TYPE(LZMA, 1) SEPARATOR \ DWARFS_COMPRESSION_TYPE(ZSTD, 2) SEPARATOR \ DWARFS_COMPRESSION_TYPE(LZ4, 3) SEPARATOR \ - DWARFS_COMPRESSION_TYPE(LZ4HC, 4) + DWARFS_COMPRESSION_TYPE(LZ4HC, 4) SEPARATOR \ + DWARFS_COMPRESSION_TYPE(BROTLI, 5) // clang-format on namespace dwarfs { diff --git a/src/dwarfs/compression/brotli.cpp b/src/dwarfs/compression/brotli.cpp new file mode 100644 index 00000000..0709a6c7 --- /dev/null +++ b/src/dwarfs/compression/brotli.cpp @@ -0,0 +1,187 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include +#include + +#include + +#include + +#include "dwarfs/block_compressor.h" +#include "dwarfs/error.h" +#include "dwarfs/fstypes.h" +#include "dwarfs/option_map.h" + +namespace dwarfs { + +namespace { + +class brotli_block_compressor final : public block_compressor::impl { + public: + brotli_block_compressor(uint32_t quality, uint32_t window_bits) + : quality_{quality} + , window_bits_{window_bits} {} + + brotli_block_compressor(const brotli_block_compressor& rhs) = default; + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + + std::vector + compress(const std::vector& data) const override { + std::vector compressed; + compressed.resize(folly::kMaxVarintLength64 + + ::BrotliEncoderMaxCompressedSize(data.size())); + size_t size_size = folly::encodeVarint(data.size(), compressed.data()); + size_t compressed_size = compressed.size() - size_size; + if (!::BrotliEncoderCompress(quality_, window_bits_, BROTLI_DEFAULT_MODE, + data.size(), data.data(), &compressed_size, + compressed.data() + size_size)) { + DWARFS_THROW(runtime_error, "brotli: error during compression"); + } + compressed.resize(size_size + compressed_size); + if (compressed.size() >= data.size()) { + throw bad_compression_ratio_error(); + } + compressed.shrink_to_fit(); + return compressed; + } + + std::vector compress(std::vector&& data) const override { + return compress(data); + } + + compression_type type() const override { return compression_type::BROTLI; } + + private: + uint32_t const quality_; + uint32_t const window_bits_; +}; + +class brotli_block_decompressor final : public block_decompressor::impl { + public: + brotli_block_decompressor(const uint8_t* data, size_t size, + std::vector& target) + : brotli_block_decompressor(folly::Range(data, size), + target) {} + + brotli_block_decompressor(folly::Range data, + std::vector& target) + : decompressed_{target} + , uncompressed_size_{folly::decodeVarint(data)} + , data_{data.data()} + , size_{data.size()} + , decoder_{::BrotliDecoderCreateInstance(nullptr, nullptr, nullptr), + &::BrotliDecoderDestroyInstance} { + if (!decoder_) { + DWARFS_THROW(runtime_error, "could not create brotli decoder"); + } + if (!::BrotliDecoderSetParameter(decoder_.get(), + BROTLI_DECODER_PARAM_LARGE_WINDOW, 1)) { + DWARFS_THROW(runtime_error, "could not set brotli decoder paramter"); + } + try { + decompressed_.reserve(uncompressed_size_); + } catch (std::bad_alloc const&) { + DWARFS_THROW( + runtime_error, + fmt::format("could not reserve {} bytes for decompressed block", + uncompressed_size_)); + } + } + + compression_type type() const override { return compression_type::BROTLI; } + + bool decompress_frame(size_t frame_size) override { + size_t pos = decompressed_.size(); + decompressed_.resize(pos + frame_size); + uint8_t* next_out = &decompressed_[pos]; + + auto res = ::BrotliDecoderDecompressStream(decoder_.get(), &size_, &data_, + &frame_size, &next_out, nullptr); + + if (res == BROTLI_DECODER_RESULT_ERROR) { + DWARFS_THROW(runtime_error, + fmt::format("brotli errro: {}", brotli_error())); + } + + decompressed_.resize(std::distance(decompressed_.data(), next_out)); + + return res == BROTLI_DECODER_RESULT_SUCCESS; + } + + size_t uncompressed_size() const override { return uncompressed_size_; } + + private: + char const* brotli_error() const { + return ::BrotliDecoderErrorString( + ::BrotliDecoderGetErrorCode(decoder_.get())); + } + + std::vector& decompressed_; + const size_t uncompressed_size_; + uint8_t const* data_; + size_t size_; + std::unique_ptr + decoder_; +}; + +class brotli_compression_factory : public compression_factory { + public: + brotli_compression_factory() + : options_{ + fmt::format("quality=[{}..{}]", BROTLI_MIN_QUALITY, + BROTLI_MAX_QUALITY), + fmt::format("lgwin=[{}..{}]", BROTLI_MIN_WINDOW_BITS, 30), + } {} + + std::string_view name() const override { return "brotli"; } + + std::string_view description() const override { return "Brotli compression"; } + + std::vector const& options() const override { return options_; } + + std::unique_ptr + make_compressor(option_map& om) const override { + return std::make_unique( + om.get("quality", BROTLI_DEFAULT_QUALITY), + om.get("lgwin", BROTLI_DEFAULT_WINDOW)); + } + + std::unique_ptr + make_decompressor(std::span data, + std::vector& target) const override { + return std::make_unique(data.data(), data.size(), + target); + } + + private: + std::vector const options_; +}; + +} // namespace + +REGISTER_COMPRESSION_FACTORY(compression_type::BROTLI, + brotli_compression_factory) + +} // namespace dwarfs diff --git a/test/dwarfs.cpp b/test/dwarfs.cpp index 459ef497..4cbd1d03 100644 --- a/test/dwarfs.cpp +++ b/test/dwarfs.cpp @@ -424,15 +424,20 @@ void basic_end_to_end_test(std::string const& compressor, EXPECT_GT(json.size(), 1000) << json; } -std::vector const compressions{"null", +std::vector const compressions{ + "null", #ifdef DWARFS_HAVE_LIBLZ4 - "lz4", "lz4hc:level=4", + "lz4", + "lz4hc:level=4", #endif #ifdef DWARFS_HAVE_LIBZSTD - "zstd:level=1", + "zstd:level=1", #endif #ifdef DWARFS_HAVE_LIBLZMA - "lzma:level=1" + "lzma:level=1", +#endif +#ifdef DWARFS_HAVE_LIBBROTLI + "brotli:quality=2", #endif };