From d47cabc5371db9c029d9711a3a268fec9a1dc424 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 12 May 2025 17:47:51 +0200 Subject: [PATCH] chore: sync fsst from upstream --- fsst/CMakeLists.txt | 2 +- fsst/fsst.h | 2 +- fsst/fsst_avx512.cpp | 10 ++++++++++ fsst/libfsst.cpp | 13 +++++++++---- fsst/libfsst.hpp | 4 ++++ fsst/libfsst12.cpp | 19 ++++++++++++++----- fsst/libfsst12.hpp | 4 ++++ 7 files changed, 43 insertions(+), 11 deletions(-) diff --git a/fsst/CMakeLists.txt b/fsst/CMakeLists.txt index e351d3e6..cfb89835 100644 --- a/fsst/CMakeLists.txt +++ b/fsst/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.5) project(fsst) diff --git a/fsst/fsst.h b/fsst/fsst.h index 34e019fe..71085d57 100644 --- a/fsst/fsst.h +++ b/fsst/fsst.h @@ -177,7 +177,7 @@ fsst_decompress( } } } - if (posOut+24 <= size) { // handle the possibly 3 last bytes without a loop + if (posOut+32 <= size) { // handle the possibly 3 last bytes without a loop if (posIn+2 <= lenIn) { strOut[posOut] = strIn[posIn+1]; if (strIn[posIn] != FSST_ESC) { diff --git a/fsst/fsst_avx512.cpp b/fsst/fsst_avx512.cpp index a2b7b5e5..150683d2 100644 --- a/fsst/fsst_avx512.cpp +++ b/fsst/fsst_avx512.cpp @@ -21,23 +21,31 @@ #include #ifdef _WIN32 +namespace libfsst { bool fsst_hasAVX512() { int info[4]; __cpuidex(info, 0x00000007, 0); return (info[1]>>16)&1; } +} // namespace libfsst #else #include +namespace libfsst { bool fsst_hasAVX512() { int info[4]; __cpuid_count(0x00000007, 0, info[0], info[1], info[2], info[3]); return (info[1]>>16)&1; } +} // namespace libfsst #endif #else +namespace libfsst { bool fsst_hasAVX512() { return false; } +} // namespace libfsst #endif +namespace libfsst { + // BULK COMPRESSION OF STRINGS // // In one call of this function, we can compress 512 strings, each of maximum length 511 bytes. @@ -138,3 +146,5 @@ size_t fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* symbolBas #endif return processed; } +} // namespace libfsst + diff --git a/fsst/libfsst.cpp b/fsst/libfsst.cpp index 5e8a5ea3..919cb9ce 100644 --- a/fsst/libfsst.cpp +++ b/fsst/libfsst.cpp @@ -17,6 +17,7 @@ // You can contact the authors via the FSST source repository : https://github.com/cwida/fsst #include "libfsst.hpp" +namespace libfsst { Symbol concat(Symbol a, Symbol b) { Symbol s; u32 length = a.length()+b.length(); @@ -25,12 +26,13 @@ Symbol concat(Symbol a, Symbol b) { s.val.num = (b.val.num << (8*a.length())) | a.val.num; return s; } +} // namespace libfsst namespace std { template <> -class hash { +class hash { public: - size_t operator()(const QSymbol& q) const { + size_t operator()(const libfsst::QSymbol& q) const { uint64_t k = q.symbol.val.num; const uint64_t m = 0xc6a4a7935bd1e995; const int r = 47; @@ -48,6 +50,7 @@ class hash { }; } +namespace libfsst { bool isEscapeCode(u16 pos) { return pos < FSST_CODE_BASE; } std::ostream& operator<<(std::ostream& out, const Symbol& s) { @@ -289,7 +292,7 @@ static inline size_t compressSIMD(SymbolTable &symbolTable, u8* symbolBase, size if (++batchPos == 512) break; } while(curOff < len[curLine]); - if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines)) { // cannot accumulate more? + if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines) || (((len[curLine])*2 + 7) > budget)) { // cannot accumulate more? if (batchPos-empty >= 32) { // if we have enough work, fire off fsst_compressAVX512 (32 is due to max 4x8 unrolling) // radix-sort jobs on length (longest string first) // -- this provides best load balancing and allows to skip empty jobs at the end @@ -615,7 +618,9 @@ inline size_t _compressAuto(Encoder *e, size_t nlines, const size_t lenIn[], con size_t compressAuto(Encoder *e, size_t nlines, const size_t lenIn[], const u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) { return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd); } +} // namespace libfsst +using namespace libfsst; // the main compression function (everything automatic) extern "C" size_t fsst_compress(fsst_encoder_t *encoder, size_t nlines, const size_t lenIn[], const u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[]) { // to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB) @@ -626,7 +631,7 @@ extern "C" size_t fsst_compress(fsst_encoder_t *encoder, size_t nlines, const si /* deallocate encoder */ extern "C" void fsst_destroy(fsst_encoder_t* encoder) { - Encoder *e = (Encoder*) encoder; + Encoder *e = (Encoder*) encoder; delete e; } diff --git a/fsst/libfsst.hpp b/fsst/libfsst.hpp index d5ffcb88..a29a4afb 100644 --- a/fsst/libfsst.hpp +++ b/fsst/libfsst.hpp @@ -37,10 +37,12 @@ using namespace std; #include "fsst.h" // the official FSST API -- also usable by C mortals /* unsigned integers */ +namespace libfsst { typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; +} // namespace libfsst #define FSST_ENDIAN_MARKER ((u64) 1) #define FSST_VERSION_20190218 20190218 @@ -57,6 +59,7 @@ typedef uint64_t u64; #define FSST_CODE_MAX (1UL< #include +namespace libfsst { Symbol concat(Symbol a, Symbol b) { Symbol s; u32 length = min(8, a.length()+b.length()); @@ -26,12 +27,14 @@ Symbol concat(Symbol a, Symbol b) { *(u64*) s.symbol = ((*(u64*) b.symbol) << (8*a.length())) | *(u64*) a.symbol; return s; } +} // namespace libfsst namespace std { template <> -class hash { +class hash { public: - size_t operator()(const Symbol& s) const { + size_t operator()(const libfsst::Symbol& s) const { + using namespace libfsst; uint64_t k = *(u64*) s.symbol; const uint64_t m = 0xc6a4a7935bd1e995; const int r = 47; @@ -49,6 +52,7 @@ class hash { }; } +namespace libfsst { std::ostream& operator<<(std::ostream& out, const Symbol& s) { for (u32 i=0; i &sample, ulong nlines, const ulong len[]) { assert(sampleLong > 0); return (sampleLong < FSST_SAMPLEMAXSZ)?sampleLong:FSST_SAMPLEMAXSZ-sampleLong; } +} // namespace libfsst +using namespace libfsst; extern "C" fsst_encoder_t* fsst_create(ulong n, const ulong lenIn[], const u8 *strIn[], int dummy) { vector sample; (void) dummy; @@ -307,14 +313,14 @@ extern "C" fsst_encoder_t* fsst_create(ulong n, const ulong lenIn[], const u8 *s /* create another encoder instance, necessary to do multi-threaded encoding using the same dictionary */ extern "C" fsst_encoder_t* fsst_duplicate(fsst_encoder_t *encoder) { - Encoder *e = new Encoder(); + Encoder *e = new Encoder(); e->symbolMap = ((Encoder*)encoder)->symbolMap; // it is a shared_ptr return (fsst_encoder_t*) e; } // export a dictionary in compact format. extern "C" u32 fsst_export(fsst_encoder_t *encoder, u8 *buf) { - Encoder *e = (Encoder*) encoder; + Encoder *e = (Encoder*) encoder; // In ->version there is a versionnr, but we hide also suffixLim/terminator/symbolCount there. // This is sufficient in principle to *reconstruct* a fsst_encoder_t from a fsst_decoder_t // (such functionality could be useful to append compressed data to an existing block). @@ -375,6 +381,7 @@ extern "C" u32 fsst_import(fsst_decoder_t *decoder, u8 *buf) { return pos; } +namespace libfsst { // runtime check for simd inline ulong _compressImpl(Encoder *e, ulong nlines, const ulong lenIn[], const u8 *strIn[], ulong size, u8 *output, ulong *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) { (void) noSuffixOpt; @@ -394,7 +401,9 @@ inline ulong _compressAuto(Encoder *e, ulong nlines, const ulong lenIn[], const ulong compressAuto(Encoder *e, ulong nlines, const ulong lenIn[], const u8 *strIn[], ulong size, u8 *output, ulong *lenOut, u8 *strOut[], int simd) { return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd); } +} // namespace libfsst +using namespace libfsst; // the main compression function (everything automatic) extern "C" ulong fsst_compress(fsst_encoder_t *encoder, ulong nlines, const ulong lenIn[], const u8 *strIn[], ulong size, u8 *output, ulong *lenOut, u8 *strOut[]) { // to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB) @@ -405,7 +414,7 @@ extern "C" ulong fsst_compress(fsst_encoder_t *encoder, ulong nlines, const ulon /* deallocate encoder */ extern "C" void fsst_destroy(fsst_encoder_t* encoder) { - Encoder *e = (Encoder*) encoder; + Encoder *e = (Encoder*) encoder; delete e; } diff --git a/fsst/libfsst12.hpp b/fsst/libfsst12.hpp index 0093a2e0..67d513be 100644 --- a/fsst/libfsst12.hpp +++ b/fsst/libfsst12.hpp @@ -36,6 +36,7 @@ using namespace std; #include "fsst12.h" // the official FSST API -- also usable by C mortals +namespace libfsst { /* workhorse type for string and buffer lengths: 64-bits on 64-bits platforms and 32-bits on 32-bits platforms */ typedef unsigned long ulong; @@ -44,6 +45,7 @@ typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; +} // namespace libfsst #define FSST_ENDIAN_MARKER ((u64) 1) #define FSST_VERSION_20190218 20190218 @@ -54,6 +56,7 @@ typedef uint64_t u64; #define FSST_CODE_MAX 4096 #define FSST_CODE_MASK ((u16) (FSST_CODE_MAX-1)) +namespace libfsst { inline uint64_t fsst_unaligned_load(u8 const* V) { uint64_t Ret; memcpy(&Ret, V, sizeof(uint64_t)); // compiler will generate efficient code (unaligned load, where possible) @@ -309,3 +312,4 @@ struct Encoder { // C++ fsst-compress function with some more control of how the compression happens (algorithm flavor, simd unroll degree) ulong compressImpl(Encoder *encoder, ulong n, ulong lenIn[], u8 *strIn[], ulong size, u8 * output, ulong *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd); ulong compressAuto(Encoder *encoder, ulong n, ulong lenIn[], u8 *strIn[], ulong size, u8 * output, ulong *lenOut, u8 *strOut[], int simd); +} // namespace libfsst