From d47cabc5371db9c029d9711a3a268fec9a1dc424 Mon Sep 17 00:00:00 2001
From: Marcus Holland-Moritz <github@mhxnet.de>
Date: Mon, 12 May 2025 17:47:51 +0200
Subject: [PATCH] chore: sync fsst from upstream

---
 fsst/CMakeLists.txt  |  2 +-
 fsst/fsst.h          |  2 +-
 fsst/fsst_avx512.cpp | 10 ++++++++++
 fsst/libfsst.cpp     | 13 +++++++++----
 fsst/libfsst.hpp     |  4 ++++
 fsst/libfsst12.cpp   | 19 ++++++++++++++-----
 fsst/libfsst12.hpp   |  4 ++++
 7 files changed, 43 insertions(+), 11 deletions(-)
diff --git a/fsst/CMakeLists.txt b/fsst/CMakeLists.txt
index e351d3e6..cfb89835 100644
--- a/fsst/CMakeLists.txt
+++ b/fsst/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0)
+cmake_minimum_required(VERSION 3.5)
 
 project(fsst)
 
diff --git a/fsst/fsst.h b/fsst/fsst.h
index 34e019fe..71085d57 100644
--- a/fsst/fsst.h
+++ b/fsst/fsst.h
@@ -177,7 +177,7 @@ fsst_decompress(
          }
       }
    }
-   if (posOut+24 <= size) { // handle the possibly 3 last bytes without a loop
+   if (posOut+32 <= size) { // handle the possibly 3 last bytes without a loop
       if (posIn+2 <= lenIn) { 
 	 strOut[posOut] = strIn[posIn+1]; 
          if (strIn[posIn] != FSST_ESC) {
diff --git a/fsst/fsst_avx512.cpp b/fsst/fsst_avx512.cpp
index a2b7b5e5..150683d2 100644
--- a/fsst/fsst_avx512.cpp
+++ b/fsst/fsst_avx512.cpp
@@ -21,23 +21,31 @@
 #include <immintrin.h>
 
 #ifdef _WIN32
+namespace libfsst {
 bool fsst_hasAVX512() {
    int info[4];
    __cpuidex(info, 0x00000007, 0);
    return (info[1]>>16)&1;
 }
+}  // namespace libfsst
 #else
 #include <cpuid.h>
+namespace libfsst {
 bool fsst_hasAVX512() {
    int info[4];
     __cpuid_count(0x00000007, 0, info[0], info[1], info[2], info[3]);
    return (info[1]>>16)&1;
 }
+}  // namespace libfsst
 #endif
 #else
+namespace libfsst {
 bool fsst_hasAVX512() { return false; }
+}  // namespace libfsst
 #endif
 
+namespace libfsst {
+
 // BULK COMPRESSION OF STRINGS
 //
 // In one call of this function, we can compress 512 strings, each of maximum length 511 bytes.
@@ -138,3 +146,5 @@ size_t fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* symbolBas
 #endif
    return processed;
 }
+}  // namespace libfsst
+
diff --git a/fsst/libfsst.cpp b/fsst/libfsst.cpp
index 5e8a5ea3..919cb9ce 100644
--- a/fsst/libfsst.cpp
+++ b/fsst/libfsst.cpp
@@ -17,6 +17,7 @@
 // You can contact the authors via the FSST source repository : https://github.com/cwida/fsst
 #include "libfsst.hpp"
 
+namespace libfsst {
 Symbol concat(Symbol a, Symbol b) {
    Symbol s;
    u32 length = a.length()+b.length();
@@ -25,12 +26,13 @@ Symbol concat(Symbol a, Symbol b) {
    s.val.num = (b.val.num << (8*a.length())) | a.val.num;
    return s;
 }
+}  // namespace libfsst
 
 namespace std {
 template <>
-class hash<QSymbol> {
+class hash<libfsst::QSymbol> {
    public:
-   size_t operator()(const QSymbol& q) const {
+   size_t operator()(const libfsst::QSymbol& q) const {
       uint64_t k = q.symbol.val.num;
       const uint64_t m = 0xc6a4a7935bd1e995;
       const int r = 47;
@@ -48,6 +50,7 @@ class hash<QSymbol> {
 };
 }
 
+namespace libfsst {
 bool isEscapeCode(u16 pos) { return pos < FSST_CODE_BASE; }
 
 std::ostream& operator<<(std::ostream& out, const Symbol& s) {
@@ -289,7 +292,7 @@ static inline size_t compressSIMD(SymbolTable &symbolTable, u8* symbolBase, size
             if (++batchPos == 512) break;
          } while(curOff < len[curLine]);
    
-         if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines)) { // cannot accumulate more?
+         if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines) || (((len[curLine])*2 + 7) > budget)) { // cannot accumulate more?
             if (batchPos-empty >= 32) { // if we have enough work, fire off fsst_compressAVX512 (32 is due to max 4x8 unrolling)
                // radix-sort jobs on length (longest string first) 
                // -- this provides best load balancing and allows to skip empty jobs at the end
@@ -615,7 +618,9 @@ inline size_t _compressAuto(Encoder *e, size_t nlines, const size_t lenIn[], con
 size_t compressAuto(Encoder *e, size_t nlines, const size_t lenIn[], const u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) {
    return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
 }
+}  // namespace libfsst
 
+using namespace libfsst;
 // the main compression function (everything automatic)
 extern "C" size_t fsst_compress(fsst_encoder_t *encoder, size_t nlines, const size_t lenIn[], const u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[]) {
    // to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB)
@@ -626,7 +631,7 @@ extern "C" size_t fsst_compress(fsst_encoder_t *encoder, size_t nlines, const si
 
 /* deallocate encoder */
 extern "C" void fsst_destroy(fsst_encoder_t* encoder) {
-   Encoder *e = (Encoder*) encoder; 
+  Encoder *e = (Encoder*) encoder; 
    delete e;
 }
 
diff --git a/fsst/libfsst.hpp b/fsst/libfsst.hpp
index d5ffcb88..a29a4afb 100644
--- a/fsst/libfsst.hpp
+++ b/fsst/libfsst.hpp
@@ -37,10 +37,12 @@ using namespace std;
 #include "fsst.h" // the official FSST API -- also usable by C mortals
 
 /* unsigned integers */
+namespace libfsst {
 typedef uint8_t u8;
 typedef uint16_t u16;
 typedef uint32_t u32;
 typedef uint64_t u64;
+}  // namespace libfsst
 
 #define FSST_ENDIAN_MARKER ((u64) 1)
 #define FSST_VERSION_20190218 20190218
@@ -57,6 +59,7 @@ typedef uint64_t u64;
 #define FSST_CODE_MAX       (1UL<<FSST_CODE_BITS) /* all bits set: indicating a symbol that has not been assigned a code yet */
 #define FSST_CODE_MASK      (FSST_CODE_MAX-1UL)   /* all bits set: indicating a symbol that has not been assigned a code yet */
 
+namespace libfsst {
 inline uint64_t fsst_unaligned_load(u8 const* V) {
     uint64_t Ret;
     memcpy(&Ret, V, sizeof(uint64_t)); // compiler will generate efficient code (unaligned load, where possible)
@@ -449,3 +452,4 @@ fsst_compressAVX512(
 // C++ fsst-compress function with some more control of how the compression happens (algorithm flavor, simd unroll degree)
 size_t compressImpl(Encoder *encoder, size_t n, size_t lenIn[], u8 *strIn[], size_t size, u8 * output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd);
 size_t compressAuto(Encoder *encoder, size_t n, size_t lenIn[], u8 *strIn[], size_t size, u8 * output, size_t *lenOut, u8 *strOut[], int simd);
+}  // namespace libfsst
diff --git a/fsst/libfsst12.cpp b/fsst/libfsst12.cpp
index fa684513..b41a3123 100644
--- a/fsst/libfsst12.cpp
+++ b/fsst/libfsst12.cpp
@@ -19,6 +19,7 @@
 #include <math.h>
 #include <string.h>
 
+namespace libfsst {
 Symbol concat(Symbol a, Symbol b) {
    Symbol s;
    u32 length = min(8, a.length()+b.length());
@@ -26,12 +27,14 @@ Symbol concat(Symbol a, Symbol b) {
    *(u64*) s.symbol = ((*(u64*) b.symbol) << (8*a.length())) | *(u64*) a.symbol;
    return s;
 }
+}  // namespace libfsst
 
 namespace std {
 template <>
-class hash<Symbol> {
+class hash<libfsst::Symbol> {
    public:
-   size_t operator()(const Symbol& s) const {
+   size_t operator()(const libfsst::Symbol& s) const {
+      using namespace libfsst;
       uint64_t k = *(u64*) s.symbol;
       const uint64_t m = 0xc6a4a7935bd1e995;
       const int r = 47;
@@ -49,6 +52,7 @@ class hash<Symbol> {
 };
 }
 
+namespace libfsst {
 std::ostream& operator<<(std::ostream& out, const Symbol& s) {
    for (u32 i=0; i<s.length(); i++)
       out << s.symbol[i];
@@ -295,7 +299,9 @@ long makeSample(vector<ulong> &sample, ulong nlines, const ulong len[]) {
    assert(sampleLong > 0);
    return (sampleLong < FSST_SAMPLEMAXSZ)?sampleLong:FSST_SAMPLEMAXSZ-sampleLong; 
 }
+}  // namespace libfsst
 
+using namespace libfsst; 
 extern "C" fsst_encoder_t* fsst_create(ulong n, const ulong lenIn[], const u8 *strIn[], int dummy) {
    vector<ulong> sample;
    (void) dummy;
@@ -307,14 +313,14 @@ extern "C" fsst_encoder_t* fsst_create(ulong n, const ulong lenIn[], const u8 *s
 
 /* create another encoder instance, necessary to do multi-threaded encoding using the same dictionary */
 extern "C" fsst_encoder_t* fsst_duplicate(fsst_encoder_t *encoder) {
-   Encoder *e = new Encoder();
+  Encoder *e = new Encoder();
    e->symbolMap = ((Encoder*)encoder)->symbolMap; // it is a shared_ptr
    return (fsst_encoder_t*) e;
 }
 
 // export a dictionary in compact format. 
 extern "C" u32 fsst_export(fsst_encoder_t *encoder, u8 *buf) {
-   Encoder *e = (Encoder*) encoder;
+  Encoder *e = (Encoder*) encoder;
    // In ->version there is a versionnr, but we hide also suffixLim/terminator/symbolCount there.
    // This is sufficient in principle to *reconstruct* a fsst_encoder_t from a fsst_decoder_t
    // (such functionality could be useful to append compressed data to an existing block).
@@ -375,6 +381,7 @@ extern "C" u32 fsst_import(fsst_decoder_t *decoder, u8 *buf) {
    return pos;
 }
 
+namespace libfsst {
 // runtime check for simd
 inline ulong _compressImpl(Encoder *e, ulong nlines, const ulong lenIn[], const u8 *strIn[], ulong size, u8 *output, ulong *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) {
    (void) noSuffixOpt;
@@ -394,7 +401,9 @@ inline ulong _compressAuto(Encoder *e, ulong nlines, const ulong lenIn[], const
 ulong compressAuto(Encoder *e, ulong nlines, const ulong lenIn[], const u8 *strIn[], ulong size, u8 *output, ulong *lenOut, u8 *strOut[], int simd) {
    return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
 }
+}  // namespace libfsst
 
+using namespace libfsst;
 // the main compression function (everything automatic)
 extern "C" ulong fsst_compress(fsst_encoder_t *encoder, ulong nlines, const ulong lenIn[], const u8 *strIn[], ulong size, u8 *output, ulong *lenOut, u8 *strOut[]) {
    // to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB)
@@ -405,7 +414,7 @@ extern "C" ulong fsst_compress(fsst_encoder_t *encoder, ulong nlines, const ulon
 
 /* deallocate encoder */
 extern "C" void fsst_destroy(fsst_encoder_t* encoder) {
-   Encoder *e = (Encoder*) encoder; 
+  Encoder *e = (Encoder*) encoder; 
    delete e;
 }
 
diff --git a/fsst/libfsst12.hpp b/fsst/libfsst12.hpp
index 0093a2e0..67d513be 100644
--- a/fsst/libfsst12.hpp
+++ b/fsst/libfsst12.hpp
@@ -36,6 +36,7 @@ using namespace std;
 
 #include "fsst12.h" // the official FSST API -- also usable by C mortals
 
+namespace libfsst {
 /* workhorse type for string and buffer lengths: 64-bits on 64-bits platforms and 32-bits on 32-bits platforms */
 typedef unsigned long ulong; 
 
@@ -44,6 +45,7 @@ typedef uint8_t u8;
 typedef uint16_t u16;
 typedef uint32_t u32;
 typedef uint64_t u64;
+} // namespace libfsst
 
 #define FSST_ENDIAN_MARKER ((u64) 1)
 #define FSST_VERSION_20190218 20190218
@@ -54,6 +56,7 @@ typedef uint64_t u64;
 #define FSST_CODE_MAX 4096
 #define FSST_CODE_MASK      ((u16) (FSST_CODE_MAX-1)) 
 
+namespace libfsst {
 inline uint64_t fsst_unaligned_load(u8 const* V) {
     uint64_t Ret;
     memcpy(&Ret, V, sizeof(uint64_t)); // compiler will generate efficient code (unaligned load, where possible)
@@ -309,3 +312,4 @@ struct Encoder {
 // C++ fsst-compress function with some more control of how the compression happens (algorithm flavor, simd unroll degree)
 ulong compressImpl(Encoder *encoder, ulong n, ulong lenIn[], u8 *strIn[], ulong size, u8 * output, ulong *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd);
 ulong compressAuto(Encoder *encoder, ulong n, ulong lenIn[], u8 *strIn[], ulong size, u8 * output, ulong *lenOut, u8 *strOut[], int simd);
+}  // namespace libfsst