From 183a16d953328a8d83f9bdfc241b27c8c619d36f Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Wed, 3 Aug 2022 18:59:47 +0200 Subject: [PATCH] fsst: deterministic symbol tables (needed to fix #91) This fixes what I believe is a bug in the fsst library that causes symbol tables to be non-deterministic. There's an open issue/PR for the library, so it's not yet clear if this fix is correct/optimal. --- fsst/libfsst.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fsst/libfsst.cpp b/fsst/libfsst.cpp index d35dad3e..069d9cca 100644 --- a/fsst/libfsst.cpp +++ b/fsst/libfsst.cpp @@ -111,6 +111,9 @@ SymbolTable *buildSymbolTable(Counters& counters, vector line, size_t len[] while (true) { u8* old = cur; counters.count1Inc(pos1); + if (cur==end) { + break; + } // count single symbol (i.e. an option is not extending it) if (st->symbols[pos1].length() != 1) counters.count1Inc(*cur); @@ -130,8 +133,6 @@ SymbolTable *buildSymbolTable(Counters& counters, vector line, size_t len[] pos2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK; cur += 1; } - } else if (cur==end) { - break; } else { assert(curfindLongestSymbol(cur, end);