Compute accelerated huffman codes, fix mistakes with uncompressed reading

2025-09-18 12:05:14 -04:00 · 2017-10-02 21:50:30 +11:00 · 2017-10-02 21:50:30 +11:00 · 8b6abe1b33
commit 8b6abe1b33
parent ad21af6c53
4 changed files with 72 additions and 43 deletions
--- a/src/Client/AABB.h
+++ b/src/Client/AABB.h
@ -14,7 +14,6 @@ typedef struct AABB_ {
 	Vector3 Max;
 } AABB;

-
 /* Returns the width of the given bounding box. */
 #define AABB_Width(bb)  (bb->Max.X - bb->Min.X)
 /* Returns the height of the given bounding box. */
@ -22,23 +21,16 @@ typedef struct AABB_ {
 /* Returns the length of the given bounding box. */
 #define AABB_Length(bb) (bb->Max.Z - bb->Min.Z)

-
 void AABB_FromCoords6(AABB* result, Real32 x1, Real32 y1, Real32 z1, Real32 x2, Real32 y2, Real32 z2);
-
 void AABB_FromCoords(AABB* result, Vector3* min, Vector3* max);
-
 void AABB_Make(AABB* result, Vector3* pos, Vector3* size);

-
 /* Returns a new AABB, with the min and max coordinates of the original AABB translated by the given vector. */
 void AABB_Offset(AABB* result, AABB* bb, Vector3* amount);
-
 /* Determines whether this AABB intersects the given AABB on any axes. */
 bool AABB_Intersects(AABB* bb, AABB* other);
-
 /* Determines whether this AABB entirely contains the AABB on all axes. */
 bool AABB_Contains(AABB* parent, AABB* child);
-
 /* Determines whether this AABB entirely contains the coordinates on all axes. */
 bool AABB_ContainsPoint(AABB* parent, Vector3* P);
 #endif
--- a/src/Client/Deflate.c
+++ b/src/Client/Deflate.c
@ -168,9 +168,11 @@ state->Bits |= (UInt32)(state->Input[state->NextIn]) << state->NumBits;\
 state->NextIn++;\
 state->NumBits += 8;\

-/* Gets bytes from the bit buffer */
-#define DEFLATE_CONSUME_BITS(state, bits, result)\
-result = state->Bits & ((1UL << (bits)) - 1UL);\
+/* Retrieves bits from the bit buffer */
+#define DEFLATE_PEEK_BITS(state, bits) (state->Bits & ((1UL << (bits)) - 1UL))
+
+/* Consumes/eats up bits from the bit buffer */
+#define DEFLATE_CONSUME_BITS(state, bits)\
 state->Bits >>= bits;\
 state->NumBits -= bits;

@ -187,8 +189,19 @@ while (state->NumBits < bitsCount) {\
 	DEFLATE_GET_BYTE(state);\
 }

+/* Peeks then consumes given bits. */
+#define DEFLATE_READ_BITS(state, bitsCount) DEFLATE_PEEK_BITS(state, bitsCount); DEFLATE_CONSUME_BITS(state, bitsCount);
+
 #define DEFLATE_NEXTBLOCK_STATE(state) state->State = state->LastBlock ? DeflateState_Done : DeflateState_Header;

+UInt32 Huffman_ReverseBits(UInt32 n, UInt8 bits) {
+	n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
+	n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
+	n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
+	n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
+	return n >> (16 - bits);
+}
+
 /* TODO: This needs to be massively optimised. */
 void Huffman_Build(HuffmanTable* table, UInt8* bitLens, Int32 count) {
 	Int32 i;
@ -203,36 +216,52 @@ void Huffman_Build(HuffmanTable* table, UInt8* bitLens, Int32 count) {
 	}
 	bl_count[0] = 0;
 	for (i = 1; i < DEFLATE_MAX_BITS; i++) {
-		if (bl_count[i] >(1 << i)) {
+		if (bl_count[i] > (1 << i)) {
 			ErrorHandler_Fail("Too many huffman codes for bit length");
 		}
 	}

 	Int32 code = 0, offset = 0;
-	Int32 next_code[DEFLATE_MAX_BITS];
 	UInt16 bl_offsets[DEFLATE_MAX_BITS];
 	for (i = 1; i < DEFLATE_MAX_BITS; i++) {
 		code = (code + bl_count[i - 1]) << 1;
-		next_code[i] = code;
 		bl_offsets[i] = (UInt16)offset;
 		table->FirstCodewords[i] = (UInt16)code;
 		table->FirstOffsets[i] = (UInt16)offset;

 		offset += bl_count[i];
 		if (bl_count[i]) {
-			table->EndCodewords[i] = code + (bl_count[i] - 1);			
+			table->EndCodewords[i] = code + (bl_count[i] - 1);
 		} else {
 			table->EndCodewords[i] = -1;
 		}
 	}

 	Int32 value = 0;
+	Platform_MemSet(table->Fast, UInt8_MaxValue, sizeof(table->Fast));
 	for (i = 0; i < count; i++, value++) {
 		Int32 len = bitLens[i];
-		if (len) {		
-			table->Values[bl_offsets[len]] = (UInt16)value;
-			bl_offsets[len]++;
+		if (len == 0) continue;
+		table->Values[bl_offsets[len]] = (UInt16)value;
+
+		/* Computes the accelerated lookup table values for this codeword
+		 * For example, assume len = 4 and codeword = 0100
+		 * - Shift it left to be 0100_00000
+		 * - Then, for all the indices from 0100_00000 to 0100_11111,
+		 *   - bit reverse index, as huffman codes are read backwards
+		 *   - set fast value to specify a 'value' value, and to skip 'len' bits
+		 */
+		if (len <= DEFLATE_ZFAST_BITS) {
+			Int16 packed = (Int16)((len << 9) | value), j;
+			Int32 codeword = table->FirstCodewords[len] + (bl_offsets[len] - table->FirstOffsets[len]);
+			codeword <<= (DEFLATE_ZFAST_BITS - len);
+
+			for (j = 0; j < 1 << (DEFLATE_ZFAST_BITS - len); j++, codeword++) {
+				Int32 index = Huffman_ReverseBits(codeword, DEFLATE_ZFAST_BITS);
+				table->Fast[index] = packed;
+			}
 		}
+		bl_offsets[len]++;
 	}
 }

@ -248,8 +277,7 @@ Int32 Huffman_Decode(DeflateState* state, HuffmanTable* table) {

 		codeword <<= 1;
 		codeword |= state->Bits & 1;
-		state->Bits >>= 1;
-		state->NumBits--;
+		DEFLATE_CONSUME_BITS(state, 1);

 		if (codeword >= table->FirstCodewords[i] && codeword <= table->EndCodewords[i]) {
 			Int32 offset = table->FirstOffsets[i] + (codeword - table->FirstCodewords[i]);
@ -277,8 +305,7 @@ bool Deflate_Step(DeflateState* state) {
 	switch (state->State) {
 	case DeflateState_Header: {
 		DEFLATE_ENSURE_BITS(state, 3);
-		UInt32 blockHeader;
-		DEFLATE_CONSUME_BITS(state, 3, blockHeader);
+		UInt32 blockHeader = DEFLATE_READ_BITS(state, 3);
 		state->LastBlock = blockHeader & 1;

 		switch (blockHeader >> 1) {
@ -319,9 +346,9 @@ bool Deflate_Step(DeflateState* state) {

 	case DeflateState_UncompressedHeader: {
 		DEFLATE_ENSURE_BITS(state, 32);
-		UInt32 len, nlen;
-		DEFLATE_CONSUME_BITS(state, 16, len);
-		DEFLATE_CONSUME_BITS(state, 16, nlen);
+		UInt32 len = DEFLATE_READ_BITS(state, 16);
+		UInt32 nlen = DEFLATE_READ_BITS(state, 16);
+
 		if (len != (nlen ^ 0xFFFFUL)) {
 			ErrorHandler_Fail("DEFLATE - Uncompressed block LEN check failed");
 		}
@ -330,15 +357,22 @@ bool Deflate_Step(DeflateState* state) {
 	} break;

 	case DeflateState_UncompressedData: {
-		if (state->AvailIn > 0 || state->AvailOut > 0) return false;
+		while (state->NumBits > 0 && state->AvailOut > 0 && state->Index > 0) {
+			*state->Output = DEFLATE_READ_BITS(state, 8);
+			state->AvailOut--;
+			state->Index--;
+		}
+
+		if (state->AvailIn == 0 || state->AvailOut == 0) return false;
 		UInt32 copyLen = min(state->AvailIn, state->AvailOut);
 		copyLen = min(copyLen, state->Index);
-
-		Platform_MemCpy(state->Output, state->Input, copyLen);
-		state->Output += copyLen;
-		state->AvailIn -= copyLen;
-		state->AvailOut -= copyLen;
-		state->Index -= copyLen;
+		if (copyLen > 0) {
+			Platform_MemCpy(state->Output, state->Input, copyLen);
+			state->Output += copyLen;
+			state->AvailIn -= copyLen;
+			state->AvailOut -= copyLen;
+			state->Index -= copyLen;
+		}

 		if (state->Index == 0) {
 			state->State = DEFLATE_NEXTBLOCK_STATE(state);
@ -347,9 +381,9 @@ bool Deflate_Step(DeflateState* state) {

 	case DeflateState_DynamicHeader: {
 		DEFLATE_ENSURE_BITS(state, 14);
-		DEFLATE_CONSUME_BITS(state, 5, state->NumLits);     state->NumLits += 257;
-		DEFLATE_CONSUME_BITS(state, 5, state->NumDists);    state->NumDists += 1;
-		DEFLATE_CONSUME_BITS(state, 4, state->NumCodeLens); state->NumCodeLens += 4;
+		state->NumLits     = DEFLATE_READ_BITS(state, 5); state->NumLits += 257;
+		state->NumDists    = DEFLATE_READ_BITS(state, 5); state->NumDists += 1;
+		state->NumCodeLens = DEFLATE_READ_BITS(state, 4); state->NumCodeLens += 4;
 		state->Index = 0;
 		state->State = DeflateState_DynamicCodeLens;
 	} break;
@ -361,7 +395,7 @@ bool Deflate_Step(DeflateState* state) {
 		while (state->Index < state->NumCodeLens) {
 			DEFLATE_ENSURE_BITS(state, 3);
 			i = order[state->Index];
-			DEFLATE_CONSUME_BITS(state, 3, state->Buffer[i]);
+			state->Buffer[i] = DEFLATE_READ_BITS(state, 3);
 			state->Index++;
 		}
 		for (i = state->NumCodeLens; i < DEFLATE_MAX_CODELENS; i++) {
@ -388,7 +422,7 @@ bool Deflate_Step(DeflateState* state) {
 				switch (bits) {
 				case 16:
 					DEFLATE_ENSURE_BITS(state, 2);
-					DEFLATE_CONSUME_BITS(state, 2, repeatCount);
+					repeatCount = DEFLATE_READ_BITS(state, 2);
 					if (state->Index == 0) {
 						ErrorHandler_Fail("DEFLATE - Tried to repeat invalid byte");
 					}
@ -397,13 +431,13 @@ bool Deflate_Step(DeflateState* state) {

 				case 17:
 					DEFLATE_ENSURE_BITS(state, 3);
-					DEFLATE_CONSUME_BITS(state, 3, repeatCount);
+					repeatCount = DEFLATE_READ_BITS(state, 3);
 					repeatCount += 3; repeatValue = 0;
 					break;

 				case 18:
 					DEFLATE_ENSURE_BITS(state, 7);
-					DEFLATE_CONSUME_BITS(state, 7, repeatCount);
+					repeatCount = DEFLATE_READ_BITS(state, 7);
 					repeatCount += 11; repeatValue = 0;
 					break;
 				}
--- a/src/Client/Deflate.h
+++ b/src/Client/Deflate.h
@ -41,12 +41,15 @@ void ZLibHeader_Read(Stream* s, ZLibHeader* header);
 #define DEFLATE_MAX_LITS 288
 #define DEFLATE_MAX_DISTS 32
 #define DEFLATE_MAX_BITS 16
+#define DEFLATE_ZFAST_BITS 9
+#define DEFLATE_ZFAST_MASK ((1 << DEFLATE_ZFAST_BITS) - 1)

 typedef struct HuffmanTable_ {
 	UInt16 FirstCodewords[DEFLATE_MAX_BITS]; /* Starting codeword for each bit length */
 	Int32 EndCodewords[DEFLATE_MAX_BITS];    /* End codeword for each bit length. -1 is ignored. */
 	UInt16 FirstOffsets[DEFLATE_MAX_BITS];   /* Base offset into Values for codewords of each bit length. */
 	UInt16 Values[DEFLATE_MAX_LITS];         /* Values/Symbols list */
+	Int16 Fast[1 << DEFLATE_ZFAST_BITS];     /* Fast lookup table for huffman codes */
 } HuffmanTable;

 typedef struct DeflateState_ {
--- a/src/Client/Stream.h
+++ b/src/Client/Stream.h
@ -17,10 +17,6 @@ typedef UInt32 (*Stream_Position)(struct Stream_* stream);

 /* Represents a stream that can be written to and/or read from. */
 typedef struct Stream_ {
-	/* Raw name buffer */
-	UInt8 NameBuffer[String_BufferSize(STREAM_NAME_LEN)];
-	/* The name of the stream. */
-	String Name;
 	/* Performs a read. Result is a ReturnCode, number of read bytes is output via pointer. */
 	Stream_Operation Read;
 	/* Performs a write. Result is a ReturnCode, number of written bytes is output via pointer. */
@ -35,6 +31,10 @@ typedef struct Stream_ {
 	Stream_Position Position;
 	/* General purpose metadata for the stream. */
 	void* Data;
+	/* Raw name buffer */
+	UInt8 NameBuffer[String_BufferSize(STREAM_NAME_LEN)];
+	/* The name of the stream. */
+	String Name;
 } Stream;

 /* Fully reads up to count bytes or fails. */