Speed up Adler-32 by doing modulo less often

2025-09-09 04:05:12 -04:00 · 2015-01-21 23:53:35 -06:00 · 2015-01-21 23:53:35 -06:00 · d46e893a0d
commit d46e893a0d
parent 9bdd60cffa
1 changed files with 40 additions and 3 deletions
--- a/src/adler32.c
+++ b/src/adler32.c
@ -5,15 +5,52 @@
 */

 #include "adler32.h"
+#include "compiler.h"
+
+/*
+ * The Adler-32 divisor, or "base", value.
+ */
+#define DIVISOR 65521
+
+/*
+ * MAX_BYTES_PER_CHUNK is the most bytes that can be processed without the
+ * possibility of s2 overflowing when it is represented as an unsigned 32-bit
+ * integer.  This value was computed using the following Python script:
+ *
+ *	divisor = 65521
+ *	count = 0
+ *	s1 = divisor - 1
+ *	s2 = divisor - 1
+ *	while True:
+ *		s1 += 0xFF
+ *		s2 += s1
+ *		if s2 > 0xFFFFFFFF:
+ *			break
+ *		count += 1
+ *	print(count)
+ *
+ * Note that to get the correct worst-case value, we must assume that every byte
+ * has value 0xFF and that s1 and s2 started with the highest possible values
+ * modulo the divisor.
+ */
+#define MAX_BYTES_PER_CHUNK	5552

 u32
 adler32(const u8 *buffer, size_t size)
 {
 	u32 s1 = 1;
 	u32 s2 = 0;
-	for (size_t i = 0; i < size; i++) {
-		s1 = (s1 + buffer[i]) % 65521;
-		s2 = (s2 + s1) % 65521;
+	const u8 *p = buffer;
+	const u8 * const end = p + size;
+	while (p != end) {
+		const u8 *chunk_end = p + min(end - p,
+					      MAX_BYTES_PER_CHUNK);
+		do {
+			s1 += *p++;
+			s2 += s1;
+		} while (p != chunk_end);
+		s1 %= 65521;
+		s2 %= 65521;
 	}
 	return (s2 << 16) | s1;
 }