From d46e893a0d7081a0821f89625e0731b78035e1ea Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 21 Jan 2015 23:53:35 -0600 Subject: [PATCH] Speed up Adler-32 by doing modulo less often --- src/adler32.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/adler32.c b/src/adler32.c index da5afc5..14c3ab3 100644 --- a/src/adler32.c +++ b/src/adler32.c @@ -5,15 +5,52 @@ */ #include "adler32.h" +#include "compiler.h" + +/* + * The Adler-32 divisor, or "base", value. + */ +#define DIVISOR 65521 + +/* + * MAX_BYTES_PER_CHUNK is the most bytes that can be processed without the + * possibility of s2 overflowing when it is represented as an unsigned 32-bit + * integer. This value was computed using the following Python script: + * + * divisor = 65521 + * count = 0 + * s1 = divisor - 1 + * s2 = divisor - 1 + * while True: + * s1 += 0xFF + * s2 += s1 + * if s2 > 0xFFFFFFFF: + * break + * count += 1 + * print(count) + * + * Note that to get the correct worst-case value, we must assume that every byte + * has value 0xFF and that s1 and s2 started with the highest possible values + * modulo the divisor. + */ +#define MAX_BYTES_PER_CHUNK 5552 u32 adler32(const u8 *buffer, size_t size) { u32 s1 = 1; u32 s2 = 0; - for (size_t i = 0; i < size; i++) { - s1 = (s1 + buffer[i]) % 65521; - s2 = (s2 + s1) % 65521; + const u8 *p = buffer; + const u8 * const end = p + size; + while (p != end) { + const u8 *chunk_end = p + min(end - p, + MAX_BYTES_PER_CHUNK); + do { + s1 += *p++; + s2 += s1; + } while (p != chunk_end); + s1 %= 65521; + s2 %= 65521; } return (s2 << 16) | s1; }