mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-09 04:05:12 -04:00
Speed up Adler-32 by doing modulo less often
This commit is contained in:
parent
9bdd60cffa
commit
d46e893a0d
@ -5,15 +5,52 @@
|
||||
*/
|
||||
|
||||
#include "adler32.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/*
|
||||
* The Adler-32 divisor, or "base", value.
|
||||
*/
|
||||
#define DIVISOR 65521
|
||||
|
||||
/*
|
||||
* MAX_BYTES_PER_CHUNK is the most bytes that can be processed without the
|
||||
* possibility of s2 overflowing when it is represented as an unsigned 32-bit
|
||||
* integer. This value was computed using the following Python script:
|
||||
*
|
||||
* divisor = 65521
|
||||
* count = 0
|
||||
* s1 = divisor - 1
|
||||
* s2 = divisor - 1
|
||||
* while True:
|
||||
* s1 += 0xFF
|
||||
* s2 += s1
|
||||
* if s2 > 0xFFFFFFFF:
|
||||
* break
|
||||
* count += 1
|
||||
* print(count)
|
||||
*
|
||||
* Note that to get the correct worst-case value, we must assume that every byte
|
||||
* has value 0xFF and that s1 and s2 started with the highest possible values
|
||||
* modulo the divisor.
|
||||
*/
|
||||
#define MAX_BYTES_PER_CHUNK 5552
|
||||
|
||||
u32
|
||||
adler32(const u8 *buffer, size_t size)
|
||||
{
|
||||
u32 s1 = 1;
|
||||
u32 s2 = 0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
s1 = (s1 + buffer[i]) % 65521;
|
||||
s2 = (s2 + s1) % 65521;
|
||||
const u8 *p = buffer;
|
||||
const u8 * const end = p + size;
|
||||
while (p != end) {
|
||||
const u8 *chunk_end = p + min(end - p,
|
||||
MAX_BYTES_PER_CHUNK);
|
||||
do {
|
||||
s1 += *p++;
|
||||
s2 += s1;
|
||||
} while (p != chunk_end);
|
||||
s1 %= 65521;
|
||||
s2 %= 65521;
|
||||
}
|
||||
return (s2 << 16) | s1;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user