initial commit

This commit is contained in:
Eric Biggers 2014-12-27 23:11:49 -06:00
commit 6a982b7ac7
33 changed files with 6260 additions and 0 deletions

83
CMakeLists.txt Normal file
View File

@ -0,0 +1,83 @@
cmake_minimum_required(VERSION 2.6)
project(libdeflate C)
set(LIB_VERSION_MAJOR 0)
set(LIB_VERSION_MINOR 0)
set(LIB_VERSION_PATCH 0)
set(LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(C_FLAGS "-std=c11 -fvisibility=hidden")
set(CMAKE_C_FLAGS_RELEASE "${C_FLAGS} -O2 -DNDEBUG")
set(CMAKE_C_FLAGS_DEBUG "${C_FLAGS} -O0 -g")
include_directories(".")
option(SUPPORT_COMPRESSION "Support DEFLATE compression" ON)
if(SUPPORT_COMPRESSION)
set(LIB_SOURCES ${LIB_SOURCES} src/deflate_compress.c)
endif()
option(SUPPORT_DECOMPRESSION "Support DEFLATE decompression" ON)
if(SUPPORT_DECOMPRESSION)
set(LIB_SOURCES ${LIB_SOURCES} src/deflate_decompress.c)
endif()
option(SUPPORT_ZLIB "Support zlib wrapper format" ON)
if(SUPPORT_ZLIB)
set(LIB_SOURCES ${LIB_SOURCES} src/adler32.c)
if(SUPPORT_COMPRESSION)
set(LIB_SOURCES ${LIB_SOURCES} src/zlib_compress.c)
endif()
if(SUPPORT_DECOMPRESSION)
set(LIB_SOURCES ${LIB_SOURCES} src/zlib_decompress.c)
endif()
endif()
option(SUPPORT_GZIP "Support gzip wrapper format" ON)
if(SUPPORT_GZIP)
set(LIB_SOURCES ${LIB_SOURCES} src/crc32.c)
if(SUPPORT_COMPRESSION)
set(LIB_SOURCES ${LIB_SOURCES} src/gzip_compress.c)
endif()
if(SUPPORT_DECOMPRESSION)
set(LIB_SOURCES ${LIB_SOURCES} src/gzip_decompress.c)
endif()
endif()
option(SUPPORT_NEAR_OPTIMAL_PARSING "Support near optimal parsing (high compression mode)" ON)
if(SUPPORT_NEAR_OPTIMAL_PARSING)
add_definitions(-DSUPPORT_NEAR_OPTIMAL_PARSING=1)
else()
add_definitions(-DSUPPORT_NEAR_OPTIMAL_PARSING=0)
endif()
option(UNSAFE_DECOMPRESSION "Assume that all compressed data is valid (faster but insecure)" OFF)
if(UNSAFE_DECOMPRESSION)
add_definitions(-DUNSAFE_DECOMPRESSION=1)
else()
add_definitions(-DUNSAFE_DECOMPRESSION=0)
endif()
option(BUILD_EXAMPLES "Build the example programs" OFF)
if(BUILD_EXAMPLES)
add_executable(benchmark examples/benchmark.c)
target_link_libraries(benchmark deflate)
endif()
add_library(deflate SHARED ${LIB_SOURCES})
add_library(deflatestatic STATIC ${LIB_SOURCES})
set_target_properties(deflate PROPERTIES VERSION ${LIB_VERSION_STRING})
set_target_properties(deflate PROPERTIES SOVERSION ${LIB_VERSION_MAJOR})
install(TARGETS deflate deflatestatic
LIBRARY DESTINATION "${CMAKE_INSTALL_PREFIX}/lib"
ARCHIVE DESTINATION "${CMAKE_INSTALL_PREFIX}/lib")
install(FILES libdeflate.h DESTINATION "${CMAKE_INSTALL_PREFIX}/include")

19
README Normal file
View File

@ -0,0 +1,19 @@
This is libdeflate, a free (public domain) library for fast, whole-buffer
DEFLATE compression and decompression.
The supported formats are:
- DEFLATE (raw)
- zlib (DEFLATE with zlib header and footer)
- gzip (DEFLATE with gzip header and footer)
libdeflate is heavily optimized. It is significantly faster than zlib, both for
compression and decompression. In addition, at compression levels 8 and above
it provides a compression ratio better than zlib's, while still being about the
same speed as zlib's level 9.
libdeflate has a simple API that is not zlib-compatible. You can create
compressors and decompressors, and use them to compress or decompress buffers.
There is not yet any support for streaming. See libdeflate.h for details.
libdeflate is public domain; the author claims no copyright on it.

210
examples/benchmark.c Normal file
View File

@ -0,0 +1,210 @@
/*
* benchmark.c - A compression testing and benchmark program.
*
* The author dedicates this file to the public domain.
* You can do whatever you want with this file.
*/
#include <libdeflate.h>
#ifdef __WIN32__
# include <windows.h>
#else
# define _FILE_OFFSET_BITS 64
# define O_BINARY 0
# define _POSIX_C_SOURCE 199309L
# include <time.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static uint64_t
current_time(void)
{
#ifdef __WIN32__
# define TIME_UNIT_PER_MS 10000
LARGE_INTEGER time;
QueryPerformanceCounter(&time);
return time.QuadPart;
#else
# define TIME_UNIT_PER_MS 1000000
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (1000000000ULL * ts.tv_sec) + ts.tv_nsec;
#endif
}
static int
do_benchmark(int fd, char *ubuf1, char *ubuf2,
char *cbuf, uint32_t max_chunk_size,
struct deflate_compressor *compressor,
struct deflate_decompressor *decompressor)
{
uint64_t usize_total = 0;
uint64_t csize_total = 0;
uint64_t compress_time_total = 0;
uint64_t decompress_time_total = 0;
for (;;) {
char *p = ubuf1;
ssize_t bytes_read;
size_t usize;
size_t csize;
bool ok;
uint64_t start_time;
/* Read the next chunk of data. */
do {
bytes_read = read(fd, p, ubuf1 + max_chunk_size - p);
if (bytes_read < 0) {
fprintf(stderr, "ERROR: Read error: %s\n",
strerror(errno));
return 1;
}
p += bytes_read;
} while (bytes_read != 0 && p != ubuf1 + max_chunk_size);
usize = p - ubuf1;
if (usize == 0) /* End of file? */
break;
/* Compress the chunk of data. */
usize_total += usize;
start_time = current_time();
csize = deflate_compress(compressor, ubuf1, usize,
cbuf, usize - 1);
compress_time_total += current_time() - start_time;
if (csize) {
/* Successfully compressed the chunk of data. */
csize_total += csize;
/* Decompress the data we just compressed and compare
* the result with the original. */
start_time = current_time();
ok = deflate_decompress(decompressor, cbuf, csize,
ubuf2, usize);
decompress_time_total += current_time() - start_time;
if (!ok) {
fprintf(stderr, "ERROR: Failed to "
"decompress data\n");
return 1;
}
if (memcmp(ubuf1, ubuf2, usize)) {
fprintf(stderr, "ERROR: Data did not "
"decompress to original\n");
return 1;
}
} else {
/* Chunk of data did not compress to less than its
* original size. */
csize_total += usize;
}
}
if (usize_total == 0) {
printf("\tEmpty input.\n");
return 0;
}
if (compress_time_total == 0)
compress_time_total++;
if (decompress_time_total == 0)
decompress_time_total++;
printf("\tCompressed %"PRIu64 " => %"PRIu64" bytes (%u.%u%%)\n",
usize_total, csize_total,
(unsigned int)(csize_total * 100 / usize_total),
(unsigned int)(csize_total * 100000 / usize_total % 1000));
printf("\tCompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n",
compress_time_total / TIME_UNIT_PER_MS,
1000 * usize_total / compress_time_total);
printf("\tDecompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n",
decompress_time_total / TIME_UNIT_PER_MS,
1000 * usize_total / decompress_time_total);
return 0;
}
int
main(int argc, char **argv)
{
const char *filename;
uint32_t chunk_size = 32768;
unsigned int compression_level = 6;
char *ubuf1 = NULL;
char *ubuf2 = NULL;
char *cbuf = NULL;
struct deflate_compressor *compressor = NULL;
struct deflate_decompressor *decompressor = NULL;
int fd = -1;
int ret;
if (argc < 2 || argc > 5) {
fprintf(stderr, "Usage: %s FILE [CHUNK_SIZE [LEVEL]]]\n", argv[0]);
ret = 2;
goto out;
}
filename = argv[1];
if (argc >= 3)
chunk_size = strtoul(argv[2], NULL, 10);
if (argc >= 4)
compression_level = strtoul(argv[3], NULL, 10);
printf("DEFLATE compression with %"PRIu32" byte chunks (level %u)\n",
chunk_size, compression_level);
compressor = deflate_alloc_compressor(compression_level);
if (!compressor) {
fprintf(stderr, "ERROR: Failed to create compressor\n");
ret = 1;
goto out;
}
decompressor = deflate_alloc_decompressor();
if (!decompressor) {
fprintf(stderr, "ERROR: Failed to create decompressor\n");
ret = 1;
goto out;
}
ubuf1 = malloc(chunk_size);
ubuf2 = malloc(chunk_size);
cbuf = malloc(chunk_size - 1);
if (!ubuf1 || !ubuf2 || !cbuf) {
fprintf(stderr, "ERROR: Insufficient memory\n");
ret = 1;
goto out;
}
fd = open(filename, O_RDONLY | O_BINARY);
if (fd < 0) {
fprintf(stderr, "ERROR: Can't open \"%s\" for reading: %s\n",
filename, strerror(errno));
ret = 1;
goto out;
}
ret = do_benchmark(fd, ubuf1, ubuf2, cbuf, chunk_size,
compressor, decompressor);
out:
close(fd);
free(cbuf);
free(ubuf2);
free(ubuf1);
deflate_free_decompressor(decompressor);
deflate_free_compressor(compressor);
return ret;
}

131
libdeflate.h Normal file
View File

@ -0,0 +1,131 @@
/*
* libdeflate.h
*
* Public header for the DEFLATE compression library.
*/
#ifndef LIBDEFLATE_H
#define LIBDEFLATE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stddef.h>
/* ========================================================================== */
/* Compression */
/* ========================================================================== */
struct deflate_compressor;
/*
* deflate_alloc_compressor() allocates a new DEFLATE compressor.
* 'compression_level' is the compression level on a zlib-like scale (1 =
* fastest, 6 = medium/default, 9 = slowest). The return value is a pointer to
* the new DEFLATE compressor, or NULL if out of memory.
*
* Note: the sliding window size is defined at compilation time (default 32768).
*/
extern struct deflate_compressor *
deflate_alloc_compressor(unsigned int compression_level);
/*
* deflate_compress() performs DEFLATE compression on a buffer of data. The
* function attempts to compress 'in_nbytes' bytes of data located at 'in' and
* write the results to 'out', which has space for 'out_nbytes_avail' bytes.
* The return value is the compressed size in bytes, or 0 if the data could not
* be compressed to 'out_nbytes_avail' bytes or fewer.
*/
extern size_t
deflate_compress(struct deflate_compressor *compressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail);
/*
* Like deflate_compress(), but store the data in the zlib wrapper format.
*/
extern size_t
zlib_compress(struct deflate_compressor *compressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail);
/*
* Like deflate_compress(), but store the data in the gzip wrapper format.
*/
extern size_t
gzip_compress(struct deflate_compressor *compressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail);
/*
* deflate_free_compressor() frees a DEFLATE compressor that was allocated with
* deflate_alloc_compressor().
*/
extern void
deflate_free_compressor(struct deflate_compressor *compressor);
/* ========================================================================== */
/* Decompression */
/* ========================================================================== */
struct deflate_decompressor;
/*
* deflate_alloc_decompressor() allocates a new DEFLATE decompressor. The
* return value is a pointer to the new DEFLATE decompressor, or NULL if out of
* memory.
*
* This function takes no parameters, and the returned decompressor is valid for
* decompressing data that was compressed at any compression level and with any
* sliding window size.
*/
extern struct deflate_decompressor *
deflate_alloc_decompressor(void);
/*
* deflate_decompress() decompresses 'in_nbytes' bytes of DEFLATE-compressed
* data at 'in' and writes the uncompressed data, which had original size
* 'out_nbytes', to 'out'. The return value is true if decompression was
* successful, or false if the compressed data was invalid.
*
* To be clear: the uncompressed size must be known *exactly* and passed as
* 'out_nbytes'.
*/
extern bool
deflate_decompress(struct deflate_decompressor *decompressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes);
/*
* Like deflate_decompress(), but assumes the zlib wrapper format instead of raw
* DEFLATE.
*/
extern bool
zlib_decompress(struct deflate_decompressor *decompressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes);
/*
* Like deflate_decompress(), but assumes the gzip wrapper format instead of raw
* DEFLATE.
*/
extern bool
gzip_decompress(struct deflate_decompressor *decompressor,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes);
/*
* deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
* with deflate_alloc_decompressor().
*/
extern void
deflate_free_decompressor(struct deflate_decompressor *decompressor);
#ifdef __cplusplus
}
#endif
#endif /* LIBDEFLATE_H */

19
src/adler32.c Normal file
View File

@ -0,0 +1,19 @@
/*
* adler32.c
*
* Adler-32 checksum algorithm.
*/
#include "adler32.h"
u32
adler32(const u8 *buffer, size_t size)
{
u32 s1 = 1;
u32 s2 = 0;
for (size_t i = 0; i < size; i++) {
s1 = (s1 + buffer[i]) % 65521;
s2 = (s2 + s1) % 65521;
}
return (s2 << 16) | s1;
}

12
src/adler32.h Normal file
View File

@ -0,0 +1,12 @@
/*
* adler32.h
*
* Adler-32 checksum algorithm.
*/
#pragma once
#include "types.h"
extern u32
adler32(const u8 *buffer, size_t size);

80
src/bitops.h Normal file
View File

@ -0,0 +1,80 @@
/*
* bitops.h
*
* Inline functions for bit manipulation.
*/
#pragma once
#include "compiler.h"
#include "types.h"
/* Find Last Set bit */
static inline unsigned fls32(u32 v)
{
#ifdef compiler_fls32
return compiler_fls32(v);
#else
unsigned bit = 0;
while ((v >>= 1) != 0)
bit++;
return bit;
#endif
}
static inline unsigned fls64(u64 v)
{
#ifdef compiler_fls64
return compiler_fls64(v);
#else
unsigned bit = 0;
while ((v >>= 1) != 0)
bit++;
return bit;
#endif
}
static inline unsigned flsw(machine_word_t v)
{
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
if (WORDSIZE == 4)
return fls32(v);
else
return fls64(v);
}
/* Find First Set bit */
static inline unsigned ffs32(u32 v)
{
#ifdef compiler_ffs32
return compiler_ffs32(v);
#else
unsigned bit;
for (bit = 0; !(v & 1); bit++, v >>= 1)
;
return bit;
#endif
}
static inline unsigned ffs64(u64 v)
{
#ifdef compiler_ffs64
return compiler_ffs64(v);
#else
unsigned bit;
for (bit = 0; !(v & 1); bit++, v >>= 1)
;
return bit;
#endif
}
static inline unsigned ffsw(machine_word_t v)
{
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
if (WORDSIZE == 4)
return ffs32(v);
else
return ffs64(v);
}

279
src/bt_matchfinder.h Normal file
View File

@ -0,0 +1,279 @@
/*
* bt_matchfinder.h
*
* This is a Binary Tree (bt) based matchfinder.
*
* The data structure is a hash table where each hash bucket contains a binary
* tree of sequences, referenced by position. The sequences in the binary tree
* are ordered such that a left child is lexicographically lesser than its
* parent, and a right child is lexicographically greater than its parent.
*
* For each sequence (position) in the input, the first 3 bytes are hashed and
* the the appropriate binary tree is re-rooted at that sequence (position).
* Since the sequences are inserted in order, each binary tree maintains the
* invariant that each child node has greater match offset than its parent.
*
* While inserting a sequence, we may search the binary tree for matches with
* that sequence. At each step, the length of the match is computed. The
* search ends when the sequences get too far away (outside of the sliding
* window), or when the binary tree ends (in the code this is the same check as
* "too far away"), or when 'max_search_depth' positions have been searched, or
* when a match of at least 'nice_len' bytes has been found.
*
* Notes:
*
* - Typically, we need to search more nodes to find a given match in a
* binary tree versus in a linked list. However, a binary tree has more
* overhead than a linked list: it needs to be kept sorted, and the inner
* search loop is more complicated. As a result, binary trees are best
* suited for compression modes where the potential matches are searched
* more thoroughly.
*
* - Since no attempt is made to keep the binary trees balanced, it's
* essential to have the 'max_search_depth' cutoff. Otherwise it could
* take quadratic time to run data through the matchfinder.
*/
#pragma once
#include "lz_extend.h"
#include "lz_hash3.h"
#include "matchfinder_common.h"
#ifndef BT_MATCHFINDER_HASH_ORDER
# if MATCHFINDER_WINDOW_ORDER < 14
# define BT_MATCHFINDER_HASH_ORDER 14
# else
# define BT_MATCHFINDER_HASH_ORDER 15
# endif
#endif
#define BT_MATCHFINDER_HASH_LENGTH (1UL << BT_MATCHFINDER_HASH_ORDER)
#define BT_MATCHFINDER_TOTAL_LENGTH \
(BT_MATCHFINDER_HASH_LENGTH + (2UL * MATCHFINDER_WINDOW_SIZE))
struct bt_matchfinder {
union {
pos_t mf_data[BT_MATCHFINDER_TOTAL_LENGTH];
struct {
pos_t hash_tab[BT_MATCHFINDER_HASH_LENGTH];
pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
};
};
} _aligned_attribute(MATCHFINDER_ALIGNMENT);
static inline void
bt_matchfinder_init(struct bt_matchfinder *mf)
{
matchfinder_init(mf->hash_tab, BT_MATCHFINDER_HASH_LENGTH);
}
#if MATCHFINDER_IS_SLIDING
static inline void
bt_matchfinder_slide_window(struct bt_matchfinder *mf)
{
matchfinder_rebase(mf->mf_data, BT_MATCHFINDER_TOTAL_LENGTH);
}
#endif
/*
* Find matches with the current sequence.
*
* @mf
* The matchfinder structure.
* @in_base
* Pointer to the next byte in the input buffer to process _at the last
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
* @in_next
* Pointer to the next byte in the input buffer to process. This is the
* pointer to the bytes being matched against.
* @max_len
* Maximum match length to return.
* @nice_len
* Stop searching if a match of at least this length is found.
* @max_search_depth
* Limit on the number of potential matches to consider.
* @prev_hash
* TODO
* @matches
* Space to write the matches that are found.
*
* Returns the number of matches found, which may be anywhere from 0 to
* (nice_len - 3 + 1), inclusively. The matches are written to @matches in
* order of strictly increasing length and strictly increasing offset. The
* minimum match length is assumed to be 3.
*/
static inline unsigned
bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
const u8 * const in_base,
const u8 * const in_next,
const unsigned max_len,
const unsigned nice_len,
const unsigned max_search_depth,
unsigned long *prev_hash,
struct lz_match * const restrict matches)
{
struct lz_match *lz_matchptr = matches;
unsigned depth_remaining = max_search_depth;
unsigned hash;
pos_t cur_match;
const u8 *matchptr;
unsigned best_len;
pos_t *pending_lt_ptr, *pending_gt_ptr;
unsigned best_lt_len, best_gt_len;
unsigned len;
pos_t *children;
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1))
return 0;
hash = *prev_hash;
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
prefetch(&mf->hash_tab[*prev_hash]);
cur_match = mf->hash_tab[hash];
mf->hash_tab[hash] = in_next - in_base;
best_len = 2;
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
best_lt_len = 0;
best_gt_len = 0;
for (;;) {
if (!matchfinder_match_in_window(cur_match,
in_base, in_next) ||
!depth_remaining--)
{
*pending_lt_ptr = MATCHFINDER_INITVAL;
*pending_gt_ptr = MATCHFINDER_INITVAL;
return lz_matchptr - matches;
}
matchptr = &in_base[cur_match];
len = min(best_lt_len, best_gt_len);
children = &mf->child_tab[(unsigned long)
matchfinder_slot_for_match(cur_match) << 1];
if (matchptr[len] == in_next[len]) {
len = lz_extend(in_next, matchptr, len + 1, max_len);
if (len > best_len) {
best_len = len;
lz_matchptr->length = len;
lz_matchptr->offset = in_next - matchptr;
lz_matchptr++;
if (len >= nice_len) {
*pending_lt_ptr = children[0];
*pending_gt_ptr = children[1];
return lz_matchptr - matches;
}
}
}
if (matchptr[len] < in_next[len]) {
*pending_lt_ptr = cur_match;
pending_lt_ptr = &children[1];
cur_match = *pending_lt_ptr;
best_lt_len = len;
} else {
*pending_gt_ptr = cur_match;
pending_gt_ptr = &children[0];
cur_match = *pending_gt_ptr;
best_gt_len = len;
}
}
}
/*
* Advance the match-finder, but don't search for matches.
*
* @mf
* The matchfinder structure.
* @in_base
* Pointer to the next byte in the input buffer to process _at the last
* time bc_matchfinder_init() or bc_matchfinder_slide_window() was called_.
* @in_next
* Pointer to the next byte in the input buffer to process.
* @in_end
* Pointer to the end of the input buffer.
* @nice_len
* Stop searching if a match of at least this length is found.
* @max_search_depth
* Limit on the number of potential matches to consider.
* @prev_hash
* TODO
*/
static inline void
bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
const u8 * const in_base,
const u8 * const in_next,
const u8 * const in_end,
const unsigned nice_len,
const unsigned max_search_depth,
unsigned long *prev_hash)
{
unsigned depth_remaining = max_search_depth;
unsigned hash;
pos_t cur_match;
const u8 *matchptr;
pos_t *pending_lt_ptr, *pending_gt_ptr;
unsigned best_lt_len, best_gt_len;
unsigned len;
pos_t *children;
if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1))
return;
hash = *prev_hash;
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
prefetch(&mf->hash_tab[*prev_hash]);
cur_match = mf->hash_tab[hash];
mf->hash_tab[hash] = in_next - in_base;
depth_remaining = max_search_depth;
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
best_lt_len = 0;
best_gt_len = 0;
for (;;) {
if (!matchfinder_match_in_window(cur_match,
in_base, in_next) ||
!depth_remaining--)
{
*pending_lt_ptr = MATCHFINDER_INITVAL;
*pending_gt_ptr = MATCHFINDER_INITVAL;
return;
}
matchptr = &in_base[cur_match];
len = min(best_lt_len, best_gt_len);
children = &mf->child_tab[(unsigned long)
matchfinder_slot_for_match(cur_match) << 1];
if (matchptr[len] == in_next[len]) {
len = lz_extend(in_next, matchptr, len + 1, nice_len);
if (len == nice_len) {
*pending_lt_ptr = children[0];
*pending_gt_ptr = children[1];
return;
}
}
if (matchptr[len] < in_next[len]) {
*pending_lt_ptr = cur_match;
pending_lt_ptr = &children[1];
cur_match = *pending_lt_ptr;
best_lt_len = len;
} else {
*pending_gt_ptr = cur_match;
pending_gt_ptr = &children[0];
cur_match = *pending_gt_ptr;
best_gt_len = len;
}
}
}

52
src/compiler-gcc.h Normal file
View File

@ -0,0 +1,52 @@
/*
* compiler-gcc.h
*
* Compiler and platform-specific definitions for the GNU C compiler.
*/
#pragma once
#ifdef __WIN32__
# define LIBEXPORT __declspec(dllexport)
#else
# define LIBEXPORT __attribute__((visibility("default")))
#endif
#define likely(expr) __builtin_expect(!!(expr), 1)
#define unlikely(expr) __builtin_expect(!!(expr), 0)
#define prefetch(addr) __builtin_prefetch(addr)
#define inline inline __attribute__((always_inline))
#define _aligned_attribute(n) __attribute__((aligned(n)))
#define _packed_attribute __attribute__((packed))
#define CPU_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#if defined(__x86_64__) || defined(__i386__)
# define UNALIGNED_ACCESS_SPEED 3
#elif defined(__ARM_FEATURE_UNALIGNED) && (__ARM_FEATURE_UNALIGNED == 1)
# define UNALIGNED_ACCESS_SPEED 2
#else
# define UNALIGNED_ACCESS_SPEED 0
#endif
#define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
(_a < _b) ? _a : _b; })
#define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
(_a > _b) ? _a : _b; })
#define swap(a, b) ({ __typeof__(a) _a = a; (a) = (b); (b) = _a; })
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
# define compiler_bswap32 __builtin_bswap32
# define compiler_bswap64 __builtin_bswap64
#endif
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
# define compiler_bswap16 __builtin_bswap16
#endif
#define compiler_fls32(n) (31 - __builtin_clz(n))
#define compiler_fls64(n) (63 - __builtin_clzll(n))
#define compiler_ffs32(n) __builtin_ctz(n)
#define compiler_ffs64(n) __builtin_ctzll(n)

60
src/compiler.h Normal file
View File

@ -0,0 +1,60 @@
/*
* compiler.h
*
* Compiler and platform-specific definitions.
*/
#pragma once
#ifdef __GNUC__
# include "compiler-gcc.h"
#else
# warning "Unrecognized compiler. Please add a header file for your compiler."
#endif
#ifndef LIBEXPORT
# define LIBEXPORT
#endif
#ifndef BUILD_BUG_ON
# define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
#endif
#ifndef likely
# define likely(expr) (expr)
#endif
#ifndef unlikely
# define unlikely(expr) (expr)
#endif
#ifndef prefetch
# define prefetch(addr)
#endif
#ifndef _aligned_attribute
# error "missing required definition of _aligned_attribute"
#endif
#ifndef _packed_attribute
# error "missing required definition of _packed_attribute"
#endif
#ifndef CPU_IS_BIG_ENDIAN
# error "missing required endianness definition"
#endif
#define CPU_IS_LITTLE_ENDIAN (!CPU_IS_BIG_ENDIAN)
#ifndef UNALIGNED_ACCESS_SPEED
# warning "assuming unaligned accesses are not allowed"
# define UNALIGNED_ACCESS_SPEED 0
#endif
#define UNALIGNED_ACCESS_IS_ALLOWED (UNALIGNED_ACCESS_SPEED >= 1)
#define UNALIGNED_ACCESS_IS_FAST (UNALIGNED_ACCESS_SPEED >= 2)
#define UNALIGNED_ACCESS_IS_VERY_FAST (UNALIGNED_ACCESS_SPEED >= 3)
#if !defined(min) || !defined(max) || !defined(swap)
# error "missing required definitions of min(), max(), and swap() macros"
#endif

73
src/crc32.c Normal file
View File

@ -0,0 +1,73 @@
/*
* crc32.c
*
* CRC-32 checksum algorithm.
*/
#include "crc32.h"
static const u32 crc_table[256] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
0x2d02ef8d,
};
u32
crc32(const u8 *buffer, size_t size)
{
u32 crc = ~0;
for (size_t i = 0; i < size; i++)
crc = crc_table[(u8)crc ^ buffer[i]] ^ (crc >> 8);
return ~crc;
}

12
src/crc32.h Normal file
View File

@ -0,0 +1,12 @@
/*
* crc32.h
*
* CRC-32 checksum algorithm.
*/
#pragma once
#include "types.h"
extern u32
crc32(const u8 *buffer, size_t size);

2323
src/deflate_compress.c Normal file

File diff suppressed because it is too large Load Diff

9
src/deflate_compress.h Normal file
View File

@ -0,0 +1,9 @@
#pragma once
/* 'struct deflate_compressor' is private to deflate_compress.c, but zlib header
* generation needs to be able to query the compression level. */
struct deflate_compressor;
extern unsigned int
deflate_get_compression_level(struct deflate_compressor *c);

59
src/deflate_constants.h Normal file
View File

@ -0,0 +1,59 @@
/*
* deflate_constants.h
*
* Constants for the DEFLATE compression format.
*/
#pragma once
/* Valid block types */
#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
/* Minimum and maximum supported match lengths (in bytes) */
#define DEFLATE_MIN_MATCH_LEN 3
#define DEFLATE_MAX_MATCH_LEN 258
/* Minimum and maximum supported match offsets (in bytes) */
#define DEFLATE_MIN_MATCH_OFFSET 1
#define DEFLATE_MAX_MATCH_OFFSET 32767
#define DEFLATE_MAX_WINDOW_SIZE 32768
/* Number of symbols in each Huffman code. Note: for the literal/length
* and offset codes, these are actually the maximum values; a given block
* might use fewer symbols. */
#define DEFLATE_NUM_PRECODE_SYMS 19
#define DEFLATE_NUM_LITLEN_SYMS 288
#define DEFLATE_NUM_OFFSET_SYMS 32
/* Division of symbols in the literal/length code */
#define DEFLATE_NUM_LITERALS 256
#define DEFLATE_END_OF_BLOCK 256
#define DEFLATE_NUM_LEN_SYMS 31
/* Maximum codeword length, in bits, within each Huffman code */
#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
/* Maximum possible overrun when decoding codeword lengths */
#define DEFLATE_MAX_LENS_OVERRUN 137
/*
* Maximum number of extra bits that may be required to represent a match
* length or offset.
*
* TODO: are we going to have full DEFLATE64 support? If so, up to 16
* length bits must be supported.
*/
#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
#define DEFLATE_MAX_EXTRA_OFFSET_BITS 14
/* The maximum number of bits in which a match can be represented. This
* is the absolute worst case, which assumes the longest possible Huffman
* codewords and the maximum numbers of extra bits. */
#define DEFLATE_MAX_MATCH_BITS \
(DEFLATE_MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + \
DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)

1455
src/deflate_decompress.c Normal file

File diff suppressed because it is too large Load Diff

75
src/endianness.h Normal file
View File

@ -0,0 +1,75 @@
/*
* endianness.h
*
* Inline functions for endianness conversion.
*/
#pragma once
#include "compiler.h"
#include "types.h"
static inline u16 bswap16(u16 n)
{
#ifdef compiler_bswap16
return compiler_bswap16(n);
#else
return (n << 8) | (n >> 8);
#endif
}
static inline u32 bswap32(u32 n)
{
#ifdef compiler_bswap32
return compiler_bswap32(n);
#else
return (n << 24) |
((n & 0xFF00) << 8) |
((n & 0xFF0000) >> 8) |
(n >> 24);
#endif
}
static inline u64 bswap64(u64 n)
{
#ifdef compiler_bswap64
return compiler_bswap64(n);
#else
return (n << 56) |
((n & 0xFF00) << 40) |
((n & 0xFF0000) << 24) |
((n & 0xFF000000) << 8) |
((n & 0xFF00000000) >> 8) |
((n & 0xFF0000000000) >> 24) |
((n & 0xFF000000000000) >> 40) |
(n >> 56);
#endif
}
#if CPU_IS_BIG_ENDIAN
# define cpu_to_le16(n) bswap16(n)
# define cpu_to_le32(n) bswap32(n)
# define cpu_to_le64(n) bswap64(n)
# define le16_to_cpu(n) bswap16(n)
# define le32_to_cpu(n) bswap32(n)
# define le64_to_cpu(n) bswap64(n)
# define cpu_to_be16(n) (n)
# define cpu_to_be32(n) (n)
# define cpu_to_be64(n) (n)
# define be16_to_cpu(n) (n)
# define be32_to_cpu(n) (n)
# define be64_to_cpu(n) (n)
#else
# define cpu_to_le16(n) (n)
# define cpu_to_le32(n) (n)
# define cpu_to_le64(n) (n)
# define le16_to_cpu(n) (n)
# define le32_to_cpu(n) (n)
# define le64_to_cpu(n) (n)
# define cpu_to_be16(n) bswap16(n)
# define cpu_to_be32(n) bswap32(n)
# define cpu_to_be64(n) bswap64(n)
# define be16_to_cpu(n) bswap16(n)
# define be32_to_cpu(n) bswap32(n)
# define be64_to_cpu(n) bswap64(n)
#endif

64
src/gzip_compress.c Normal file
View File

@ -0,0 +1,64 @@
/*
* gzip_compress.c
*
* Generate DEFLATE-compressed data in the gzip wrapper format.
*/
#include "libdeflate.h"
#include "crc32.h"
#include "deflate_compress.h"
#include "gzip_constants.h"
#include "unaligned.h"
LIBEXPORT size_t
gzip_compress(struct deflate_compressor *c, const void *in, size_t in_size,
void *out, size_t out_nbytes_avail)
{
u8 *out_next = out;
unsigned compression_level;
u8 xfl;
size_t deflate_size;
if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
return 0;
/* ID1 */
*out_next++ = GZIP_ID1;
/* ID2 */
*out_next++ = GZIP_ID2;
/* CM */
*out_next++ = GZIP_CM_DEFLATE;
/* FLG */
*out_next++ = 0;
/* MTIME */
put_unaligned_u32_be(GZIP_MTIME_UNAVAILABLE, out_next);
out_next += 4;
/* XFL */
xfl = 0;
compression_level = deflate_get_compression_level(c);
if (compression_level < 2)
xfl |= GZIP_XFL_FASTEST_COMRESSION;
else if (compression_level >= 8)
xfl |= GZIP_XFL_SLOWEST_COMRESSION;
*out_next++ = xfl;
/* OS */
*out_next++ = GZIP_OS_UNKNOWN; /* OS */
/* Compressed data */
deflate_size = deflate_compress(c, in, in_size, out_next,
out_nbytes_avail - GZIP_MIN_OVERHEAD);
if (deflate_size == 0)
return 0;
out_next += deflate_size;
/* CRC32 */
put_unaligned_u32_be(crc32(in, in_size), out_next);
out_next += 4;
/* ISIZE */
put_unaligned_u32_be(in_size, out_next);
out_next += 4;
return out_next - (u8 *)out;
}

47
src/gzip_constants.h Normal file
View File

@ -0,0 +1,47 @@
/*
* gzip_constants.h
*
* Constants for the gzip wrapper format.
*/
#pragma once
#include "compiler.h"
#define GZIP_MIN_HEADER_SIZE 10
#define GZIP_FOOTER_SIZE 8
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
#define GZIP_ID1 0x1F
#define GZIP_ID2 0x8B
#define GZIP_CM_DEFLATE 8
#define GZIP_FTEXT 0x01
#define GZIP_FHCRC 0x02
#define GZIP_FEXTRA 0x04
#define GZIP_FNAME 0x08
#define GZIP_FCOMMENT 0x10
#define GZIP_FRESERVED 0xE0
#define GZIP_MTIME_UNAVAILABLE 0
#define GZIP_XFL_SLOWEST_COMRESSION 0x02
#define GZIP_XFL_FASTEST_COMRESSION 0x04
#define GZIP_XFL_RESERVED 0xF9
#define GZIP_OS_FAT 0
#define GZIP_OS_AMIGA 1
#define GZIP_OS_VMS 2
#define GZIP_OS_UNIX 3
#define GZIP_OS_VM_CMS 4
#define GZIP_OS_ATARI_TOS 5
#define GZIP_OS_HPFS 6
#define GZIP_OS_MACINTOSH 7
#define GZIP_OS_Z_SYSTEM 8
#define GZIP_OS_CP_M 9
#define GZIP_OS_TOPS_20 10
#define GZIP_OS_NTFS 11
#define GZIP_OS_QDOS 12
#define GZIP_OS_RISCOS 13
#define GZIP_OS_UNKNOWN 255

100
src/gzip_decompress.c Normal file
View File

@ -0,0 +1,100 @@
/*
* gzip_decompress.c
*
* Decompress DEFLATE-compressed data wrapped in the gzip format.
*/
#include "libdeflate.h"
#include "crc32.h"
#include "gzip_constants.h"
#include "unaligned.h"
LIBEXPORT bool
gzip_decompress(struct deflate_decompressor *d,
const void *in, size_t in_nbytes, void *out, size_t out_nbytes)
{
const u8 *in_next = in;
const u8 * const in_end = in_next + in_nbytes;
u8 flg;
if (in_nbytes < GZIP_MIN_OVERHEAD)
return false;
/* ID1 */
if (*in_next++ != GZIP_ID1)
return false;
/* ID2 */
if (*in_next++ != GZIP_ID2)
return false;
/* CM */
if (*in_next++ != GZIP_CM_DEFLATE)
return false;
flg = *in_next++;
/* MTIME */
in_next += 4;
/* XFL */
if (*in_next++ & GZIP_XFL_RESERVED)
return false;
/* OS */
in_next += 1;
if (flg & GZIP_FRESERVED)
return false;
/* Extra field */
if (flg & GZIP_FEXTRA) {
u16 xlen = get_unaligned_u16_be(in_next);
in_next += 2;
if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
return false;
in_next += xlen;
}
/* Original file name (zero terminated) */
if (flg & GZIP_FNAME) {
while (*in_next != 0 && ++in_next != in_end)
;
if (in_next != in_end)
in_next++;
if (in_end - in_next < GZIP_FOOTER_SIZE)
return false;
}
/* File comment (zero terminated) */
if (flg & GZIP_FCOMMENT) {
while (*in_next != 0 && ++in_next != in_end)
;
if (in_next != in_end)
in_next++;
if (in_end - in_next < GZIP_FOOTER_SIZE)
return false;
}
/* CRC16 for gzip header */
if (flg & GZIP_FHCRC) {
in_next += 2;
if (in_end - in_next < GZIP_FOOTER_SIZE)
return false;
}
/* Compressed data */
if (!deflate_decompress(d, in_next, in_end - GZIP_FOOTER_SIZE - in_next,
out, out_nbytes))
return false;
in_next = in_end - GZIP_FOOTER_SIZE;
/* CRC32 */
if (crc32(out, out_nbytes) != get_unaligned_u32_be(in_next))
return false;
in_next += 4;
/* ISIZE */
if ((u32)out_nbytes != get_unaligned_u32_be(in_next))
return false;
return true;
}

235
src/hc_matchfinder.h Normal file
View File

@ -0,0 +1,235 @@
/*
* hc_matchfinder.h
*
* This is a Hash Chain (hc) based matchfinder.
*
* The data structure is a hash table where each hash bucket contains a linked
* list of sequences, referenced by position.
*
* For each sequence (position) in the input, the first 3 bytes are hashed and
* that sequence (position) is prepended to the appropriate linked list in the
* hash table. Since the sequences are inserted in order, each list is always
* sorted by increasing match offset.
*
* At the same time as inserting a sequence, we may search the linked list for
* matches with that sequence. At each step, the length of the match is
* computed. The search ends when the sequences get too far away (outside of
* the sliding window), or when the list ends (in the code this is the same
* check as "too far away"), or when 'max_search_depth' positions have been
* searched, or when a match of at least 'nice_len' bytes has been found.
*/
#pragma once
#include "lz_extend.h"
#include "lz_hash3.h"
#include "matchfinder_common.h"
#include "unaligned.h"
#ifndef HC_MATCHFINDER_HASH_ORDER
# if MATCHFINDER_WINDOW_ORDER < 14
# define HC_MATCHFINDER_HASH_ORDER 14
# else
# define HC_MATCHFINDER_HASH_ORDER 15
# endif
#endif
#define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
#define HC_MATCHFINDER_TOTAL_LENGTH \
(HC_MATCHFINDER_HASH_LENGTH + MATCHFINDER_WINDOW_SIZE)
struct hc_matchfinder {
union {
pos_t mf_data[HC_MATCHFINDER_TOTAL_LENGTH];
struct {
pos_t hash_tab[HC_MATCHFINDER_HASH_LENGTH];
pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
};
};
} _aligned_attribute(MATCHFINDER_ALIGNMENT);
static inline void
hc_matchfinder_init(struct hc_matchfinder *mf)
{
matchfinder_init(mf->hash_tab, HC_MATCHFINDER_HASH_LENGTH);
}
#if MATCHFINDER_IS_SLIDING
static inline void
hc_matchfinder_slide_window(struct hc_matchfinder *mf)
{
matchfinder_rebase(mf->mf_data, HC_MATCHFINDER_TOTAL_LENGTH);
}
#endif
/*
* Find the longest match longer than 'best_len'.
*
* @mf
* The matchfinder structure.
* @in_base
* Pointer to the next byte in the input buffer to process _at the last
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
* @in_next
* Pointer to the next byte in the input buffer to process. This is the
* pointer to the bytes being matched against.
* @best_len
* Require a match at least this long.
* @max_len
* Maximum match length to return.
* @nice_len
* Stop searching if a match of at least this length is found.
* @max_search_depth
* Limit on the number of potential matches to consider.
* @offset_ret
* The match offset is returned here.
*
* Return the length of the match found, or 'best_len' if no match longer than
* 'best_len' was found.
*/
static inline unsigned
hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
const u8 * const in_base,
const u8 * const in_next,
unsigned best_len,
const unsigned max_len,
const unsigned nice_len,
const unsigned max_search_depth,
unsigned *offset_ret)
{
unsigned depth_remaining = max_search_depth;
const u8 *best_matchptr = best_matchptr; /* uninitialized */
const u8 *matchptr;
unsigned len;
unsigned hash;
pos_t cur_match;
u32 first_3_bytes;
/* Insert the current sequence into the appropriate hash chain. */
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
goto out;
first_3_bytes = load_u24_unaligned(in_next);
hash = lz_hash3_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
cur_match = mf->hash_tab[hash];
mf->next_tab[in_next - in_base] = cur_match;
mf->hash_tab[hash] = in_next - in_base;
if (unlikely(best_len >= max_len))
goto out;
/* Search the appropriate hash chain for matches. */
if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
goto out;
if (best_len < 3) {
for (;;) {
/* No length 3 match found yet.
* Check the first 3 bytes. */
matchptr = &in_base[cur_match];
if (load_u24_unaligned(matchptr) == first_3_bytes)
break;
/* Not a match; keep trying. */
cur_match = mf->next_tab[
matchfinder_slot_for_match(cur_match)];
if (!matchfinder_match_in_window(cur_match,
in_base, in_next))
goto out;
if (!--depth_remaining)
goto out;
}
/* Found a length 3 match. */
best_matchptr = matchptr;
best_len = lz_extend(in_next, best_matchptr, 3, max_len);
if (best_len >= nice_len)
goto out;
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
goto out;
if (!--depth_remaining)
goto out;
}
for (;;) {
for (;;) {
matchptr = &in_base[cur_match];
/* Already found a length 3 match. Try for a longer match;
* start by checking the last 2 bytes and the first 4 bytes. */
#if UNALIGNED_ACCESS_IS_FAST
if ((load_u32_unaligned(matchptr + best_len - 3) ==
load_u32_unaligned(in_next + best_len - 3)) &&
(load_u32_unaligned(matchptr) ==
load_u32_unaligned(in_next)))
#else
if (matchptr[best_len] == in_next[best_len])
#endif
break;
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
goto out;
if (!--depth_remaining)
goto out;
}
if (UNALIGNED_ACCESS_IS_FAST)
len = 4;
else
len = 0;
len = lz_extend(in_next, matchptr, len, max_len);
if (len > best_len) {
best_len = len;
best_matchptr = matchptr;
if (best_len >= nice_len)
goto out;
}
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
goto out;
if (!--depth_remaining)
goto out;
}
out:
*offset_ret = in_next - best_matchptr;
return best_len;
}
/*
* Advance the match-finder, but don't search for matches.
*
* @mf
* The matchfinder structure.
* @in_base
* Pointer to the next byte in the input buffer to process _at the last
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
* @in_next
* Pointer to the next byte in the input buffer to process.
* @in_end
* Pointer to the end of the input buffer.
* @count
* Number of bytes to skip; must be > 0.
*/
static inline void
hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
const u8 *in_base,
const u8 *in_next,
const u8 *in_end,
unsigned count)
{
unsigned hash;
if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
return;
do {
hash = lz_hash3(in_next, HC_MATCHFINDER_HASH_ORDER);
mf->next_tab[in_next - in_base] = mf->hash_tab[hash];
mf->hash_tab[hash] = in_next - in_base;
in_next++;
} while (--count);
}

60
src/lz_extend.h Normal file
View File

@ -0,0 +1,60 @@
/*
* lz_extend.h
*
* Fast match extension for Lempel-Ziv matchfinding.
*/
#pragma once
#include "bitops.h"
#include "unaligned.h"
/*
* Return the number of bytes at @matchptr that match the bytes at @strptr, up
* to a maximum of @max_len. Initially, @start_len bytes are matched.
*/
static inline unsigned
lz_extend(const u8 * const strptr, const u8 * const matchptr,
const unsigned start_len, const unsigned max_len)
{
unsigned len = start_len;
machine_word_t v_word;
if (UNALIGNED_ACCESS_IS_FAST) {
if (likely(max_len - len >= 4 * WORDSIZE)) {
#define COMPARE_WORD_STEP \
v_word = load_word_unaligned(&matchptr[len]) ^ \
load_word_unaligned(&strptr[len]); \
if (v_word != 0) \
goto word_differs; \
len += WORDSIZE; \
COMPARE_WORD_STEP
COMPARE_WORD_STEP
COMPARE_WORD_STEP
COMPARE_WORD_STEP
#undef COMPARE_WORD_STEP
}
while (len + WORDSIZE <= max_len) {
v_word = load_word_unaligned(&matchptr[len]) ^
load_word_unaligned(&strptr[len]);
if (v_word != 0)
goto word_differs;
len += WORDSIZE;
}
}
while (len < max_len && matchptr[len] == strptr[len])
len++;
return len;
word_differs:
if (CPU_IS_LITTLE_ENDIAN)
len += (ffsw(v_word) >> 3);
else
len += (flsw(v_word) >> 3);
return len;
}

49
src/lz_hash3.h Normal file
View File

@ -0,0 +1,49 @@
/*
* lz_hash3.h
*
* 3-byte hashing for Lempel-Ziv matchfinding.
*/
#pragma once
#include "unaligned.h"
static inline u32
loaded_u32_to_u24(u32 v)
{
if (CPU_IS_LITTLE_ENDIAN)
return v & 0xFFFFFF;
else
return v >> 8;
}
static inline u32
load_u24_unaligned(const u8 *p)
{
if (UNALIGNED_ACCESS_IS_FAST)
return loaded_u32_to_u24(load_u32_unaligned(p));
else
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
}
static inline u32
lz_hash3_u24(u32 str, unsigned num_bits)
{
return (u32)(str * 0x1E35A7BD) >> (32 - num_bits);
}
/*
* Hash the next 3-byte sequence in the window, producing a hash of length
* 'num_bits' bits. At least LZ_HASH_REQUIRED_NBYTES must be available at 'p';
* this might be 4 bytes rather than 3 because an unaligned load is faster on
* some architectures.
*/
static inline u32
lz_hash3(const u8 *p, unsigned num_bits)
{
return lz_hash3_u24(load_u24_unaligned(p), num_bits);
}
/* Number of bytes the hash function actually requires be available, due to the
* possibility of an unaligned load. */
#define LZ_HASH_REQUIRED_NBYTES (UNALIGNED_ACCESS_IS_FAST ? 4 : 3)

64
src/matchfinder_avx2.h Normal file
View File

@ -0,0 +1,64 @@
/*
* matchfinder_avx2.h
*
* Matchfinding routines optimized for Intel AVX2 (Advanced Vector Extensions).
*/
#include <immintrin.h>
static inline bool
matchfinder_init_avx2(pos_t *data, size_t size)
{
__m256i v, *p;
size_t n;
if (size % sizeof(__m256i) * 4)
return false;
if (sizeof(pos_t) == 2)
v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
else if (sizeof(pos_t) == 4)
v = _mm256_set1_epi32(MATCHFINDER_INITVAL);
else
return false;
p = (__m256i *)data;
n = size / (sizeof(__m256i) * 4);
do {
p[0] = v;
p[1] = v;
p[2] = v;
p[3] = v;
p += 4;
} while (--n);
return true;
}
static inline bool
matchfinder_rebase_avx2(pos_t *data, size_t size)
{
__m256i v, *p;
size_t n;
if ((size % sizeof(__m256i) * 4 != 0))
return false;
if (sizeof(pos_t) == 2)
v = _mm256_set1_epi16((pos_t)-MATCHFINDER_WINDOW_SIZE);
else if (sizeof(pos_t) == 4)
v = _mm256_set1_epi32((pos_t)-MATCHFINDER_WINDOW_SIZE);
else
return false;
p = (__m256i *)data;
n = size / (sizeof(__m256i) * 4);
do {
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
p[0] = _mm256_adds_epi16(p[0], v);
p[1] = _mm256_adds_epi16(p[1], v);
p[2] = _mm256_adds_epi16(p[2], v);
p[3] = _mm256_adds_epi16(p[3], v);
p += 4;
} while (--n);
return true;
}

163
src/matchfinder_common.h Normal file
View File

@ -0,0 +1,163 @@
/*
* matchfinder_common.h
*
* Common code for Lempel-Ziv matchfinding.
*/
#pragma once
#include "types.h"
#include <string.h>
#ifndef MATCHFINDER_WINDOW_ORDER
# error "MATCHFINDER_WINDOW_ORDER must be defined!"
#endif
#ifndef MATCHFINDER_IS_SLIDING
# error "MATCHFINDER_IS_SLIDING must be defined!"
#endif
#define MATCHFINDER_WINDOW_SIZE ((size_t)1 << MATCHFINDER_WINDOW_ORDER)
#if MATCHFINDER_IS_SLIDING
# include "matchfinder_sliding.h"
#else
# include "matchfinder_nonsliding.h"
#endif
#define MATCHFINDER_ALIGNMENT 8
#ifdef __AVX2__
# include "matchfinder_avx2.h"
# if MATCHFINDER_ALIGNMENT < 32
# undef MATCHFINDER_ALIGNMENT
# define MATCHFINDER_ALIGNMENT 32
# endif
#endif
#ifdef __SSE2__
# include "matchfinder_sse2.h"
# if MATCHFINDER_ALIGNMENT < 16
# undef MATCHFINDER_ALIGNMENT
# define MATCHFINDER_ALIGNMENT 16
# endif
#endif
/*
* Representation of a match.
*/
struct lz_match {
/* The number of bytes matched. */
pos_t length;
/* The offset back from the current position that was matched. */
pos_t offset;
};
static inline bool
matchfinder_memset_init_okay(void)
{
/* All bytes must match in order to use memset. */
const pos_t v = MATCHFINDER_INITVAL;
if (sizeof(pos_t) == 2)
return (u8)v == (u8)(v >> 8);
if (sizeof(pos_t) == 4)
return (u8)v == (u8)(v >> 8) &&
(u8)v == (u8)(v >> 16) &&
(u8)v == (u8)(v >> 24);
return false;
}
/*
* Initialize the hash table portion of the matchfinder.
*
* Essentially, this is an optimized memset().
*
* 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
*/
static inline void
matchfinder_init(pos_t *data, size_t num_entries)
{
const size_t size = num_entries * sizeof(data[0]);
#ifdef __AVX2__
if (matchfinder_init_avx2(data, size))
return;
#endif
#ifdef __SSE2__
if (matchfinder_init_sse2(data, size))
return;
#endif
if (matchfinder_memset_init_okay()) {
memset(data, (u8)MATCHFINDER_INITVAL, size);
return;
}
for (size_t i = 0; i < num_entries; i++)
data[i] = MATCHFINDER_INITVAL;
}
#if MATCHFINDER_IS_SLIDING
/*
* Slide the matchfinder by WINDOW_SIZE bytes.
*
* This must be called just after each WINDOW_SIZE bytes have been run through
* the matchfinder.
*
* This will subtract WINDOW_SIZE bytes from each entry in the array specified.
* The effect is that all entries are updated to be relative to the current
* position, rather than the position WINDOW_SIZE bytes prior.
*
* Underflow is detected and replaced with signed saturation. This ensures that
* once the sliding window has passed over a position, that position forever
* remains out of bounds.
*
* The array passed in must contain all matchfinder data that is
* position-relative. Concretely, this will include the hash table as well as
* the table of positions that is used to link together the sequences in each
* hash bucket. Note that in the latter table, the links are 1-ary in the case
* of "hash chains", and 2-ary in the case of "binary trees". In either case,
* the links need to be rebased in the same way.
*/
static inline void
matchfinder_rebase(pos_t *data, size_t num_entries)
{
const size_t size = num_entries * sizeof(data[0]);
#ifdef __AVX2__
if (matchfinder_rebase_avx2(data, size))
return;
#endif
#ifdef __SSE2__
if (matchfinder_rebase_sse2(data, size))
return;
#endif
if (MATCHFINDER_WINDOW_SIZE == 32768) {
/* Branchless version for 32768 byte windows. If the value was
* already negative, clear all bits except the sign bit; this
* changes the value to -32768. Otherwise, set the sign bit;
* this is equivalent to subtracting 32768. */
for (size_t i = 0; i < num_entries; i++) {
u16 v = data[i];
u16 sign_bit = v & 0x8000;
v &= sign_bit - ((sign_bit >> 15) ^ 1);
v |= 0x8000;
data[i] = v;
}
return;
}
for (size_t i = 0; i < num_entries; i++) {
if (data[i] >= 0)
data[i] -= (pos_t)-MATCHFINDER_WINDOW_SIZE;
else
data[i] = (pos_t)-MATCHFINDER_WINDOW_SIZE;
}
}
#endif /* MATCHFINDER_IS_SLIDING */

View File

@ -0,0 +1,47 @@
/*
* matchfinder_nonsliding.h
*
* Definitions for nonsliding window matchfinders.
*
* "Nonsliding window" means that any prior sequence can be matched.
*/
#if MATCHFINDER_WINDOW_ORDER <= 16
typedef u16 pos_t;
#else
typedef u32 pos_t;
#endif
#if MATCHFINDER_WINDOW_ORDER != 16 && MATCHFINDER_WINDOW_ORDER != 32
/* Not all the bits of the position type are needed, so the sign bit can be
* reserved to mean "out of bounds". */
#define MATCHFINDER_INITVAL ((pos_t)-1)
static inline bool
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
{
return !(cur_match & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
}
#else
/* All bits of the position type are needed, so use 0 to mean "out of bounds".
* This prevents the beginning of the buffer from matching anything; however,
* this doesn't matter much. */
#define MATCHFINDER_INITVAL ((pos_t)0)
static inline bool
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
{
return cur_match != 0;
}
#endif
static inline pos_t
matchfinder_slot_for_match(pos_t cur_match)
{
return cur_match;
}

30
src/matchfinder_sliding.h Normal file
View File

@ -0,0 +1,30 @@
/*
* matchfinder_sliding.h
*
* Definitions for sliding window matchfinders.
*
* "Sliding window" means that only sequences beginning in the most recent
* MATCHFINDER_WINDOW_SIZE bytes can be matched.
*/
#if MATCHFINDER_WINDOW_ORDER <= 15
typedef s16 pos_t;
#else
typedef s32 pos_t;
#endif
#define MATCHFINDER_INITVAL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
/* In the sliding window case, positions are stored relative to 'in_base'. */
static inline bool
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
{
return cur_match > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
}
static inline pos_t
matchfinder_slot_for_match(pos_t cur_match)
{
return cur_match & (MATCHFINDER_WINDOW_SIZE - 1);
}

64
src/matchfinder_sse2.h Normal file
View File

@ -0,0 +1,64 @@
/*
* matchfinder_sse2.h
*
* Matchfinding routines optimized for Intel SSE2 (Streaming SIMD Extensions).
*/
#include <emmintrin.h>
static inline bool
matchfinder_init_sse2(pos_t *data, size_t size)
{
__m128i v, *p;
size_t n;
if (size % sizeof(__m128i) * 4)
return false;
if (sizeof(pos_t) == 2)
v = _mm_set1_epi16(MATCHFINDER_INITVAL);
else if (sizeof(pos_t) == 4)
v = _mm_set1_epi32(MATCHFINDER_INITVAL);
else
return false;
p = (__m128i *)data;
n = size / (sizeof(__m128i) * 4);
do {
p[0] = v;
p[1] = v;
p[2] = v;
p[3] = v;
p += 4;
} while (--n);
return true;
}
static inline bool
matchfinder_rebase_sse2(pos_t *data, size_t size)
{
__m128i v, *p;
size_t n;
if ((size % sizeof(__m128i) * 4 != 0))
return false;
if (sizeof(pos_t) == 2)
v = _mm_set1_epi16((pos_t)-MATCHFINDER_WINDOW_SIZE);
else if (sizeof(pos_t) == 4)
v = _mm_set1_epi32((pos_t)-MATCHFINDER_WINDOW_SIZE);
else
return false;
p = (__m128i *)data;
n = size / (sizeof(__m128i) * 4);
do {
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
p[0] = _mm_adds_epi16(p[0], v);
p[1] = _mm_adds_epi16(p[1], v);
p[2] = _mm_adds_epi16(p[2], v);
p[3] = _mm_adds_epi16(p[3], v);
p += 4;
} while (--n);
return true;
}

38
src/types.h Normal file
View File

@ -0,0 +1,38 @@
/*
* types.h
*
* Definitions of fixed-width integers, 'bool', 'size_t', and 'machine_word_t'.
*/
#pragma once
#include <inttypes.h>
#include <stdbool.h>
#include <stddef.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t s8;
typedef int16_t s16;
typedef int32_t s32;
typedef int64_t s64;
typedef uint16_t le16;
typedef uint32_t le32;
typedef uint64_t le64;
typedef uint16_t be16;
typedef uint32_t be32;
typedef uint64_t be64;
/*
* Type of a machine word. 'unsigned long' would be logical, but that is only
* 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best
* we can do without a bunch of #ifdefs appears to be 'size_t'.
*/
typedef size_t machine_word_t;
#define WORDSIZE sizeof(machine_word_t)

216
src/unaligned.h Normal file
View File

@ -0,0 +1,216 @@
/*
* unaligned.h
*
* Inline functions for unaligned memory access.
*/
#pragma once
#include "compiler.h"
#include "endianness.h"
#include "types.h"
#define DEFINE_UNALIGNED_TYPE(type) \
struct type##_unaligned { \
type v; \
} _packed_attribute; \
\
static inline type \
load_##type##_unaligned(const void *p) \
{ \
return ((const struct type##_unaligned *)p)->v; \
} \
\
static inline void \
store_##type##_unaligned(type val, void *p) \
{ \
((struct type##_unaligned *)p)->v = val; \
}
DEFINE_UNALIGNED_TYPE(u16);
DEFINE_UNALIGNED_TYPE(u32);
DEFINE_UNALIGNED_TYPE(u64);
DEFINE_UNALIGNED_TYPE(machine_word_t);
#define load_word_unaligned load_machine_word_t_unaligned
#define store_word_unaligned store_machine_word_t_unaligned
static inline u16
get_unaligned_u16_le(const void *p)
{
u16 v;
if (UNALIGNED_ACCESS_IS_FAST) {
v = le16_to_cpu(load_u16_unaligned(p));
} else {
const u8 *p8 = p;
v = 0;
v |= (u16)p8[0] << 0;
v |= (u16)p8[1] << 8;
}
return v;
}
static inline u32
get_unaligned_u32_le(const void *p)
{
u32 v;
if (UNALIGNED_ACCESS_IS_FAST) {
v = le32_to_cpu(load_u32_unaligned(p));
} else {
const u8 *p8 = p;
v = 0;
v |= (u32)p8[0] << 0;
v |= (u32)p8[1] << 8;
v |= (u32)p8[2] << 16;
v |= (u32)p8[3] << 24;
}
return v;
}
static inline u64
get_unaligned_u64_le(const void *p)
{
u64 v;
if (UNALIGNED_ACCESS_IS_FAST) {
v = le64_to_cpu(load_u64_unaligned(p));
} else {
const u8 *p8 = p;
v = 0;
v |= (u64)p8[0] << 0;
v |= (u64)p8[1] << 8;
v |= (u64)p8[2] << 16;
v |= (u64)p8[3] << 24;
v |= (u64)p8[4] << 32;
v |= (u64)p8[5] << 40;
v |= (u64)p8[6] << 48;
v |= (u64)p8[7] << 56;
}
return v;
}
static inline machine_word_t
get_unaligned_word_le(const void *p)
{
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
if (WORDSIZE == 4)
return get_unaligned_u32_le(p);
else
return get_unaligned_u64_le(p);
}
static inline void
put_unaligned_u16_le(u16 v, void *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u16_unaligned(cpu_to_le16(v), p);
} else {
u8 *p8 = p;
p8[0] = (v >> 0) & 0xFF;
p8[1] = (v >> 8) & 0xFF;
}
}
static inline void
put_unaligned_u32_le(u32 v, void *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u32_unaligned(cpu_to_le32(v), p);
} else {
u8 *p8 = p;
p8[0] = (v >> 0) & 0xFF;
p8[1] = (v >> 8) & 0xFF;
p8[2] = (v >> 16) & 0xFF;
p8[3] = (v >> 24) & 0xFF;
}
}
static inline void
put_unaligned_u64_le(u64 v, void *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u64_unaligned(cpu_to_le64(v), p);
} else {
u8 *p8 = p;
p8[0] = (v >> 0) & 0xFF;
p8[1] = (v >> 8) & 0xFF;
p8[2] = (v >> 16) & 0xFF;
p8[3] = (v >> 24) & 0xFF;
p8[4] = (v >> 32) & 0xFF;
p8[5] = (v >> 40) & 0xFF;
p8[6] = (v >> 48) & 0xFF;
p8[7] = (v >> 56) & 0xFF;
}
}
static inline void
put_unaligned_word_le(machine_word_t v, void *p)
{
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
if (WORDSIZE == 4)
put_unaligned_u32_le(v, p);
else
put_unaligned_u64_le(v, p);
}
static inline u16
get_unaligned_u16_be(const void *p)
{
u16 v;
if (UNALIGNED_ACCESS_IS_FAST) {
v = be16_to_cpu(load_u16_unaligned(p));
} else {
const u8 *p8 = p;
v = 0;
v |= (u16)p8[0] << 8;
v |= (u16)p8[1] << 0;
}
return v;
}
static inline u32
get_unaligned_u32_be(const void *p)
{
u32 v;
if (UNALIGNED_ACCESS_IS_FAST) {
v = be32_to_cpu(load_u32_unaligned(p));
} else {
const u8 *p8 = p;
v = 0;
v |= (u32)p8[0] << 24;
v |= (u32)p8[1] << 16;
v |= (u32)p8[2] << 8;
v |= (u32)p8[3] << 0;
}
return v;
}
static inline void
put_unaligned_u16_be(u16 v, void *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u16_unaligned(cpu_to_be16(v), p);
} else {
u8 *p8 = p;
p8[0] = (v >> 8) & 0xFF;
p8[1] = (v >> 0) & 0xFF;
}
}
static inline void
put_unaligned_u32_be(u32 v, void *p)
{
if (UNALIGNED_ACCESS_IS_FAST) {
store_u32_unaligned(cpu_to_be32(v), p);
} else {
u8 *p8 = p;
p8[0] = (v >> 24) & 0xFF;
p8[1] = (v >> 16) & 0xFF;
p8[2] = (v >> 8) & 0xFF;
p8[3] = (v >> 0) & 0xFF;
}
}

56
src/zlib_compress.c Normal file
View File

@ -0,0 +1,56 @@
/*
* zlib_compress.c
*
* Generate DEFLATE-compressed data in the zlib wrapper format.
*/
#include "libdeflate.h"
#include "adler32.h"
#include "deflate_compress.h"
#include "unaligned.h"
#include "zlib_constants.h"
LIBEXPORT size_t
zlib_compress(struct deflate_compressor *c, const void *in, size_t in_size,
void *out, size_t out_nbytes_avail)
{
u8 *out_next = out;
u16 hdr;
unsigned compression_level;
unsigned level_hint;
size_t deflate_size;
if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD)
return 0;
/* 2 byte header: CMF and FLG */
hdr = (ZLIB_CM_DEFLATE << 8) | (ZLIB_CINFO_32K_WINDOW << 12);
compression_level = deflate_get_compression_level(c);
if (compression_level < 2)
level_hint = ZLIB_FASTEST_COMPRESSION;
else if (compression_level < 6)
level_hint = ZLIB_FAST_COMPRESSION;
else if (compression_level < 8)
level_hint = ZLIB_DEFAULT_COMPRESSION;
else
level_hint = ZLIB_SLOWEST_COMPRESSION;
hdr |= level_hint << 6;
hdr |= 31 - (hdr % 31);
put_unaligned_u16_be(hdr, out_next);
out_next += 2;
/* Compressed data */
deflate_size = deflate_compress(c, in, in_size, out_next,
out_nbytes_avail - ZLIB_MIN_OVERHEAD);
if (deflate_size == 0)
return 0;
out_next += deflate_size;
/* ADLER32 */
put_unaligned_u32_be(adler32(in, in_size), out_next);
out_next += 4;
return out_next - (u8 *)out;
}

20
src/zlib_constants.h Normal file
View File

@ -0,0 +1,20 @@
/*
* zlib_constants.h
*
* Constants for the zlib wrapper format.
*/
#pragma once
#define ZLIB_MIN_HEADER_SIZE 2
#define ZLIB_FOOTER_SIZE 4
#define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE)
#define ZLIB_CM_DEFLATE 8
#define ZLIB_CINFO_32K_WINDOW 7
#define ZLIB_FASTEST_COMPRESSION 0
#define ZLIB_FAST_COMPRESSION 1
#define ZLIB_DEFAULT_COMPRESSION 2
#define ZLIB_SLOWEST_COMPRESSION 3

56
src/zlib_decompress.c Normal file
View File

@ -0,0 +1,56 @@
/*
* zlib_decompress.c
*
* Decompress DEFLATE-compressed data wrapped in the zlib format.
*/
#include "libdeflate.h"
#include "adler32.h"
#include "unaligned.h"
#include "zlib_constants.h"
LIBEXPORT bool
zlib_decompress(struct deflate_decompressor *d,
const void *in, size_t in_nbytes, void *out, size_t out_nbytes)
{
const u8 *in_next = in;
const u8 * const in_end = in_next + in_nbytes;
u16 hdr;
if (in_nbytes < ZLIB_MIN_OVERHEAD)
return false;
/* 2 byte header: CMF and FLG */
hdr = get_unaligned_u16_be(in_next);
in_next += 2;
/* FCHECK */
if ((hdr % 31) != 0)
return false;
/* CM */
if (((hdr >> 8) & 0xF) != ZLIB_CM_DEFLATE)
return false;
/* CINFO */
if ((hdr >> 12) > ZLIB_CINFO_32K_WINDOW)
return false;
/* FDICT */
if ((hdr >> 5) & 1)
return false;
/* Compressed data */
if (!deflate_decompress(d, in_next, in_end - ZLIB_FOOTER_SIZE - in_next,
out, out_nbytes))
return false;
in_next = in_end - ZLIB_FOOTER_SIZE;
/* ADLER32 */
if (adler32(out, out_nbytes) != get_unaligned_u32_be(in_next))
return false;
return true;
}