mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-12 05:48:25 -04:00
initial commit
This commit is contained in:
commit
6a982b7ac7
83
CMakeLists.txt
Normal file
83
CMakeLists.txt
Normal file
@ -0,0 +1,83 @@
|
||||
cmake_minimum_required(VERSION 2.6)
|
||||
project(libdeflate C)
|
||||
|
||||
set(LIB_VERSION_MAJOR 0)
|
||||
set(LIB_VERSION_MINOR 0)
|
||||
set(LIB_VERSION_PATCH 0)
|
||||
|
||||
set(LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}")
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
set(C_FLAGS "-std=c11 -fvisibility=hidden")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${C_FLAGS} -O2 -DNDEBUG")
|
||||
set(CMAKE_C_FLAGS_DEBUG "${C_FLAGS} -O0 -g")
|
||||
|
||||
include_directories(".")
|
||||
|
||||
option(SUPPORT_COMPRESSION "Support DEFLATE compression" ON)
|
||||
if(SUPPORT_COMPRESSION)
|
||||
set(LIB_SOURCES ${LIB_SOURCES} src/deflate_compress.c)
|
||||
endif()
|
||||
|
||||
option(SUPPORT_DECOMPRESSION "Support DEFLATE decompression" ON)
|
||||
if(SUPPORT_DECOMPRESSION)
|
||||
set(LIB_SOURCES ${LIB_SOURCES} src/deflate_decompress.c)
|
||||
endif()
|
||||
|
||||
option(SUPPORT_ZLIB "Support zlib wrapper format" ON)
|
||||
if(SUPPORT_ZLIB)
|
||||
set(LIB_SOURCES ${LIB_SOURCES} src/adler32.c)
|
||||
if(SUPPORT_COMPRESSION)
|
||||
set(LIB_SOURCES ${LIB_SOURCES} src/zlib_compress.c)
|
||||
endif()
|
||||
if(SUPPORT_DECOMPRESSION)
|
||||
set(LIB_SOURCES ${LIB_SOURCES} src/zlib_decompress.c)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(SUPPORT_GZIP "Support gzip wrapper format" ON)
|
||||
if(SUPPORT_GZIP)
|
||||
set(LIB_SOURCES ${LIB_SOURCES} src/crc32.c)
|
||||
if(SUPPORT_COMPRESSION)
|
||||
set(LIB_SOURCES ${LIB_SOURCES} src/gzip_compress.c)
|
||||
endif()
|
||||
if(SUPPORT_DECOMPRESSION)
|
||||
set(LIB_SOURCES ${LIB_SOURCES} src/gzip_decompress.c)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(SUPPORT_NEAR_OPTIMAL_PARSING "Support near optimal parsing (high compression mode)" ON)
|
||||
if(SUPPORT_NEAR_OPTIMAL_PARSING)
|
||||
add_definitions(-DSUPPORT_NEAR_OPTIMAL_PARSING=1)
|
||||
else()
|
||||
add_definitions(-DSUPPORT_NEAR_OPTIMAL_PARSING=0)
|
||||
endif()
|
||||
|
||||
option(UNSAFE_DECOMPRESSION "Assume that all compressed data is valid (faster but insecure)" OFF)
|
||||
if(UNSAFE_DECOMPRESSION)
|
||||
add_definitions(-DUNSAFE_DECOMPRESSION=1)
|
||||
else()
|
||||
add_definitions(-DUNSAFE_DECOMPRESSION=0)
|
||||
endif()
|
||||
|
||||
option(BUILD_EXAMPLES "Build the example programs" OFF)
|
||||
if(BUILD_EXAMPLES)
|
||||
add_executable(benchmark examples/benchmark.c)
|
||||
target_link_libraries(benchmark deflate)
|
||||
endif()
|
||||
|
||||
add_library(deflate SHARED ${LIB_SOURCES})
|
||||
add_library(deflatestatic STATIC ${LIB_SOURCES})
|
||||
|
||||
set_target_properties(deflate PROPERTIES VERSION ${LIB_VERSION_STRING})
|
||||
set_target_properties(deflate PROPERTIES SOVERSION ${LIB_VERSION_MAJOR})
|
||||
|
||||
install(TARGETS deflate deflatestatic
|
||||
LIBRARY DESTINATION "${CMAKE_INSTALL_PREFIX}/lib"
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_PREFIX}/lib")
|
||||
|
||||
install(FILES libdeflate.h DESTINATION "${CMAKE_INSTALL_PREFIX}/include")
|
19
README
Normal file
19
README
Normal file
@ -0,0 +1,19 @@
|
||||
This is libdeflate, a free (public domain) library for fast, whole-buffer
|
||||
DEFLATE compression and decompression.
|
||||
|
||||
The supported formats are:
|
||||
|
||||
- DEFLATE (raw)
|
||||
- zlib (DEFLATE with zlib header and footer)
|
||||
- gzip (DEFLATE with gzip header and footer)
|
||||
|
||||
libdeflate is heavily optimized. It is significantly faster than zlib, both for
|
||||
compression and decompression. In addition, at compression levels 8 and above
|
||||
it provides a compression ratio better than zlib's, while still being about the
|
||||
same speed as zlib's level 9.
|
||||
|
||||
libdeflate has a simple API that is not zlib-compatible. You can create
|
||||
compressors and decompressors, and use them to compress or decompress buffers.
|
||||
There is not yet any support for streaming. See libdeflate.h for details.
|
||||
|
||||
libdeflate is public domain; the author claims no copyright on it.
|
210
examples/benchmark.c
Normal file
210
examples/benchmark.c
Normal file
@ -0,0 +1,210 @@
|
||||
/*
|
||||
* benchmark.c - A compression testing and benchmark program.
|
||||
*
|
||||
* The author dedicates this file to the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
#include <libdeflate.h>
|
||||
|
||||
#ifdef __WIN32__
|
||||
# include <windows.h>
|
||||
#else
|
||||
# define _FILE_OFFSET_BITS 64
|
||||
# define O_BINARY 0
|
||||
# define _POSIX_C_SOURCE 199309L
|
||||
# include <time.h>
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static uint64_t
|
||||
current_time(void)
|
||||
{
|
||||
#ifdef __WIN32__
|
||||
# define TIME_UNIT_PER_MS 10000
|
||||
LARGE_INTEGER time;
|
||||
QueryPerformanceCounter(&time);
|
||||
return time.QuadPart;
|
||||
#else
|
||||
# define TIME_UNIT_PER_MS 1000000
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (1000000000ULL * ts.tv_sec) + ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
do_benchmark(int fd, char *ubuf1, char *ubuf2,
|
||||
char *cbuf, uint32_t max_chunk_size,
|
||||
struct deflate_compressor *compressor,
|
||||
struct deflate_decompressor *decompressor)
|
||||
{
|
||||
uint64_t usize_total = 0;
|
||||
uint64_t csize_total = 0;
|
||||
uint64_t compress_time_total = 0;
|
||||
uint64_t decompress_time_total = 0;
|
||||
|
||||
for (;;) {
|
||||
char *p = ubuf1;
|
||||
ssize_t bytes_read;
|
||||
size_t usize;
|
||||
size_t csize;
|
||||
bool ok;
|
||||
uint64_t start_time;
|
||||
|
||||
/* Read the next chunk of data. */
|
||||
do {
|
||||
bytes_read = read(fd, p, ubuf1 + max_chunk_size - p);
|
||||
if (bytes_read < 0) {
|
||||
fprintf(stderr, "ERROR: Read error: %s\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
p += bytes_read;
|
||||
} while (bytes_read != 0 && p != ubuf1 + max_chunk_size);
|
||||
|
||||
usize = p - ubuf1;
|
||||
|
||||
if (usize == 0) /* End of file? */
|
||||
break;
|
||||
|
||||
/* Compress the chunk of data. */
|
||||
usize_total += usize;
|
||||
start_time = current_time();
|
||||
csize = deflate_compress(compressor, ubuf1, usize,
|
||||
cbuf, usize - 1);
|
||||
compress_time_total += current_time() - start_time;
|
||||
|
||||
if (csize) {
|
||||
/* Successfully compressed the chunk of data. */
|
||||
csize_total += csize;
|
||||
|
||||
/* Decompress the data we just compressed and compare
|
||||
* the result with the original. */
|
||||
start_time = current_time();
|
||||
ok = deflate_decompress(decompressor, cbuf, csize,
|
||||
ubuf2, usize);
|
||||
decompress_time_total += current_time() - start_time;
|
||||
if (!ok) {
|
||||
fprintf(stderr, "ERROR: Failed to "
|
||||
"decompress data\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (memcmp(ubuf1, ubuf2, usize)) {
|
||||
fprintf(stderr, "ERROR: Data did not "
|
||||
"decompress to original\n");
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
/* Chunk of data did not compress to less than its
|
||||
* original size. */
|
||||
csize_total += usize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (usize_total == 0) {
|
||||
printf("\tEmpty input.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (compress_time_total == 0)
|
||||
compress_time_total++;
|
||||
if (decompress_time_total == 0)
|
||||
decompress_time_total++;
|
||||
|
||||
printf("\tCompressed %"PRIu64 " => %"PRIu64" bytes (%u.%u%%)\n",
|
||||
usize_total, csize_total,
|
||||
(unsigned int)(csize_total * 100 / usize_total),
|
||||
(unsigned int)(csize_total * 100000 / usize_total % 1000));
|
||||
printf("\tCompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n",
|
||||
compress_time_total / TIME_UNIT_PER_MS,
|
||||
1000 * usize_total / compress_time_total);
|
||||
printf("\tDecompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n",
|
||||
decompress_time_total / TIME_UNIT_PER_MS,
|
||||
1000 * usize_total / decompress_time_total);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
const char *filename;
|
||||
uint32_t chunk_size = 32768;
|
||||
unsigned int compression_level = 6;
|
||||
char *ubuf1 = NULL;
|
||||
char *ubuf2 = NULL;
|
||||
char *cbuf = NULL;
|
||||
struct deflate_compressor *compressor = NULL;
|
||||
struct deflate_decompressor *decompressor = NULL;
|
||||
int fd = -1;
|
||||
int ret;
|
||||
|
||||
if (argc < 2 || argc > 5) {
|
||||
fprintf(stderr, "Usage: %s FILE [CHUNK_SIZE [LEVEL]]]\n", argv[0]);
|
||||
ret = 2;
|
||||
goto out;
|
||||
}
|
||||
|
||||
filename = argv[1];
|
||||
|
||||
if (argc >= 3)
|
||||
chunk_size = strtoul(argv[2], NULL, 10);
|
||||
|
||||
if (argc >= 4)
|
||||
compression_level = strtoul(argv[3], NULL, 10);
|
||||
|
||||
printf("DEFLATE compression with %"PRIu32" byte chunks (level %u)\n",
|
||||
chunk_size, compression_level);
|
||||
|
||||
compressor = deflate_alloc_compressor(compression_level);
|
||||
if (!compressor) {
|
||||
fprintf(stderr, "ERROR: Failed to create compressor\n");
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
decompressor = deflate_alloc_decompressor();
|
||||
if (!decompressor) {
|
||||
fprintf(stderr, "ERROR: Failed to create decompressor\n");
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ubuf1 = malloc(chunk_size);
|
||||
ubuf2 = malloc(chunk_size);
|
||||
cbuf = malloc(chunk_size - 1);
|
||||
|
||||
if (!ubuf1 || !ubuf2 || !cbuf) {
|
||||
fprintf(stderr, "ERROR: Insufficient memory\n");
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fd = open(filename, O_RDONLY | O_BINARY);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "ERROR: Can't open \"%s\" for reading: %s\n",
|
||||
filename, strerror(errno));
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = do_benchmark(fd, ubuf1, ubuf2, cbuf, chunk_size,
|
||||
compressor, decompressor);
|
||||
out:
|
||||
close(fd);
|
||||
free(cbuf);
|
||||
free(ubuf2);
|
||||
free(ubuf1);
|
||||
deflate_free_decompressor(decompressor);
|
||||
deflate_free_compressor(compressor);
|
||||
return ret;
|
||||
}
|
131
libdeflate.h
Normal file
131
libdeflate.h
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* libdeflate.h
|
||||
*
|
||||
* Public header for the DEFLATE compression library.
|
||||
*/
|
||||
|
||||
#ifndef LIBDEFLATE_H
|
||||
#define LIBDEFLATE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Compression */
|
||||
/* ========================================================================== */
|
||||
|
||||
struct deflate_compressor;
|
||||
|
||||
/*
|
||||
* deflate_alloc_compressor() allocates a new DEFLATE compressor.
|
||||
* 'compression_level' is the compression level on a zlib-like scale (1 =
|
||||
* fastest, 6 = medium/default, 9 = slowest). The return value is a pointer to
|
||||
* the new DEFLATE compressor, or NULL if out of memory.
|
||||
*
|
||||
* Note: the sliding window size is defined at compilation time (default 32768).
|
||||
*/
|
||||
extern struct deflate_compressor *
|
||||
deflate_alloc_compressor(unsigned int compression_level);
|
||||
|
||||
/*
|
||||
* deflate_compress() performs DEFLATE compression on a buffer of data. The
|
||||
* function attempts to compress 'in_nbytes' bytes of data located at 'in' and
|
||||
* write the results to 'out', which has space for 'out_nbytes_avail' bytes.
|
||||
* The return value is the compressed size in bytes, or 0 if the data could not
|
||||
* be compressed to 'out_nbytes_avail' bytes or fewer.
|
||||
*/
|
||||
extern size_t
|
||||
deflate_compress(struct deflate_compressor *compressor,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail);
|
||||
|
||||
/*
|
||||
* Like deflate_compress(), but store the data in the zlib wrapper format.
|
||||
*/
|
||||
extern size_t
|
||||
zlib_compress(struct deflate_compressor *compressor,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail);
|
||||
|
||||
/*
|
||||
* Like deflate_compress(), but store the data in the gzip wrapper format.
|
||||
*/
|
||||
extern size_t
|
||||
gzip_compress(struct deflate_compressor *compressor,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail);
|
||||
|
||||
/*
|
||||
* deflate_free_compressor() frees a DEFLATE compressor that was allocated with
|
||||
* deflate_alloc_compressor().
|
||||
*/
|
||||
extern void
|
||||
deflate_free_compressor(struct deflate_compressor *compressor);
|
||||
|
||||
/* ========================================================================== */
|
||||
/* Decompression */
|
||||
/* ========================================================================== */
|
||||
|
||||
struct deflate_decompressor;
|
||||
|
||||
/*
|
||||
* deflate_alloc_decompressor() allocates a new DEFLATE decompressor. The
|
||||
* return value is a pointer to the new DEFLATE decompressor, or NULL if out of
|
||||
* memory.
|
||||
*
|
||||
* This function takes no parameters, and the returned decompressor is valid for
|
||||
* decompressing data that was compressed at any compression level and with any
|
||||
* sliding window size.
|
||||
*/
|
||||
extern struct deflate_decompressor *
|
||||
deflate_alloc_decompressor(void);
|
||||
|
||||
/*
|
||||
* deflate_decompress() decompresses 'in_nbytes' bytes of DEFLATE-compressed
|
||||
* data at 'in' and writes the uncompressed data, which had original size
|
||||
* 'out_nbytes', to 'out'. The return value is true if decompression was
|
||||
* successful, or false if the compressed data was invalid.
|
||||
*
|
||||
* To be clear: the uncompressed size must be known *exactly* and passed as
|
||||
* 'out_nbytes'.
|
||||
*/
|
||||
extern bool
|
||||
deflate_decompress(struct deflate_decompressor *decompressor,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes);
|
||||
|
||||
/*
|
||||
* Like deflate_decompress(), but assumes the zlib wrapper format instead of raw
|
||||
* DEFLATE.
|
||||
*/
|
||||
extern bool
|
||||
zlib_decompress(struct deflate_decompressor *decompressor,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes);
|
||||
|
||||
/*
|
||||
* Like deflate_decompress(), but assumes the gzip wrapper format instead of raw
|
||||
* DEFLATE.
|
||||
*/
|
||||
extern bool
|
||||
gzip_decompress(struct deflate_decompressor *decompressor,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes);
|
||||
|
||||
/*
|
||||
* deflate_free_decompressor() frees a DEFLATE decompressor that was allocated
|
||||
* with deflate_alloc_decompressor().
|
||||
*/
|
||||
extern void
|
||||
deflate_free_decompressor(struct deflate_decompressor *decompressor);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* LIBDEFLATE_H */
|
19
src/adler32.c
Normal file
19
src/adler32.c
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
* adler32.c
|
||||
*
|
||||
* Adler-32 checksum algorithm.
|
||||
*/
|
||||
|
||||
#include "adler32.h"
|
||||
|
||||
u32
|
||||
adler32(const u8 *buffer, size_t size)
|
||||
{
|
||||
u32 s1 = 1;
|
||||
u32 s2 = 0;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
s1 = (s1 + buffer[i]) % 65521;
|
||||
s2 = (s2 + s1) % 65521;
|
||||
}
|
||||
return (s2 << 16) | s1;
|
||||
}
|
12
src/adler32.h
Normal file
12
src/adler32.h
Normal file
@ -0,0 +1,12 @@
|
||||
/*
|
||||
* adler32.h
|
||||
*
|
||||
* Adler-32 checksum algorithm.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
extern u32
|
||||
adler32(const u8 *buffer, size_t size);
|
80
src/bitops.h
Normal file
80
src/bitops.h
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* bitops.h
|
||||
*
|
||||
* Inline functions for bit manipulation.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "compiler.h"
|
||||
#include "types.h"
|
||||
|
||||
/* Find Last Set bit */
|
||||
|
||||
static inline unsigned fls32(u32 v)
|
||||
{
|
||||
#ifdef compiler_fls32
|
||||
return compiler_fls32(v);
|
||||
#else
|
||||
unsigned bit = 0;
|
||||
while ((v >>= 1) != 0)
|
||||
bit++;
|
||||
return bit;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned fls64(u64 v)
|
||||
{
|
||||
#ifdef compiler_fls64
|
||||
return compiler_fls64(v);
|
||||
#else
|
||||
unsigned bit = 0;
|
||||
while ((v >>= 1) != 0)
|
||||
bit++;
|
||||
return bit;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned flsw(machine_word_t v)
|
||||
{
|
||||
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
||||
if (WORDSIZE == 4)
|
||||
return fls32(v);
|
||||
else
|
||||
return fls64(v);
|
||||
}
|
||||
|
||||
/* Find First Set bit */
|
||||
|
||||
static inline unsigned ffs32(u32 v)
|
||||
{
|
||||
#ifdef compiler_ffs32
|
||||
return compiler_ffs32(v);
|
||||
#else
|
||||
unsigned bit;
|
||||
for (bit = 0; !(v & 1); bit++, v >>= 1)
|
||||
;
|
||||
return bit;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned ffs64(u64 v)
|
||||
{
|
||||
#ifdef compiler_ffs64
|
||||
return compiler_ffs64(v);
|
||||
#else
|
||||
unsigned bit;
|
||||
for (bit = 0; !(v & 1); bit++, v >>= 1)
|
||||
;
|
||||
return bit;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned ffsw(machine_word_t v)
|
||||
{
|
||||
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
||||
if (WORDSIZE == 4)
|
||||
return ffs32(v);
|
||||
else
|
||||
return ffs64(v);
|
||||
}
|
279
src/bt_matchfinder.h
Normal file
279
src/bt_matchfinder.h
Normal file
@ -0,0 +1,279 @@
|
||||
/*
|
||||
* bt_matchfinder.h
|
||||
*
|
||||
* This is a Binary Tree (bt) based matchfinder.
|
||||
*
|
||||
* The data structure is a hash table where each hash bucket contains a binary
|
||||
* tree of sequences, referenced by position. The sequences in the binary tree
|
||||
* are ordered such that a left child is lexicographically lesser than its
|
||||
* parent, and a right child is lexicographically greater than its parent.
|
||||
*
|
||||
* For each sequence (position) in the input, the first 3 bytes are hashed and
|
||||
* the the appropriate binary tree is re-rooted at that sequence (position).
|
||||
* Since the sequences are inserted in order, each binary tree maintains the
|
||||
* invariant that each child node has greater match offset than its parent.
|
||||
*
|
||||
* While inserting a sequence, we may search the binary tree for matches with
|
||||
* that sequence. At each step, the length of the match is computed. The
|
||||
* search ends when the sequences get too far away (outside of the sliding
|
||||
* window), or when the binary tree ends (in the code this is the same check as
|
||||
* "too far away"), or when 'max_search_depth' positions have been searched, or
|
||||
* when a match of at least 'nice_len' bytes has been found.
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
* - Typically, we need to search more nodes to find a given match in a
|
||||
* binary tree versus in a linked list. However, a binary tree has more
|
||||
* overhead than a linked list: it needs to be kept sorted, and the inner
|
||||
* search loop is more complicated. As a result, binary trees are best
|
||||
* suited for compression modes where the potential matches are searched
|
||||
* more thoroughly.
|
||||
*
|
||||
* - Since no attempt is made to keep the binary trees balanced, it's
|
||||
* essential to have the 'max_search_depth' cutoff. Otherwise it could
|
||||
* take quadratic time to run data through the matchfinder.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lz_extend.h"
|
||||
#include "lz_hash3.h"
|
||||
#include "matchfinder_common.h"
|
||||
|
||||
#ifndef BT_MATCHFINDER_HASH_ORDER
|
||||
# if MATCHFINDER_WINDOW_ORDER < 14
|
||||
# define BT_MATCHFINDER_HASH_ORDER 14
|
||||
# else
|
||||
# define BT_MATCHFINDER_HASH_ORDER 15
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define BT_MATCHFINDER_HASH_LENGTH (1UL << BT_MATCHFINDER_HASH_ORDER)
|
||||
|
||||
#define BT_MATCHFINDER_TOTAL_LENGTH \
|
||||
(BT_MATCHFINDER_HASH_LENGTH + (2UL * MATCHFINDER_WINDOW_SIZE))
|
||||
|
||||
struct bt_matchfinder {
|
||||
union {
|
||||
pos_t mf_data[BT_MATCHFINDER_TOTAL_LENGTH];
|
||||
struct {
|
||||
pos_t hash_tab[BT_MATCHFINDER_HASH_LENGTH];
|
||||
pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
|
||||
};
|
||||
};
|
||||
} _aligned_attribute(MATCHFINDER_ALIGNMENT);
|
||||
|
||||
static inline void
|
||||
bt_matchfinder_init(struct bt_matchfinder *mf)
|
||||
{
|
||||
matchfinder_init(mf->hash_tab, BT_MATCHFINDER_HASH_LENGTH);
|
||||
}
|
||||
|
||||
#if MATCHFINDER_IS_SLIDING
|
||||
static inline void
|
||||
bt_matchfinder_slide_window(struct bt_matchfinder *mf)
|
||||
{
|
||||
matchfinder_rebase(mf->mf_data, BT_MATCHFINDER_TOTAL_LENGTH);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Find matches with the current sequence.
|
||||
*
|
||||
* @mf
|
||||
* The matchfinder structure.
|
||||
* @in_base
|
||||
* Pointer to the next byte in the input buffer to process _at the last
|
||||
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
|
||||
* @in_next
|
||||
* Pointer to the next byte in the input buffer to process. This is the
|
||||
* pointer to the bytes being matched against.
|
||||
* @max_len
|
||||
* Maximum match length to return.
|
||||
* @nice_len
|
||||
* Stop searching if a match of at least this length is found.
|
||||
* @max_search_depth
|
||||
* Limit on the number of potential matches to consider.
|
||||
* @prev_hash
|
||||
* TODO
|
||||
* @matches
|
||||
* Space to write the matches that are found.
|
||||
*
|
||||
* Returns the number of matches found, which may be anywhere from 0 to
|
||||
* (nice_len - 3 + 1), inclusively. The matches are written to @matches in
|
||||
* order of strictly increasing length and strictly increasing offset. The
|
||||
* minimum match length is assumed to be 3.
|
||||
*/
|
||||
static inline unsigned
|
||||
bt_matchfinder_get_matches(struct bt_matchfinder * const restrict mf,
|
||||
const u8 * const in_base,
|
||||
const u8 * const in_next,
|
||||
const unsigned max_len,
|
||||
const unsigned nice_len,
|
||||
const unsigned max_search_depth,
|
||||
unsigned long *prev_hash,
|
||||
struct lz_match * const restrict matches)
|
||||
{
|
||||
struct lz_match *lz_matchptr = matches;
|
||||
unsigned depth_remaining = max_search_depth;
|
||||
unsigned hash;
|
||||
pos_t cur_match;
|
||||
const u8 *matchptr;
|
||||
unsigned best_len;
|
||||
pos_t *pending_lt_ptr, *pending_gt_ptr;
|
||||
unsigned best_lt_len, best_gt_len;
|
||||
unsigned len;
|
||||
pos_t *children;
|
||||
|
||||
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES + 1))
|
||||
return 0;
|
||||
|
||||
hash = *prev_hash;
|
||||
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
|
||||
prefetch(&mf->hash_tab[*prev_hash]);
|
||||
cur_match = mf->hash_tab[hash];
|
||||
mf->hash_tab[hash] = in_next - in_base;
|
||||
|
||||
best_len = 2;
|
||||
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
|
||||
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
|
||||
best_lt_len = 0;
|
||||
best_gt_len = 0;
|
||||
for (;;) {
|
||||
if (!matchfinder_match_in_window(cur_match,
|
||||
in_base, in_next) ||
|
||||
!depth_remaining--)
|
||||
{
|
||||
*pending_lt_ptr = MATCHFINDER_INITVAL;
|
||||
*pending_gt_ptr = MATCHFINDER_INITVAL;
|
||||
return lz_matchptr - matches;
|
||||
}
|
||||
|
||||
matchptr = &in_base[cur_match];
|
||||
len = min(best_lt_len, best_gt_len);
|
||||
|
||||
children = &mf->child_tab[(unsigned long)
|
||||
matchfinder_slot_for_match(cur_match) << 1];
|
||||
|
||||
if (matchptr[len] == in_next[len]) {
|
||||
|
||||
len = lz_extend(in_next, matchptr, len + 1, max_len);
|
||||
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
|
||||
lz_matchptr->length = len;
|
||||
lz_matchptr->offset = in_next - matchptr;
|
||||
lz_matchptr++;
|
||||
|
||||
if (len >= nice_len) {
|
||||
*pending_lt_ptr = children[0];
|
||||
*pending_gt_ptr = children[1];
|
||||
return lz_matchptr - matches;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matchptr[len] < in_next[len]) {
|
||||
*pending_lt_ptr = cur_match;
|
||||
pending_lt_ptr = &children[1];
|
||||
cur_match = *pending_lt_ptr;
|
||||
best_lt_len = len;
|
||||
} else {
|
||||
*pending_gt_ptr = cur_match;
|
||||
pending_gt_ptr = &children[0];
|
||||
cur_match = *pending_gt_ptr;
|
||||
best_gt_len = len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the match-finder, but don't search for matches.
|
||||
*
|
||||
* @mf
|
||||
* The matchfinder structure.
|
||||
* @in_base
|
||||
* Pointer to the next byte in the input buffer to process _at the last
|
||||
* time bc_matchfinder_init() or bc_matchfinder_slide_window() was called_.
|
||||
* @in_next
|
||||
* Pointer to the next byte in the input buffer to process.
|
||||
* @in_end
|
||||
* Pointer to the end of the input buffer.
|
||||
* @nice_len
|
||||
* Stop searching if a match of at least this length is found.
|
||||
* @max_search_depth
|
||||
* Limit on the number of potential matches to consider.
|
||||
* @prev_hash
|
||||
* TODO
|
||||
*/
|
||||
static inline void
|
||||
bt_matchfinder_skip_position(struct bt_matchfinder * const restrict mf,
|
||||
const u8 * const in_base,
|
||||
const u8 * const in_next,
|
||||
const u8 * const in_end,
|
||||
const unsigned nice_len,
|
||||
const unsigned max_search_depth,
|
||||
unsigned long *prev_hash)
|
||||
{
|
||||
unsigned depth_remaining = max_search_depth;
|
||||
unsigned hash;
|
||||
pos_t cur_match;
|
||||
const u8 *matchptr;
|
||||
pos_t *pending_lt_ptr, *pending_gt_ptr;
|
||||
unsigned best_lt_len, best_gt_len;
|
||||
unsigned len;
|
||||
pos_t *children;
|
||||
|
||||
if (unlikely(in_end - in_next < LZ_HASH_REQUIRED_NBYTES + 1))
|
||||
return;
|
||||
|
||||
hash = *prev_hash;
|
||||
*prev_hash = lz_hash3(in_next + 1, BT_MATCHFINDER_HASH_ORDER);
|
||||
prefetch(&mf->hash_tab[*prev_hash]);
|
||||
cur_match = mf->hash_tab[hash];
|
||||
mf->hash_tab[hash] = in_next - in_base;
|
||||
|
||||
depth_remaining = max_search_depth;
|
||||
pending_lt_ptr = &mf->child_tab[(in_next - in_base) << 1];
|
||||
pending_gt_ptr = &mf->child_tab[((in_next - in_base) << 1) + 1];
|
||||
best_lt_len = 0;
|
||||
best_gt_len = 0;
|
||||
for (;;) {
|
||||
if (!matchfinder_match_in_window(cur_match,
|
||||
in_base, in_next) ||
|
||||
!depth_remaining--)
|
||||
{
|
||||
*pending_lt_ptr = MATCHFINDER_INITVAL;
|
||||
*pending_gt_ptr = MATCHFINDER_INITVAL;
|
||||
return;
|
||||
}
|
||||
|
||||
matchptr = &in_base[cur_match];
|
||||
len = min(best_lt_len, best_gt_len);
|
||||
|
||||
children = &mf->child_tab[(unsigned long)
|
||||
matchfinder_slot_for_match(cur_match) << 1];
|
||||
|
||||
if (matchptr[len] == in_next[len]) {
|
||||
len = lz_extend(in_next, matchptr, len + 1, nice_len);
|
||||
if (len == nice_len) {
|
||||
*pending_lt_ptr = children[0];
|
||||
*pending_gt_ptr = children[1];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (matchptr[len] < in_next[len]) {
|
||||
*pending_lt_ptr = cur_match;
|
||||
pending_lt_ptr = &children[1];
|
||||
cur_match = *pending_lt_ptr;
|
||||
best_lt_len = len;
|
||||
} else {
|
||||
*pending_gt_ptr = cur_match;
|
||||
pending_gt_ptr = &children[0];
|
||||
cur_match = *pending_gt_ptr;
|
||||
best_gt_len = len;
|
||||
}
|
||||
}
|
||||
}
|
52
src/compiler-gcc.h
Normal file
52
src/compiler-gcc.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* compiler-gcc.h
|
||||
*
|
||||
* Compiler and platform-specific definitions for the GNU C compiler.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __WIN32__
|
||||
# define LIBEXPORT __declspec(dllexport)
|
||||
#else
|
||||
# define LIBEXPORT __attribute__((visibility("default")))
|
||||
#endif
|
||||
|
||||
#define likely(expr) __builtin_expect(!!(expr), 1)
|
||||
#define unlikely(expr) __builtin_expect(!!(expr), 0)
|
||||
#define prefetch(addr) __builtin_prefetch(addr)
|
||||
#define inline inline __attribute__((always_inline))
|
||||
#define _aligned_attribute(n) __attribute__((aligned(n)))
|
||||
#define _packed_attribute __attribute__((packed))
|
||||
|
||||
#define CPU_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
# define UNALIGNED_ACCESS_SPEED 3
|
||||
#elif defined(__ARM_FEATURE_UNALIGNED) && (__ARM_FEATURE_UNALIGNED == 1)
|
||||
# define UNALIGNED_ACCESS_SPEED 2
|
||||
#else
|
||||
# define UNALIGNED_ACCESS_SPEED 0
|
||||
#endif
|
||||
|
||||
#define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
|
||||
(_a < _b) ? _a : _b; })
|
||||
|
||||
#define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
|
||||
(_a > _b) ? _a : _b; })
|
||||
|
||||
#define swap(a, b) ({ __typeof__(a) _a = a; (a) = (b); (b) = _a; })
|
||||
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
|
||||
# define compiler_bswap32 __builtin_bswap32
|
||||
# define compiler_bswap64 __builtin_bswap64
|
||||
#endif
|
||||
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
|
||||
# define compiler_bswap16 __builtin_bswap16
|
||||
#endif
|
||||
|
||||
#define compiler_fls32(n) (31 - __builtin_clz(n))
|
||||
#define compiler_fls64(n) (63 - __builtin_clzll(n))
|
||||
#define compiler_ffs32(n) __builtin_ctz(n)
|
||||
#define compiler_ffs64(n) __builtin_ctzll(n)
|
60
src/compiler.h
Normal file
60
src/compiler.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* compiler.h
|
||||
*
|
||||
* Compiler and platform-specific definitions.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __GNUC__
|
||||
# include "compiler-gcc.h"
|
||||
#else
|
||||
# warning "Unrecognized compiler. Please add a header file for your compiler."
|
||||
#endif
|
||||
|
||||
#ifndef LIBEXPORT
|
||||
# define LIBEXPORT
|
||||
#endif
|
||||
|
||||
#ifndef BUILD_BUG_ON
|
||||
# define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
|
||||
#endif
|
||||
|
||||
#ifndef likely
|
||||
# define likely(expr) (expr)
|
||||
#endif
|
||||
|
||||
#ifndef unlikely
|
||||
# define unlikely(expr) (expr)
|
||||
#endif
|
||||
|
||||
#ifndef prefetch
|
||||
# define prefetch(addr)
|
||||
#endif
|
||||
|
||||
#ifndef _aligned_attribute
|
||||
# error "missing required definition of _aligned_attribute"
|
||||
#endif
|
||||
|
||||
#ifndef _packed_attribute
|
||||
# error "missing required definition of _packed_attribute"
|
||||
#endif
|
||||
|
||||
#ifndef CPU_IS_BIG_ENDIAN
|
||||
# error "missing required endianness definition"
|
||||
#endif
|
||||
|
||||
#define CPU_IS_LITTLE_ENDIAN (!CPU_IS_BIG_ENDIAN)
|
||||
|
||||
#ifndef UNALIGNED_ACCESS_SPEED
|
||||
# warning "assuming unaligned accesses are not allowed"
|
||||
# define UNALIGNED_ACCESS_SPEED 0
|
||||
#endif
|
||||
|
||||
#define UNALIGNED_ACCESS_IS_ALLOWED (UNALIGNED_ACCESS_SPEED >= 1)
|
||||
#define UNALIGNED_ACCESS_IS_FAST (UNALIGNED_ACCESS_SPEED >= 2)
|
||||
#define UNALIGNED_ACCESS_IS_VERY_FAST (UNALIGNED_ACCESS_SPEED >= 3)
|
||||
|
||||
#if !defined(min) || !defined(max) || !defined(swap)
|
||||
# error "missing required definitions of min(), max(), and swap() macros"
|
||||
#endif
|
73
src/crc32.c
Normal file
73
src/crc32.c
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* crc32.c
|
||||
*
|
||||
* CRC-32 checksum algorithm.
|
||||
*/
|
||||
|
||||
#include "crc32.h"
|
||||
|
||||
static const u32 crc_table[256] = {
|
||||
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
|
||||
0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
|
||||
0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
|
||||
0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
|
||||
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
|
||||
0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
|
||||
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
|
||||
0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
|
||||
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
|
||||
0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
|
||||
0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
|
||||
0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
||||
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
|
||||
0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
|
||||
0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
|
||||
0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
|
||||
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
|
||||
0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
|
||||
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
|
||||
0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
|
||||
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
|
||||
0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
|
||||
0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
|
||||
0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
||||
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
|
||||
0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
|
||||
0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
|
||||
0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
|
||||
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
|
||||
0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
|
||||
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
|
||||
0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
|
||||
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
|
||||
0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
|
||||
0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
|
||||
0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
||||
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
|
||||
0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
|
||||
0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
|
||||
0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
|
||||
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
|
||||
0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
|
||||
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
|
||||
0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
|
||||
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
|
||||
0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
|
||||
0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
|
||||
0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
||||
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
|
||||
0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
|
||||
0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
|
||||
0x2d02ef8d,
|
||||
};
|
||||
|
||||
u32
|
||||
crc32(const u8 *buffer, size_t size)
|
||||
{
|
||||
u32 crc = ~0;
|
||||
|
||||
for (size_t i = 0; i < size; i++)
|
||||
crc = crc_table[(u8)crc ^ buffer[i]] ^ (crc >> 8);
|
||||
|
||||
return ~crc;
|
||||
}
|
12
src/crc32.h
Normal file
12
src/crc32.h
Normal file
@ -0,0 +1,12 @@
|
||||
/*
|
||||
* crc32.h
|
||||
*
|
||||
* CRC-32 checksum algorithm.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
extern u32
|
||||
crc32(const u8 *buffer, size_t size);
|
2323
src/deflate_compress.c
Normal file
2323
src/deflate_compress.c
Normal file
File diff suppressed because it is too large
Load Diff
9
src/deflate_compress.h
Normal file
9
src/deflate_compress.h
Normal file
@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
/* 'struct deflate_compressor' is private to deflate_compress.c, but zlib header
|
||||
* generation needs to be able to query the compression level. */
|
||||
|
||||
struct deflate_compressor;
|
||||
|
||||
extern unsigned int
|
||||
deflate_get_compression_level(struct deflate_compressor *c);
|
59
src/deflate_constants.h
Normal file
59
src/deflate_constants.h
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* deflate_constants.h
|
||||
*
|
||||
* Constants for the DEFLATE compression format.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/* Valid block types */
|
||||
#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
|
||||
#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
|
||||
#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
|
||||
|
||||
/* Minimum and maximum supported match lengths (in bytes) */
|
||||
#define DEFLATE_MIN_MATCH_LEN 3
|
||||
#define DEFLATE_MAX_MATCH_LEN 258
|
||||
|
||||
/* Minimum and maximum supported match offsets (in bytes) */
|
||||
#define DEFLATE_MIN_MATCH_OFFSET 1
|
||||
#define DEFLATE_MAX_MATCH_OFFSET 32767
|
||||
|
||||
#define DEFLATE_MAX_WINDOW_SIZE 32768
|
||||
|
||||
/* Number of symbols in each Huffman code. Note: for the literal/length
|
||||
* and offset codes, these are actually the maximum values; a given block
|
||||
* might use fewer symbols. */
|
||||
#define DEFLATE_NUM_PRECODE_SYMS 19
|
||||
#define DEFLATE_NUM_LITLEN_SYMS 288
|
||||
#define DEFLATE_NUM_OFFSET_SYMS 32
|
||||
|
||||
/* Division of symbols in the literal/length code */
|
||||
#define DEFLATE_NUM_LITERALS 256
|
||||
#define DEFLATE_END_OF_BLOCK 256
|
||||
#define DEFLATE_NUM_LEN_SYMS 31
|
||||
|
||||
/* Maximum codeword length, in bits, within each Huffman code */
|
||||
#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
|
||||
#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
|
||||
#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
|
||||
|
||||
/* Maximum possible overrun when decoding codeword lengths */
|
||||
#define DEFLATE_MAX_LENS_OVERRUN 137
|
||||
|
||||
/*
|
||||
* Maximum number of extra bits that may be required to represent a match
|
||||
* length or offset.
|
||||
*
|
||||
* TODO: are we going to have full DEFLATE64 support? If so, up to 16
|
||||
* length bits must be supported.
|
||||
*/
|
||||
#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
|
||||
#define DEFLATE_MAX_EXTRA_OFFSET_BITS 14
|
||||
|
||||
/* The maximum number of bits in which a match can be represented. This
|
||||
* is the absolute worst case, which assumes the longest possible Huffman
|
||||
* codewords and the maximum numbers of extra bits. */
|
||||
#define DEFLATE_MAX_MATCH_BITS \
|
||||
(DEFLATE_MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + \
|
||||
DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)
|
1455
src/deflate_decompress.c
Normal file
1455
src/deflate_decompress.c
Normal file
File diff suppressed because it is too large
Load Diff
75
src/endianness.h
Normal file
75
src/endianness.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* endianness.h
|
||||
*
|
||||
* Inline functions for endianness conversion.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "compiler.h"
|
||||
#include "types.h"
|
||||
|
||||
static inline u16 bswap16(u16 n)
|
||||
{
|
||||
#ifdef compiler_bswap16
|
||||
return compiler_bswap16(n);
|
||||
#else
|
||||
return (n << 8) | (n >> 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline u32 bswap32(u32 n)
|
||||
{
|
||||
#ifdef compiler_bswap32
|
||||
return compiler_bswap32(n);
|
||||
#else
|
||||
return (n << 24) |
|
||||
((n & 0xFF00) << 8) |
|
||||
((n & 0xFF0000) >> 8) |
|
||||
(n >> 24);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline u64 bswap64(u64 n)
|
||||
{
|
||||
#ifdef compiler_bswap64
|
||||
return compiler_bswap64(n);
|
||||
#else
|
||||
return (n << 56) |
|
||||
((n & 0xFF00) << 40) |
|
||||
((n & 0xFF0000) << 24) |
|
||||
((n & 0xFF000000) << 8) |
|
||||
((n & 0xFF00000000) >> 8) |
|
||||
((n & 0xFF0000000000) >> 24) |
|
||||
((n & 0xFF000000000000) >> 40) |
|
||||
(n >> 56);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CPU_IS_BIG_ENDIAN
|
||||
# define cpu_to_le16(n) bswap16(n)
|
||||
# define cpu_to_le32(n) bswap32(n)
|
||||
# define cpu_to_le64(n) bswap64(n)
|
||||
# define le16_to_cpu(n) bswap16(n)
|
||||
# define le32_to_cpu(n) bswap32(n)
|
||||
# define le64_to_cpu(n) bswap64(n)
|
||||
# define cpu_to_be16(n) (n)
|
||||
# define cpu_to_be32(n) (n)
|
||||
# define cpu_to_be64(n) (n)
|
||||
# define be16_to_cpu(n) (n)
|
||||
# define be32_to_cpu(n) (n)
|
||||
# define be64_to_cpu(n) (n)
|
||||
#else
|
||||
# define cpu_to_le16(n) (n)
|
||||
# define cpu_to_le32(n) (n)
|
||||
# define cpu_to_le64(n) (n)
|
||||
# define le16_to_cpu(n) (n)
|
||||
# define le32_to_cpu(n) (n)
|
||||
# define le64_to_cpu(n) (n)
|
||||
# define cpu_to_be16(n) bswap16(n)
|
||||
# define cpu_to_be32(n) bswap32(n)
|
||||
# define cpu_to_be64(n) bswap64(n)
|
||||
# define be16_to_cpu(n) bswap16(n)
|
||||
# define be32_to_cpu(n) bswap32(n)
|
||||
# define be64_to_cpu(n) bswap64(n)
|
||||
#endif
|
64
src/gzip_compress.c
Normal file
64
src/gzip_compress.c
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* gzip_compress.c
|
||||
*
|
||||
* Generate DEFLATE-compressed data in the gzip wrapper format.
|
||||
*/
|
||||
|
||||
#include "libdeflate.h"
|
||||
|
||||
#include "crc32.h"
|
||||
#include "deflate_compress.h"
|
||||
#include "gzip_constants.h"
|
||||
#include "unaligned.h"
|
||||
|
||||
LIBEXPORT size_t
|
||||
gzip_compress(struct deflate_compressor *c, const void *in, size_t in_size,
|
||||
void *out, size_t out_nbytes_avail)
|
||||
{
|
||||
u8 *out_next = out;
|
||||
unsigned compression_level;
|
||||
u8 xfl;
|
||||
size_t deflate_size;
|
||||
|
||||
if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
|
||||
return 0;
|
||||
|
||||
/* ID1 */
|
||||
*out_next++ = GZIP_ID1;
|
||||
/* ID2 */
|
||||
*out_next++ = GZIP_ID2;
|
||||
/* CM */
|
||||
*out_next++ = GZIP_CM_DEFLATE;
|
||||
/* FLG */
|
||||
*out_next++ = 0;
|
||||
/* MTIME */
|
||||
put_unaligned_u32_be(GZIP_MTIME_UNAVAILABLE, out_next);
|
||||
out_next += 4;
|
||||
/* XFL */
|
||||
xfl = 0;
|
||||
compression_level = deflate_get_compression_level(c);
|
||||
if (compression_level < 2)
|
||||
xfl |= GZIP_XFL_FASTEST_COMRESSION;
|
||||
else if (compression_level >= 8)
|
||||
xfl |= GZIP_XFL_SLOWEST_COMRESSION;
|
||||
*out_next++ = xfl;
|
||||
/* OS */
|
||||
*out_next++ = GZIP_OS_UNKNOWN; /* OS */
|
||||
|
||||
/* Compressed data */
|
||||
deflate_size = deflate_compress(c, in, in_size, out_next,
|
||||
out_nbytes_avail - GZIP_MIN_OVERHEAD);
|
||||
if (deflate_size == 0)
|
||||
return 0;
|
||||
out_next += deflate_size;
|
||||
|
||||
/* CRC32 */
|
||||
put_unaligned_u32_be(crc32(in, in_size), out_next);
|
||||
out_next += 4;
|
||||
|
||||
/* ISIZE */
|
||||
put_unaligned_u32_be(in_size, out_next);
|
||||
out_next += 4;
|
||||
|
||||
return out_next - (u8 *)out;
|
||||
}
|
47
src/gzip_constants.h
Normal file
47
src/gzip_constants.h
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* gzip_constants.h
|
||||
*
|
||||
* Constants for the gzip wrapper format.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
#define GZIP_MIN_HEADER_SIZE 10
|
||||
#define GZIP_FOOTER_SIZE 8
|
||||
#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
|
||||
|
||||
#define GZIP_ID1 0x1F
|
||||
#define GZIP_ID2 0x8B
|
||||
|
||||
#define GZIP_CM_DEFLATE 8
|
||||
|
||||
#define GZIP_FTEXT 0x01
|
||||
#define GZIP_FHCRC 0x02
|
||||
#define GZIP_FEXTRA 0x04
|
||||
#define GZIP_FNAME 0x08
|
||||
#define GZIP_FCOMMENT 0x10
|
||||
#define GZIP_FRESERVED 0xE0
|
||||
|
||||
#define GZIP_MTIME_UNAVAILABLE 0
|
||||
|
||||
#define GZIP_XFL_SLOWEST_COMRESSION 0x02
|
||||
#define GZIP_XFL_FASTEST_COMRESSION 0x04
|
||||
#define GZIP_XFL_RESERVED 0xF9
|
||||
|
||||
#define GZIP_OS_FAT 0
|
||||
#define GZIP_OS_AMIGA 1
|
||||
#define GZIP_OS_VMS 2
|
||||
#define GZIP_OS_UNIX 3
|
||||
#define GZIP_OS_VM_CMS 4
|
||||
#define GZIP_OS_ATARI_TOS 5
|
||||
#define GZIP_OS_HPFS 6
|
||||
#define GZIP_OS_MACINTOSH 7
|
||||
#define GZIP_OS_Z_SYSTEM 8
|
||||
#define GZIP_OS_CP_M 9
|
||||
#define GZIP_OS_TOPS_20 10
|
||||
#define GZIP_OS_NTFS 11
|
||||
#define GZIP_OS_QDOS 12
|
||||
#define GZIP_OS_RISCOS 13
|
||||
#define GZIP_OS_UNKNOWN 255
|
100
src/gzip_decompress.c
Normal file
100
src/gzip_decompress.c
Normal file
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* gzip_decompress.c
|
||||
*
|
||||
* Decompress DEFLATE-compressed data wrapped in the gzip format.
|
||||
*/
|
||||
|
||||
#include "libdeflate.h"
|
||||
|
||||
#include "crc32.h"
|
||||
#include "gzip_constants.h"
|
||||
#include "unaligned.h"
|
||||
|
||||
LIBEXPORT bool
|
||||
gzip_decompress(struct deflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes, void *out, size_t out_nbytes)
|
||||
{
|
||||
const u8 *in_next = in;
|
||||
const u8 * const in_end = in_next + in_nbytes;
|
||||
u8 flg;
|
||||
|
||||
if (in_nbytes < GZIP_MIN_OVERHEAD)
|
||||
return false;
|
||||
|
||||
/* ID1 */
|
||||
if (*in_next++ != GZIP_ID1)
|
||||
return false;
|
||||
/* ID2 */
|
||||
if (*in_next++ != GZIP_ID2)
|
||||
return false;
|
||||
/* CM */
|
||||
if (*in_next++ != GZIP_CM_DEFLATE)
|
||||
return false;
|
||||
flg = *in_next++;
|
||||
/* MTIME */
|
||||
in_next += 4;
|
||||
/* XFL */
|
||||
if (*in_next++ & GZIP_XFL_RESERVED)
|
||||
return false;
|
||||
/* OS */
|
||||
in_next += 1;
|
||||
|
||||
if (flg & GZIP_FRESERVED)
|
||||
return false;
|
||||
|
||||
/* Extra field */
|
||||
if (flg & GZIP_FEXTRA) {
|
||||
u16 xlen = get_unaligned_u16_be(in_next);
|
||||
in_next += 2;
|
||||
|
||||
if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
|
||||
return false;
|
||||
|
||||
in_next += xlen;
|
||||
}
|
||||
|
||||
/* Original file name (zero terminated) */
|
||||
if (flg & GZIP_FNAME) {
|
||||
while (*in_next != 0 && ++in_next != in_end)
|
||||
;
|
||||
if (in_next != in_end)
|
||||
in_next++;
|
||||
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* File comment (zero terminated) */
|
||||
if (flg & GZIP_FCOMMENT) {
|
||||
while (*in_next != 0 && ++in_next != in_end)
|
||||
;
|
||||
if (in_next != in_end)
|
||||
in_next++;
|
||||
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* CRC16 for gzip header */
|
||||
if (flg & GZIP_FHCRC) {
|
||||
in_next += 2;
|
||||
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Compressed data */
|
||||
if (!deflate_decompress(d, in_next, in_end - GZIP_FOOTER_SIZE - in_next,
|
||||
out, out_nbytes))
|
||||
return false;
|
||||
|
||||
in_next = in_end - GZIP_FOOTER_SIZE;
|
||||
|
||||
/* CRC32 */
|
||||
if (crc32(out, out_nbytes) != get_unaligned_u32_be(in_next))
|
||||
return false;
|
||||
in_next += 4;
|
||||
|
||||
/* ISIZE */
|
||||
if ((u32)out_nbytes != get_unaligned_u32_be(in_next))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
235
src/hc_matchfinder.h
Normal file
235
src/hc_matchfinder.h
Normal file
@ -0,0 +1,235 @@
|
||||
/*
|
||||
* hc_matchfinder.h
|
||||
*
|
||||
* This is a Hash Chain (hc) based matchfinder.
|
||||
*
|
||||
* The data structure is a hash table where each hash bucket contains a linked
|
||||
* list of sequences, referenced by position.
|
||||
*
|
||||
* For each sequence (position) in the input, the first 3 bytes are hashed and
|
||||
* that sequence (position) is prepended to the appropriate linked list in the
|
||||
* hash table. Since the sequences are inserted in order, each list is always
|
||||
* sorted by increasing match offset.
|
||||
*
|
||||
* At the same time as inserting a sequence, we may search the linked list for
|
||||
* matches with that sequence. At each step, the length of the match is
|
||||
* computed. The search ends when the sequences get too far away (outside of
|
||||
* the sliding window), or when the list ends (in the code this is the same
|
||||
* check as "too far away"), or when 'max_search_depth' positions have been
|
||||
* searched, or when a match of at least 'nice_len' bytes has been found.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lz_extend.h"
|
||||
#include "lz_hash3.h"
|
||||
#include "matchfinder_common.h"
|
||||
#include "unaligned.h"
|
||||
|
||||
#ifndef HC_MATCHFINDER_HASH_ORDER
|
||||
# if MATCHFINDER_WINDOW_ORDER < 14
|
||||
# define HC_MATCHFINDER_HASH_ORDER 14
|
||||
# else
|
||||
# define HC_MATCHFINDER_HASH_ORDER 15
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define HC_MATCHFINDER_HASH_LENGTH (1UL << HC_MATCHFINDER_HASH_ORDER)
|
||||
|
||||
#define HC_MATCHFINDER_TOTAL_LENGTH \
|
||||
(HC_MATCHFINDER_HASH_LENGTH + MATCHFINDER_WINDOW_SIZE)
|
||||
|
||||
struct hc_matchfinder {
|
||||
union {
|
||||
pos_t mf_data[HC_MATCHFINDER_TOTAL_LENGTH];
|
||||
struct {
|
||||
pos_t hash_tab[HC_MATCHFINDER_HASH_LENGTH];
|
||||
pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
|
||||
};
|
||||
};
|
||||
} _aligned_attribute(MATCHFINDER_ALIGNMENT);
|
||||
|
||||
static inline void
|
||||
hc_matchfinder_init(struct hc_matchfinder *mf)
|
||||
{
|
||||
matchfinder_init(mf->hash_tab, HC_MATCHFINDER_HASH_LENGTH);
|
||||
}
|
||||
|
||||
#if MATCHFINDER_IS_SLIDING
|
||||
static inline void
|
||||
hc_matchfinder_slide_window(struct hc_matchfinder *mf)
|
||||
{
|
||||
matchfinder_rebase(mf->mf_data, HC_MATCHFINDER_TOTAL_LENGTH);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Find the longest match longer than 'best_len'.
|
||||
*
|
||||
* @mf
|
||||
* The matchfinder structure.
|
||||
* @in_base
|
||||
* Pointer to the next byte in the input buffer to process _at the last
|
||||
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
|
||||
* @in_next
|
||||
* Pointer to the next byte in the input buffer to process. This is the
|
||||
* pointer to the bytes being matched against.
|
||||
* @best_len
|
||||
* Require a match at least this long.
|
||||
* @max_len
|
||||
* Maximum match length to return.
|
||||
* @nice_len
|
||||
* Stop searching if a match of at least this length is found.
|
||||
* @max_search_depth
|
||||
* Limit on the number of potential matches to consider.
|
||||
* @offset_ret
|
||||
* The match offset is returned here.
|
||||
*
|
||||
* Return the length of the match found, or 'best_len' if no match longer than
|
||||
* 'best_len' was found.
|
||||
*/
|
||||
static inline unsigned
|
||||
hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
||||
const u8 * const in_base,
|
||||
const u8 * const in_next,
|
||||
unsigned best_len,
|
||||
const unsigned max_len,
|
||||
const unsigned nice_len,
|
||||
const unsigned max_search_depth,
|
||||
unsigned *offset_ret)
|
||||
{
|
||||
unsigned depth_remaining = max_search_depth;
|
||||
const u8 *best_matchptr = best_matchptr; /* uninitialized */
|
||||
const u8 *matchptr;
|
||||
unsigned len;
|
||||
unsigned hash;
|
||||
pos_t cur_match;
|
||||
u32 first_3_bytes;
|
||||
|
||||
/* Insert the current sequence into the appropriate hash chain. */
|
||||
if (unlikely(max_len < LZ_HASH_REQUIRED_NBYTES))
|
||||
goto out;
|
||||
first_3_bytes = load_u24_unaligned(in_next);
|
||||
hash = lz_hash3_u24(first_3_bytes, HC_MATCHFINDER_HASH_ORDER);
|
||||
cur_match = mf->hash_tab[hash];
|
||||
mf->next_tab[in_next - in_base] = cur_match;
|
||||
mf->hash_tab[hash] = in_next - in_base;
|
||||
|
||||
if (unlikely(best_len >= max_len))
|
||||
goto out;
|
||||
|
||||
/* Search the appropriate hash chain for matches. */
|
||||
|
||||
if (!(matchfinder_match_in_window(cur_match, in_base, in_next)))
|
||||
goto out;
|
||||
|
||||
if (best_len < 3) {
|
||||
for (;;) {
|
||||
/* No length 3 match found yet.
|
||||
* Check the first 3 bytes. */
|
||||
matchptr = &in_base[cur_match];
|
||||
|
||||
if (load_u24_unaligned(matchptr) == first_3_bytes)
|
||||
break;
|
||||
|
||||
/* Not a match; keep trying. */
|
||||
cur_match = mf->next_tab[
|
||||
matchfinder_slot_for_match(cur_match)];
|
||||
if (!matchfinder_match_in_window(cur_match,
|
||||
in_base, in_next))
|
||||
goto out;
|
||||
if (!--depth_remaining)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Found a length 3 match. */
|
||||
best_matchptr = matchptr;
|
||||
best_len = lz_extend(in_next, best_matchptr, 3, max_len);
|
||||
if (best_len >= nice_len)
|
||||
goto out;
|
||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
||||
goto out;
|
||||
if (!--depth_remaining)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
for (;;) {
|
||||
matchptr = &in_base[cur_match];
|
||||
|
||||
/* Already found a length 3 match. Try for a longer match;
|
||||
* start by checking the last 2 bytes and the first 4 bytes. */
|
||||
#if UNALIGNED_ACCESS_IS_FAST
|
||||
if ((load_u32_unaligned(matchptr + best_len - 3) ==
|
||||
load_u32_unaligned(in_next + best_len - 3)) &&
|
||||
(load_u32_unaligned(matchptr) ==
|
||||
load_u32_unaligned(in_next)))
|
||||
#else
|
||||
if (matchptr[best_len] == in_next[best_len])
|
||||
#endif
|
||||
break;
|
||||
|
||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
||||
goto out;
|
||||
if (!--depth_remaining)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST)
|
||||
len = 4;
|
||||
else
|
||||
len = 0;
|
||||
len = lz_extend(in_next, matchptr, len, max_len);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
best_matchptr = matchptr;
|
||||
if (best_len >= nice_len)
|
||||
goto out;
|
||||
}
|
||||
cur_match = mf->next_tab[matchfinder_slot_for_match(cur_match)];
|
||||
if (!matchfinder_match_in_window(cur_match, in_base, in_next))
|
||||
goto out;
|
||||
if (!--depth_remaining)
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
*offset_ret = in_next - best_matchptr;
|
||||
return best_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the match-finder, but don't search for matches.
|
||||
*
|
||||
* @mf
|
||||
* The matchfinder structure.
|
||||
* @in_base
|
||||
* Pointer to the next byte in the input buffer to process _at the last
|
||||
* time hc_matchfinder_init() or hc_matchfinder_slide_window() was called_.
|
||||
* @in_next
|
||||
* Pointer to the next byte in the input buffer to process.
|
||||
* @in_end
|
||||
* Pointer to the end of the input buffer.
|
||||
* @count
|
||||
* Number of bytes to skip; must be > 0.
|
||||
*/
|
||||
static inline void
|
||||
hc_matchfinder_skip_positions(struct hc_matchfinder * restrict mf,
|
||||
const u8 *in_base,
|
||||
const u8 *in_next,
|
||||
const u8 *in_end,
|
||||
unsigned count)
|
||||
{
|
||||
unsigned hash;
|
||||
|
||||
if (unlikely(in_next + count >= in_end - LZ_HASH_REQUIRED_NBYTES))
|
||||
return;
|
||||
|
||||
do {
|
||||
hash = lz_hash3(in_next, HC_MATCHFINDER_HASH_ORDER);
|
||||
mf->next_tab[in_next - in_base] = mf->hash_tab[hash];
|
||||
mf->hash_tab[hash] = in_next - in_base;
|
||||
in_next++;
|
||||
} while (--count);
|
||||
}
|
60
src/lz_extend.h
Normal file
60
src/lz_extend.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* lz_extend.h
|
||||
*
|
||||
* Fast match extension for Lempel-Ziv matchfinding.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bitops.h"
|
||||
#include "unaligned.h"
|
||||
|
||||
/*
|
||||
* Return the number of bytes at @matchptr that match the bytes at @strptr, up
|
||||
* to a maximum of @max_len. Initially, @start_len bytes are matched.
|
||||
*/
|
||||
static inline unsigned
|
||||
lz_extend(const u8 * const strptr, const u8 * const matchptr,
|
||||
const unsigned start_len, const unsigned max_len)
|
||||
{
|
||||
unsigned len = start_len;
|
||||
machine_word_t v_word;
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
|
||||
if (likely(max_len - len >= 4 * WORDSIZE)) {
|
||||
|
||||
#define COMPARE_WORD_STEP \
|
||||
v_word = load_word_unaligned(&matchptr[len]) ^ \
|
||||
load_word_unaligned(&strptr[len]); \
|
||||
if (v_word != 0) \
|
||||
goto word_differs; \
|
||||
len += WORDSIZE; \
|
||||
|
||||
COMPARE_WORD_STEP
|
||||
COMPARE_WORD_STEP
|
||||
COMPARE_WORD_STEP
|
||||
COMPARE_WORD_STEP
|
||||
#undef COMPARE_WORD_STEP
|
||||
}
|
||||
|
||||
while (len + WORDSIZE <= max_len) {
|
||||
v_word = load_word_unaligned(&matchptr[len]) ^
|
||||
load_word_unaligned(&strptr[len]);
|
||||
if (v_word != 0)
|
||||
goto word_differs;
|
||||
len += WORDSIZE;
|
||||
}
|
||||
}
|
||||
|
||||
while (len < max_len && matchptr[len] == strptr[len])
|
||||
len++;
|
||||
return len;
|
||||
|
||||
word_differs:
|
||||
if (CPU_IS_LITTLE_ENDIAN)
|
||||
len += (ffsw(v_word) >> 3);
|
||||
else
|
||||
len += (flsw(v_word) >> 3);
|
||||
return len;
|
||||
}
|
49
src/lz_hash3.h
Normal file
49
src/lz_hash3.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* lz_hash3.h
|
||||
*
|
||||
* 3-byte hashing for Lempel-Ziv matchfinding.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "unaligned.h"
|
||||
|
||||
static inline u32
|
||||
loaded_u32_to_u24(u32 v)
|
||||
{
|
||||
if (CPU_IS_LITTLE_ENDIAN)
|
||||
return v & 0xFFFFFF;
|
||||
else
|
||||
return v >> 8;
|
||||
}
|
||||
|
||||
static inline u32
|
||||
load_u24_unaligned(const u8 *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST)
|
||||
return loaded_u32_to_u24(load_u32_unaligned(p));
|
||||
else
|
||||
return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
lz_hash3_u24(u32 str, unsigned num_bits)
|
||||
{
|
||||
return (u32)(str * 0x1E35A7BD) >> (32 - num_bits);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hash the next 3-byte sequence in the window, producing a hash of length
|
||||
* 'num_bits' bits. At least LZ_HASH_REQUIRED_NBYTES must be available at 'p';
|
||||
* this might be 4 bytes rather than 3 because an unaligned load is faster on
|
||||
* some architectures.
|
||||
*/
|
||||
static inline u32
|
||||
lz_hash3(const u8 *p, unsigned num_bits)
|
||||
{
|
||||
return lz_hash3_u24(load_u24_unaligned(p), num_bits);
|
||||
}
|
||||
|
||||
/* Number of bytes the hash function actually requires be available, due to the
|
||||
* possibility of an unaligned load. */
|
||||
#define LZ_HASH_REQUIRED_NBYTES (UNALIGNED_ACCESS_IS_FAST ? 4 : 3)
|
64
src/matchfinder_avx2.h
Normal file
64
src/matchfinder_avx2.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* matchfinder_avx2.h
|
||||
*
|
||||
* Matchfinding routines optimized for Intel AVX2 (Advanced Vector Extensions).
|
||||
*/
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
static inline bool
|
||||
matchfinder_init_avx2(pos_t *data, size_t size)
|
||||
{
|
||||
__m256i v, *p;
|
||||
size_t n;
|
||||
|
||||
if (size % sizeof(__m256i) * 4)
|
||||
return false;
|
||||
|
||||
if (sizeof(pos_t) == 2)
|
||||
v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
|
||||
else if (sizeof(pos_t) == 4)
|
||||
v = _mm256_set1_epi32(MATCHFINDER_INITVAL);
|
||||
else
|
||||
return false;
|
||||
|
||||
p = (__m256i *)data;
|
||||
n = size / (sizeof(__m256i) * 4);
|
||||
do {
|
||||
p[0] = v;
|
||||
p[1] = v;
|
||||
p[2] = v;
|
||||
p[3] = v;
|
||||
p += 4;
|
||||
} while (--n);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
matchfinder_rebase_avx2(pos_t *data, size_t size)
|
||||
{
|
||||
__m256i v, *p;
|
||||
size_t n;
|
||||
|
||||
if ((size % sizeof(__m256i) * 4 != 0))
|
||||
return false;
|
||||
|
||||
if (sizeof(pos_t) == 2)
|
||||
v = _mm256_set1_epi16((pos_t)-MATCHFINDER_WINDOW_SIZE);
|
||||
else if (sizeof(pos_t) == 4)
|
||||
v = _mm256_set1_epi32((pos_t)-MATCHFINDER_WINDOW_SIZE);
|
||||
else
|
||||
return false;
|
||||
|
||||
p = (__m256i *)data;
|
||||
n = size / (sizeof(__m256i) * 4);
|
||||
do {
|
||||
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
|
||||
p[0] = _mm256_adds_epi16(p[0], v);
|
||||
p[1] = _mm256_adds_epi16(p[1], v);
|
||||
p[2] = _mm256_adds_epi16(p[2], v);
|
||||
p[3] = _mm256_adds_epi16(p[3], v);
|
||||
p += 4;
|
||||
} while (--n);
|
||||
return true;
|
||||
}
|
163
src/matchfinder_common.h
Normal file
163
src/matchfinder_common.h
Normal file
@ -0,0 +1,163 @@
|
||||
/*
|
||||
* matchfinder_common.h
|
||||
*
|
||||
* Common code for Lempel-Ziv matchfinding.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MATCHFINDER_WINDOW_ORDER
|
||||
# error "MATCHFINDER_WINDOW_ORDER must be defined!"
|
||||
#endif
|
||||
|
||||
#ifndef MATCHFINDER_IS_SLIDING
|
||||
# error "MATCHFINDER_IS_SLIDING must be defined!"
|
||||
#endif
|
||||
|
||||
#define MATCHFINDER_WINDOW_SIZE ((size_t)1 << MATCHFINDER_WINDOW_ORDER)
|
||||
|
||||
#if MATCHFINDER_IS_SLIDING
|
||||
# include "matchfinder_sliding.h"
|
||||
#else
|
||||
# include "matchfinder_nonsliding.h"
|
||||
#endif
|
||||
|
||||
#define MATCHFINDER_ALIGNMENT 8
|
||||
|
||||
#ifdef __AVX2__
|
||||
# include "matchfinder_avx2.h"
|
||||
# if MATCHFINDER_ALIGNMENT < 32
|
||||
# undef MATCHFINDER_ALIGNMENT
|
||||
# define MATCHFINDER_ALIGNMENT 32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __SSE2__
|
||||
# include "matchfinder_sse2.h"
|
||||
# if MATCHFINDER_ALIGNMENT < 16
|
||||
# undef MATCHFINDER_ALIGNMENT
|
||||
# define MATCHFINDER_ALIGNMENT 16
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Representation of a match.
|
||||
*/
|
||||
struct lz_match {
|
||||
|
||||
/* The number of bytes matched. */
|
||||
pos_t length;
|
||||
|
||||
/* The offset back from the current position that was matched. */
|
||||
pos_t offset;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
matchfinder_memset_init_okay(void)
|
||||
{
|
||||
/* All bytes must match in order to use memset. */
|
||||
const pos_t v = MATCHFINDER_INITVAL;
|
||||
if (sizeof(pos_t) == 2)
|
||||
return (u8)v == (u8)(v >> 8);
|
||||
if (sizeof(pos_t) == 4)
|
||||
return (u8)v == (u8)(v >> 8) &&
|
||||
(u8)v == (u8)(v >> 16) &&
|
||||
(u8)v == (u8)(v >> 24);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the hash table portion of the matchfinder.
|
||||
*
|
||||
* Essentially, this is an optimized memset().
|
||||
*
|
||||
* 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
|
||||
*/
|
||||
static inline void
|
||||
matchfinder_init(pos_t *data, size_t num_entries)
|
||||
{
|
||||
const size_t size = num_entries * sizeof(data[0]);
|
||||
|
||||
#ifdef __AVX2__
|
||||
if (matchfinder_init_avx2(data, size))
|
||||
return;
|
||||
#endif
|
||||
|
||||
#ifdef __SSE2__
|
||||
if (matchfinder_init_sse2(data, size))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (matchfinder_memset_init_okay()) {
|
||||
memset(data, (u8)MATCHFINDER_INITVAL, size);
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_entries; i++)
|
||||
data[i] = MATCHFINDER_INITVAL;
|
||||
}
|
||||
|
||||
#if MATCHFINDER_IS_SLIDING
|
||||
/*
|
||||
* Slide the matchfinder by WINDOW_SIZE bytes.
|
||||
*
|
||||
* This must be called just after each WINDOW_SIZE bytes have been run through
|
||||
* the matchfinder.
|
||||
*
|
||||
* This will subtract WINDOW_SIZE bytes from each entry in the array specified.
|
||||
* The effect is that all entries are updated to be relative to the current
|
||||
* position, rather than the position WINDOW_SIZE bytes prior.
|
||||
*
|
||||
* Underflow is detected and replaced with signed saturation. This ensures that
|
||||
* once the sliding window has passed over a position, that position forever
|
||||
* remains out of bounds.
|
||||
*
|
||||
* The array passed in must contain all matchfinder data that is
|
||||
* position-relative. Concretely, this will include the hash table as well as
|
||||
* the table of positions that is used to link together the sequences in each
|
||||
* hash bucket. Note that in the latter table, the links are 1-ary in the case
|
||||
* of "hash chains", and 2-ary in the case of "binary trees". In either case,
|
||||
* the links need to be rebased in the same way.
|
||||
*/
|
||||
static inline void
|
||||
matchfinder_rebase(pos_t *data, size_t num_entries)
|
||||
{
|
||||
const size_t size = num_entries * sizeof(data[0]);
|
||||
|
||||
#ifdef __AVX2__
|
||||
if (matchfinder_rebase_avx2(data, size))
|
||||
return;
|
||||
#endif
|
||||
|
||||
#ifdef __SSE2__
|
||||
if (matchfinder_rebase_sse2(data, size))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (MATCHFINDER_WINDOW_SIZE == 32768) {
|
||||
/* Branchless version for 32768 byte windows. If the value was
|
||||
* already negative, clear all bits except the sign bit; this
|
||||
* changes the value to -32768. Otherwise, set the sign bit;
|
||||
* this is equivalent to subtracting 32768. */
|
||||
for (size_t i = 0; i < num_entries; i++) {
|
||||
u16 v = data[i];
|
||||
u16 sign_bit = v & 0x8000;
|
||||
v &= sign_bit - ((sign_bit >> 15) ^ 1);
|
||||
v |= 0x8000;
|
||||
data[i] = v;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_entries; i++) {
|
||||
if (data[i] >= 0)
|
||||
data[i] -= (pos_t)-MATCHFINDER_WINDOW_SIZE;
|
||||
else
|
||||
data[i] = (pos_t)-MATCHFINDER_WINDOW_SIZE;
|
||||
}
|
||||
}
|
||||
#endif /* MATCHFINDER_IS_SLIDING */
|
47
src/matchfinder_nonsliding.h
Normal file
47
src/matchfinder_nonsliding.h
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* matchfinder_nonsliding.h
|
||||
*
|
||||
* Definitions for nonsliding window matchfinders.
|
||||
*
|
||||
* "Nonsliding window" means that any prior sequence can be matched.
|
||||
*/
|
||||
|
||||
#if MATCHFINDER_WINDOW_ORDER <= 16
|
||||
typedef u16 pos_t;
|
||||
#else
|
||||
typedef u32 pos_t;
|
||||
#endif
|
||||
|
||||
#if MATCHFINDER_WINDOW_ORDER != 16 && MATCHFINDER_WINDOW_ORDER != 32
|
||||
|
||||
/* Not all the bits of the position type are needed, so the sign bit can be
|
||||
* reserved to mean "out of bounds". */
|
||||
#define MATCHFINDER_INITVAL ((pos_t)-1)
|
||||
|
||||
static inline bool
|
||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
||||
{
|
||||
return !(cur_match & ((pos_t)1 << (sizeof(pos_t) * 8 - 1)));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* All bits of the position type are needed, so use 0 to mean "out of bounds".
|
||||
* This prevents the beginning of the buffer from matching anything; however,
|
||||
* this doesn't matter much. */
|
||||
|
||||
#define MATCHFINDER_INITVAL ((pos_t)0)
|
||||
|
||||
static inline bool
|
||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
||||
{
|
||||
return cur_match != 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline pos_t
|
||||
matchfinder_slot_for_match(pos_t cur_match)
|
||||
{
|
||||
return cur_match;
|
||||
}
|
30
src/matchfinder_sliding.h
Normal file
30
src/matchfinder_sliding.h
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* matchfinder_sliding.h
|
||||
*
|
||||
* Definitions for sliding window matchfinders.
|
||||
*
|
||||
* "Sliding window" means that only sequences beginning in the most recent
|
||||
* MATCHFINDER_WINDOW_SIZE bytes can be matched.
|
||||
*/
|
||||
|
||||
#if MATCHFINDER_WINDOW_ORDER <= 15
|
||||
typedef s16 pos_t;
|
||||
#else
|
||||
typedef s32 pos_t;
|
||||
#endif
|
||||
|
||||
#define MATCHFINDER_INITVAL ((pos_t)-MATCHFINDER_WINDOW_SIZE)
|
||||
|
||||
/* In the sliding window case, positions are stored relative to 'in_base'. */
|
||||
|
||||
static inline bool
|
||||
matchfinder_match_in_window(pos_t cur_match, const u8 *in_base, const u8 *in_next)
|
||||
{
|
||||
return cur_match > (pos_t)((in_next - in_base) - MATCHFINDER_WINDOW_SIZE);
|
||||
}
|
||||
|
||||
static inline pos_t
|
||||
matchfinder_slot_for_match(pos_t cur_match)
|
||||
{
|
||||
return cur_match & (MATCHFINDER_WINDOW_SIZE - 1);
|
||||
}
|
64
src/matchfinder_sse2.h
Normal file
64
src/matchfinder_sse2.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* matchfinder_sse2.h
|
||||
*
|
||||
* Matchfinding routines optimized for Intel SSE2 (Streaming SIMD Extensions).
|
||||
*/
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
static inline bool
|
||||
matchfinder_init_sse2(pos_t *data, size_t size)
|
||||
{
|
||||
__m128i v, *p;
|
||||
size_t n;
|
||||
|
||||
if (size % sizeof(__m128i) * 4)
|
||||
return false;
|
||||
|
||||
if (sizeof(pos_t) == 2)
|
||||
v = _mm_set1_epi16(MATCHFINDER_INITVAL);
|
||||
else if (sizeof(pos_t) == 4)
|
||||
v = _mm_set1_epi32(MATCHFINDER_INITVAL);
|
||||
else
|
||||
return false;
|
||||
|
||||
p = (__m128i *)data;
|
||||
n = size / (sizeof(__m128i) * 4);
|
||||
do {
|
||||
p[0] = v;
|
||||
p[1] = v;
|
||||
p[2] = v;
|
||||
p[3] = v;
|
||||
p += 4;
|
||||
} while (--n);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
matchfinder_rebase_sse2(pos_t *data, size_t size)
|
||||
{
|
||||
__m128i v, *p;
|
||||
size_t n;
|
||||
|
||||
if ((size % sizeof(__m128i) * 4 != 0))
|
||||
return false;
|
||||
|
||||
if (sizeof(pos_t) == 2)
|
||||
v = _mm_set1_epi16((pos_t)-MATCHFINDER_WINDOW_SIZE);
|
||||
else if (sizeof(pos_t) == 4)
|
||||
v = _mm_set1_epi32((pos_t)-MATCHFINDER_WINDOW_SIZE);
|
||||
else
|
||||
return false;
|
||||
|
||||
p = (__m128i *)data;
|
||||
n = size / (sizeof(__m128i) * 4);
|
||||
do {
|
||||
/* PADDSW: Add Packed Signed Integers With Signed Saturation */
|
||||
p[0] = _mm_adds_epi16(p[0], v);
|
||||
p[1] = _mm_adds_epi16(p[1], v);
|
||||
p[2] = _mm_adds_epi16(p[2], v);
|
||||
p[3] = _mm_adds_epi16(p[3], v);
|
||||
p += 4;
|
||||
} while (--n);
|
||||
return true;
|
||||
}
|
38
src/types.h
Normal file
38
src/types.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* types.h
|
||||
*
|
||||
* Definitions of fixed-width integers, 'bool', 'size_t', and 'machine_word_t'.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
||||
typedef int8_t s8;
|
||||
typedef int16_t s16;
|
||||
typedef int32_t s32;
|
||||
typedef int64_t s64;
|
||||
|
||||
typedef uint16_t le16;
|
||||
typedef uint32_t le32;
|
||||
typedef uint64_t le64;
|
||||
|
||||
typedef uint16_t be16;
|
||||
typedef uint32_t be32;
|
||||
typedef uint64_t be64;
|
||||
|
||||
/*
|
||||
* Type of a machine word. 'unsigned long' would be logical, but that is only
|
||||
* 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best
|
||||
* we can do without a bunch of #ifdefs appears to be 'size_t'.
|
||||
*/
|
||||
typedef size_t machine_word_t;
|
||||
|
||||
#define WORDSIZE sizeof(machine_word_t)
|
216
src/unaligned.h
Normal file
216
src/unaligned.h
Normal file
@ -0,0 +1,216 @@
|
||||
/*
|
||||
* unaligned.h
|
||||
*
|
||||
* Inline functions for unaligned memory access.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "compiler.h"
|
||||
#include "endianness.h"
|
||||
#include "types.h"
|
||||
|
||||
#define DEFINE_UNALIGNED_TYPE(type) \
|
||||
struct type##_unaligned { \
|
||||
type v; \
|
||||
} _packed_attribute; \
|
||||
\
|
||||
static inline type \
|
||||
load_##type##_unaligned(const void *p) \
|
||||
{ \
|
||||
return ((const struct type##_unaligned *)p)->v; \
|
||||
} \
|
||||
\
|
||||
static inline void \
|
||||
store_##type##_unaligned(type val, void *p) \
|
||||
{ \
|
||||
((struct type##_unaligned *)p)->v = val; \
|
||||
}
|
||||
|
||||
DEFINE_UNALIGNED_TYPE(u16);
|
||||
DEFINE_UNALIGNED_TYPE(u32);
|
||||
DEFINE_UNALIGNED_TYPE(u64);
|
||||
DEFINE_UNALIGNED_TYPE(machine_word_t);
|
||||
|
||||
#define load_word_unaligned load_machine_word_t_unaligned
|
||||
#define store_word_unaligned store_machine_word_t_unaligned
|
||||
|
||||
static inline u16
|
||||
get_unaligned_u16_le(const void *p)
|
||||
{
|
||||
u16 v;
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
v = le16_to_cpu(load_u16_unaligned(p));
|
||||
} else {
|
||||
const u8 *p8 = p;
|
||||
v = 0;
|
||||
v |= (u16)p8[0] << 0;
|
||||
v |= (u16)p8[1] << 8;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline u32
|
||||
get_unaligned_u32_le(const void *p)
|
||||
{
|
||||
u32 v;
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
v = le32_to_cpu(load_u32_unaligned(p));
|
||||
} else {
|
||||
const u8 *p8 = p;
|
||||
v = 0;
|
||||
v |= (u32)p8[0] << 0;
|
||||
v |= (u32)p8[1] << 8;
|
||||
v |= (u32)p8[2] << 16;
|
||||
v |= (u32)p8[3] << 24;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline u64
|
||||
get_unaligned_u64_le(const void *p)
|
||||
{
|
||||
u64 v;
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
v = le64_to_cpu(load_u64_unaligned(p));
|
||||
} else {
|
||||
const u8 *p8 = p;
|
||||
v = 0;
|
||||
v |= (u64)p8[0] << 0;
|
||||
v |= (u64)p8[1] << 8;
|
||||
v |= (u64)p8[2] << 16;
|
||||
v |= (u64)p8[3] << 24;
|
||||
v |= (u64)p8[4] << 32;
|
||||
v |= (u64)p8[5] << 40;
|
||||
v |= (u64)p8[6] << 48;
|
||||
v |= (u64)p8[7] << 56;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline machine_word_t
|
||||
get_unaligned_word_le(const void *p)
|
||||
{
|
||||
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
||||
if (WORDSIZE == 4)
|
||||
return get_unaligned_u32_le(p);
|
||||
else
|
||||
return get_unaligned_u64_le(p);
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_u16_le(u16 v, void *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
store_u16_unaligned(cpu_to_le16(v), p);
|
||||
} else {
|
||||
u8 *p8 = p;
|
||||
p8[0] = (v >> 0) & 0xFF;
|
||||
p8[1] = (v >> 8) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_u32_le(u32 v, void *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
store_u32_unaligned(cpu_to_le32(v), p);
|
||||
} else {
|
||||
u8 *p8 = p;
|
||||
p8[0] = (v >> 0) & 0xFF;
|
||||
p8[1] = (v >> 8) & 0xFF;
|
||||
p8[2] = (v >> 16) & 0xFF;
|
||||
p8[3] = (v >> 24) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_u64_le(u64 v, void *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
store_u64_unaligned(cpu_to_le64(v), p);
|
||||
} else {
|
||||
u8 *p8 = p;
|
||||
p8[0] = (v >> 0) & 0xFF;
|
||||
p8[1] = (v >> 8) & 0xFF;
|
||||
p8[2] = (v >> 16) & 0xFF;
|
||||
p8[3] = (v >> 24) & 0xFF;
|
||||
p8[4] = (v >> 32) & 0xFF;
|
||||
p8[5] = (v >> 40) & 0xFF;
|
||||
p8[6] = (v >> 48) & 0xFF;
|
||||
p8[7] = (v >> 56) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_word_le(machine_word_t v, void *p)
|
||||
{
|
||||
BUILD_BUG_ON(WORDSIZE != 4 && WORDSIZE != 8);
|
||||
if (WORDSIZE == 4)
|
||||
put_unaligned_u32_le(v, p);
|
||||
else
|
||||
put_unaligned_u64_le(v, p);
|
||||
}
|
||||
|
||||
static inline u16
|
||||
get_unaligned_u16_be(const void *p)
|
||||
{
|
||||
u16 v;
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
v = be16_to_cpu(load_u16_unaligned(p));
|
||||
} else {
|
||||
const u8 *p8 = p;
|
||||
v = 0;
|
||||
v |= (u16)p8[0] << 8;
|
||||
v |= (u16)p8[1] << 0;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline u32
|
||||
get_unaligned_u32_be(const void *p)
|
||||
{
|
||||
u32 v;
|
||||
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
v = be32_to_cpu(load_u32_unaligned(p));
|
||||
} else {
|
||||
const u8 *p8 = p;
|
||||
v = 0;
|
||||
v |= (u32)p8[0] << 24;
|
||||
v |= (u32)p8[1] << 16;
|
||||
v |= (u32)p8[2] << 8;
|
||||
v |= (u32)p8[3] << 0;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_u16_be(u16 v, void *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
store_u16_unaligned(cpu_to_be16(v), p);
|
||||
} else {
|
||||
u8 *p8 = p;
|
||||
p8[0] = (v >> 8) & 0xFF;
|
||||
p8[1] = (v >> 0) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_u32_be(u32 v, void *p)
|
||||
{
|
||||
if (UNALIGNED_ACCESS_IS_FAST) {
|
||||
store_u32_unaligned(cpu_to_be32(v), p);
|
||||
} else {
|
||||
u8 *p8 = p;
|
||||
p8[0] = (v >> 24) & 0xFF;
|
||||
p8[1] = (v >> 16) & 0xFF;
|
||||
p8[2] = (v >> 8) & 0xFF;
|
||||
p8[3] = (v >> 0) & 0xFF;
|
||||
}
|
||||
}
|
56
src/zlib_compress.c
Normal file
56
src/zlib_compress.c
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* zlib_compress.c
|
||||
*
|
||||
* Generate DEFLATE-compressed data in the zlib wrapper format.
|
||||
*/
|
||||
|
||||
#include "libdeflate.h"
|
||||
|
||||
#include "adler32.h"
|
||||
#include "deflate_compress.h"
|
||||
#include "unaligned.h"
|
||||
#include "zlib_constants.h"
|
||||
|
||||
LIBEXPORT size_t
|
||||
zlib_compress(struct deflate_compressor *c, const void *in, size_t in_size,
|
||||
void *out, size_t out_nbytes_avail)
|
||||
{
|
||||
u8 *out_next = out;
|
||||
u16 hdr;
|
||||
unsigned compression_level;
|
||||
unsigned level_hint;
|
||||
size_t deflate_size;
|
||||
|
||||
if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD)
|
||||
return 0;
|
||||
|
||||
/* 2 byte header: CMF and FLG */
|
||||
hdr = (ZLIB_CM_DEFLATE << 8) | (ZLIB_CINFO_32K_WINDOW << 12);
|
||||
compression_level = deflate_get_compression_level(c);
|
||||
if (compression_level < 2)
|
||||
level_hint = ZLIB_FASTEST_COMPRESSION;
|
||||
else if (compression_level < 6)
|
||||
level_hint = ZLIB_FAST_COMPRESSION;
|
||||
else if (compression_level < 8)
|
||||
level_hint = ZLIB_DEFAULT_COMPRESSION;
|
||||
else
|
||||
level_hint = ZLIB_SLOWEST_COMPRESSION;
|
||||
hdr |= level_hint << 6;
|
||||
hdr |= 31 - (hdr % 31);
|
||||
|
||||
put_unaligned_u16_be(hdr, out_next);
|
||||
out_next += 2;
|
||||
|
||||
/* Compressed data */
|
||||
deflate_size = deflate_compress(c, in, in_size, out_next,
|
||||
out_nbytes_avail - ZLIB_MIN_OVERHEAD);
|
||||
if (deflate_size == 0)
|
||||
return 0;
|
||||
out_next += deflate_size;
|
||||
|
||||
/* ADLER32 */
|
||||
put_unaligned_u32_be(adler32(in, in_size), out_next);
|
||||
out_next += 4;
|
||||
|
||||
return out_next - (u8 *)out;
|
||||
}
|
20
src/zlib_constants.h
Normal file
20
src/zlib_constants.h
Normal file
@ -0,0 +1,20 @@
|
||||
/*
|
||||
* zlib_constants.h
|
||||
*
|
||||
* Constants for the zlib wrapper format.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#define ZLIB_MIN_HEADER_SIZE 2
|
||||
#define ZLIB_FOOTER_SIZE 4
|
||||
#define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE)
|
||||
|
||||
#define ZLIB_CM_DEFLATE 8
|
||||
|
||||
#define ZLIB_CINFO_32K_WINDOW 7
|
||||
|
||||
#define ZLIB_FASTEST_COMPRESSION 0
|
||||
#define ZLIB_FAST_COMPRESSION 1
|
||||
#define ZLIB_DEFAULT_COMPRESSION 2
|
||||
#define ZLIB_SLOWEST_COMPRESSION 3
|
56
src/zlib_decompress.c
Normal file
56
src/zlib_decompress.c
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* zlib_decompress.c
|
||||
*
|
||||
* Decompress DEFLATE-compressed data wrapped in the zlib format.
|
||||
*/
|
||||
|
||||
#include "libdeflate.h"
|
||||
|
||||
#include "adler32.h"
|
||||
#include "unaligned.h"
|
||||
#include "zlib_constants.h"
|
||||
|
||||
LIBEXPORT bool
|
||||
zlib_decompress(struct deflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes, void *out, size_t out_nbytes)
|
||||
{
|
||||
const u8 *in_next = in;
|
||||
const u8 * const in_end = in_next + in_nbytes;
|
||||
u16 hdr;
|
||||
|
||||
if (in_nbytes < ZLIB_MIN_OVERHEAD)
|
||||
return false;
|
||||
|
||||
/* 2 byte header: CMF and FLG */
|
||||
hdr = get_unaligned_u16_be(in_next);
|
||||
in_next += 2;
|
||||
|
||||
/* FCHECK */
|
||||
if ((hdr % 31) != 0)
|
||||
return false;
|
||||
|
||||
/* CM */
|
||||
if (((hdr >> 8) & 0xF) != ZLIB_CM_DEFLATE)
|
||||
return false;
|
||||
|
||||
/* CINFO */
|
||||
if ((hdr >> 12) > ZLIB_CINFO_32K_WINDOW)
|
||||
return false;
|
||||
|
||||
/* FDICT */
|
||||
if ((hdr >> 5) & 1)
|
||||
return false;
|
||||
|
||||
/* Compressed data */
|
||||
if (!deflate_decompress(d, in_next, in_end - ZLIB_FOOTER_SIZE - in_next,
|
||||
out, out_nbytes))
|
||||
return false;
|
||||
|
||||
in_next = in_end - ZLIB_FOOTER_SIZE;
|
||||
|
||||
/* ADLER32 */
|
||||
if (adler32(out, out_nbytes) != get_unaligned_u32_be(in_next))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user