More updates

This commit is contained in:
Eric Biggers 2015-11-21 16:13:57 -06:00
parent 3a19fbae6f
commit a60bc308c0
9 changed files with 88 additions and 85 deletions

View File

@ -34,7 +34,7 @@ UNSAFE_DECOMPRESSION := no
CC = gcc CC = gcc
AR = ar AR = ar
override CFLAGS += -O2 -I. -std=c11 override CFLAGS += -O2 -I. -std=c99 -fvisibility=hidden
ifeq ($(SUPPORT_NEAR_OPTIMAL_PARSING),yes) ifeq ($(SUPPORT_NEAR_OPTIMAL_PARSING),yes)
override CFLAGS += -DSUPPORT_NEAR_OPTIMAL_PARSING=1 override CFLAGS += -DSUPPORT_NEAR_OPTIMAL_PARSING=1
@ -70,7 +70,7 @@ ifeq ($(SUPPORT_GZIP),yes)
SRC += src/crc32.c SRC += src/crc32.c
endif endif
override PIC_CFLAGS := $(CFLAGS) -fPIC -fvisibility=hidden override PIC_CFLAGS := $(CFLAGS) -fPIC
OBJ := $(SRC:.c=.o) OBJ := $(SRC:.c=.o)
PIC_OBJ := $(SRC:.c=.pic.o) PIC_OBJ := $(SRC:.c=.pic.o)
@ -88,7 +88,7 @@ libdeflate.a:$(OBJ)
$(AR) cr $@ $+ $(AR) cr $@ $+
benchmark:tools/benchmark.c libdeflate.a benchmark:tools/benchmark.c libdeflate.a
$(CC) -o $@ $(CFLAGS) -L. -lz $+ libdeflate.a $(CC) -o $@ $(CFLAGS) -L. $+ libdeflate.a -lz
TARGETS := TARGETS :=
ifeq ($(BUILD_STATIC_LIBRARY),yes) ifeq ($(BUILD_STATIC_LIBRARY),yes)

View File

@ -51,10 +51,11 @@
#include "lz_hash.h" #include "lz_hash.h"
#define BT_MATCHFINDER_HASH3_ORDER 15 #define BT_MATCHFINDER_HASH3_ORDER 15
#define BT_MATCHFINDER_HASH3_WAYS 1
#define BT_MATCHFINDER_HASH4_ORDER 16 #define BT_MATCHFINDER_HASH4_ORDER 16
#define BT_MATCHFINDER_TOTAL_HASH_LENGTH \ #define BT_MATCHFINDER_TOTAL_HASH_LENGTH \
((1UL << BT_MATCHFINDER_HASH3_ORDER) + \ ((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
(1UL << BT_MATCHFINDER_HASH4_ORDER)) (1UL << BT_MATCHFINDER_HASH4_ORDER))
/* Representation of a match found by the bt_matchfinder */ /* Representation of a match found by the bt_matchfinder */
@ -70,7 +71,7 @@ struct lz_match {
struct bt_matchfinder { struct bt_matchfinder {
/* The hash table for finding length 3 matches */ /* The hash table for finding length 3 matches */
mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER]; mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
/* The hash table which contains the roots of the binary trees for /* The hash table which contains the roots of the binary trees for
* finding length 4+ matches */ * finding length 4+ matches */
@ -139,7 +140,12 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
u32 next_seq3; u32 next_seq3;
u32 hash3; u32 hash3;
u32 hash4; u32 hash4;
STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
BT_MATCHFINDER_HASH3_WAYS <= 2);
s32 cur_node; s32 cur_node;
#if BT_MATCHFINDER_HASH3_WAYS >= 2
s32 cur_node_2;
#endif
const u8 *matchptr; const u8 *matchptr;
mf_pos_t *pending_lt_ptr, *pending_gt_ptr; mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
u32 best_lt_len, best_gt_len; u32 best_lt_len, best_gt_len;
@ -157,14 +163,28 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
prefetchw(&mf->hash3_tab[next_hashes[0]]); prefetchw(&mf->hash3_tab[next_hashes[0]]);
prefetchw(&mf->hash4_tab[next_hashes[1]]); prefetchw(&mf->hash4_tab[next_hashes[1]]);
cur_node = mf->hash3_tab[hash3]; cur_node = mf->hash3_tab[hash3][0];
mf->hash3_tab[hash3] = cur_pos; mf->hash3_tab[hash3][0] = cur_pos;
if (record_matches && cur_node > cutoff && #if BT_MATCHFINDER_HASH3_WAYS >= 2
load_u24_unaligned(in_next) == load_u24_unaligned(&in_base[cur_node])) cur_node_2 = mf->hash3_tab[hash3][1];
{ mf->hash3_tab[hash3][1] = cur_node;
lz_matchptr->length = 3; #endif
lz_matchptr->offset = in_next - &in_base[cur_node]; if (record_matches && cur_node > cutoff) {
lz_matchptr++; u32 seq3 = load_u24_unaligned(in_next);
if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
lz_matchptr->length = 3;
lz_matchptr->offset = in_next - &in_base[cur_node];
lz_matchptr++;
}
#if BT_MATCHFINDER_HASH3_WAYS >= 2
else if (cur_node_2 > cutoff &&
seq3 == load_u24_unaligned(&in_base[cur_node_2]))
{
lz_matchptr->length = 3;
lz_matchptr->offset = in_next - &in_base[cur_node_2];
lz_matchptr++;
}
#endif
} }
cur_node = mf->hash4_tab[hash4]; cur_node = mf->hash4_tab[hash4];
@ -250,22 +270,21 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
* Must be <= @max_len. * Must be <= @max_len.
* @max_search_depth * @max_search_depth
* Limit on the number of potential matches to consider. Must be >= 1. * Limit on the number of potential matches to consider. Must be >= 1.
* @next_hash * @next_hashes
* Pointer to the hash code for the current sequence, which was computed * The precomputed hash codes for the sequence beginning at @in_next.
* one position in advance so that the binary tree root could be * These will be used and then updated with the precomputed hashcodes for
* prefetched. This is an input/output parameter. * the sequence beginning at @in_next + 1.
* @best_len_ret * @best_len_ret
* If a match of length >= 4 was found, then the length of the longest such * If a match of length >= 4 was found, then the length of the longest such
* match is written here; otherwise 2 is written here. (Note: this is * match is written here; otherwise 3 is written here. (Note: this is
* redundant with the 'struct lz_match' array, but this is easier for the * redundant with the 'struct lz_match' array, but this is easier for the
* compiler to optimize when inlined and the caller immediately does a * compiler to optimize when inlined and the caller immediately does a
* check against 'best_len'.) * check against 'best_len'.)
* @lz_matchptr * @lz_matchptr
* An array in which this function will record the matches. The recorded * An array in which this function will record the matches. The recorded
* matches will be sorted by strictly increasing length and increasing * matches will be sorted by strictly increasing length and (non-strictly)
* offset. The maximum number of matches that may be found is * increasing offset. The maximum number of matches that may be found is
* 'MIN(nice_len, max_len) - 2 + 1', or one less if length 2 matches are * 'nice_len - 2'.
* disabled.
* *
* The return value is a pointer to the next available slot in the @lz_matchptr * The return value is a pointer to the next available slot in the @lz_matchptr
* array. (If no matches were found, this will be the same as @lz_matchptr.) * array. (If no matches were found, this will be the same as @lz_matchptr.)
@ -296,28 +315,8 @@ bt_matchfinder_get_matches(struct bt_matchfinder *mf,
/* /*
* Advance the matchfinder, but don't record any matches. * Advance the matchfinder, but don't record any matches.
* *
* @mf * This is very similar to bt_matchfinder_get_matches() because both functions
* The matchfinder structure. * must do hashing and tree re-rooting.
* @in_base
* Pointer to the next byte in the input buffer to process _at the last
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
* @cur_pos
* The current position in the input buffer relative to @in_base.
* @max_len
* The maximum permissible match length at this position. Must be >=
* BT_MATCHFINDER_REQUIRED_NBYTES.
* @nice_len
* Stop searching if a match of at least this length is found.
* @max_search_depth
* Limit on the number of potential matches to consider.
* @next_hash
* Pointer to the hash code for the current sequence, which was computed
* one position in advance so that the binary tree root could be
* prefetched. This is an input/output parameter.
*
* Note: this is very similar to bt_matchfinder_get_matches() because both
* functions must do hashing and tree re-rooting. This version just doesn't
* actually record any matches.
*/ */
static forceinline void static forceinline void
bt_matchfinder_skip_position(struct bt_matchfinder *mf, bt_matchfinder_skip_position(struct bt_matchfinder *mf,

View File

@ -22,22 +22,22 @@
# define LIBEXPORT # define LIBEXPORT
#endif #endif
/* likely() - hint that the expression is usually true */ /* likely(expr) - hint that the expression is usually true */
#ifndef likely #ifndef likely
# define likely(expr) (expr) # define likely(expr) (expr)
#endif #endif
/* unlikely() - hint that the expression is usually false */ /* unlikely(expr) - hint that the expression is usually false */
#ifndef unlikely #ifndef unlikely
# define unlikely(expr) (expr) # define unlikely(expr) (expr)
#endif #endif
/* prefetchr() - prefetch into L1 cache for read */ /* prefetchr(addr) - prefetch into L1 cache for read */
#ifndef prefetchr #ifndef prefetchr
# define prefetchr(addr) # define prefetchr(addr)
#endif #endif
/* prefetchw() - prefetch into L1 cache for write */ /* prefetchw(addr) - prefetch into L1 cache for write */
#ifndef prefetchw #ifndef prefetchw
# define prefetchw(addr) # define prefetchw(addr)
#endif #endif
@ -47,35 +47,35 @@
#ifndef _aligned_attribute #ifndef _aligned_attribute
#endif #endif
/* compiler_fls32() - efficiently find the index of the last (highest) set bit /* compiler_fls32(n) - efficiently find the index of the last (highest) set bit
* in a nonzero 32-bit integer */ * in a nonzero 32-bit integer */
#ifndef compiler_fls32 #ifndef compiler_fls32
#endif #endif
/* compiler_fls64() - efficiently find the index of the last (highest) set bit /* compiler_fls64(n) - efficiently find the index of the last (highest) set bit
* in a nonzero 64-bit integer */ * in a nonzero 64-bit integer */
#ifndef compiler_fls64 #ifndef compiler_fls64
#endif #endif
/* compiler_ffs32() - efficiently find the index of the first (lowest) set bit /* compiler_ffs32(n) - efficiently find the index of the first (lowest) set bit
* in a nonzero 32-bit integer */ * in a nonzero 32-bit integer */
#ifndef compiler_ffs32 #ifndef compiler_ffs32
#endif #endif
/* compiler_ffs64() - efficiently find the index of the first (lowest) set bit /* compiler_ffs64(n) - efficiently find the index of the first (lowest) set bit
* in a nonzero 64-bit integer */ * in a nonzero 64-bit integer */
#ifndef compiler_ffs64 #ifndef compiler_ffs64
#endif #endif
/* compiler_bswap16() - efficiently swap the bytes of a 16-bit integer. */ /* compiler_bswap16(n) - efficiently swap the bytes of a 16-bit integer. */
#ifndef compiler_bswap16 #ifndef compiler_bswap16
#endif #endif
/* compiler_bswap32() - efficiently swap the bytes of a 32-bit integer */ /* compiler_bswap32(n) - efficiently swap the bytes of a 32-bit integer */
#ifndef compiler_bswap32 #ifndef compiler_bswap32
#endif #endif
/* compiler_bswap64() - efficiently swap the bytes of a 64-bit integer */ /* compiler_bswap64(n) - efficiently swap the bytes of a 64-bit integer */
#ifndef compiler_bswap64 #ifndef compiler_bswap64
#endif #endif
@ -99,16 +99,16 @@ static forceinline int CPU_IS_LITTLE_ENDIAN(void)
#define CPU_IS_BIG_ENDIAN() (!CPU_IS_LITTLE_ENDIAN()) #define CPU_IS_BIG_ENDIAN() (!CPU_IS_LITTLE_ENDIAN())
/* /*
* DEFINE_UNALIGNED_TYPE(type) - this should be a macro that, given an integer * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
* type 'type', defines load_type_unaligned() and store_type_unaligned() * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
* functions which load and store variables of type 'type' from/to unaligned * which load and store variables of type 'type' from/to unaligned memory
* memory addresses. If not defined, a fallback is used. * addresses. If not defined, a fallback is used.
*/ */
#ifndef DEFINE_UNALIGNED_TYPE #ifndef DEFINE_UNALIGNED_TYPE
/* Although memcpy() may seem inefficient, it *usually* gets optimized /* Although memcpy() may seem inefficient, it *usually* gets optimized
* appropriately by modern compilers. It's portable and is probably the best * appropriately by modern compilers. It's portable and may be the best we can
* fallback. */ * do for a fallback... */
#include <string.h> #include <string.h>
#define DEFINE_UNALIGNED_TYPE(type) \ #define DEFINE_UNALIGNED_TYPE(type) \

View File

@ -188,7 +188,7 @@ crc32_slice4(u32 remainder, const u8 *buffer, size_t nbytes)
end32 = p + ((end - p) & ~3); end32 = p + ((end - p) & ~3);
for (; p != end32; p += 4) { for (; p != end32; p += 4) {
u32 v = cpu_to_le32(*(const u32 *)p); u32 v = le32_to_cpu(*(const u32 *)p);
remainder = remainder =
crc32_table[0x300 + (u8)((remainder ^ v) >> 0)] ^ crc32_table[0x300 + (u8)((remainder ^ v) >> 0)] ^
crc32_table[0x200 + (u8)((remainder ^ v) >> 8)] ^ crc32_table[0x200 + (u8)((remainder ^ v) >> 8)] ^
@ -218,8 +218,8 @@ crc32_slice8(u32 remainder, const u8 *buffer, size_t nbytes)
end64 = p + ((end - p) & ~7); end64 = p + ((end - p) & ~7);
for (; p != end64; p += 8) { for (; p != end64; p += 8) {
u32 v1 = cpu_to_le32(*(const u32 *)(p + 0)); u32 v1 = le32_to_cpu(*(const u32 *)(p + 0));
u32 v2 = cpu_to_le32(*(const u32 *)(p + 4)); u32 v2 = le32_to_cpu(*(const u32 *)(p + 4));
remainder = remainder =
crc32_table[0x700 + (u8)((remainder ^ v1) >> 0)] ^ crc32_table[0x700 + (u8)((remainder ^ v1) >> 0)] ^
crc32_table[0x600 + (u8)((remainder ^ v1) >> 8)] ^ crc32_table[0x600 + (u8)((remainder ^ v1) >> 8)] ^

View File

@ -193,7 +193,7 @@ typedef machine_word_t bitbuf_t;
*/ */
#define FILL_BITS_WORDWISE() \ #define FILL_BITS_WORDWISE() \
({ \ ({ \
bitbuf |= load_leword_unaligned(in_next) << bitsleft; \ bitbuf |= get_unaligned_leword(in_next) << bitsleft; \
in_next += (BITBUF_NBITS - bitsleft) >> 3; \ in_next += (BITBUF_NBITS - bitsleft) >> 3; \
bitsleft += (BITBUF_NBITS - bitsleft) & ~7; \ bitsleft += (BITBUF_NBITS - bitsleft) & ~7; \
}) })

View File

@ -25,18 +25,18 @@
#define GZIP_XFL_SLOWEST_COMRESSION 0x02 #define GZIP_XFL_SLOWEST_COMRESSION 0x02
#define GZIP_XFL_FASTEST_COMRESSION 0x04 #define GZIP_XFL_FASTEST_COMRESSION 0x04
#define GZIP_OS_FAT 0 #define GZIP_OS_FAT 0
#define GZIP_OS_AMIGA 1 #define GZIP_OS_AMIGA 1
#define GZIP_OS_VMS 2 #define GZIP_OS_VMS 2
#define GZIP_OS_UNIX 3 #define GZIP_OS_UNIX 3
#define GZIP_OS_VM_CMS 4 #define GZIP_OS_VM_CMS 4
#define GZIP_OS_ATARI_TOS 5 #define GZIP_OS_ATARI_TOS 5
#define GZIP_OS_HPFS 6 #define GZIP_OS_HPFS 6
#define GZIP_OS_MACINTOSH 7 #define GZIP_OS_MACINTOSH 7
#define GZIP_OS_Z_SYSTEM 8 #define GZIP_OS_Z_SYSTEM 8
#define GZIP_OS_CP_M 9 #define GZIP_OS_CP_M 9
#define GZIP_OS_TOPS_20 10 #define GZIP_OS_TOPS_20 10
#define GZIP_OS_NTFS 11 #define GZIP_OS_NTFS 11
#define GZIP_OS_QDOS 12 #define GZIP_OS_QDOS 12
#define GZIP_OS_RISCOS 13 #define GZIP_OS_RISCOS 13
#define GZIP_OS_UNKNOWN 255 #define GZIP_OS_UNKNOWN 255

View File

@ -66,7 +66,7 @@ gzip_decompress(struct deflate_decompressor *d,
/* File comment (zero terminated) */ /* File comment (zero terminated) */
if (flg & GZIP_FCOMMENT) { if (flg & GZIP_FCOMMENT) {
while (*in_next++ != 0 && ++in_next != in_end) while (*in_next++ != 0 && in_next != in_end)
; ;
if (in_end - in_next < GZIP_FOOTER_SIZE) if (in_end - in_next < GZIP_FOOTER_SIZE)
return false; return false;

View File

@ -59,7 +59,7 @@ get_unaligned_be32(const u8 *p)
return be32_to_cpu(load_u32_unaligned(p)); return be32_to_cpu(load_u32_unaligned(p));
else else
return ((u32)p[0] << 24) | ((u32)p[1] << 16) | return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
((u32)p[2] << 8)| ((u32)p[3] << 0); ((u32)p[2] << 8) | ((u32)p[3] << 0);
} }
static forceinline u64 static forceinline u64
@ -75,7 +75,7 @@ get_unaligned_le64(const u8 *p)
} }
static forceinline machine_word_t static forceinline machine_word_t
load_leword_unaligned(const u8 *p) get_unaligned_leword(const u8 *p)
{ {
STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8); STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8);
if (WORDSIZE == 4) if (WORDSIZE == 4)

View File

@ -1,5 +1,5 @@
/* /*
* util.h - useful types, macros, and compiler/platform-specific definitions * util.h - useful types, macros, and compiler or platform-specific definitions
*/ */
#pragma once #pragma once
@ -10,7 +10,7 @@
#include "compiler.h" #include "compiler.h"
/* Definitions of fixed-width integers, 'bool', 'size_t', and 'machine_word_t' */ /* Fixed-width integer types */
typedef uint8_t u8; typedef uint8_t u8;
typedef uint16_t u16; typedef uint16_t u16;
@ -44,3 +44,7 @@ typedef size_t machine_word_t;
/* MIN() - calculate the minimum of two variables. Arguments may be evaluted /* MIN() - calculate the minimum of two variables. Arguments may be evaluted
* multiple times. */ * multiple times. */
#define MIN(a, b) ((a) <= (b) ? (a) : (b)) #define MIN(a, b) ((a) <= (b) ? (a) : (b))
/* MAX() - calculate the maximum of two variables. Arguments may be evaluted
* multiple times. */
#define MAX(a, b) ((a) >= (b) ? (a) : (b))