From a60bc308c0a8909b5c3043387f94f5abef66278f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 21 Nov 2015 16:13:57 -0600 Subject: [PATCH] More updates --- Makefile | 6 +-- src/bt_matchfinder.h | 81 ++++++++++++++++++++-------------------- src/compiler.h | 34 ++++++++--------- src/crc32.c | 6 +-- src/deflate_decompress.c | 2 +- src/gzip_constants.h | 30 +++++++-------- src/gzip_decompress.c | 2 +- src/unaligned.h | 4 +- src/util.h | 8 +++- 9 files changed, 88 insertions(+), 85 deletions(-) diff --git a/Makefile b/Makefile index 6ee5575..373716c 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ UNSAFE_DECOMPRESSION := no CC = gcc AR = ar -override CFLAGS += -O2 -I. -std=c11 +override CFLAGS += -O2 -I. -std=c99 -fvisibility=hidden ifeq ($(SUPPORT_NEAR_OPTIMAL_PARSING),yes) override CFLAGS += -DSUPPORT_NEAR_OPTIMAL_PARSING=1 @@ -70,7 +70,7 @@ ifeq ($(SUPPORT_GZIP),yes) SRC += src/crc32.c endif -override PIC_CFLAGS := $(CFLAGS) -fPIC -fvisibility=hidden +override PIC_CFLAGS := $(CFLAGS) -fPIC OBJ := $(SRC:.c=.o) PIC_OBJ := $(SRC:.c=.pic.o) @@ -88,7 +88,7 @@ libdeflate.a:$(OBJ) $(AR) cr $@ $+ benchmark:tools/benchmark.c libdeflate.a - $(CC) -o $@ $(CFLAGS) -L. -lz $+ libdeflate.a + $(CC) -o $@ $(CFLAGS) -L. $+ libdeflate.a -lz TARGETS := ifeq ($(BUILD_STATIC_LIBRARY),yes) diff --git a/src/bt_matchfinder.h b/src/bt_matchfinder.h index 6ca0221..2a1ab50 100644 --- a/src/bt_matchfinder.h +++ b/src/bt_matchfinder.h @@ -51,10 +51,11 @@ #include "lz_hash.h" #define BT_MATCHFINDER_HASH3_ORDER 15 +#define BT_MATCHFINDER_HASH3_WAYS 1 #define BT_MATCHFINDER_HASH4_ORDER 16 #define BT_MATCHFINDER_TOTAL_HASH_LENGTH \ - ((1UL << BT_MATCHFINDER_HASH3_ORDER) + \ + ((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \ (1UL << BT_MATCHFINDER_HASH4_ORDER)) /* Representation of a match found by the bt_matchfinder */ @@ -70,7 +71,7 @@ struct lz_match { struct bt_matchfinder { /* The hash table for finding length 3 matches */ - mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER]; + mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS]; /* The hash table which contains the roots of the binary trees for * finding length 4+ matches */ @@ -139,7 +140,12 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf, u32 next_seq3; u32 hash3; u32 hash4; + STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 && + BT_MATCHFINDER_HASH3_WAYS <= 2); s32 cur_node; +#if BT_MATCHFINDER_HASH3_WAYS >= 2 + s32 cur_node_2; +#endif const u8 *matchptr; mf_pos_t *pending_lt_ptr, *pending_gt_ptr; u32 best_lt_len, best_gt_len; @@ -157,14 +163,28 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf, prefetchw(&mf->hash3_tab[next_hashes[0]]); prefetchw(&mf->hash4_tab[next_hashes[1]]); - cur_node = mf->hash3_tab[hash3]; - mf->hash3_tab[hash3] = cur_pos; - if (record_matches && cur_node > cutoff && - load_u24_unaligned(in_next) == load_u24_unaligned(&in_base[cur_node])) - { - lz_matchptr->length = 3; - lz_matchptr->offset = in_next - &in_base[cur_node]; - lz_matchptr++; + cur_node = mf->hash3_tab[hash3][0]; + mf->hash3_tab[hash3][0] = cur_pos; +#if BT_MATCHFINDER_HASH3_WAYS >= 2 + cur_node_2 = mf->hash3_tab[hash3][1]; + mf->hash3_tab[hash3][1] = cur_node; +#endif + if (record_matches && cur_node > cutoff) { + u32 seq3 = load_u24_unaligned(in_next); + if (seq3 == load_u24_unaligned(&in_base[cur_node])) { + lz_matchptr->length = 3; + lz_matchptr->offset = in_next - &in_base[cur_node]; + lz_matchptr++; + } + #if BT_MATCHFINDER_HASH3_WAYS >= 2 + else if (cur_node_2 > cutoff && + seq3 == load_u24_unaligned(&in_base[cur_node_2])) + { + lz_matchptr->length = 3; + lz_matchptr->offset = in_next - &in_base[cur_node_2]; + lz_matchptr++; + } + #endif } cur_node = mf->hash4_tab[hash4]; @@ -250,22 +270,21 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf, * Must be <= @max_len. * @max_search_depth * Limit on the number of potential matches to consider. Must be >= 1. - * @next_hash - * Pointer to the hash code for the current sequence, which was computed - * one position in advance so that the binary tree root could be - * prefetched. This is an input/output parameter. + * @next_hashes + * The precomputed hash codes for the sequence beginning at @in_next. + * These will be used and then updated with the precomputed hashcodes for + * the sequence beginning at @in_next + 1. * @best_len_ret * If a match of length >= 4 was found, then the length of the longest such - * match is written here; otherwise 2 is written here. (Note: this is + * match is written here; otherwise 3 is written here. (Note: this is * redundant with the 'struct lz_match' array, but this is easier for the * compiler to optimize when inlined and the caller immediately does a * check against 'best_len'.) * @lz_matchptr * An array in which this function will record the matches. The recorded - * matches will be sorted by strictly increasing length and increasing - * offset. The maximum number of matches that may be found is - * 'MIN(nice_len, max_len) - 2 + 1', or one less if length 2 matches are - * disabled. + * matches will be sorted by strictly increasing length and (non-strictly) + * increasing offset. The maximum number of matches that may be found is + * 'nice_len - 2'. * * The return value is a pointer to the next available slot in the @lz_matchptr * array. (If no matches were found, this will be the same as @lz_matchptr.) @@ -296,28 +315,8 @@ bt_matchfinder_get_matches(struct bt_matchfinder *mf, /* * Advance the matchfinder, but don't record any matches. * - * @mf - * The matchfinder structure. - * @in_base - * Pointer to the next byte in the input buffer to process _at the last - * time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_. - * @cur_pos - * The current position in the input buffer relative to @in_base. - * @max_len - * The maximum permissible match length at this position. Must be >= - * BT_MATCHFINDER_REQUIRED_NBYTES. - * @nice_len - * Stop searching if a match of at least this length is found. - * @max_search_depth - * Limit on the number of potential matches to consider. - * @next_hash - * Pointer to the hash code for the current sequence, which was computed - * one position in advance so that the binary tree root could be - * prefetched. This is an input/output parameter. - * - * Note: this is very similar to bt_matchfinder_get_matches() because both - * functions must do hashing and tree re-rooting. This version just doesn't - * actually record any matches. + * This is very similar to bt_matchfinder_get_matches() because both functions + * must do hashing and tree re-rooting. */ static forceinline void bt_matchfinder_skip_position(struct bt_matchfinder *mf, diff --git a/src/compiler.h b/src/compiler.h index e9cecef..8507571 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -22,22 +22,22 @@ # define LIBEXPORT #endif -/* likely() - hint that the expression is usually true */ +/* likely(expr) - hint that the expression is usually true */ #ifndef likely # define likely(expr) (expr) #endif -/* unlikely() - hint that the expression is usually false */ +/* unlikely(expr) - hint that the expression is usually false */ #ifndef unlikely # define unlikely(expr) (expr) #endif -/* prefetchr() - prefetch into L1 cache for read */ +/* prefetchr(addr) - prefetch into L1 cache for read */ #ifndef prefetchr # define prefetchr(addr) #endif -/* prefetchw() - prefetch into L1 cache for write */ +/* prefetchw(addr) - prefetch into L1 cache for write */ #ifndef prefetchw # define prefetchw(addr) #endif @@ -47,35 +47,35 @@ #ifndef _aligned_attribute #endif -/* compiler_fls32() - efficiently find the index of the last (highest) set bit +/* compiler_fls32(n) - efficiently find the index of the last (highest) set bit * in a nonzero 32-bit integer */ #ifndef compiler_fls32 #endif -/* compiler_fls64() - efficiently find the index of the last (highest) set bit +/* compiler_fls64(n) - efficiently find the index of the last (highest) set bit * in a nonzero 64-bit integer */ #ifndef compiler_fls64 #endif -/* compiler_ffs32() - efficiently find the index of the first (lowest) set bit +/* compiler_ffs32(n) - efficiently find the index of the first (lowest) set bit * in a nonzero 32-bit integer */ #ifndef compiler_ffs32 #endif -/* compiler_ffs64() - efficiently find the index of the first (lowest) set bit +/* compiler_ffs64(n) - efficiently find the index of the first (lowest) set bit * in a nonzero 64-bit integer */ #ifndef compiler_ffs64 #endif -/* compiler_bswap16() - efficiently swap the bytes of a 16-bit integer. */ +/* compiler_bswap16(n) - efficiently swap the bytes of a 16-bit integer. */ #ifndef compiler_bswap16 #endif -/* compiler_bswap32() - efficiently swap the bytes of a 32-bit integer */ +/* compiler_bswap32(n) - efficiently swap the bytes of a 32-bit integer */ #ifndef compiler_bswap32 #endif -/* compiler_bswap64() - efficiently swap the bytes of a 64-bit integer */ +/* compiler_bswap64(n) - efficiently swap the bytes of a 64-bit integer */ #ifndef compiler_bswap64 #endif @@ -99,16 +99,16 @@ static forceinline int CPU_IS_LITTLE_ENDIAN(void) #define CPU_IS_BIG_ENDIAN() (!CPU_IS_LITTLE_ENDIAN()) /* - * DEFINE_UNALIGNED_TYPE(type) - this should be a macro that, given an integer - * type 'type', defines load_type_unaligned() and store_type_unaligned() - * functions which load and store variables of type 'type' from/to unaligned - * memory addresses. If not defined, a fallback is used. + * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type', + * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions + * which load and store variables of type 'type' from/to unaligned memory + * addresses. If not defined, a fallback is used. */ #ifndef DEFINE_UNALIGNED_TYPE /* Although memcpy() may seem inefficient, it *usually* gets optimized - * appropriately by modern compilers. It's portable and is probably the best - * fallback. */ + * appropriately by modern compilers. It's portable and may be the best we can + * do for a fallback... */ #include #define DEFINE_UNALIGNED_TYPE(type) \ diff --git a/src/crc32.c b/src/crc32.c index bc578cc..0d76571 100644 --- a/src/crc32.c +++ b/src/crc32.c @@ -188,7 +188,7 @@ crc32_slice4(u32 remainder, const u8 *buffer, size_t nbytes) end32 = p + ((end - p) & ~3); for (; p != end32; p += 4) { - u32 v = cpu_to_le32(*(const u32 *)p); + u32 v = le32_to_cpu(*(const u32 *)p); remainder = crc32_table[0x300 + (u8)((remainder ^ v) >> 0)] ^ crc32_table[0x200 + (u8)((remainder ^ v) >> 8)] ^ @@ -218,8 +218,8 @@ crc32_slice8(u32 remainder, const u8 *buffer, size_t nbytes) end64 = p + ((end - p) & ~7); for (; p != end64; p += 8) { - u32 v1 = cpu_to_le32(*(const u32 *)(p + 0)); - u32 v2 = cpu_to_le32(*(const u32 *)(p + 4)); + u32 v1 = le32_to_cpu(*(const u32 *)(p + 0)); + u32 v2 = le32_to_cpu(*(const u32 *)(p + 4)); remainder = crc32_table[0x700 + (u8)((remainder ^ v1) >> 0)] ^ crc32_table[0x600 + (u8)((remainder ^ v1) >> 8)] ^ diff --git a/src/deflate_decompress.c b/src/deflate_decompress.c index cfe0cf1..0eb40c9 100644 --- a/src/deflate_decompress.c +++ b/src/deflate_decompress.c @@ -193,7 +193,7 @@ typedef machine_word_t bitbuf_t; */ #define FILL_BITS_WORDWISE() \ ({ \ - bitbuf |= load_leword_unaligned(in_next) << bitsleft; \ + bitbuf |= get_unaligned_leword(in_next) << bitsleft; \ in_next += (BITBUF_NBITS - bitsleft) >> 3; \ bitsleft += (BITBUF_NBITS - bitsleft) & ~7; \ }) diff --git a/src/gzip_constants.h b/src/gzip_constants.h index 6d430ab..61ea803 100644 --- a/src/gzip_constants.h +++ b/src/gzip_constants.h @@ -25,18 +25,18 @@ #define GZIP_XFL_SLOWEST_COMRESSION 0x02 #define GZIP_XFL_FASTEST_COMRESSION 0x04 -#define GZIP_OS_FAT 0 -#define GZIP_OS_AMIGA 1 -#define GZIP_OS_VMS 2 -#define GZIP_OS_UNIX 3 -#define GZIP_OS_VM_CMS 4 -#define GZIP_OS_ATARI_TOS 5 -#define GZIP_OS_HPFS 6 -#define GZIP_OS_MACINTOSH 7 -#define GZIP_OS_Z_SYSTEM 8 -#define GZIP_OS_CP_M 9 -#define GZIP_OS_TOPS_20 10 -#define GZIP_OS_NTFS 11 -#define GZIP_OS_QDOS 12 -#define GZIP_OS_RISCOS 13 -#define GZIP_OS_UNKNOWN 255 +#define GZIP_OS_FAT 0 +#define GZIP_OS_AMIGA 1 +#define GZIP_OS_VMS 2 +#define GZIP_OS_UNIX 3 +#define GZIP_OS_VM_CMS 4 +#define GZIP_OS_ATARI_TOS 5 +#define GZIP_OS_HPFS 6 +#define GZIP_OS_MACINTOSH 7 +#define GZIP_OS_Z_SYSTEM 8 +#define GZIP_OS_CP_M 9 +#define GZIP_OS_TOPS_20 10 +#define GZIP_OS_NTFS 11 +#define GZIP_OS_QDOS 12 +#define GZIP_OS_RISCOS 13 +#define GZIP_OS_UNKNOWN 255 diff --git a/src/gzip_decompress.c b/src/gzip_decompress.c index b9a0a88..b4c4d05 100644 --- a/src/gzip_decompress.c +++ b/src/gzip_decompress.c @@ -66,7 +66,7 @@ gzip_decompress(struct deflate_decompressor *d, /* File comment (zero terminated) */ if (flg & GZIP_FCOMMENT) { - while (*in_next++ != 0 && ++in_next != in_end) + while (*in_next++ != 0 && in_next != in_end) ; if (in_end - in_next < GZIP_FOOTER_SIZE) return false; diff --git a/src/unaligned.h b/src/unaligned.h index 7a70f12..a2494dc 100644 --- a/src/unaligned.h +++ b/src/unaligned.h @@ -59,7 +59,7 @@ get_unaligned_be32(const u8 *p) return be32_to_cpu(load_u32_unaligned(p)); else return ((u32)p[0] << 24) | ((u32)p[1] << 16) | - ((u32)p[2] << 8)| ((u32)p[3] << 0); + ((u32)p[2] << 8) | ((u32)p[3] << 0); } static forceinline u64 @@ -75,7 +75,7 @@ get_unaligned_le64(const u8 *p) } static forceinline machine_word_t -load_leword_unaligned(const u8 *p) +get_unaligned_leword(const u8 *p) { STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8); if (WORDSIZE == 4) diff --git a/src/util.h b/src/util.h index 9123165..8f12529 100644 --- a/src/util.h +++ b/src/util.h @@ -1,5 +1,5 @@ /* - * util.h - useful types, macros, and compiler/platform-specific definitions + * util.h - useful types, macros, and compiler or platform-specific definitions */ #pragma once @@ -10,7 +10,7 @@ #include "compiler.h" -/* Definitions of fixed-width integers, 'bool', 'size_t', and 'machine_word_t' */ +/* Fixed-width integer types */ typedef uint8_t u8; typedef uint16_t u16; @@ -44,3 +44,7 @@ typedef size_t machine_word_t; /* MIN() - calculate the minimum of two variables. Arguments may be evaluted * multiple times. */ #define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +/* MAX() - calculate the maximum of two variables. Arguments may be evaluted + * multiple times. */ +#define MAX(a, b) ((a) >= (b) ? (a) : (b))