mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-14 06:49:09 -04:00
More updates
This commit is contained in:
parent
3a19fbae6f
commit
a60bc308c0
6
Makefile
6
Makefile
@ -34,7 +34,7 @@ UNSAFE_DECOMPRESSION := no
|
||||
CC = gcc
|
||||
AR = ar
|
||||
|
||||
override CFLAGS += -O2 -I. -std=c11
|
||||
override CFLAGS += -O2 -I. -std=c99 -fvisibility=hidden
|
||||
|
||||
ifeq ($(SUPPORT_NEAR_OPTIMAL_PARSING),yes)
|
||||
override CFLAGS += -DSUPPORT_NEAR_OPTIMAL_PARSING=1
|
||||
@ -70,7 +70,7 @@ ifeq ($(SUPPORT_GZIP),yes)
|
||||
SRC += src/crc32.c
|
||||
endif
|
||||
|
||||
override PIC_CFLAGS := $(CFLAGS) -fPIC -fvisibility=hidden
|
||||
override PIC_CFLAGS := $(CFLAGS) -fPIC
|
||||
|
||||
OBJ := $(SRC:.c=.o)
|
||||
PIC_OBJ := $(SRC:.c=.pic.o)
|
||||
@ -88,7 +88,7 @@ libdeflate.a:$(OBJ)
|
||||
$(AR) cr $@ $+
|
||||
|
||||
benchmark:tools/benchmark.c libdeflate.a
|
||||
$(CC) -o $@ $(CFLAGS) -L. -lz $+ libdeflate.a
|
||||
$(CC) -o $@ $(CFLAGS) -L. $+ libdeflate.a -lz
|
||||
|
||||
TARGETS :=
|
||||
ifeq ($(BUILD_STATIC_LIBRARY),yes)
|
||||
|
@ -51,10 +51,11 @@
|
||||
#include "lz_hash.h"
|
||||
|
||||
#define BT_MATCHFINDER_HASH3_ORDER 15
|
||||
#define BT_MATCHFINDER_HASH3_WAYS 1
|
||||
#define BT_MATCHFINDER_HASH4_ORDER 16
|
||||
|
||||
#define BT_MATCHFINDER_TOTAL_HASH_LENGTH \
|
||||
((1UL << BT_MATCHFINDER_HASH3_ORDER) + \
|
||||
((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
|
||||
(1UL << BT_MATCHFINDER_HASH4_ORDER))
|
||||
|
||||
/* Representation of a match found by the bt_matchfinder */
|
||||
@ -70,7 +71,7 @@ struct lz_match {
|
||||
struct bt_matchfinder {
|
||||
|
||||
/* The hash table for finding length 3 matches */
|
||||
mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER];
|
||||
mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
|
||||
|
||||
/* The hash table which contains the roots of the binary trees for
|
||||
* finding length 4+ matches */
|
||||
@ -139,7 +140,12 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
|
||||
u32 next_seq3;
|
||||
u32 hash3;
|
||||
u32 hash4;
|
||||
STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
|
||||
BT_MATCHFINDER_HASH3_WAYS <= 2);
|
||||
s32 cur_node;
|
||||
#if BT_MATCHFINDER_HASH3_WAYS >= 2
|
||||
s32 cur_node_2;
|
||||
#endif
|
||||
const u8 *matchptr;
|
||||
mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
|
||||
u32 best_lt_len, best_gt_len;
|
||||
@ -157,15 +163,29 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
|
||||
prefetchw(&mf->hash3_tab[next_hashes[0]]);
|
||||
prefetchw(&mf->hash4_tab[next_hashes[1]]);
|
||||
|
||||
cur_node = mf->hash3_tab[hash3];
|
||||
mf->hash3_tab[hash3] = cur_pos;
|
||||
if (record_matches && cur_node > cutoff &&
|
||||
load_u24_unaligned(in_next) == load_u24_unaligned(&in_base[cur_node]))
|
||||
{
|
||||
cur_node = mf->hash3_tab[hash3][0];
|
||||
mf->hash3_tab[hash3][0] = cur_pos;
|
||||
#if BT_MATCHFINDER_HASH3_WAYS >= 2
|
||||
cur_node_2 = mf->hash3_tab[hash3][1];
|
||||
mf->hash3_tab[hash3][1] = cur_node;
|
||||
#endif
|
||||
if (record_matches && cur_node > cutoff) {
|
||||
u32 seq3 = load_u24_unaligned(in_next);
|
||||
if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
|
||||
lz_matchptr->length = 3;
|
||||
lz_matchptr->offset = in_next - &in_base[cur_node];
|
||||
lz_matchptr++;
|
||||
}
|
||||
#if BT_MATCHFINDER_HASH3_WAYS >= 2
|
||||
else if (cur_node_2 > cutoff &&
|
||||
seq3 == load_u24_unaligned(&in_base[cur_node_2]))
|
||||
{
|
||||
lz_matchptr->length = 3;
|
||||
lz_matchptr->offset = in_next - &in_base[cur_node_2];
|
||||
lz_matchptr++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
cur_node = mf->hash4_tab[hash4];
|
||||
mf->hash4_tab[hash4] = cur_pos;
|
||||
@ -250,22 +270,21 @@ bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
|
||||
* Must be <= @max_len.
|
||||
* @max_search_depth
|
||||
* Limit on the number of potential matches to consider. Must be >= 1.
|
||||
* @next_hash
|
||||
* Pointer to the hash code for the current sequence, which was computed
|
||||
* one position in advance so that the binary tree root could be
|
||||
* prefetched. This is an input/output parameter.
|
||||
* @next_hashes
|
||||
* The precomputed hash codes for the sequence beginning at @in_next.
|
||||
* These will be used and then updated with the precomputed hashcodes for
|
||||
* the sequence beginning at @in_next + 1.
|
||||
* @best_len_ret
|
||||
* If a match of length >= 4 was found, then the length of the longest such
|
||||
* match is written here; otherwise 2 is written here. (Note: this is
|
||||
* match is written here; otherwise 3 is written here. (Note: this is
|
||||
* redundant with the 'struct lz_match' array, but this is easier for the
|
||||
* compiler to optimize when inlined and the caller immediately does a
|
||||
* check against 'best_len'.)
|
||||
* @lz_matchptr
|
||||
* An array in which this function will record the matches. The recorded
|
||||
* matches will be sorted by strictly increasing length and increasing
|
||||
* offset. The maximum number of matches that may be found is
|
||||
* 'MIN(nice_len, max_len) - 2 + 1', or one less if length 2 matches are
|
||||
* disabled.
|
||||
* matches will be sorted by strictly increasing length and (non-strictly)
|
||||
* increasing offset. The maximum number of matches that may be found is
|
||||
* 'nice_len - 2'.
|
||||
*
|
||||
* The return value is a pointer to the next available slot in the @lz_matchptr
|
||||
* array. (If no matches were found, this will be the same as @lz_matchptr.)
|
||||
@ -296,28 +315,8 @@ bt_matchfinder_get_matches(struct bt_matchfinder *mf,
|
||||
/*
|
||||
* Advance the matchfinder, but don't record any matches.
|
||||
*
|
||||
* @mf
|
||||
* The matchfinder structure.
|
||||
* @in_base
|
||||
* Pointer to the next byte in the input buffer to process _at the last
|
||||
* time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
|
||||
* @cur_pos
|
||||
* The current position in the input buffer relative to @in_base.
|
||||
* @max_len
|
||||
* The maximum permissible match length at this position. Must be >=
|
||||
* BT_MATCHFINDER_REQUIRED_NBYTES.
|
||||
* @nice_len
|
||||
* Stop searching if a match of at least this length is found.
|
||||
* @max_search_depth
|
||||
* Limit on the number of potential matches to consider.
|
||||
* @next_hash
|
||||
* Pointer to the hash code for the current sequence, which was computed
|
||||
* one position in advance so that the binary tree root could be
|
||||
* prefetched. This is an input/output parameter.
|
||||
*
|
||||
* Note: this is very similar to bt_matchfinder_get_matches() because both
|
||||
* functions must do hashing and tree re-rooting. This version just doesn't
|
||||
* actually record any matches.
|
||||
* This is very similar to bt_matchfinder_get_matches() because both functions
|
||||
* must do hashing and tree re-rooting.
|
||||
*/
|
||||
static forceinline void
|
||||
bt_matchfinder_skip_position(struct bt_matchfinder *mf,
|
||||
|
@ -22,22 +22,22 @@
|
||||
# define LIBEXPORT
|
||||
#endif
|
||||
|
||||
/* likely() - hint that the expression is usually true */
|
||||
/* likely(expr) - hint that the expression is usually true */
|
||||
#ifndef likely
|
||||
# define likely(expr) (expr)
|
||||
#endif
|
||||
|
||||
/* unlikely() - hint that the expression is usually false */
|
||||
/* unlikely(expr) - hint that the expression is usually false */
|
||||
#ifndef unlikely
|
||||
# define unlikely(expr) (expr)
|
||||
#endif
|
||||
|
||||
/* prefetchr() - prefetch into L1 cache for read */
|
||||
/* prefetchr(addr) - prefetch into L1 cache for read */
|
||||
#ifndef prefetchr
|
||||
# define prefetchr(addr)
|
||||
#endif
|
||||
|
||||
/* prefetchw() - prefetch into L1 cache for write */
|
||||
/* prefetchw(addr) - prefetch into L1 cache for write */
|
||||
#ifndef prefetchw
|
||||
# define prefetchw(addr)
|
||||
#endif
|
||||
@ -47,35 +47,35 @@
|
||||
#ifndef _aligned_attribute
|
||||
#endif
|
||||
|
||||
/* compiler_fls32() - efficiently find the index of the last (highest) set bit
|
||||
/* compiler_fls32(n) - efficiently find the index of the last (highest) set bit
|
||||
* in a nonzero 32-bit integer */
|
||||
#ifndef compiler_fls32
|
||||
#endif
|
||||
|
||||
/* compiler_fls64() - efficiently find the index of the last (highest) set bit
|
||||
/* compiler_fls64(n) - efficiently find the index of the last (highest) set bit
|
||||
* in a nonzero 64-bit integer */
|
||||
#ifndef compiler_fls64
|
||||
#endif
|
||||
|
||||
/* compiler_ffs32() - efficiently find the index of the first (lowest) set bit
|
||||
/* compiler_ffs32(n) - efficiently find the index of the first (lowest) set bit
|
||||
* in a nonzero 32-bit integer */
|
||||
#ifndef compiler_ffs32
|
||||
#endif
|
||||
|
||||
/* compiler_ffs64() - efficiently find the index of the first (lowest) set bit
|
||||
/* compiler_ffs64(n) - efficiently find the index of the first (lowest) set bit
|
||||
* in a nonzero 64-bit integer */
|
||||
#ifndef compiler_ffs64
|
||||
#endif
|
||||
|
||||
/* compiler_bswap16() - efficiently swap the bytes of a 16-bit integer. */
|
||||
/* compiler_bswap16(n) - efficiently swap the bytes of a 16-bit integer. */
|
||||
#ifndef compiler_bswap16
|
||||
#endif
|
||||
|
||||
/* compiler_bswap32() - efficiently swap the bytes of a 32-bit integer */
|
||||
/* compiler_bswap32(n) - efficiently swap the bytes of a 32-bit integer */
|
||||
#ifndef compiler_bswap32
|
||||
#endif
|
||||
|
||||
/* compiler_bswap64() - efficiently swap the bytes of a 64-bit integer */
|
||||
/* compiler_bswap64(n) - efficiently swap the bytes of a 64-bit integer */
|
||||
#ifndef compiler_bswap64
|
||||
#endif
|
||||
|
||||
@ -99,16 +99,16 @@ static forceinline int CPU_IS_LITTLE_ENDIAN(void)
|
||||
#define CPU_IS_BIG_ENDIAN() (!CPU_IS_LITTLE_ENDIAN())
|
||||
|
||||
/*
|
||||
* DEFINE_UNALIGNED_TYPE(type) - this should be a macro that, given an integer
|
||||
* type 'type', defines load_type_unaligned() and store_type_unaligned()
|
||||
* functions which load and store variables of type 'type' from/to unaligned
|
||||
* memory addresses. If not defined, a fallback is used.
|
||||
* DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
|
||||
* defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
|
||||
* which load and store variables of type 'type' from/to unaligned memory
|
||||
* addresses. If not defined, a fallback is used.
|
||||
*/
|
||||
#ifndef DEFINE_UNALIGNED_TYPE
|
||||
|
||||
/* Although memcpy() may seem inefficient, it *usually* gets optimized
|
||||
* appropriately by modern compilers. It's portable and is probably the best
|
||||
* fallback. */
|
||||
* appropriately by modern compilers. It's portable and may be the best we can
|
||||
* do for a fallback... */
|
||||
#include <string.h>
|
||||
|
||||
#define DEFINE_UNALIGNED_TYPE(type) \
|
||||
|
@ -188,7 +188,7 @@ crc32_slice4(u32 remainder, const u8 *buffer, size_t nbytes)
|
||||
|
||||
end32 = p + ((end - p) & ~3);
|
||||
for (; p != end32; p += 4) {
|
||||
u32 v = cpu_to_le32(*(const u32 *)p);
|
||||
u32 v = le32_to_cpu(*(const u32 *)p);
|
||||
remainder =
|
||||
crc32_table[0x300 + (u8)((remainder ^ v) >> 0)] ^
|
||||
crc32_table[0x200 + (u8)((remainder ^ v) >> 8)] ^
|
||||
@ -218,8 +218,8 @@ crc32_slice8(u32 remainder, const u8 *buffer, size_t nbytes)
|
||||
|
||||
end64 = p + ((end - p) & ~7);
|
||||
for (; p != end64; p += 8) {
|
||||
u32 v1 = cpu_to_le32(*(const u32 *)(p + 0));
|
||||
u32 v2 = cpu_to_le32(*(const u32 *)(p + 4));
|
||||
u32 v1 = le32_to_cpu(*(const u32 *)(p + 0));
|
||||
u32 v2 = le32_to_cpu(*(const u32 *)(p + 4));
|
||||
remainder =
|
||||
crc32_table[0x700 + (u8)((remainder ^ v1) >> 0)] ^
|
||||
crc32_table[0x600 + (u8)((remainder ^ v1) >> 8)] ^
|
||||
|
@ -193,7 +193,7 @@ typedef machine_word_t bitbuf_t;
|
||||
*/
|
||||
#define FILL_BITS_WORDWISE() \
|
||||
({ \
|
||||
bitbuf |= load_leword_unaligned(in_next) << bitsleft; \
|
||||
bitbuf |= get_unaligned_leword(in_next) << bitsleft; \
|
||||
in_next += (BITBUF_NBITS - bitsleft) >> 3; \
|
||||
bitsleft += (BITBUF_NBITS - bitsleft) & ~7; \
|
||||
})
|
||||
|
@ -66,7 +66,7 @@ gzip_decompress(struct deflate_decompressor *d,
|
||||
|
||||
/* File comment (zero terminated) */
|
||||
if (flg & GZIP_FCOMMENT) {
|
||||
while (*in_next++ != 0 && ++in_next != in_end)
|
||||
while (*in_next++ != 0 && in_next != in_end)
|
||||
;
|
||||
if (in_end - in_next < GZIP_FOOTER_SIZE)
|
||||
return false;
|
||||
|
@ -59,7 +59,7 @@ get_unaligned_be32(const u8 *p)
|
||||
return be32_to_cpu(load_u32_unaligned(p));
|
||||
else
|
||||
return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
|
||||
((u32)p[2] << 8)| ((u32)p[3] << 0);
|
||||
((u32)p[2] << 8) | ((u32)p[3] << 0);
|
||||
}
|
||||
|
||||
static forceinline u64
|
||||
@ -75,7 +75,7 @@ get_unaligned_le64(const u8 *p)
|
||||
}
|
||||
|
||||
static forceinline machine_word_t
|
||||
load_leword_unaligned(const u8 *p)
|
||||
get_unaligned_leword(const u8 *p)
|
||||
{
|
||||
STATIC_ASSERT(WORDSIZE == 4 || WORDSIZE == 8);
|
||||
if (WORDSIZE == 4)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* util.h - useful types, macros, and compiler/platform-specific definitions
|
||||
* util.h - useful types, macros, and compiler or platform-specific definitions
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
@ -10,7 +10,7 @@
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
/* Definitions of fixed-width integers, 'bool', 'size_t', and 'machine_word_t' */
|
||||
/* Fixed-width integer types */
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
@ -44,3 +44,7 @@ typedef size_t machine_word_t;
|
||||
/* MIN() - calculate the minimum of two variables. Arguments may be evaluted
|
||||
* multiple times. */
|
||||
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
||||
|
||||
/* MAX() - calculate the maximum of two variables. Arguments may be evaluted
|
||||
* multiple times. */
|
||||
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
||||
|
Loading…
x
Reference in New Issue
Block a user