compress.gzip: add compression flags (fix #14994) (#23370)

This commit is contained in:
kbkpbot 2025-01-05 20:18:53 +08:00 committed by GitHub
parent af1ef92917
commit 7040514a61
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 94 additions and 3 deletions

View File

@ -6,10 +6,64 @@ module gzip
import compress as compr
import hash.crc32
// CompressFlags
// TODO: These flags have no use now
@[flag]
pub enum CompressFlags {
// The low 12 bits will be overwritten by `compression_level`
compression_level_overwrite_flag01
compression_level_overwrite_flag02
compression_level_overwrite_flag03
compression_level_overwrite_flag04
compression_level_overwrite_flag05
compression_level_overwrite_flag06
compression_level_overwrite_flag07
compression_level_overwrite_flag08
compression_level_overwrite_flag09
compression_level_overwrite_flag10
compression_level_overwrite_flag11
compression_level_overwrite_flag12
// If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
write_zlib_header //= 0x01000
// Always compute the adler-32 of the input data (even when not writing zlib headers).
compute_adler32 //= 0x02000
// Set to use faster greedy parsing, instead of more efficient lazy parsing.
greedy_parsing_flag //= 0x04000
// Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
nondeterministic_parsing_flag //= 0x08000
// Only look for RLE matches (matches with a distance of 1)
rle_matches //= 0x10000
// Discards matches <= 5 chars if enabled.
filter_matches //= 0x20000
// Disable usage of optimized Huffman tables.
force_all_static_blocks //= 0x40000
// Only use raw (uncompressed) deflate blocks.
force_all_raw_blocks //= 0x80000
}
// CompressParams set compression_level for compression:
// 0: Huffman only;
// 1: Huffman+LZ (fastest/crap compression);
// 128: default_max_probes;
// 4095: Huffman+LZ (slowest/best compression)
@[params]
pub struct CompressParams {
pub:
compression_level int = 128 // 0~4095
flags CompressFlags
}
// compresses an array of bytes using gzip and returns the compressed bytes in a new array
// Example: compressed := gzip.compress(b)!
pub fn compress(data []u8) ![]u8 {
compressed := compr.compress(data, 0)!
// Example: compressed := gzip.compress(b, compression_level:4095)!
// Note: compression_level 0~4095
pub fn compress(data []u8, params CompressParams) ![]u8 {
if params.compression_level !in 0..4096 {
return error('compression level should in [0,4095]')
}
// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup.
flags := params.compression_level | (int(params.flags) & ~int(4095))
compressed := compr.compress(data, flags)!
// header
mut result := [
u8(0x1f), // magic numbers (1F 8B)
@ -40,12 +94,28 @@ pub fn compress(data []u8) ![]u8 {
return result
}
// DecompressFlags
// TODO: These flags have no use now
@[flag]
pub enum DecompressFlags {
// If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
parse_zlib_header
// If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
has_more_input
// If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
using_non_wrapping_output_buf
// Force adler-32 checksum computation of the decompressed bytes.
compute_adler32
}
// DecompressParams set flags for decompression:
@[params]
pub struct DecompressParams {
pub:
verify_header_checksum bool = true
verify_length bool = true
verify_checksum bool = true
flags DecompressFlags
}
pub const reserved_bits = 0b1110_0000

View File

@ -39,3 +39,24 @@ fn test_reading_gzip_files_compressed_with_different_options() {
assert content9 == content5
assert content5 == content1
}
fn test_compress_with_deferent_level() {
compressed := os.read_bytes(s('readme_level_9.gz'))!
content9 := gzip.decompress(compressed)!
// compression: Huffman only=0
compress_0 := gzip.compress(content9, compression_level: 0)!
decompress_0 := gzip.decompress(compress_0)!
// compression: default_max_probes=128
compress_128 := gzip.compress(content9)!
decompress_128 := gzip.decompress(compress_128)!
// compression: Huffman+LZ=4095(slowest/best compression)
compress_4095 := gzip.compress(content9, compression_level: 4095)!
decompress_4095 := gzip.decompress(compress_4095)!
assert content9 == decompress_0
assert content9 == decompress_128
assert content9 == decompress_4095
}