compress.gzip: add compression flags (fix #14994) (#23370)

2025-09-19 12:27:02 -04:00 · 2025-01-05 20:18:53 +08:00 · 2025-01-05 20:18:53 +08:00 · 7040514a61
commit 7040514a61
parent af1ef92917
2 changed files with 94 additions and 3 deletions
--- a/vlib/compress/gzip/gzip.v
+++ b/vlib/compress/gzip/gzip.v
@ -6,10 +6,64 @@ module gzip
 import compress as compr
 import hash.crc32

+// CompressFlags
+// TODO: These flags have no use now
+@[flag]
+pub enum CompressFlags {
+	// The low 12 bits will be overwritten by `compression_level`
+	compression_level_overwrite_flag01
+	compression_level_overwrite_flag02
+	compression_level_overwrite_flag03
+	compression_level_overwrite_flag04
+	compression_level_overwrite_flag05
+	compression_level_overwrite_flag06
+	compression_level_overwrite_flag07
+	compression_level_overwrite_flag08
+	compression_level_overwrite_flag09
+	compression_level_overwrite_flag10
+	compression_level_overwrite_flag11
+	compression_level_overwrite_flag12
+
+	// If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
+	write_zlib_header //= 0x01000
+	// Always compute the adler-32 of the input data (even when not writing zlib headers).
+	compute_adler32 //= 0x02000
+	// Set to use faster greedy parsing, instead of more efficient lazy parsing.
+	greedy_parsing_flag //= 0x04000
+	// Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
+	nondeterministic_parsing_flag //= 0x08000
+	// Only look for RLE matches (matches with a distance of 1)
+	rle_matches //= 0x10000
+	// Discards matches <= 5 chars if enabled.
+	filter_matches //= 0x20000
+	// Disable usage of optimized Huffman tables.
+	force_all_static_blocks //= 0x40000
+	// Only use raw (uncompressed) deflate blocks.
+	force_all_raw_blocks //= 0x80000
+}
+
+// CompressParams set compression_level for compression:
+// 0: Huffman only;
+// 1: Huffman+LZ (fastest/crap compression);
+// 128: default_max_probes;
+// 4095: Huffman+LZ (slowest/best compression)
+@[params]
+pub struct CompressParams {
+pub:
+	compression_level int = 128 // 0~4095
+	flags             CompressFlags
+}
+
 // compresses an array of bytes using gzip and returns the compressed bytes in a new array
-// Example: compressed := gzip.compress(b)!
-pub fn compress(data []u8) ![]u8 {
-	compressed := compr.compress(data, 0)!
+// Example: compressed := gzip.compress(b, compression_level:4095)!
+// Note: compression_level 0~4095
+pub fn compress(data []u8, params CompressParams) ![]u8 {
+	if params.compression_level !in 0..4096 {
+		return error('compression level should in [0,4095]')
+	}
+	// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup.
+	flags := params.compression_level | (int(params.flags) & ~int(4095))
+	compressed := compr.compress(data, flags)!
 	// header
 	mut result := [
 		u8(0x1f), // magic numbers (1F 8B)
@ -40,12 +94,28 @@ pub fn compress(data []u8) ![]u8 {
 	return result
 }

+// DecompressFlags
+// TODO: These flags have no use now
+@[flag]
+pub enum DecompressFlags {
+	// If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
+	parse_zlib_header
+	// If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
+	has_more_input
+	// If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
+	using_non_wrapping_output_buf
+	// Force adler-32 checksum computation of the decompressed bytes.
+	compute_adler32
+}
+
+// DecompressParams set flags for decompression:
@[params]
 pub struct DecompressParams {
 pub:
 	verify_header_checksum bool = true
 	verify_length          bool = true
 	verify_checksum        bool = true
+	flags                  DecompressFlags
 }

 pub const reserved_bits = 0b1110_0000
--- a/vlib/compress/gzip/read_gz_files_test.v
+++ b/vlib/compress/gzip/read_gz_files_test.v
@ -39,3 +39,24 @@ fn test_reading_gzip_files_compressed_with_different_options() {
 	assert content9 == content5
 	assert content5 == content1
 }
+
+fn test_compress_with_deferent_level() {
+	compressed := os.read_bytes(s('readme_level_9.gz'))!
+	content9 := gzip.decompress(compressed)!
+
+	// compression: Huffman only=0
+	compress_0 := gzip.compress(content9, compression_level: 0)!
+	decompress_0 := gzip.decompress(compress_0)!
+
+	// compression: default_max_probes=128
+	compress_128 := gzip.compress(content9)!
+	decompress_128 := gzip.decompress(compress_128)!
+
+	// compression: Huffman+LZ=4095(slowest/best compression)
+	compress_4095 := gzip.compress(content9, compression_level: 4095)!
+	decompress_4095 := gzip.decompress(compress_4095)!
+
+	assert content9 == decompress_0
+	assert content9 == decompress_128
+	assert content9 == decompress_4095
+}