compress,compress.gzip: add decompress_with_callback API (#24904)

This commit is contained in:
Jorge Mireles 2025-07-15 09:57:14 -06:00 committed by GitHub
parent 0d8cc4588f
commit 8605599aee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 81 additions and 1 deletions

View File

@ -51,3 +51,51 @@ pub fn decompress(data []u8, flags int) ![]u8 {
return ret
}
}
// ChunkCallback is used to receive decompressed chunks of maximum 32768 bytes.
// After processing the chunk this function should return the chunk's length to indicate
// the decompressor to send more chunks, otherwise the decompression stops.
// The userdata parameter comes from the call to decompress_with_callback/4, and can be used
// to pass arbitrary data, without having to create a closure.
pub type ChunkCallback = fn (chunk []u8, userdata voidptr) int
// decompress_with_callback decompresses an array of bytes based on the provided flags,
// and a V fn callback to receive decompressed chunks, of at most 32 kilobytes each.
// It returns the total decompressed length, or a decompression error.
// NB: this is a low level api, a high level implementation like zlib/gzip should be preferred.
pub fn decompress_with_callback(data []u8, cb ChunkCallback, userdata voidptr, flags int) !u64 {
cbdata := DecompressionCallBackData{
data: data.data
size: usize(data.len)
cb: cb
userdata: userdata
}
status := C.tinfl_decompress_mem_to_callback(cbdata.data, &cbdata.size, c_cb_for_decompress_mem,
&cbdata, flags)
if status == 0 {
return error('decompression error')
}
return cbdata.decompressed_size
}
struct DecompressionCallBackData {
mut:
data voidptr
size usize
decompressed_size u64
userdata voidptr
cb ChunkCallback = unsafe { nil }
}
fn c_cb_for_decompress_mem(buf &char, len int, pdcbd voidptr) int {
mut cbdata := unsafe { &DecompressionCallBackData(pdcbd) }
if cbdata.cb(unsafe { voidptr(buf).vbytes(len) }, cbdata.userdata) == len {
cbdata.decompressed_size += u64(len)
return 1 // continue decompressing
}
return 0 // stop decompressing
}
type DecompressCallback = fn (const_buffer voidptr, len int, userdata voidptr) int
fn C.tinfl_decompress_mem_to_callback(const_input_buffer voidptr, psize &usize, put_buf_cb DecompressCallback, userdata voidptr, flags int) int

View File

@ -203,7 +203,7 @@ pub fn validate(data []u8, params DecompressParams) !GzipHeader {
return header
}
// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array
// decompress an array of bytes using zlib and returns the decompressed bytes in a new array
// Example: decompressed := gzip.decompress(b)!
pub fn decompress(data []u8, params DecompressParams) ![]u8 {
gzip_header := validate(data, params)!
@ -221,3 +221,20 @@ pub fn decompress(data []u8, params DecompressParams) ![]u8 {
}
return decompressed
}
// decompress_with_callback decompresses the given `data`, using zlib. It calls `cb` with each chunk of decompressed bytes.
// A chunk is usually 32 KB or less. Note: the chunk data received by `cb` should be cloned, if you need to store it for later,
// and not process it right away.
// The callback function should return the chunk length, if it wants to continue decompressing, or 0, if it wants to abort the decompression early.
// See also compress.ChunkCallback for more details.
pub fn decompress_with_callback(data []u8, cb compr.ChunkCallback, userdata voidptr, params DecompressParams) !int {
gzip_header := validate(data, params)!
header_len := gzip_header.length
expected_len := int((u32(data[data.len - 1]) << 24) | (u32(data[data.len - 2]) << 16) | (u32(data[data.len - 3]) << 8) | data[data.len - 4])
body := data[header_len..data.len - 8]
chunks_len := int(compr.decompress_with_callback(body, cb, userdata, 0)!)
if params.verify_length && expected_len != chunks_len {
return error('Decompress error: expected length:${expected_len}, got:${chunks_len}')
}
return chunks_len
}

View File

@ -132,3 +132,18 @@ fn test_gzip_with_invalid_flags() {
compressed[3] |= 0b1000_0000
assert_decompress_error(compressed, 'reserved flags are set, unsupported field detected')!
}
fn test_gzip_decompress_callback() {
uncompressed := '321323'.repeat(10_000)
gz := compress(uncompressed.bytes())!
mut size := 0
mut ref := &size
decoded := decompress_with_callback(gz, fn (chunk []u8, ref &int) int {
unsafe {
*ref += chunk.len
}
return chunk.len
}, ref)!
assert decoded == size
assert decoded == uncompressed.len
}