v/vlib/archive/tar/reader.v

278 lines
8.5 KiB
V

module tar
import compress.gzip
import os
// read_tar_gz_file decompresses a given local file and reads all the blocks
// with a given reader.
pub fn read_tar_gz_file(path string, reader Reader) ! {
tar_gz := os.read_bytes(path)!
all_blocks := gzip.decompress(tar_gz)!
mut untar := Untar{
reader: reader
}
untar.read_all_blocks(all_blocks)!
}
// Read is used by Untar to call Reader implemented methods.
// The implementor can read the block's `get_block_number()` and `get_path()`
// and can set the field `stop_early` to true to suspend the reading.
pub struct Read {
mut:
block_number int
special BlockSpecial
prefix_len int
prefix_buf [131]u8
separator bool
path_len int
path_buf [100]u8
long_path &LongPath = unsafe { nil }
pub mut:
stop_early bool
}
// set_short_path sets Read path with the tar block strings `prefix` and `path`.
// Block's `prefix` C string max length is 131 but most of the time is 0.
// Block's `path` C string max length is 100. Both `prefix` and `path` are
// linked to a V string but converted until is needed, see `get_path()`.
fn (mut b Read) set_short_path(buffer [512]u8, separator_after_prefix bool) {
// first check if TAR block has a prefix string (0 to 131 chars). The
// prefix will be other than '' the TAR block filepath len is > 100.
b.prefix_len = 0
for i := 345; i < 345 + 131; i++ {
letter := buffer[i]
if letter == 0 {
break // first 0 found means prefix C string is complete.
}
b.prefix_buf[b.prefix_len] = letter
b.prefix_len++
}
b.separator = separator_after_prefix
// most of the time there is path for blocks like dirs and regular files:
b.path_len = 0
for i := 0; i < 100; i++ {
letter := buffer[i]
if letter == 0 {
break // first 0 found means path C string is complete.
}
b.path_buf[b.path_len] = letter
b.path_len++
}
}
// set_long_path sets Read path with the long path reference.
fn (mut b Read) set_long_path(long_path &LongPath) {
b.long_path = unsafe { long_path }
}
// get_path returns the path of this read. The path is valid for blocks of types
// directory, file and file data.
pub fn (b Read) get_path() string {
if b.long_path != unsafe { nil } {
return b.long_path.get_path()
}
mut str := []u8{}
if b.prefix_len > 0 {
str << b.prefix_buf[0..b.prefix_len]
}
if b.prefix_len > 0 && b.separator {
str << `/`
}
if b.path_len > 0 {
str << b.path_buf[0..b.path_len]
}
return str.bytestr()
}
// get_block_number returns the consecutive number of this read.
pub fn (b Read) get_block_number() int {
return b.block_number
}
// get_special returns the special type of the Read.
pub fn (b Read) get_special() BlockSpecial {
return b.special
}
// str returns a string representation with block number, path, special type and stop early.
pub fn (r Read) str() string {
return '(block_number:${r.block_number} path:${r.get_path()} special:${r.special} stop_early:${r.stop_early})'
}
// Reader is used to read by Untar to parse the blocks.
pub interface Reader {
mut:
// dir_block is called when untar reads a block of type directory.
// Call `Read.get_path()` to get the full name of the directory.
// `size` field is zero for directories.
// The implementor can set Read's field `stop_early` to suspend the reader.
dir_block(mut read Read, size u64)
// file_block is called when untar reads a block of type filename.
// Call `Read.get_path()` to get the full name of the file.
// `size` is the expected file size in bytes to be read later.
// The implementor can set Read's field `stop_early` to suspend the reader.
file_block(mut read Read, size u64)
// file_block is called when untar reads a block of type filedata.
// Call `Read.get_path()` to get the full name of the file data belongs to.
// The `data` size is 512 bytes or less. `pending` indicates how many bytes are left to read.
// The implementor can inspect the data and use the pending value
// to set Read's field `stop_early` to suspend the reader.
data_block(mut read Read, data []u8, pending int)
// other_block is called when untar reads a block type other than directory,
// filename or filedata. `Read.get_header()` and 'details' give more info about the block.
// `block device` or `FIFO`.
// The implementor can set Read's field `stop_early` to suspend the reader.
other_block(mut read Read, details string)
}
// DebugReader implements a Reader and prints rows for blocks read
// as directories, files, file data blocks and special blocks.
pub struct DebugReader implements Reader {
}
// new_debug_reader returns a DebugReader
pub fn new_debug_reader() &DebugReader {
return &DebugReader{}
}
fn (mut t DebugReader) dir_block(mut read Read, size u64) {
println('DIR #${read.get_block_number()} ${read.get_path()}')
}
fn (mut t DebugReader) file_block(mut read Read, size u64) {
println('FILE #${read.get_block_number()} path:${read.get_path()} size:${size}')
}
fn (mut t DebugReader) data_block(mut read Read, data []u8, pending int) {
println('DATA #${read.get_block_number()} ${read.get_path()} size:${data.len} pending:${pending}')
}
fn (mut t DebugReader) other_block(mut read Read, details string) {
println('OTHER #${read.get_block_number()} special:${read.special} ${details}')
}
// ReadResult is returned by ReadResultFn
pub enum ReadResult {
@continue
stop_early
end_of_file
end_archive
overflow
}
type ReadResultFn = fn (block []u8) !ReadResult
@[heap]
pub struct Decompressor {
mut:
untar &Untar
}
// new_decompressor returns a Decompressor to decompress a tar.gz file
// A given Untar with a registered Reader will read the blocks.
pub fn new_decompressor(untar &Untar) &Decompressor {
return &Decompressor{
untar: untar
}
}
// read_all decompresses the given `tar_gz` array with all the tar blocks.
// Then calls untar method `read_all` to read all the blocks at once.
// A read result is returned which can be of the type stop early or an error.
pub fn (mut d Decompressor) read_all(tar_gz []u8) !ReadResult {
all_blocks := gzip.decompress(tar_gz)!
return d.untar.read_all_blocks(all_blocks)!
}
// read_chunks decompresses the given `tar_gz` array by chunks of
// 32768 bytes which can hold up to 64 tar blocks of 512 bytes each.
// Then calls untar method read_block with ChunksReader dispatcher.
// A read result is returned which can be of the type stop early or an error.
pub fn (mut d Decompressor) read_chunks(tar_gz []u8) !ReadResult {
mut reader := &ChunksReader{
read_block_fn: d.untar.read_single_block
}
callback := fn (chunk []u8, mut reader ChunksReader) int {
result := reader.read_blocks(chunk)
if result == .continue {
return chunk.len // go for more
}
return 0 // suspend
}
gzip.decompress_with_callback(tar_gz, callback, reader) or {
if reader.result == .continue {
return err
}
return reader.result
}
return reader.result
}
// ChunkReader has a reusable fixed buffer with maximum length of decompressed chunk
// of 32768 bytes plus a maximum previous pending tar block of 512 bytes.
struct ChunksReader {
mut:
read_block_fn ReadResultFn = unsafe { nil }
buffer [32768 + 512]u8
chunks_counter int
pending int // position of the last not sent buffer byte
result ReadResult
}
// read_blocks receives a chunk like those of 32k from a gzip decompressor. The chunk is
// assumed to be a TAR archive section and is cut in 512 bytes blocks that are sent to
// the untar reader one by one. The untar reader result informs this process to continue or
// stop early. This process can keep in the buffer the remaining bytes of an incomplete
// block and will be send to the untar reader prepended to a next chunk cuts.
fn (mut d ChunksReader) read_blocks(chunk []u8) ReadResult {
d.chunks_counter++
total := d.pending + chunk.len
if total > d.buffer.len {
assert false, 'Should not occur buffer overflow ${total}'
return .overflow
}
// append new chunk after previous incomplete block bytes not sent yet
for i, ch in chunk {
d.buffer[i + d.pending] = ch
}
d.pending += chunk.len
mut cut := 0
for {
if cut + 512 > d.pending {
// after sending all complete blocks move the remaining not sent bytes
// to the start of the reused buffer to be prepended before next chunk
for i := cut; i < d.pending; i++ {
d.buffer[cut - 512] = d.buffer[i]
}
d.pending -= cut
return .continue
}
// send a complete block
block := d.buffer[cut..cut + 512]
cut += 512
d.result = d.read_block_fn(block) or {
assert false, 'Should not occur buffer overflow'
return .overflow
}
match d.result {
.continue {
// try next cut or leave a remaining
}
else {
break // untar error or stop_early
}
}
}
return d.result
}