v/vlib/archive/tar/reader.v

module tar

import compress.gzip
import os

// read_tar_gz_file decompresses a given local file and reads all the blocks
// with a given reader.
pub fn read_tar_gz_file(path string, reader Reader) ! {
	tar_gz := os.read_bytes(path)!
	all_blocks := gzip.decompress(tar_gz)!
	mut untar := Untar{
		reader: reader
	}
	untar.read_all_blocks(all_blocks)!
}

// Read is used by Untar to call Reader implemented methods.
// The implementor can read the block's `get_block_number()` and `get_path()`
// and can set the field `stop_early` to true to suspend the reading.
pub struct Read {
mut:
	block_number int
	special      BlockSpecial
	prefix_len   int
	prefix_buf   [131]u8
	separator    bool
	path_len     int
	path_buf     [100]u8

	long_path &LongPath = unsafe { nil }
pub mut:
	stop_early bool
}

// set_short_path sets Read path with the tar block strings `prefix` and `path`.
// Block's `prefix` C string max length is 131 but most of the time is 0.
// Block's `path` C string max length is 100. Both `prefix` and `path` are
// linked to a V string but converted until is needed, see `get_path()`.
fn (mut b Read) set_short_path(buffer [512]u8, separator_after_prefix bool) {
	// first check if TAR block has a prefix string (0 to 131 chars). The
	// prefix will be other than '' the TAR block filepath len is > 100.
	b.prefix_len = 0
	for i := 345; i < 345 + 131; i++ {
		letter := buffer[i]
		if letter == 0 {
			break // first 0 found means prefix C string is complete.
		}
		b.prefix_buf[b.prefix_len] = letter
		b.prefix_len++
	}

	b.separator = separator_after_prefix

	// most of the time there is path for blocks like dirs and regular files:
	b.path_len = 0
	for i := 0; i < 100; i++ {
		letter := buffer[i]
		if letter == 0 {
			break // first 0 found means path C string is complete.
		}
		b.path_buf[b.path_len] = letter
		b.path_len++
	}
}

// set_long_path sets Read path with the long path reference.
fn (mut b Read) set_long_path(long_path &LongPath) {
	b.long_path = unsafe { long_path }
}

// get_path returns the path of this read. The path is valid for blocks of types
// directory, file and file data.
pub fn (b Read) get_path() string {
	if b.long_path != unsafe { nil } {
		return b.long_path.get_path()
	}

	mut str := []u8{}
	if b.prefix_len > 0 {
		str << b.prefix_buf[0..b.prefix_len]
	}
	if b.prefix_len > 0 && b.separator {
		str << `/`
	}
	if b.path_len > 0 {
		str << b.path_buf[0..b.path_len]
	}
	return str.bytestr()
}

// get_block_number returns the consecutive number of this read.
pub fn (b Read) get_block_number() int {
	return b.block_number
}

// get_special returns the special type of the Read.
pub fn (b Read) get_special() BlockSpecial {
	return b.special
}

// str returns a string representation with block number, path, special type and stop early.
pub fn (r Read) str() string {
	return '(block_number:${r.block_number} path:${r.get_path()} special:${r.special} stop_early:${r.stop_early})'
}

// Reader is used to read by Untar to parse the blocks.
pub interface Reader {
mut:
	// dir_block is called when untar reads a block of type directory.
	// Call `Read.get_path()` to get the full name of the directory.
	// `size` field is zero for directories.
	// The implementor can set Read's field `stop_early` to suspend the reader.
	dir_block(mut read Read, size u64)

	// file_block is called when untar reads a block of type filename.
	// Call `Read.get_path()` to get the full name of the file.
	// `size` is the expected file size in bytes to be read later.
	// The implementor can set Read's field `stop_early` to suspend the reader.
	file_block(mut read Read, size u64)

	// file_block is called when untar reads a block of type filedata.
	// Call `Read.get_path()` to get the full name of the file data belongs to.
	// The `data` size is 512 bytes or less. `pending` indicates how many bytes are left to read.
	// The implementor can inspect the data and use the pending value
	// to set Read's field `stop_early` to suspend the reader.
	data_block(mut read Read, data []u8, pending int)

	// other_block is called when untar reads a block type other than directory,
	// filename or filedata. `Read.get_header()` and 'details' give more info about the block.
	// `block device` or `FIFO`.
	// The implementor can set Read's field `stop_early` to suspend the reader.
	other_block(mut read Read, details string)
}

// DebugReader implements a Reader and prints rows for blocks read
// as directories, files, file data blocks and special blocks.
pub struct DebugReader implements Reader {
}

// new_debug_reader returns a DebugReader
pub fn new_debug_reader() &DebugReader {
	return &DebugReader{}
}

fn (mut t DebugReader) dir_block(mut read Read, size u64) {
	println('DIR   #${read.get_block_number()} ${read.get_path()}')
}

fn (mut t DebugReader) file_block(mut read Read, size u64) {
	println('FILE  #${read.get_block_number()} path:${read.get_path()} size:${size}')
}

fn (mut t DebugReader) data_block(mut read Read, data []u8, pending int) {
	println('DATA  #${read.get_block_number()} ${read.get_path()} size:${data.len} pending:${pending}')
}

fn (mut t DebugReader) other_block(mut read Read, details string) {
	println('OTHER #${read.get_block_number()} special:${read.special} ${details}')
}

// ReadResult is returned by ReadResultFn
pub enum ReadResult {
	@continue
	stop_early
	end_of_file
	end_archive
	overflow
}

type ReadResultFn = fn (block []u8) !ReadResult

@[heap]
pub struct Decompressor {
mut:
	untar &Untar
}

// new_decompressor returns a Decompressor to decompress a tar.gz file
// A given Untar with a registered Reader will read the blocks.
pub fn new_decompressor(untar &Untar) &Decompressor {
	return &Decompressor{
		untar: untar
	}
}

// read_all decompresses the given `tar_gz` array with all the tar blocks.
// Then calls untar method `read_all` to read all the blocks at once.
// A read result is returned which can be of the type stop early or an error.
pub fn (mut d Decompressor) read_all(tar_gz []u8) !ReadResult {
	all_blocks := gzip.decompress(tar_gz)!
	return d.untar.read_all_blocks(all_blocks)!
}

// read_chunks decompresses the given `tar_gz` array by chunks of
// 32768 bytes which can hold up to 64 tar blocks of 512 bytes each.
// Then calls untar method read_block with ChunksReader dispatcher.
// A read result is returned which can be of the type stop early or an error.
pub fn (mut d Decompressor) read_chunks(tar_gz []u8) !ReadResult {
	mut reader := &ChunksReader{
		read_block_fn: d.untar.read_single_block
	}
	callback := fn (chunk []u8, mut reader ChunksReader) int {
		result := reader.read_blocks(chunk)
		if result == .continue {
			return chunk.len // go for more
		}
		return 0 // suspend
	}
	gzip.decompress_with_callback(tar_gz, callback, reader) or {
		if reader.result == .continue {
			return err
		}
		return reader.result
	}
	return reader.result
}

// ChunkReader has a reusable fixed buffer with maximum length of decompressed chunk
// of 32768 bytes plus a maximum previous pending tar block of 512 bytes.
struct ChunksReader {
mut:
	read_block_fn  ReadResultFn = unsafe { nil }
	buffer         [32768 + 512]u8
	chunks_counter int
	pending        int // position of the last not sent buffer byte
	result         ReadResult
}

// read_blocks receives a chunk like those of 32k from a gzip decompressor. The chunk is
// assumed to be a TAR archive section and is cut in 512 bytes blocks that are sent to
// the untar reader one by one. The untar reader result informs this process to continue or
// stop early. This process can keep in the buffer the remaining bytes of an incomplete
// block and will be send to the untar reader prepended to a next chunk cuts.
fn (mut d ChunksReader) read_blocks(chunk []u8) ReadResult {
	d.chunks_counter++
	total := d.pending + chunk.len
	if total > d.buffer.len {
		assert false, 'Should not occur buffer overflow ${total}'
		return .overflow
	}

	// append new chunk after previous incomplete block bytes not sent yet
	for i, ch in chunk {
		d.buffer[i + d.pending] = ch
	}
	d.pending += chunk.len

	mut cut := 0
	for {
		if cut + 512 > d.pending {
			// after sending all complete blocks move the remaining not sent bytes
			// to the start of the reused buffer to be prepended before next chunk
			for i := cut; i < d.pending; i++ {
				d.buffer[cut - 512] = d.buffer[i]
			}
			d.pending -= cut
			return .continue
		}

		// send a complete block
		block := d.buffer[cut..cut + 512]
		cut += 512
		d.result = d.read_block_fn(block) or {
			assert false, 'Should not occur buffer overflow'
			return .overflow
		}
		match d.result {
			.continue {
				// try next cut or leave a remaining
			}
			else {
				break // untar error or stop_early
			}
		}
	}
	return d.result
}