From c60a869fb439237e47fb5d52eccad0cad320954b Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Tue, 20 Feb 2024 02:41:20 +0200 Subject: [PATCH] vlib: add `encoding.txtar` (port of Go's txtar module) (#20874) --- vlib/encoding/txtar/README.md | 62 +++++++++++++++++ vlib/encoding/txtar/pack_unpack.v | 54 +++++++++++++++ vlib/encoding/txtar/txtar.v | 94 +++++++++++++++++++++++++ vlib/encoding/txtar/txtar_test.v | 110 ++++++++++++++++++++++++++++++ 4 files changed, 320 insertions(+) create mode 100644 vlib/encoding/txtar/README.md create mode 100644 vlib/encoding/txtar/pack_unpack.v create mode 100644 vlib/encoding/txtar/txtar.v create mode 100644 vlib/encoding/txtar/txtar_test.v diff --git a/vlib/encoding/txtar/README.md b/vlib/encoding/txtar/README.md new file mode 100644 index 0000000000..2401dd1bf4 --- /dev/null +++ b/vlib/encoding/txtar/README.md @@ -0,0 +1,62 @@ +## Description +The purpose of the `encoding.txtar` module, is best described in the original +[Go source](https://github.com/golang/go/blob/master/src/internal/txtar/archive.go): + + Package txtar implements a trivial text-based file archive format. + + The goals for the format are: + * be trivial enough to create and edit by hand. + * be able to store trees of text files describing go command test cases. + * diff nicely in git history and code reviews. + + Non-goals include: + * being a completely general archive format + * storing binary data + * storing file modes + * storing special files like symbolic links, and so on. + +## Txtar format spec +See the spec in the `txtar` Go package source code, linked above: + + * A txtar archive is zero or more comment lines and then a sequence of file entries. + * Each file entry begins with a file marker line of the form "-- FILENAME --" + and is followed by zero or more file content lines making up the file data. + * The comment or file content ends at the next file marker line. + * The file marker line must begin with the three-byte sequence "-- " + and end with the three-byte sequence " --", but the enclosed + file name can be surrounding by additional white space, + all of which is stripped. + + * If the txtar file is missing a trailing newline on the final line, + parsers should consider a final newline to be present anyway. + + * There are no possible syntax errors in a txtar archive. + +## Example +```v +import os +import encoding.txtar + +a := txtar.parse('comment +line1 +line2 +-- file.txt -- +some content that will go into file.txt +some more content +-- a/b/c/file.v -- +import os +dump(os.args) +-- bcd/def/another.v -- +dump(2+2) +') +assert a.files.len == 2 +assert a.files[0].path == 'file.txt' +assert a.files[2].path == 'bcd/def/another.v' + +tfolder := os.join_path(os.temp_dir(), 'xyz') +txtar.unpack(a, tfolder)! +assert os.exists(os.join_path(tfolder, 'bcd/def/another.v')) +b := txtar.pack(tfolder, '')! +assert b.files.len == a.files.len +os.rmdir_all(tfolder)! +``` \ No newline at end of file diff --git a/vlib/encoding/txtar/pack_unpack.v b/vlib/encoding/txtar/pack_unpack.v new file mode 100644 index 0000000000..e2bb894e16 --- /dev/null +++ b/vlib/encoding/txtar/pack_unpack.v @@ -0,0 +1,54 @@ +module txtar + +import os + +// pack will create a txtar archive, given a path. +// When the path is a folder, it will walk over all files in that base folder, read their contents and create a File entry for each. +// When the path is a file, it will create an Archive, that contains just a single File entry, for that single file. +pub fn pack(path string, comment string) !Archive { + if !os.exists(path) { + return error('file or folder ${path} does not exist') + } + npath := path.replace(os.path_separator, '/') + mut a := Archive{ + comment: comment + } + if os.is_file(npath) { + fname := os.file_name(npath) + fcontent := os.read_file(npath)! + a.files << File{fname, fcontent} + return a + } + files := os.walk_ext(npath, '').map(it.replace(os.path_separator, '/')) + for f in files { + frelative := f.replace_once(npath, '').trim_left('/') + fcontent := os.read_file(f)! + a.files << File{frelative, fcontent} + } + return a +} + +// unpack will extract *all files* in the archive `a`, into the base folder `path`. +// Note that all file paths will be appended to the base folder `path`, i.e. +// if you have a File with `path` field == 'abc/def/x.v', and base folder path == '/tmp', +// then the final path for that File, will be '/tmp/abc/def/x.v' +// Note that unpack will try to create any of the intermediate folders like +// /tmp, /tmp/abc, /tmp/abc/def, if they do not already exist. +pub fn unpack(a &Archive, path string) ! { + for f in a.files { + full_path := os.join_path(path, f.path) + folder := os.dir(full_path) + if !os.exists(folder) { + os.mkdir_all(folder)! + } + os.write_file(full_path, f.content)! + } +} + +// parse_file parses the given `file_path` as an archive. +// It will return an error, only if the `file_path` is not readable. +// See the README.md, or the test txtar_test.v, for a description of the format. +pub fn parse_file(file_path string) !Archive { + content := os.read_file(file_path)! + return parse(content) +} diff --git a/vlib/encoding/txtar/txtar.v b/vlib/encoding/txtar/txtar.v new file mode 100644 index 0000000000..20e162560f --- /dev/null +++ b/vlib/encoding/txtar/txtar.v @@ -0,0 +1,94 @@ +module txtar + +// Ported from https://cs.opensource.google/go/x/tools/+/master:txtar/archive.go +import strings + +// Archive is a collection of files +pub struct Archive { +pub mut: + comment string // the start of the archive; contains potentially multiple lines, before the files + files []File // a series of files +} + +// File is a single file in an Archive. Each starting with a `-- FILENAME --` line. +pub struct File { +pub mut: + path string // 'abc/def.v' from the `-- abc/def.v --` header + content string // everything after that, till the next `-- name --` line. +} + +// str returns a string representation of the `a` Archive. +// It is suitable for storing in a text file. +// It is also in the same format, that txtar.parse/1 expects. +pub fn (a &Archive) str() string { + mut sb := strings.new_builder(a.comment.len + 200 * a.files.len) + sb.write_string(fix_nl(a.comment)) + for f in a.files { + sb.write_string('-- ${f.path} --\n') + sb.write_string(fix_nl(f.content)) + } + return sb.str() +} + +// parse parses the serialized form of an Archive. +// The returned Archive holds slices of data. +pub fn parse(content string) Archive { + mut a := Archive{} + comment, mut name, mut data := find_file_marker(content) + a.comment = comment + for name != '' { + mut f := File{name, ''} + f.content, name, data = find_file_marker(data) + a.files << f + } + return a +} + +const nlm = '\n-- ' +const mstart = '-- ' +const mend = ' --' + +// find_file_marker finds the next file marker in data, extracts the file name, +// and returns the data before the marker, the file name, and the data after the marker. +// If there is no next marker, find_file_marker returns fixNL(data), '', ''. +fn find_file_marker(data string) (string, string, string) { + mut i := 0 + for i < data.len { + name, after := is_marker(data[i..]) + if name != '' { + return data[..i], name, after + } + j := data[i..].index(txtar.nlm) or { return fix_nl(data), '', '' } + i += j + 1 // positioned at start of new possible marker + } + return '', '', '' +} + +// is_marker checks whether the data begins with a file marker line. +// If so, it returns the name from the line, and the data after the line. +// Otherwise it returns name == "". +fn is_marker(data string) (string, string) { + if !data.starts_with(txtar.mstart) { + return '', '' + } + mut ndata := data + mut after := '' + i := data.index_u8(`\n`) + if i >= 0 { + ndata, after = data[..i], data[i + 1..] + } + if !(ndata.ends_with(txtar.mend) && ndata.len >= txtar.mstart.len + txtar.mend.len) { + return '', '' + } + name := ndata[txtar.mstart.len..ndata.len - txtar.mend.len].trim_space() + return name, after +} + +// fix_nl returns the data, if it is empty, or if it ends in \n. +// Otherwise it returns data + a final \n addded. +fn fix_nl(data string) string { + if data.len == 0 || data[data.len - 1] == `\n` { + return data + } + return '${data}\n' +} diff --git a/vlib/encoding/txtar/txtar_test.v b/vlib/encoding/txtar/txtar_test.v new file mode 100644 index 0000000000..4304bde3ed --- /dev/null +++ b/vlib/encoding/txtar/txtar_test.v @@ -0,0 +1,110 @@ +import os +import encoding.txtar + +// txtar implements a trivial text-based file archive format, +// Ported from https://cs.opensource.google/go/x/tools/+/master:txtar/archive.go +// It has some convenience additions (the txtar.pack/1 and txtar.unpack/1 functions). +// See also the README.md file in this folder. + +fn test_parse_nothing() { + dump(@LOCATION) + content := '' + a := txtar.parse(content) + assert a.str() == content + assert a.comment == '' + assert a.files.len == 0 +} + +fn test_parse_no_files() { + dump(@LOCATION) + content := 'some +comments +' + a := txtar.parse(content) + assert a.str() == content + assert a.comment != '' + assert a.files.len == 0 +} + +fn test_parse_no_comments() { + dump(@LOCATION) + content := '-- abc.xyz -- +line1 +-- another.txt -- +z line1 +' + a := txtar.parse(content) + assert a.str() == content + assert a.comment == '' + assert a.files.len == 2 + assert a.files[0].path == 'abc.xyz' + assert a.files[0].content.split_into_lines() == ['line1'] + assert a.files[1].path == 'another.txt' + assert a.files[1].content.split_into_lines() == ['z line1'] +} + +const simple_archive_content = 'some + +comments on +several lines +-- abc.xyz -- +line1 +line2 +-- empty -- +-- folder2/another.txt -- +z line1 +z line2 +z line3 +-- folder3/final.txt -- +' + +fn test_parse() { + dump(@LOCATION) + a := txtar.parse(simple_archive_content) + assert a.str() == simple_archive_content + assert a.comment != '' + assert a.comment.split_into_lines().len == 4 + assert a.comment.contains('\n\n') + assert a.files.len == 4 + assert a.files[0].path == 'abc.xyz' + assert a.files[0].content.split_into_lines() == ['line1', 'line2'] + assert a.files[1].path == 'empty' + assert a.files[1].content == '' + assert a.files[2].path == 'folder2/another.txt' + assert a.files[2].content.split_into_lines() == ['z line1', 'z line2', 'z line3'] + assert a.files[3].path == 'folder3/final.txt' + assert a.files[3].content == '' +} + +fn test_parse_file() { + dump(@LOCATION) + fpath := os.join_path(os.temp_dir(), 'txtar.txt') + defer { + os.rm(fpath) or {} + } + os.write_file(fpath, simple_archive_content)! + a := txtar.parse_file(fpath)! + assert a.comment != '' + assert a.files.len == 4 + assert a.str() == simple_archive_content +} + +fn test_unpack_to_folder_then_pack_same_folder() { + dump(@LOCATION) + folder := os.join_path(os.temp_dir(), 'txtar_folder') + defer { + os.rmdir_all(folder) or {} + } + a := txtar.parse(simple_archive_content) + txtar.unpack(a, folder)! + assert os.is_file(os.join_path(folder, 'empty')) + assert os.is_file(os.join_path(folder, 'folder2/another.txt')) + assert os.is_file(os.join_path(folder, 'folder3/final.txt')) + b := txtar.pack(folder, 'abc')! + assert a.comment != b.comment + assert b.comment == 'abc' + assert b.files.len == a.files.len + ofiles := a.files.sorted(|x, y| x.path < y.path) + pfiles := b.files.sorted(|x, y| x.path < y.path) + assert ofiles == pfiles +}