From c60a869fb439237e47fb5d52eccad0cad320954b Mon Sep 17 00:00:00 2001
From: Delyan Angelov <delian66@gmail.com>
Date: Tue, 20 Feb 2024 02:41:20 +0200
Subject: [PATCH] vlib: add `encoding.txtar` (port of Go's txtar module)
 (#20874)

---
 vlib/encoding/txtar/README.md     |  62 +++++++++++++++++
 vlib/encoding/txtar/pack_unpack.v |  54 +++++++++++++++
 vlib/encoding/txtar/txtar.v       |  94 +++++++++++++++++++++++++
 vlib/encoding/txtar/txtar_test.v  | 110 ++++++++++++++++++++++++++++++
 4 files changed, 320 insertions(+)
 create mode 100644 vlib/encoding/txtar/README.md
 create mode 100644 vlib/encoding/txtar/pack_unpack.v
 create mode 100644 vlib/encoding/txtar/txtar.v
 create mode 100644 vlib/encoding/txtar/txtar_test.v

diff --git a/vlib/encoding/txtar/README.md b/vlib/encoding/txtar/README.md
new file mode 100644
index 0000000000..2401dd1bf4
--- /dev/null
+++ b/vlib/encoding/txtar/README.md
@@ -0,0 +1,62 @@
+## Description
+The purpose of the `encoding.txtar` module, is best described in the original
+[Go source](https://github.com/golang/go/blob/master/src/internal/txtar/archive.go):
+
+    Package txtar implements a trivial text-based file archive format.
+
+    The goals for the format are:
+       *   be trivial enough to create and edit by hand.
+       *   be able to store trees of text files describing go command test cases.
+       *   diff nicely in git history and code reviews.
+
+    Non-goals include:
+       *   being a completely general archive format
+       *   storing binary data
+       *   storing file modes
+       *   storing special files like symbolic links, and so on.
+
+## Txtar format spec
+See the spec in the `txtar` Go package source code, linked above:
+
+       *   A txtar archive is zero or more comment lines and then a sequence of file entries.
+       *   Each file entry begins with a file marker line of the form "-- FILENAME --"
+           and is followed by zero or more file content lines making up the file data.
+       *   The comment or file content ends at the next file marker line.
+       *   The file marker line must begin with the three-byte sequence "-- "
+           and end with the three-byte sequence " --", but the enclosed
+           file name can be surrounding by additional white space,
+           all of which is stripped.
+
+       *   If the txtar file is missing a trailing newline on the final line,
+           parsers should consider a final newline to be present anyway.
+
+       *   There are no possible syntax errors in a txtar archive.
+
+## Example
+```v
+import os
+import encoding.txtar
+
+a := txtar.parse('comment
+line1
+line2
+-- file.txt --
+some content that will go into file.txt
+some more content
+-- a/b/c/file.v --
+import os
+dump(os.args)
+-- bcd/def/another.v --
+dump(2+2)
+')
+assert a.files.len == 2
+assert a.files[0].path == 'file.txt'
+assert a.files[2].path == 'bcd/def/another.v'
+
+tfolder := os.join_path(os.temp_dir(), 'xyz')
+txtar.unpack(a, tfolder)!
+assert os.exists(os.join_path(tfolder, 'bcd/def/another.v'))
+b := txtar.pack(tfolder, '')!
+assert b.files.len == a.files.len
+os.rmdir_all(tfolder)!
+```
\ No newline at end of file
diff --git a/vlib/encoding/txtar/pack_unpack.v b/vlib/encoding/txtar/pack_unpack.v
new file mode 100644
index 0000000000..e2bb894e16
--- /dev/null
+++ b/vlib/encoding/txtar/pack_unpack.v
@@ -0,0 +1,54 @@
+module txtar
+
+import os
+
+// pack will create a txtar archive, given a path.
+// When the path is a folder, it will walk over all files in that base folder, read their contents and create a File entry for each.
+// When the path is a file, it will create an Archive, that contains just a single File entry, for that single file.
+pub fn pack(path string, comment string) !Archive {
+	if !os.exists(path) {
+		return error('file or folder ${path} does not exist')
+	}
+	npath := path.replace(os.path_separator, '/')
+	mut a := Archive{
+		comment: comment
+	}
+	if os.is_file(npath) {
+		fname := os.file_name(npath)
+		fcontent := os.read_file(npath)!
+		a.files << File{fname, fcontent}
+		return a
+	}
+	files := os.walk_ext(npath, '').map(it.replace(os.path_separator, '/'))
+	for f in files {
+		frelative := f.replace_once(npath, '').trim_left('/')
+		fcontent := os.read_file(f)!
+		a.files << File{frelative, fcontent}
+	}
+	return a
+}
+
+// unpack will extract *all files* in the archive `a`, into the base folder `path`.
+// Note that all file paths will be appended to the base folder `path`, i.e.
+// if you have a File with `path` field == 'abc/def/x.v', and base folder path == '/tmp',
+// then the final path for that File, will be '/tmp/abc/def/x.v'
+// Note that unpack will try to create any of the intermediate folders like
+// /tmp, /tmp/abc, /tmp/abc/def, if they do not already exist.
+pub fn unpack(a &Archive, path string) ! {
+	for f in a.files {
+		full_path := os.join_path(path, f.path)
+		folder := os.dir(full_path)
+		if !os.exists(folder) {
+			os.mkdir_all(folder)!
+		}
+		os.write_file(full_path, f.content)!
+	}
+}
+
+// parse_file parses the given `file_path` as an archive.
+// It will return an error, only if the `file_path` is not readable.
+// See the README.md, or the test txtar_test.v, for a description of the format.
+pub fn parse_file(file_path string) !Archive {
+	content := os.read_file(file_path)!
+	return parse(content)
+}
diff --git a/vlib/encoding/txtar/txtar.v b/vlib/encoding/txtar/txtar.v
new file mode 100644
index 0000000000..20e162560f
--- /dev/null
+++ b/vlib/encoding/txtar/txtar.v
@@ -0,0 +1,94 @@
+module txtar
+
+// Ported from https://cs.opensource.google/go/x/tools/+/master:txtar/archive.go
+import strings
+
+// Archive is a collection of files
+pub struct Archive {
+pub mut:
+	comment string // the start of the archive; contains potentially multiple lines, before the files
+	files   []File // a series of files
+}
+
+// File is a single file in an Archive. Each starting with a `-- FILENAME --` line.
+pub struct File {
+pub mut:
+	path    string // 'abc/def.v' from the `-- abc/def.v --` header
+	content string // everything after that, till the next `-- name --` line.
+}
+
+// str returns a string representation of the `a` Archive.
+// It is suitable for storing in a text file.
+// It is also in the same format, that txtar.parse/1 expects.
+pub fn (a &Archive) str() string {
+	mut sb := strings.new_builder(a.comment.len + 200 * a.files.len)
+	sb.write_string(fix_nl(a.comment))
+	for f in a.files {
+		sb.write_string('-- ${f.path} --\n')
+		sb.write_string(fix_nl(f.content))
+	}
+	return sb.str()
+}
+
+// parse parses the serialized form of an Archive.
+// The returned Archive holds slices of data.
+pub fn parse(content string) Archive {
+	mut a := Archive{}
+	comment, mut name, mut data := find_file_marker(content)
+	a.comment = comment
+	for name != '' {
+		mut f := File{name, ''}
+		f.content, name, data = find_file_marker(data)
+		a.files << f
+	}
+	return a
+}
+
+const nlm = '\n-- '
+const mstart = '-- '
+const mend = ' --'
+
+// find_file_marker finds the next file marker in data, extracts the file name,
+// and returns the data before the marker, the file name, and the data after the marker.
+// If there is no next marker, find_file_marker returns fixNL(data), '', ''.
+fn find_file_marker(data string) (string, string, string) {
+	mut i := 0
+	for i < data.len {
+		name, after := is_marker(data[i..])
+		if name != '' {
+			return data[..i], name, after
+		}
+		j := data[i..].index(txtar.nlm) or { return fix_nl(data), '', '' }
+		i += j + 1 // positioned at start of new possible marker
+	}
+	return '', '', ''
+}
+
+// is_marker checks whether the data begins with a file marker line.
+// If so, it returns the name from the line, and the data after the line.
+// Otherwise it returns name == "".
+fn is_marker(data string) (string, string) {
+	if !data.starts_with(txtar.mstart) {
+		return '', ''
+	}
+	mut ndata := data
+	mut after := ''
+	i := data.index_u8(`\n`)
+	if i >= 0 {
+		ndata, after = data[..i], data[i + 1..]
+	}
+	if !(ndata.ends_with(txtar.mend) && ndata.len >= txtar.mstart.len + txtar.mend.len) {
+		return '', ''
+	}
+	name := ndata[txtar.mstart.len..ndata.len - txtar.mend.len].trim_space()
+	return name, after
+}
+
+// fix_nl returns the data, if it is empty, or if it ends in \n.
+// Otherwise it returns data + a final \n addded.
+fn fix_nl(data string) string {
+	if data.len == 0 || data[data.len - 1] == `\n` {
+		return data
+	}
+	return '${data}\n'
+}
diff --git a/vlib/encoding/txtar/txtar_test.v b/vlib/encoding/txtar/txtar_test.v
new file mode 100644
index 0000000000..4304bde3ed
--- /dev/null
+++ b/vlib/encoding/txtar/txtar_test.v
@@ -0,0 +1,110 @@
+import os
+import encoding.txtar
+
+// txtar implements a trivial text-based file archive format,
+// Ported from https://cs.opensource.google/go/x/tools/+/master:txtar/archive.go
+// It has some convenience additions (the txtar.pack/1 and txtar.unpack/1 functions).
+// See also the README.md file in this folder.
+
+fn test_parse_nothing() {
+	dump(@LOCATION)
+	content := ''
+	a := txtar.parse(content)
+	assert a.str() == content
+	assert a.comment == ''
+	assert a.files.len == 0
+}
+
+fn test_parse_no_files() {
+	dump(@LOCATION)
+	content := 'some
+comments
+'
+	a := txtar.parse(content)
+	assert a.str() == content
+	assert a.comment != ''
+	assert a.files.len == 0
+}
+
+fn test_parse_no_comments() {
+	dump(@LOCATION)
+	content := '-- abc.xyz --
+line1
+-- another.txt --
+z line1
+'
+	a := txtar.parse(content)
+	assert a.str() == content
+	assert a.comment == ''
+	assert a.files.len == 2
+	assert a.files[0].path == 'abc.xyz'
+	assert a.files[0].content.split_into_lines() == ['line1']
+	assert a.files[1].path == 'another.txt'
+	assert a.files[1].content.split_into_lines() == ['z line1']
+}
+
+const simple_archive_content = 'some
+
+comments on
+several lines
+-- abc.xyz --
+line1
+line2
+-- empty --
+-- folder2/another.txt --
+z line1
+z line2
+z line3
+-- folder3/final.txt --
+'
+
+fn test_parse() {
+	dump(@LOCATION)
+	a := txtar.parse(simple_archive_content)
+	assert a.str() == simple_archive_content
+	assert a.comment != ''
+	assert a.comment.split_into_lines().len == 4
+	assert a.comment.contains('\n\n')
+	assert a.files.len == 4
+	assert a.files[0].path == 'abc.xyz'
+	assert a.files[0].content.split_into_lines() == ['line1', 'line2']
+	assert a.files[1].path == 'empty'
+	assert a.files[1].content == ''
+	assert a.files[2].path == 'folder2/another.txt'
+	assert a.files[2].content.split_into_lines() == ['z line1', 'z line2', 'z line3']
+	assert a.files[3].path == 'folder3/final.txt'
+	assert a.files[3].content == ''
+}
+
+fn test_parse_file() {
+	dump(@LOCATION)
+	fpath := os.join_path(os.temp_dir(), 'txtar.txt')
+	defer {
+		os.rm(fpath) or {}
+	}
+	os.write_file(fpath, simple_archive_content)!
+	a := txtar.parse_file(fpath)!
+	assert a.comment != ''
+	assert a.files.len == 4
+	assert a.str() == simple_archive_content
+}
+
+fn test_unpack_to_folder_then_pack_same_folder() {
+	dump(@LOCATION)
+	folder := os.join_path(os.temp_dir(), 'txtar_folder')
+	defer {
+		os.rmdir_all(folder) or {}
+	}
+	a := txtar.parse(simple_archive_content)
+	txtar.unpack(a, folder)!
+	assert os.is_file(os.join_path(folder, 'empty'))
+	assert os.is_file(os.join_path(folder, 'folder2/another.txt'))
+	assert os.is_file(os.join_path(folder, 'folder3/final.txt'))
+	b := txtar.pack(folder, 'abc')!
+	assert a.comment != b.comment
+	assert b.comment == 'abc'
+	assert b.files.len == a.files.len
+	ofiles := a.files.sorted(|x, y| x.path < y.path)
+	pfiles := b.files.sorted(|x, y| x.path < y.path)
+	assert ofiles == pfiles
+}