vlib: add encoding.txtar (port of Go's txtar module) (#20874)

This commit is contained in:
Delyan Angelov 2024-02-20 02:41:20 +02:00 committed by GitHub
parent efa98d9234
commit c60a869fb4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 320 additions and 0 deletions

View File

@ -0,0 +1,62 @@
## Description
The purpose of the `encoding.txtar` module, is best described in the original
[Go source](https://github.com/golang/go/blob/master/src/internal/txtar/archive.go):
Package txtar implements a trivial text-based file archive format.
The goals for the format are:
* be trivial enough to create and edit by hand.
* be able to store trees of text files describing go command test cases.
* diff nicely in git history and code reviews.
Non-goals include:
* being a completely general archive format
* storing binary data
* storing file modes
* storing special files like symbolic links, and so on.
## Txtar format spec
See the spec in the `txtar` Go package source code, linked above:
* A txtar archive is zero or more comment lines and then a sequence of file entries.
* Each file entry begins with a file marker line of the form "-- FILENAME --"
and is followed by zero or more file content lines making up the file data.
* The comment or file content ends at the next file marker line.
* The file marker line must begin with the three-byte sequence "-- "
and end with the three-byte sequence " --", but the enclosed
file name can be surrounding by additional white space,
all of which is stripped.
* If the txtar file is missing a trailing newline on the final line,
parsers should consider a final newline to be present anyway.
* There are no possible syntax errors in a txtar archive.
## Example
```v
import os
import encoding.txtar
a := txtar.parse('comment
line1
line2
-- file.txt --
some content that will go into file.txt
some more content
-- a/b/c/file.v --
import os
dump(os.args)
-- bcd/def/another.v --
dump(2+2)
')
assert a.files.len == 2
assert a.files[0].path == 'file.txt'
assert a.files[2].path == 'bcd/def/another.v'
tfolder := os.join_path(os.temp_dir(), 'xyz')
txtar.unpack(a, tfolder)!
assert os.exists(os.join_path(tfolder, 'bcd/def/another.v'))
b := txtar.pack(tfolder, '')!
assert b.files.len == a.files.len
os.rmdir_all(tfolder)!
```

View File

@ -0,0 +1,54 @@
module txtar
import os
// pack will create a txtar archive, given a path.
// When the path is a folder, it will walk over all files in that base folder, read their contents and create a File entry for each.
// When the path is a file, it will create an Archive, that contains just a single File entry, for that single file.
pub fn pack(path string, comment string) !Archive {
if !os.exists(path) {
return error('file or folder ${path} does not exist')
}
npath := path.replace(os.path_separator, '/')
mut a := Archive{
comment: comment
}
if os.is_file(npath) {
fname := os.file_name(npath)
fcontent := os.read_file(npath)!
a.files << File{fname, fcontent}
return a
}
files := os.walk_ext(npath, '').map(it.replace(os.path_separator, '/'))
for f in files {
frelative := f.replace_once(npath, '').trim_left('/')
fcontent := os.read_file(f)!
a.files << File{frelative, fcontent}
}
return a
}
// unpack will extract *all files* in the archive `a`, into the base folder `path`.
// Note that all file paths will be appended to the base folder `path`, i.e.
// if you have a File with `path` field == 'abc/def/x.v', and base folder path == '/tmp',
// then the final path for that File, will be '/tmp/abc/def/x.v'
// Note that unpack will try to create any of the intermediate folders like
// /tmp, /tmp/abc, /tmp/abc/def, if they do not already exist.
pub fn unpack(a &Archive, path string) ! {
for f in a.files {
full_path := os.join_path(path, f.path)
folder := os.dir(full_path)
if !os.exists(folder) {
os.mkdir_all(folder)!
}
os.write_file(full_path, f.content)!
}
}
// parse_file parses the given `file_path` as an archive.
// It will return an error, only if the `file_path` is not readable.
// See the README.md, or the test txtar_test.v, for a description of the format.
pub fn parse_file(file_path string) !Archive {
content := os.read_file(file_path)!
return parse(content)
}

View File

@ -0,0 +1,94 @@
module txtar
// Ported from https://cs.opensource.google/go/x/tools/+/master:txtar/archive.go
import strings
// Archive is a collection of files
pub struct Archive {
pub mut:
comment string // the start of the archive; contains potentially multiple lines, before the files
files []File // a series of files
}
// File is a single file in an Archive. Each starting with a `-- FILENAME --` line.
pub struct File {
pub mut:
path string // 'abc/def.v' from the `-- abc/def.v --` header
content string // everything after that, till the next `-- name --` line.
}
// str returns a string representation of the `a` Archive.
// It is suitable for storing in a text file.
// It is also in the same format, that txtar.parse/1 expects.
pub fn (a &Archive) str() string {
mut sb := strings.new_builder(a.comment.len + 200 * a.files.len)
sb.write_string(fix_nl(a.comment))
for f in a.files {
sb.write_string('-- ${f.path} --\n')
sb.write_string(fix_nl(f.content))
}
return sb.str()
}
// parse parses the serialized form of an Archive.
// The returned Archive holds slices of data.
pub fn parse(content string) Archive {
mut a := Archive{}
comment, mut name, mut data := find_file_marker(content)
a.comment = comment
for name != '' {
mut f := File{name, ''}
f.content, name, data = find_file_marker(data)
a.files << f
}
return a
}
const nlm = '\n-- '
const mstart = '-- '
const mend = ' --'
// find_file_marker finds the next file marker in data, extracts the file name,
// and returns the data before the marker, the file name, and the data after the marker.
// If there is no next marker, find_file_marker returns fixNL(data), '', ''.
fn find_file_marker(data string) (string, string, string) {
mut i := 0
for i < data.len {
name, after := is_marker(data[i..])
if name != '' {
return data[..i], name, after
}
j := data[i..].index(txtar.nlm) or { return fix_nl(data), '', '' }
i += j + 1 // positioned at start of new possible marker
}
return '', '', ''
}
// is_marker checks whether the data begins with a file marker line.
// If so, it returns the name from the line, and the data after the line.
// Otherwise it returns name == "".
fn is_marker(data string) (string, string) {
if !data.starts_with(txtar.mstart) {
return '', ''
}
mut ndata := data
mut after := ''
i := data.index_u8(`\n`)
if i >= 0 {
ndata, after = data[..i], data[i + 1..]
}
if !(ndata.ends_with(txtar.mend) && ndata.len >= txtar.mstart.len + txtar.mend.len) {
return '', ''
}
name := ndata[txtar.mstart.len..ndata.len - txtar.mend.len].trim_space()
return name, after
}
// fix_nl returns the data, if it is empty, or if it ends in \n.
// Otherwise it returns data + a final \n addded.
fn fix_nl(data string) string {
if data.len == 0 || data[data.len - 1] == `\n` {
return data
}
return '${data}\n'
}

View File

@ -0,0 +1,110 @@
import os
import encoding.txtar
// txtar implements a trivial text-based file archive format,
// Ported from https://cs.opensource.google/go/x/tools/+/master:txtar/archive.go
// It has some convenience additions (the txtar.pack/1 and txtar.unpack/1 functions).
// See also the README.md file in this folder.
fn test_parse_nothing() {
dump(@LOCATION)
content := ''
a := txtar.parse(content)
assert a.str() == content
assert a.comment == ''
assert a.files.len == 0
}
fn test_parse_no_files() {
dump(@LOCATION)
content := 'some
comments
'
a := txtar.parse(content)
assert a.str() == content
assert a.comment != ''
assert a.files.len == 0
}
fn test_parse_no_comments() {
dump(@LOCATION)
content := '-- abc.xyz --
line1
-- another.txt --
z line1
'
a := txtar.parse(content)
assert a.str() == content
assert a.comment == ''
assert a.files.len == 2
assert a.files[0].path == 'abc.xyz'
assert a.files[0].content.split_into_lines() == ['line1']
assert a.files[1].path == 'another.txt'
assert a.files[1].content.split_into_lines() == ['z line1']
}
const simple_archive_content = 'some
comments on
several lines
-- abc.xyz --
line1
line2
-- empty --
-- folder2/another.txt --
z line1
z line2
z line3
-- folder3/final.txt --
'
fn test_parse() {
dump(@LOCATION)
a := txtar.parse(simple_archive_content)
assert a.str() == simple_archive_content
assert a.comment != ''
assert a.comment.split_into_lines().len == 4
assert a.comment.contains('\n\n')
assert a.files.len == 4
assert a.files[0].path == 'abc.xyz'
assert a.files[0].content.split_into_lines() == ['line1', 'line2']
assert a.files[1].path == 'empty'
assert a.files[1].content == ''
assert a.files[2].path == 'folder2/another.txt'
assert a.files[2].content.split_into_lines() == ['z line1', 'z line2', 'z line3']
assert a.files[3].path == 'folder3/final.txt'
assert a.files[3].content == ''
}
fn test_parse_file() {
dump(@LOCATION)
fpath := os.join_path(os.temp_dir(), 'txtar.txt')
defer {
os.rm(fpath) or {}
}
os.write_file(fpath, simple_archive_content)!
a := txtar.parse_file(fpath)!
assert a.comment != ''
assert a.files.len == 4
assert a.str() == simple_archive_content
}
fn test_unpack_to_folder_then_pack_same_folder() {
dump(@LOCATION)
folder := os.join_path(os.temp_dir(), 'txtar_folder')
defer {
os.rmdir_all(folder) or {}
}
a := txtar.parse(simple_archive_content)
txtar.unpack(a, folder)!
assert os.is_file(os.join_path(folder, 'empty'))
assert os.is_file(os.join_path(folder, 'folder2/another.txt'))
assert os.is_file(os.join_path(folder, 'folder3/final.txt'))
b := txtar.pack(folder, 'abc')!
assert a.comment != b.comment
assert b.comment == 'abc'
assert b.files.len == a.files.len
ofiles := a.files.sorted(|x, y| x.path < y.path)
pfiles := b.files.sorted(|x, y| x.path < y.path)
assert ofiles == pfiles
}