mirror of
https://github.com/vlang/v.git
synced 2025-08-04 02:07:28 -04:00
io: add a string_reader
submodule (#20893)
This commit is contained in:
parent
a9c10428d2
commit
d8c4a84f71
@ -24,6 +24,7 @@ pub struct BufferedReaderConfig {
|
||||
// BufferedReadLineConfig are options that can be given to the read_line() function.
|
||||
@[params]
|
||||
pub struct BufferedReadLineConfig {
|
||||
pub:
|
||||
delim u8 = `\n` // line delimiter
|
||||
}
|
||||
|
||||
|
@ -38,13 +38,13 @@ fn test_read_all_huge() {
|
||||
assert res == '123'.repeat(100000).bytes()
|
||||
}
|
||||
|
||||
struct StringReader {
|
||||
struct StringReaderTest {
|
||||
text string
|
||||
mut:
|
||||
place int
|
||||
}
|
||||
|
||||
fn (mut s StringReader) read(mut buf []u8) !int {
|
||||
fn (mut s StringReaderTest) read(mut buf []u8) !int {
|
||||
if s.place >= s.text.len {
|
||||
return Eof{}
|
||||
}
|
||||
@ -55,9 +55,9 @@ fn (mut s StringReader) read(mut buf []u8) !int {
|
||||
|
||||
const newline_count = 100000
|
||||
|
||||
fn test_stringreader() {
|
||||
fn test_stringreadertest() {
|
||||
text := '12345\n'.repeat(io.newline_count)
|
||||
mut s := StringReader{
|
||||
mut s := StringReaderTest{
|
||||
text: text
|
||||
}
|
||||
mut r := new_buffered_reader(reader: s)
|
||||
@ -80,9 +80,9 @@ fn test_stringreader() {
|
||||
}
|
||||
}
|
||||
|
||||
fn test_stringreader2() {
|
||||
fn test_stringreadertest2() {
|
||||
text := '12345\r\n'.repeat(io.newline_count)
|
||||
mut s := StringReader{
|
||||
mut s := StringReaderTest{
|
||||
text: text
|
||||
}
|
||||
mut r := new_buffered_reader(reader: s)
|
||||
@ -107,7 +107,7 @@ fn test_stringreader2() {
|
||||
|
||||
fn test_leftover() {
|
||||
text := 'This is a test\r\nNice try!'
|
||||
mut s := StringReader{
|
||||
mut s := StringReaderTest{
|
||||
text: text
|
||||
}
|
||||
mut r := new_buffered_reader(reader: s)
|
||||
@ -129,7 +129,7 @@ fn test_leftover() {
|
||||
|
||||
fn test_totalread_read() {
|
||||
text := 'Some testing text'
|
||||
mut s := StringReader{
|
||||
mut s := StringReaderTest{
|
||||
text: text
|
||||
}
|
||||
mut r := new_buffered_reader(reader: s)
|
||||
@ -145,7 +145,7 @@ fn test_totalread_read() {
|
||||
|
||||
fn test_totalread_readline() {
|
||||
text := 'Some testing text\nmore_enters'
|
||||
mut s := StringReader{
|
||||
mut s := StringReaderTest{
|
||||
text: text
|
||||
}
|
||||
mut r := new_buffered_reader(reader: s)
|
||||
@ -164,7 +164,7 @@ fn test_totalread_readline() {
|
||||
|
||||
fn test_read_line_until_zero_terminated() {
|
||||
text := 'This is a test\0Nice try!\0'
|
||||
mut s := StringReader{
|
||||
mut s := StringReaderTest{
|
||||
text: text
|
||||
}
|
||||
mut r := new_buffered_reader(reader: s)
|
||||
|
292
vlib/io/string_reader/string_reader.v
Normal file
292
vlib/io/string_reader/string_reader.v
Normal file
@ -0,0 +1,292 @@
|
||||
module string_reader
|
||||
|
||||
import io
|
||||
import strings
|
||||
|
||||
@[params]
|
||||
pub struct StringReaderParams {
|
||||
// the reader interface
|
||||
reader ?io.Reader
|
||||
// initialize the builder with this source string
|
||||
source ?string
|
||||
// if no source is given the string builder is initialized with this size
|
||||
initial_size int
|
||||
}
|
||||
|
||||
// StringReader is able to read data from a Reader interface and/or source string to a dynamically
|
||||
// growing buffer using a string builder. Unlike the BufferedReader, StringReader will
|
||||
// keep the entire contents of the buffer in memory, allowing the incoming data to be reused
|
||||
// and read in an efficient matter. The StringReader will not set a maximum capacity to the string
|
||||
// builders buffer and could grow very large.
|
||||
pub struct StringReader {
|
||||
mut:
|
||||
reader ?io.Reader
|
||||
offset int // current offset in the buffer
|
||||
pub mut:
|
||||
end_of_stream bool // whether we reached the end of the upstream reader
|
||||
builder strings.Builder
|
||||
}
|
||||
|
||||
// new creates a new StringReader and sets the string builder size to `initial_size`.
|
||||
// If a source
|
||||
pub fn StringReader.new(params StringReaderParams) StringReader {
|
||||
mut r := StringReader{
|
||||
reader: params.reader
|
||||
}
|
||||
|
||||
if source := params.source {
|
||||
r.builder = strings.new_builder(source.len)
|
||||
r.builder.write_string(source)
|
||||
} else {
|
||||
r.builder = strings.new_builder(params.initial_size)
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// needs_fill returns whether the buffer needs refilling
|
||||
pub fn (r StringReader) needs_fill() bool {
|
||||
return r.offset >= r.builder.len
|
||||
}
|
||||
|
||||
// needs_fill_until returns whether the buffer needs refilling in order to read
|
||||
// `n` bytes
|
||||
pub fn (r StringReader) needs_fill_until(n int) bool {
|
||||
return r.offset + n >= r.builder.len
|
||||
}
|
||||
|
||||
// fill_bufer tries to read data into the buffer until either a 0 length read or if read_to_end_of_stream
|
||||
// is true then the end of the stream. It returns the number of bytes read
|
||||
pub fn (mut r StringReader) fill_buffer(read_till_end_of_stream bool) !int {
|
||||
if r.end_of_stream {
|
||||
return io.Eof{}
|
||||
}
|
||||
|
||||
mut reader := r.reader or { return error('reader is not set') }
|
||||
|
||||
start := r.builder.len
|
||||
mut end := start
|
||||
|
||||
// make sure there is enough room in the string builder
|
||||
unsafe { r.builder.grow_len(io.read_all_len) }
|
||||
defer {
|
||||
// shrink the length of the buffer to the total of bytes read
|
||||
r.builder.go_back(r.builder.len - end)
|
||||
}
|
||||
|
||||
for {
|
||||
read := reader.read(mut r.builder[start..]) or {
|
||||
r.end_of_stream = true
|
||||
break
|
||||
}
|
||||
end += read
|
||||
|
||||
if !read_till_end_of_stream && read == 0 {
|
||||
break
|
||||
} else if r.builder.len == end {
|
||||
unsafe { r.builder.grow_len(io.read_all_grow_len) }
|
||||
}
|
||||
}
|
||||
|
||||
if end == start {
|
||||
return io.Eof{}
|
||||
}
|
||||
|
||||
return end - start
|
||||
}
|
||||
|
||||
// fill_buffer_until tries read `n` amount of bytes from the reader into the buffer
|
||||
// and returns the actual number of bytes read
|
||||
pub fn (mut r StringReader) fill_buffer_until(n int) !int {
|
||||
if r.end_of_stream {
|
||||
return io.Eof{}
|
||||
}
|
||||
|
||||
mut reader := r.reader or { return error('reader is not set') }
|
||||
|
||||
start := r.builder.len
|
||||
// make sure there is enough room in the string builder
|
||||
if n > io.read_all_len {
|
||||
unsafe { r.builder.grow_len(io.read_all_len) }
|
||||
} else {
|
||||
unsafe { r.builder.grow_len(n) }
|
||||
}
|
||||
|
||||
mut end := start
|
||||
for {
|
||||
read := reader.read(mut r.builder[start..]) or {
|
||||
r.end_of_stream = true
|
||||
break
|
||||
}
|
||||
end += read
|
||||
|
||||
if read == 0 || end - start == n {
|
||||
break
|
||||
} else if r.builder.len == end {
|
||||
if n - end > io.read_all_grow_len {
|
||||
unsafe { r.builder.grow_len(io.read_all_grow_len) }
|
||||
} else {
|
||||
unsafe { r.builder.grow_len(n - end) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if end == start {
|
||||
return io.Eof{}
|
||||
}
|
||||
return end - start
|
||||
}
|
||||
|
||||
// read_all_bytes reads all bytes from a reader until either a 0 length read or if read_to_end_of_stream
|
||||
// is true then the end of the stream. It returns a copy of the read data
|
||||
pub fn (mut r StringReader) read_all_bytes(read_till_end_of_stream bool) ![]u8 {
|
||||
start := r.offset
|
||||
// ignore Eof error from fill buffer
|
||||
r.fill_buffer(read_till_end_of_stream) or {}
|
||||
r.offset = r.builder.len
|
||||
// check if there was still data in the buffer, but the reader has reached its end of stream
|
||||
if start == r.offset {
|
||||
return io.Eof{}
|
||||
}
|
||||
|
||||
return r.get_part(start, r.offset - start)!
|
||||
}
|
||||
|
||||
// read_all reads all bytes from a reader until either a 0 length read or if read_to_end_of_stream
|
||||
// is true then the end of the stream. It produces a string from the read data
|
||||
pub fn (mut r StringReader) read_all(read_till_end_of_stream bool) !string {
|
||||
buf := r.read_all_bytes(read_till_end_of_stream)!
|
||||
return unsafe { tos(buf.data, buf.len) }
|
||||
}
|
||||
|
||||
// read_bytes tries to read n amount of bytes from the reader
|
||||
pub fn (mut r StringReader) read_bytes(n int) ![]u8 {
|
||||
start := r.offset
|
||||
|
||||
if r.needs_fill_until(n) {
|
||||
actual_read := r.fill_buffer_until(n - (r.builder.len - r.offset))!
|
||||
r.offset += actual_read
|
||||
} else {
|
||||
r.offset += n
|
||||
}
|
||||
|
||||
return r.get_part(start, r.offset - start)!
|
||||
}
|
||||
|
||||
// read_bytes tries to read `n` amount of bytes from the reader and produces a string
|
||||
// from the read data
|
||||
pub fn (mut r StringReader) read_string(n int) !string {
|
||||
buf := r.read_bytes(n)!
|
||||
return unsafe { tos(buf.data, buf.len) }
|
||||
}
|
||||
|
||||
// read implements the Reader interface
|
||||
pub fn (mut r StringReader) read(mut buf []u8) !int {
|
||||
start := r.offset
|
||||
|
||||
read := r.fill_buffer_until(buf.len - start)!
|
||||
r.offset += read
|
||||
|
||||
copy(mut buf, r.builder[start..read])
|
||||
return r.builder.len - start
|
||||
}
|
||||
|
||||
// read_line attempts to read a line from the reader.
|
||||
// It will read until it finds the specified line delimiter
|
||||
// such as (\n, the default or \0) or the end of stream.
|
||||
@[direct_array_access]
|
||||
pub fn (mut r StringReader) read_line(config io.BufferedReadLineConfig) !string {
|
||||
if r.end_of_stream && r.needs_fill() {
|
||||
return io.Eof{}
|
||||
}
|
||||
|
||||
start := r.offset
|
||||
for {
|
||||
if r.needs_fill() {
|
||||
r.fill_buffer(false) or {
|
||||
// we are at the end of the stream
|
||||
if r.offset == start {
|
||||
return io.Eof{}
|
||||
}
|
||||
return r.get_string_part(start, r.offset - start)!
|
||||
}
|
||||
}
|
||||
// try to find a newline character
|
||||
mut i := r.offset
|
||||
for ; i < r.builder.len; i++ {
|
||||
c := r.builder[i]
|
||||
if c == config.delim {
|
||||
// great, we hit something
|
||||
// do some checking for whether we hit \r\n or just \n
|
||||
mut x := i
|
||||
if i != 0 && config.delim == `\n` && r.builder[i - 1] == `\r` {
|
||||
x--
|
||||
}
|
||||
r.offset = i + 1
|
||||
return r.get_string_part(start, x - start)!
|
||||
}
|
||||
}
|
||||
r.offset = i
|
||||
}
|
||||
|
||||
return io.Eof{}
|
||||
}
|
||||
|
||||
// write implements the Writer interface
|
||||
pub fn (mut r StringReader) write(buf []u8) !int {
|
||||
return r.builder.write(buf)!
|
||||
}
|
||||
|
||||
// get_data returns a copy of the buffer
|
||||
@[inline]
|
||||
pub fn (r StringReader) get_data() []u8 {
|
||||
unsafe {
|
||||
mut x := malloc_noscan(r.builder.len)
|
||||
vmemcpy(x, &u8(r.builder.data), r.builder.len)
|
||||
return x.vbytes(r.builder.len)
|
||||
}
|
||||
}
|
||||
|
||||
// get get_part returns a copy of a part of the buffer from `start` till `start` + `n`
|
||||
pub fn (r StringReader) get_part(start int, n int) ![]u8 {
|
||||
if start + n > r.builder.len {
|
||||
return io.Eof{}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
mut x := malloc_noscan(n)
|
||||
vmemcpy(x, &u8(r.builder.data) + start, n)
|
||||
return x.vbytes(n)
|
||||
}
|
||||
}
|
||||
|
||||
// get_string produces a string from all the bytes in the buffer
|
||||
@[inline]
|
||||
pub fn (r StringReader) get_string() string {
|
||||
return r.builder.spart(0, r.builder.len)
|
||||
}
|
||||
|
||||
// get_string_part produces a string from `start` till `start` + `n` of the buffer
|
||||
pub fn (r StringReader) get_string_part(start int, n int) !string {
|
||||
if start + n > r.builder.len {
|
||||
return io.Eof{}
|
||||
}
|
||||
|
||||
return r.builder.spart(start, n)
|
||||
}
|
||||
|
||||
// flush clears the stringbuilder and returns the resulting string and the stringreaders
|
||||
// offset is reset to 0
|
||||
pub fn (mut r StringReader) flush() string {
|
||||
r.offset = 0
|
||||
return r.builder.str()
|
||||
}
|
||||
|
||||
// free frees the memory block used for the string builders buffer,
|
||||
// a new string builder with size 0 is initialized and the stringreaders offset is reset to 0
|
||||
@[unsafe]
|
||||
pub fn (mut r StringReader) free() {
|
||||
unsafe { r.builder.free() }
|
||||
r.builder = strings.new_builder(0)
|
||||
r.offset = 0
|
||||
}
|
110
vlib/io/string_reader/string_reader_test.v
Normal file
110
vlib/io/string_reader/string_reader_test.v
Normal file
@ -0,0 +1,110 @@
|
||||
module string_reader
|
||||
|
||||
import io
|
||||
|
||||
struct Buf {
|
||||
pub:
|
||||
bytes []u8
|
||||
mut:
|
||||
i int
|
||||
}
|
||||
|
||||
fn (mut b Buf) read(mut buf []u8) !int {
|
||||
if !(b.i < b.bytes.len) {
|
||||
return io.Eof{}
|
||||
}
|
||||
n := copy(mut buf, b.bytes[b.i..])
|
||||
b.i += n
|
||||
return n
|
||||
}
|
||||
|
||||
fn test_read_all() {
|
||||
mut reader := StringReader.new()
|
||||
|
||||
if _ := reader.read_all(false) {
|
||||
assert false, 'should return io.Eof'
|
||||
} else {
|
||||
assert err is io.Eof
|
||||
}
|
||||
|
||||
contents := 'test string'
|
||||
buf := Buf{
|
||||
bytes: contents.bytes()
|
||||
}
|
||||
|
||||
reader = StringReader.new(reader: buf)
|
||||
data := reader.read_all(false)!
|
||||
|
||||
assert data == contents
|
||||
}
|
||||
|
||||
fn test_read_bytes() {
|
||||
mut reader := StringReader.new()
|
||||
|
||||
if _ := reader.read_bytes(4) {
|
||||
assert false, 'should return io.Eof'
|
||||
} else {
|
||||
assert err.msg() == 'reader is not set'
|
||||
}
|
||||
|
||||
buf := Buf{
|
||||
bytes: '12345678'.bytes()
|
||||
}
|
||||
|
||||
reader = StringReader.new(reader: buf)
|
||||
mut data := reader.read_bytes(4)!
|
||||
assert data == buf.bytes[..4]
|
||||
|
||||
data = reader.read_bytes(4)!
|
||||
assert data == buf.bytes[4..8]
|
||||
}
|
||||
|
||||
fn test_read_line() {
|
||||
mut reader := StringReader.new()
|
||||
|
||||
if _ := reader.read_line() {
|
||||
assert false, 'should return io.Eof'
|
||||
} else {
|
||||
assert err is io.Eof
|
||||
}
|
||||
|
||||
buf := Buf{
|
||||
bytes: 'first line\r\nsecond line\n'.bytes()
|
||||
}
|
||||
|
||||
reader = StringReader.new(reader: buf)
|
||||
|
||||
assert reader.read_line()! == 'first line'
|
||||
assert reader.read_line()! == 'second line'
|
||||
}
|
||||
|
||||
fn test_from_string() {
|
||||
mut reader := StringReader.new(source: 'test')
|
||||
assert reader.read_all(false)! == 'test'
|
||||
|
||||
if _ := reader.read_all(false) {
|
||||
assert false, 'should return Eof'
|
||||
} else {
|
||||
assert err is io.Eof
|
||||
}
|
||||
}
|
||||
|
||||
fn test_from_string_and_reader() {
|
||||
buf := Buf{
|
||||
bytes: 'buffer'.bytes()
|
||||
}
|
||||
|
||||
mut reader := StringReader.new(reader: buf, source: 'string')
|
||||
|
||||
assert reader.read_all(false)! == 'stringbuffer'
|
||||
}
|
||||
|
||||
fn test_flush() {
|
||||
mut reader := StringReader.new(source: 'flushed data')
|
||||
|
||||
str := reader.flush()
|
||||
assert str == 'flushed data'
|
||||
|
||||
assert reader.offset == 0
|
||||
assert reader.builder.len == 0
|
||||
}
|
@ -154,8 +154,9 @@ pub fn (mut b Builder) go_back(n int) {
|
||||
b.trim(b.len - n)
|
||||
}
|
||||
|
||||
// spart returns a part of the buffer as a string
|
||||
@[inline]
|
||||
fn (b &Builder) spart(start_pos int, n int) string {
|
||||
pub fn (b &Builder) spart(start_pos int, n int) string {
|
||||
unsafe {
|
||||
mut x := malloc_noscan(n + 1)
|
||||
vmemcpy(x, &u8(b.data) + start_pos, n)
|
||||
@ -253,6 +254,20 @@ pub fn (mut b Builder) ensure_cap(n int) {
|
||||
}
|
||||
}
|
||||
|
||||
// grow_len grows the length of the buffer by `n` bytes if necessary
|
||||
@[unsafe]
|
||||
pub fn (mut b Builder) grow_len(n int) {
|
||||
if n <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
new_len := b.len + n
|
||||
b.ensure_cap(new_len)
|
||||
unsafe {
|
||||
b.len = new_len
|
||||
}
|
||||
}
|
||||
|
||||
// free frees the memory block, used for the buffer.
|
||||
// Note: do not use the builder, after a call to free().
|
||||
@[unsafe]
|
||||
|
@ -166,3 +166,28 @@ fn test_write_decimal() {
|
||||
assert sb_i64_str(9223372036854775807) == '9223372036854775807'
|
||||
assert sb_i64_str(-9223372036854775807) == '-9223372036854775807'
|
||||
}
|
||||
|
||||
fn test_grow_len() {
|
||||
mut sb := strings.new_builder(10)
|
||||
assert sb.len == 0
|
||||
assert sb.cap == 10
|
||||
|
||||
sb.write_string('0123456789')
|
||||
assert sb.len == 10
|
||||
|
||||
unsafe { sb.grow_len(-5) }
|
||||
assert sb.len == 10
|
||||
assert sb.cap == 10
|
||||
|
||||
unsafe { sb.grow_len(10) }
|
||||
assert sb.len == 20
|
||||
assert sb.cap == 20
|
||||
|
||||
unsafe { sb.ensure_cap(35) }
|
||||
assert sb.len == 20
|
||||
assert sb.cap == 35
|
||||
|
||||
unsafe { sb.grow_len(5) }
|
||||
assert sb.len == 25
|
||||
assert sb.cap == 35
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user