diff --git a/vlib/io/buffered_reader.v b/vlib/io/buffered_reader.v index 5cd9c1e83d..968ab64319 100644 --- a/vlib/io/buffered_reader.v +++ b/vlib/io/buffered_reader.v @@ -24,6 +24,7 @@ pub struct BufferedReaderConfig { // BufferedReadLineConfig are options that can be given to the read_line() function. @[params] pub struct BufferedReadLineConfig { +pub: delim u8 = `\n` // line delimiter } diff --git a/vlib/io/reader_test.v b/vlib/io/reader_test.v index 14e4ec8711..74bcc0b58f 100644 --- a/vlib/io/reader_test.v +++ b/vlib/io/reader_test.v @@ -38,13 +38,13 @@ fn test_read_all_huge() { assert res == '123'.repeat(100000).bytes() } -struct StringReader { +struct StringReaderTest { text string mut: place int } -fn (mut s StringReader) read(mut buf []u8) !int { +fn (mut s StringReaderTest) read(mut buf []u8) !int { if s.place >= s.text.len { return Eof{} } @@ -55,9 +55,9 @@ fn (mut s StringReader) read(mut buf []u8) !int { const newline_count = 100000 -fn test_stringreader() { +fn test_stringreadertest() { text := '12345\n'.repeat(io.newline_count) - mut s := StringReader{ + mut s := StringReaderTest{ text: text } mut r := new_buffered_reader(reader: s) @@ -80,9 +80,9 @@ fn test_stringreader() { } } -fn test_stringreader2() { +fn test_stringreadertest2() { text := '12345\r\n'.repeat(io.newline_count) - mut s := StringReader{ + mut s := StringReaderTest{ text: text } mut r := new_buffered_reader(reader: s) @@ -107,7 +107,7 @@ fn test_stringreader2() { fn test_leftover() { text := 'This is a test\r\nNice try!' - mut s := StringReader{ + mut s := StringReaderTest{ text: text } mut r := new_buffered_reader(reader: s) @@ -129,7 +129,7 @@ fn test_leftover() { fn test_totalread_read() { text := 'Some testing text' - mut s := StringReader{ + mut s := StringReaderTest{ text: text } mut r := new_buffered_reader(reader: s) @@ -145,7 +145,7 @@ fn test_totalread_read() { fn test_totalread_readline() { text := 'Some testing text\nmore_enters' - mut s := StringReader{ + mut s := StringReaderTest{ text: text } mut r := new_buffered_reader(reader: s) @@ -164,7 +164,7 @@ fn test_totalread_readline() { fn test_read_line_until_zero_terminated() { text := 'This is a test\0Nice try!\0' - mut s := StringReader{ + mut s := StringReaderTest{ text: text } mut r := new_buffered_reader(reader: s) diff --git a/vlib/io/string_reader/string_reader.v b/vlib/io/string_reader/string_reader.v new file mode 100644 index 0000000000..a9fae873bd --- /dev/null +++ b/vlib/io/string_reader/string_reader.v @@ -0,0 +1,292 @@ +module string_reader + +import io +import strings + +@[params] +pub struct StringReaderParams { + // the reader interface + reader ?io.Reader + // initialize the builder with this source string + source ?string + // if no source is given the string builder is initialized with this size + initial_size int +} + +// StringReader is able to read data from a Reader interface and/or source string to a dynamically +// growing buffer using a string builder. Unlike the BufferedReader, StringReader will +// keep the entire contents of the buffer in memory, allowing the incoming data to be reused +// and read in an efficient matter. The StringReader will not set a maximum capacity to the string +// builders buffer and could grow very large. +pub struct StringReader { +mut: + reader ?io.Reader + offset int // current offset in the buffer +pub mut: + end_of_stream bool // whether we reached the end of the upstream reader + builder strings.Builder +} + +// new creates a new StringReader and sets the string builder size to `initial_size`. +// If a source +pub fn StringReader.new(params StringReaderParams) StringReader { + mut r := StringReader{ + reader: params.reader + } + + if source := params.source { + r.builder = strings.new_builder(source.len) + r.builder.write_string(source) + } else { + r.builder = strings.new_builder(params.initial_size) + } + + return r +} + +// needs_fill returns whether the buffer needs refilling +pub fn (r StringReader) needs_fill() bool { + return r.offset >= r.builder.len +} + +// needs_fill_until returns whether the buffer needs refilling in order to read +// `n` bytes +pub fn (r StringReader) needs_fill_until(n int) bool { + return r.offset + n >= r.builder.len +} + +// fill_bufer tries to read data into the buffer until either a 0 length read or if read_to_end_of_stream +// is true then the end of the stream. It returns the number of bytes read +pub fn (mut r StringReader) fill_buffer(read_till_end_of_stream bool) !int { + if r.end_of_stream { + return io.Eof{} + } + + mut reader := r.reader or { return error('reader is not set') } + + start := r.builder.len + mut end := start + + // make sure there is enough room in the string builder + unsafe { r.builder.grow_len(io.read_all_len) } + defer { + // shrink the length of the buffer to the total of bytes read + r.builder.go_back(r.builder.len - end) + } + + for { + read := reader.read(mut r.builder[start..]) or { + r.end_of_stream = true + break + } + end += read + + if !read_till_end_of_stream && read == 0 { + break + } else if r.builder.len == end { + unsafe { r.builder.grow_len(io.read_all_grow_len) } + } + } + + if end == start { + return io.Eof{} + } + + return end - start +} + +// fill_buffer_until tries read `n` amount of bytes from the reader into the buffer +// and returns the actual number of bytes read +pub fn (mut r StringReader) fill_buffer_until(n int) !int { + if r.end_of_stream { + return io.Eof{} + } + + mut reader := r.reader or { return error('reader is not set') } + + start := r.builder.len + // make sure there is enough room in the string builder + if n > io.read_all_len { + unsafe { r.builder.grow_len(io.read_all_len) } + } else { + unsafe { r.builder.grow_len(n) } + } + + mut end := start + for { + read := reader.read(mut r.builder[start..]) or { + r.end_of_stream = true + break + } + end += read + + if read == 0 || end - start == n { + break + } else if r.builder.len == end { + if n - end > io.read_all_grow_len { + unsafe { r.builder.grow_len(io.read_all_grow_len) } + } else { + unsafe { r.builder.grow_len(n - end) } + } + } + } + + if end == start { + return io.Eof{} + } + return end - start +} + +// read_all_bytes reads all bytes from a reader until either a 0 length read or if read_to_end_of_stream +// is true then the end of the stream. It returns a copy of the read data +pub fn (mut r StringReader) read_all_bytes(read_till_end_of_stream bool) ![]u8 { + start := r.offset + // ignore Eof error from fill buffer + r.fill_buffer(read_till_end_of_stream) or {} + r.offset = r.builder.len + // check if there was still data in the buffer, but the reader has reached its end of stream + if start == r.offset { + return io.Eof{} + } + + return r.get_part(start, r.offset - start)! +} + +// read_all reads all bytes from a reader until either a 0 length read or if read_to_end_of_stream +// is true then the end of the stream. It produces a string from the read data +pub fn (mut r StringReader) read_all(read_till_end_of_stream bool) !string { + buf := r.read_all_bytes(read_till_end_of_stream)! + return unsafe { tos(buf.data, buf.len) } +} + +// read_bytes tries to read n amount of bytes from the reader +pub fn (mut r StringReader) read_bytes(n int) ![]u8 { + start := r.offset + + if r.needs_fill_until(n) { + actual_read := r.fill_buffer_until(n - (r.builder.len - r.offset))! + r.offset += actual_read + } else { + r.offset += n + } + + return r.get_part(start, r.offset - start)! +} + +// read_bytes tries to read `n` amount of bytes from the reader and produces a string +// from the read data +pub fn (mut r StringReader) read_string(n int) !string { + buf := r.read_bytes(n)! + return unsafe { tos(buf.data, buf.len) } +} + +// read implements the Reader interface +pub fn (mut r StringReader) read(mut buf []u8) !int { + start := r.offset + + read := r.fill_buffer_until(buf.len - start)! + r.offset += read + + copy(mut buf, r.builder[start..read]) + return r.builder.len - start +} + +// read_line attempts to read a line from the reader. +// It will read until it finds the specified line delimiter +// such as (\n, the default or \0) or the end of stream. +@[direct_array_access] +pub fn (mut r StringReader) read_line(config io.BufferedReadLineConfig) !string { + if r.end_of_stream && r.needs_fill() { + return io.Eof{} + } + + start := r.offset + for { + if r.needs_fill() { + r.fill_buffer(false) or { + // we are at the end of the stream + if r.offset == start { + return io.Eof{} + } + return r.get_string_part(start, r.offset - start)! + } + } + // try to find a newline character + mut i := r.offset + for ; i < r.builder.len; i++ { + c := r.builder[i] + if c == config.delim { + // great, we hit something + // do some checking for whether we hit \r\n or just \n + mut x := i + if i != 0 && config.delim == `\n` && r.builder[i - 1] == `\r` { + x-- + } + r.offset = i + 1 + return r.get_string_part(start, x - start)! + } + } + r.offset = i + } + + return io.Eof{} +} + +// write implements the Writer interface +pub fn (mut r StringReader) write(buf []u8) !int { + return r.builder.write(buf)! +} + +// get_data returns a copy of the buffer +@[inline] +pub fn (r StringReader) get_data() []u8 { + unsafe { + mut x := malloc_noscan(r.builder.len) + vmemcpy(x, &u8(r.builder.data), r.builder.len) + return x.vbytes(r.builder.len) + } +} + +// get get_part returns a copy of a part of the buffer from `start` till `start` + `n` +pub fn (r StringReader) get_part(start int, n int) ![]u8 { + if start + n > r.builder.len { + return io.Eof{} + } + + unsafe { + mut x := malloc_noscan(n) + vmemcpy(x, &u8(r.builder.data) + start, n) + return x.vbytes(n) + } +} + +// get_string produces a string from all the bytes in the buffer +@[inline] +pub fn (r StringReader) get_string() string { + return r.builder.spart(0, r.builder.len) +} + +// get_string_part produces a string from `start` till `start` + `n` of the buffer +pub fn (r StringReader) get_string_part(start int, n int) !string { + if start + n > r.builder.len { + return io.Eof{} + } + + return r.builder.spart(start, n) +} + +// flush clears the stringbuilder and returns the resulting string and the stringreaders +// offset is reset to 0 +pub fn (mut r StringReader) flush() string { + r.offset = 0 + return r.builder.str() +} + +// free frees the memory block used for the string builders buffer, +// a new string builder with size 0 is initialized and the stringreaders offset is reset to 0 +@[unsafe] +pub fn (mut r StringReader) free() { + unsafe { r.builder.free() } + r.builder = strings.new_builder(0) + r.offset = 0 +} diff --git a/vlib/io/string_reader/string_reader_test.v b/vlib/io/string_reader/string_reader_test.v new file mode 100644 index 0000000000..6e6c2d9608 --- /dev/null +++ b/vlib/io/string_reader/string_reader_test.v @@ -0,0 +1,110 @@ +module string_reader + +import io + +struct Buf { +pub: + bytes []u8 +mut: + i int +} + +fn (mut b Buf) read(mut buf []u8) !int { + if !(b.i < b.bytes.len) { + return io.Eof{} + } + n := copy(mut buf, b.bytes[b.i..]) + b.i += n + return n +} + +fn test_read_all() { + mut reader := StringReader.new() + + if _ := reader.read_all(false) { + assert false, 'should return io.Eof' + } else { + assert err is io.Eof + } + + contents := 'test string' + buf := Buf{ + bytes: contents.bytes() + } + + reader = StringReader.new(reader: buf) + data := reader.read_all(false)! + + assert data == contents +} + +fn test_read_bytes() { + mut reader := StringReader.new() + + if _ := reader.read_bytes(4) { + assert false, 'should return io.Eof' + } else { + assert err.msg() == 'reader is not set' + } + + buf := Buf{ + bytes: '12345678'.bytes() + } + + reader = StringReader.new(reader: buf) + mut data := reader.read_bytes(4)! + assert data == buf.bytes[..4] + + data = reader.read_bytes(4)! + assert data == buf.bytes[4..8] +} + +fn test_read_line() { + mut reader := StringReader.new() + + if _ := reader.read_line() { + assert false, 'should return io.Eof' + } else { + assert err is io.Eof + } + + buf := Buf{ + bytes: 'first line\r\nsecond line\n'.bytes() + } + + reader = StringReader.new(reader: buf) + + assert reader.read_line()! == 'first line' + assert reader.read_line()! == 'second line' +} + +fn test_from_string() { + mut reader := StringReader.new(source: 'test') + assert reader.read_all(false)! == 'test' + + if _ := reader.read_all(false) { + assert false, 'should return Eof' + } else { + assert err is io.Eof + } +} + +fn test_from_string_and_reader() { + buf := Buf{ + bytes: 'buffer'.bytes() + } + + mut reader := StringReader.new(reader: buf, source: 'string') + + assert reader.read_all(false)! == 'stringbuffer' +} + +fn test_flush() { + mut reader := StringReader.new(source: 'flushed data') + + str := reader.flush() + assert str == 'flushed data' + + assert reader.offset == 0 + assert reader.builder.len == 0 +} diff --git a/vlib/strings/builder.c.v b/vlib/strings/builder.c.v index 9fc2bd7dcf..283aeed542 100644 --- a/vlib/strings/builder.c.v +++ b/vlib/strings/builder.c.v @@ -154,8 +154,9 @@ pub fn (mut b Builder) go_back(n int) { b.trim(b.len - n) } +// spart returns a part of the buffer as a string @[inline] -fn (b &Builder) spart(start_pos int, n int) string { +pub fn (b &Builder) spart(start_pos int, n int) string { unsafe { mut x := malloc_noscan(n + 1) vmemcpy(x, &u8(b.data) + start_pos, n) @@ -253,6 +254,20 @@ pub fn (mut b Builder) ensure_cap(n int) { } } +// grow_len grows the length of the buffer by `n` bytes if necessary +@[unsafe] +pub fn (mut b Builder) grow_len(n int) { + if n <= 0 { + return + } + + new_len := b.len + n + b.ensure_cap(new_len) + unsafe { + b.len = new_len + } +} + // free frees the memory block, used for the buffer. // Note: do not use the builder, after a call to free(). @[unsafe] diff --git a/vlib/strings/builder_test.v b/vlib/strings/builder_test.v index 0b70238c3a..59c4838af1 100644 --- a/vlib/strings/builder_test.v +++ b/vlib/strings/builder_test.v @@ -166,3 +166,28 @@ fn test_write_decimal() { assert sb_i64_str(9223372036854775807) == '9223372036854775807' assert sb_i64_str(-9223372036854775807) == '-9223372036854775807' } + +fn test_grow_len() { + mut sb := strings.new_builder(10) + assert sb.len == 0 + assert sb.cap == 10 + + sb.write_string('0123456789') + assert sb.len == 10 + + unsafe { sb.grow_len(-5) } + assert sb.len == 10 + assert sb.cap == 10 + + unsafe { sb.grow_len(10) } + assert sb.len == 20 + assert sb.cap == 20 + + unsafe { sb.ensure_cap(35) } + assert sb.len == 20 + assert sb.cap == 35 + + unsafe { sb.grow_len(5) } + assert sb.len == 25 + assert sb.cap == 35 +}