mirror of
https://github.com/vlang/v.git
synced 2025-09-10 07:47:20 -04:00
encoding.csv: add a sequential reader too (suitable for very large .csv files, it does not read everything at once) (#20140)
This commit is contained in:
parent
99d9473643
commit
cfcbcb416a
@ -1,6 +1,77 @@
|
|||||||
|
|
||||||
# CSV Reader
|
# CSV Reader
|
||||||
This module is a Random Access CSV file reader, it indexes the file before reading the data.
|
There are two CSV readers in this module:
|
||||||
|
|
||||||
|
* Random Access reader
|
||||||
|
* Sequential reader
|
||||||
|
|
||||||
|
# Sequential CSV reader
|
||||||
|
The sequential reader read the file row by row using only the memory needed for readings.
|
||||||
|
Here is a very simple example of usage:
|
||||||
|
|
||||||
|
```v
|
||||||
|
import encoding.csv
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
mut csvr := csv.csv_sequential_reader(file_path: 'big2.csv', end_line_len: csv.endline_crlf_len)!
|
||||||
|
for csvr.has_data() > 1 {
|
||||||
|
println(csvr.get_next_row()!)
|
||||||
|
}
|
||||||
|
csvr.dispose_csv_reader()
|
||||||
|
}
|
||||||
|
```
|
||||||
|
This is the simplest way to use it to read csv files in sequential mode,
|
||||||
|
with default configuration every cell is read as `string`.
|
||||||
|
The function `get_row()` is used to read a single row, and it returns an array of `string`.
|
||||||
|
|
||||||
|
## Reading from different sources `csv_sequential_reader`
|
||||||
|
The CSV Sequential Reader can read from files, and memory buffers.
|
||||||
|
|
||||||
|
### read from a file
|
||||||
|
```v ignore
|
||||||
|
csv.csv_sequential_reader(file_path:file_path)
|
||||||
|
```
|
||||||
|
### read from a memory buffer
|
||||||
|
```v ignore
|
||||||
|
csv.csv_sequential_reader(scr_buf:voidptr(buffer_ptr), scr_buf_len: buffer_len)
|
||||||
|
```
|
||||||
|
When you call `csv.csv_sequential_reader` a `SequentialReader` struct is initialized passing
|
||||||
|
a `SequentialReaderConfig` struct as a parameter.
|
||||||
|
Using these structs, it is possible to change the behavior of the CSV Reader.
|
||||||
|
|
||||||
|
## The `SequentialReaderConfig` struct
|
||||||
|
The config struct is as follows:
|
||||||
|
```v ignore
|
||||||
|
pub struct SequentialReaderConfig {
|
||||||
|
scr_buf voidptr // pointer to the buffer of data
|
||||||
|
scr_buf_len i64 // if > 0 use the RAM pointed by scr_buf as source of data
|
||||||
|
file_path string
|
||||||
|
start_index i64
|
||||||
|
end_index i64 = -1
|
||||||
|
mem_buf_size int = 1024 * 64 // default buffer size 64KByte
|
||||||
|
separator u8 = `,`
|
||||||
|
comment u8 = `#` // every line that start with the comment char is ignored
|
||||||
|
default_cell string = '*' // return this string if out of the csv boundaries
|
||||||
|
empty_cell string // return this string if empty cell
|
||||||
|
end_line_len int = endline_cr_len // size of the endline rune
|
||||||
|
quote u8 = `"` // double quote is the standard quote char
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|Field|Description|
|
||||||
|
|------------|--------------|
|
||||||
|
|`scr_buf`, `scr_buf_len`|If `scr_buf_len > 0` the reader will use the `scr_buf` pointer as the base address of the data to parse and `scr_buf_len` as the length of the buffer itself|
|
||||||
|
|`file_path`| if `scr_buf_len == 0` the reader will try to open the `file_path` file|
|
||||||
|
|`start_index`,`end_index`| **Internal usage for now**|
|
||||||
|
|`mem_buf_size`|memory allocated for the reading operations on the file, more memory more speed|
|
||||||
|
|`separator`|char used as cell separator in the CSV file, default is comma|
|
||||||
|
|`comment`|every line that start with the comment char is ignored|
|
||||||
|
|`default_cell`|return this string if the query coordinates are out of the csv boundaries|
|
||||||
|
|`empty_cell`|return this string if the query coordinates are on an empty cell|
|
||||||
|
|`end_line_len`|size of the endline, `endline_cr_len=1`,`endline_crlf_len=2`|
|
||||||
|
|`quote`|quote char for the cells|
|
||||||
|
|
||||||
|
|
||||||
|
# Random Access CSV Reader
|
||||||
|
The Random Access CSV file reader indexes the file before reading the data.
|
||||||
This indexing operation permits access to every cell of the CSV file in random order.
|
This indexing operation permits access to every cell of the CSV file in random order.
|
||||||
Here is a very simple example of usage:
|
Here is a very simple example of usage:
|
||||||
|
|
||||||
@ -30,12 +101,12 @@ will give the following output:
|
|||||||
['0', '1', '2']
|
['0', '1', '2']
|
||||||
['3', '4', '5']
|
['3', '4', '5']
|
||||||
```
|
```
|
||||||
This is the simplest way to use it to read csv files, with default configuration
|
This is the simplest way to use it to read csv files in a random access mode,
|
||||||
every cell is read as `string`.
|
with default configuration every cell is read as `string`.
|
||||||
The function `get_row()` is used to read a single row, and it returns an array of `string`.
|
The function `get_row()` is used to read a single row, and it returns an array of `string`.
|
||||||
|
|
||||||
## Reading from different sources `csv_reader`
|
## Reading from different sources `csv_reader`
|
||||||
The CSV Reader can read from files, strings, memory buffers.
|
The CSV Random access Reader can read from files, strings, memory buffers.
|
||||||
### read from a file
|
### read from a file
|
||||||
```v ignore
|
```v ignore
|
||||||
csv.csv_reader(file_path:file_path)
|
csv.csv_reader(file_path:file_path)
|
||||||
@ -64,7 +135,7 @@ pub struct RandomAccessReaderConfig {
|
|||||||
end_index i64 = -1
|
end_index i64 = -1
|
||||||
mem_buf_size int = 1024 * 64 // default buffer size 64KByte
|
mem_buf_size int = 1024 * 64 // default buffer size 64KByte
|
||||||
separator u8 = `,`
|
separator u8 = `,`
|
||||||
comment u8 = `#` // every line that start with the quote char is ignored
|
comment u8 = `#` // every line that start with the comment char is ignored
|
||||||
default_cell string = '*' // return this string if out of the csv boundaries
|
default_cell string = '*' // return this string if out of the csv boundaries
|
||||||
empty_cell string // return this string if empty cell
|
empty_cell string // return this string if empty cell
|
||||||
end_line_len int = csv.endline_cr_len // size of the endline rune
|
end_line_len int = csv.endline_cr_len // size of the endline rune
|
||||||
@ -79,6 +150,7 @@ pub struct RandomAccessReaderConfig {
|
|||||||
|`start_index`,`end_index`| **Internal usage for now**|
|
|`start_index`,`end_index`| **Internal usage for now**|
|
||||||
|`mem_buf_size`|memory allocated for the reading operations on the file, more memory more speed|
|
|`mem_buf_size`|memory allocated for the reading operations on the file, more memory more speed|
|
||||||
|`separator`|char used as cell separator in the CSV file, default is comma|
|
|`separator`|char used as cell separator in the CSV file, default is comma|
|
||||||
|
|`comment`|every line that start with the comment char is ignored
|
||||||
|`default_cell`|return this string if the query coordinates are out of the csv boundaries|
|
|`default_cell`|return this string if the query coordinates are out of the csv boundaries|
|
||||||
|`empty_cell`|return this string if the query coordinates are on an empty cell|
|
|`empty_cell`|return this string if the query coordinates are on an empty cell|
|
||||||
|`end_line_len`|size of the endline, `endline_cr_len=1`,`endline_crlf_len=2`|
|
|`end_line_len`|size of the endline, `endline_cr_len=1`,`endline_crlf_len=2`|
|
||||||
|
@ -1,11 +1,10 @@
|
|||||||
/*
|
/*
|
||||||
csv reader 1.0 alpha
|
csv random access reader 1.0 alpha
|
||||||
|
|
||||||
Copyright (c) 2023 Dario Deledda. All rights reserved.
|
Copyright (c) 2023 Dario Deledda. All rights reserved.
|
||||||
Use of this source code is governed by an MIT license
|
Use of this source code is governed by an MIT license
|
||||||
that can be found in the LICENSE file.
|
that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
|
||||||
Known limitations:
|
Known limitations:
|
||||||
- no stream reading
|
- no stream reading
|
||||||
*/
|
*/
|
||||||
@ -404,7 +403,7 @@ pub fn (mut cr RandomAccessReader) get_cell(cfg GetCellConfig) !string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// create the string from the buffer
|
// create the string from the buffer
|
||||||
mut tmp_mem := malloc(isize(len + 1))
|
mut tmp_mem := malloc_noscan(isize(len + 1))
|
||||||
/*
|
/*
|
||||||
defer {
|
defer {
|
||||||
free(tmp_mem)
|
free(tmp_mem)
|
297
vlib/encoding/csv/csv_reader_sequential.v
Normal file
297
vlib/encoding/csv/csv_reader_sequential.v
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
/*
|
||||||
|
csv serial reader 1.0 alpha
|
||||||
|
|
||||||
|
Copyright (c) 2023 Dario Deledda. All rights reserved.
|
||||||
|
Use of this source code is governed by an MIT license
|
||||||
|
that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
Known limitations:
|
||||||
|
*/
|
||||||
|
module csv
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
@[params]
|
||||||
|
pub struct SequentialReaderConfig {
|
||||||
|
scr_buf voidptr // pointer to the buffer of data
|
||||||
|
scr_buf_len i64 // if > 0 use the RAM pointed by scr_buf as source of data
|
||||||
|
file_path string
|
||||||
|
start_index i64
|
||||||
|
end_index i64 = -1
|
||||||
|
mem_buf_size int = 1024 * 64 // default buffer size 64KByte
|
||||||
|
separator u8 = `,`
|
||||||
|
comment u8 = `#` // every line that start with the comment char is ignored
|
||||||
|
default_cell string = '*' // return this string if out of the csv boundaries
|
||||||
|
empty_cell string // return this string if empty cell
|
||||||
|
end_line_len int = endline_cr_len // size of the endline rune
|
||||||
|
quote u8 = `"` // double quote is the standard quote char
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SequentialReader {
|
||||||
|
pub mut:
|
||||||
|
index i64
|
||||||
|
|
||||||
|
f os.File
|
||||||
|
f_len i64
|
||||||
|
is_bom_present bool
|
||||||
|
|
||||||
|
start_index i64
|
||||||
|
end_index i64 = -1
|
||||||
|
|
||||||
|
end_line u8 = `\n`
|
||||||
|
end_line_len int = endline_cr_len // size of the endline rune \n = 1, \r\n = 2
|
||||||
|
separator u8 = `,` // comma is the default separator
|
||||||
|
separator_len int = 1 // size of the separator rune
|
||||||
|
quote u8 = `"` // double quote is the standard quote char
|
||||||
|
|
||||||
|
comment u8 = `#` // every line that start with the quote char is ignored
|
||||||
|
|
||||||
|
default_cell string = '*' // return this string if out of the csv boundaries
|
||||||
|
empty_cell string = '#' // retunrn this if empty cell
|
||||||
|
// ram buffer
|
||||||
|
mem_buf_type u32 // buffer type 0=File,1=RAM
|
||||||
|
mem_buf voidptr // buffer used to load chars from file
|
||||||
|
mem_buf_size i64 // size of the buffer
|
||||||
|
mem_buf_start i64 = -1 // start index in the file of the read buffer
|
||||||
|
mem_buf_end i64 = -1 // end index in the file of the read buffer
|
||||||
|
|
||||||
|
ch_buf []u8 = []u8{cap: 1024}
|
||||||
|
// error management
|
||||||
|
row_count i64
|
||||||
|
col_count i64
|
||||||
|
}
|
||||||
|
|
||||||
|
// csv_sequential_reader creates a sequential csv reader
|
||||||
|
pub fn csv_sequential_reader(cfg SequentialReaderConfig) !&SequentialReader {
|
||||||
|
mut cr := &SequentialReader{}
|
||||||
|
|
||||||
|
cr.start_index = cfg.start_index
|
||||||
|
cr.end_index = cfg.end_index
|
||||||
|
|
||||||
|
// reading from a RAM buffer
|
||||||
|
if cfg.scr_buf != 0 && cfg.scr_buf_len > 0 {
|
||||||
|
cr.mem_buf_type = ram_csv // RAM buffer
|
||||||
|
cr.mem_buf = cfg.scr_buf
|
||||||
|
cr.mem_buf_size = cfg.scr_buf_len
|
||||||
|
if cfg.end_index == -1 {
|
||||||
|
cr.end_index = cfg.scr_buf_len
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if BOM header is in the memory buffer
|
||||||
|
unsafe {
|
||||||
|
if *&u8(cr.mem_buf) == 0xEF && *(&u8(cr.mem_buf) + 1) == 0xBB
|
||||||
|
&& *(&u8(cr.mem_buf) + 2) == 0xBF {
|
||||||
|
cr.is_bom_present = true
|
||||||
|
cr.index += 3 // skip the BOM
|
||||||
|
cr.start_index += 3 // skip the BOM
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cr.mem_buf_start = 0
|
||||||
|
cr.mem_buf_end = cr.mem_buf_size
|
||||||
|
|
||||||
|
// check if is a file source
|
||||||
|
} else if cfg.file_path.len > 0 {
|
||||||
|
if !os.exists(cfg.file_path) {
|
||||||
|
return error('ERROR: file ${cfg.file_path} not found!')
|
||||||
|
}
|
||||||
|
cr.mem_buf_type = file_csv // File buffer
|
||||||
|
// allocate the memory
|
||||||
|
unsafe {
|
||||||
|
cr.mem_buf = malloc(cfg.mem_buf_size)
|
||||||
|
cr.mem_buf_size = cfg.mem_buf_size
|
||||||
|
}
|
||||||
|
cr.f = os.open_file(cfg.file_path, 'rb')!
|
||||||
|
|
||||||
|
cr.f.seek(0, .end)!
|
||||||
|
cr.f_len = cr.f.tell()!
|
||||||
|
|
||||||
|
cr.f.seek(cfg.start_index, .start)!
|
||||||
|
cr.index = cr.f.tell()!
|
||||||
|
|
||||||
|
if cfg.end_index == -1 {
|
||||||
|
cr.end_index = cr.f_len
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if BOM header is in the file
|
||||||
|
if cr.index == 0 {
|
||||||
|
if cr.f.read_into_ptr(cr.mem_buf, 4)! == 4 {
|
||||||
|
unsafe {
|
||||||
|
if *&u8(cr.mem_buf) == 0xEF && *(&u8(cr.mem_buf) + 1) == 0xBB
|
||||||
|
&& *(&u8(cr.mem_buf) + 2) == 0xBF {
|
||||||
|
cr.is_bom_present = true
|
||||||
|
cr.index += 3 // skip the BOM
|
||||||
|
cr.start_index += 3 // skip the BOM
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cr.f.seek(cfg.start_index, .start)!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cr.default_cell = cfg.default_cell
|
||||||
|
cr.empty_cell = cfg.empty_cell
|
||||||
|
cr.end_line_len = cfg.end_line_len
|
||||||
|
cr.separator = cfg.separator
|
||||||
|
cr.comment = cfg.comment
|
||||||
|
cr.quote = cfg.quote
|
||||||
|
|
||||||
|
return cr
|
||||||
|
}
|
||||||
|
|
||||||
|
// dispose_csv_reader release the resources used by the csv_reader
|
||||||
|
pub fn (mut cr SequentialReader) dispose_csv_reader() {
|
||||||
|
if cr.mem_buf_type == ram_csv {
|
||||||
|
// do nothing, ram buffer is static
|
||||||
|
} else if cr.mem_buf_type == file_csv {
|
||||||
|
// file close
|
||||||
|
if cr.f.is_opened {
|
||||||
|
cr.f.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// free the allocated memory
|
||||||
|
if cr.mem_buf_size > 0 {
|
||||||
|
unsafe {
|
||||||
|
free(cr.mem_buf)
|
||||||
|
}
|
||||||
|
cr.mem_buf = unsafe { nil }
|
||||||
|
cr.mem_buf_size = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// has_data return the bytes available for future readings
|
||||||
|
pub fn (mut cr SequentialReader) has_data() i64 {
|
||||||
|
return cr.end_index - cr.start_index
|
||||||
|
}
|
||||||
|
|
||||||
|
fn (mut cr SequentialReader) fill_buffer(index i64) ! {
|
||||||
|
if cr.mem_buf_type == ram_csv {
|
||||||
|
// for now do nothing if ram buffer
|
||||||
|
} else {
|
||||||
|
cr.f.seek(index, .start)!
|
||||||
|
// IMPORTANT: add 64 bit support in vlib!!
|
||||||
|
read_bytes_count := cr.f.read_into_ptr(cr.mem_buf, int(cr.mem_buf_size))!
|
||||||
|
cr.mem_buf_start = index
|
||||||
|
cr.mem_buf_end = index + read_bytes_count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum SequentialReadingState as u16 {
|
||||||
|
comment
|
||||||
|
quote
|
||||||
|
after_quote
|
||||||
|
cell
|
||||||
|
newline
|
||||||
|
}
|
||||||
|
|
||||||
|
// get_next_row get the next row from the CSV file as a string array
|
||||||
|
pub fn (mut cr SequentialReader) get_next_row() ![]string {
|
||||||
|
mut row_res := []string{}
|
||||||
|
// clear the cell buffer
|
||||||
|
cr.ch_buf.clear()
|
||||||
|
mut i := cr.start_index
|
||||||
|
mut state := SequentialReadingState.cell
|
||||||
|
|
||||||
|
p := &u8(cr.mem_buf)
|
||||||
|
for i < cr.end_index {
|
||||||
|
if i < cr.mem_buf_start || i >= cr.mem_buf_end {
|
||||||
|
cr.fill_buffer(i)!
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
ch := *(p + i - cr.mem_buf_start)
|
||||||
|
|
||||||
|
if state == .cell {
|
||||||
|
if ch == cr.separator {
|
||||||
|
// must be optimized
|
||||||
|
cr.ch_buf << 0
|
||||||
|
row_res << if (cr.ch_buf.len - 1) == 0 {
|
||||||
|
cr.empty_cell
|
||||||
|
} else {
|
||||||
|
(tos(cr.ch_buf.data, cr.ch_buf.len - 1).clone())
|
||||||
|
}
|
||||||
|
cr.ch_buf.clear()
|
||||||
|
} else if cr.ch_buf.len == 0 && ch == cr.comment && row_res.len == 0 {
|
||||||
|
state = .comment
|
||||||
|
} else if ch == cr.quote {
|
||||||
|
state = .quote
|
||||||
|
cr.ch_buf.clear()
|
||||||
|
cr.col_count++
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
} else if ch == cr.end_line {
|
||||||
|
cr.row_count++
|
||||||
|
cr.col_count = 0
|
||||||
|
|
||||||
|
// skip empty rows
|
||||||
|
if !(row_res.len == 0 && cr.ch_buf.len < 1) {
|
||||||
|
cr.ch_buf << 0
|
||||||
|
row_res << if (cr.ch_buf.len - 1) == 0 {
|
||||||
|
cr.empty_cell
|
||||||
|
} else {
|
||||||
|
(tos(cr.ch_buf.data, cr.ch_buf.len - 1).clone())
|
||||||
|
}
|
||||||
|
i += cr.end_line_len - 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else if ch == `\r` && cr.end_line_len == 2 {
|
||||||
|
// skip CR
|
||||||
|
} else { // normal char inside a cell
|
||||||
|
cr.ch_buf << ch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if state == .comment {
|
||||||
|
if cr.ch_buf.len > 0 {
|
||||||
|
// must be optimized
|
||||||
|
cr.ch_buf << 0
|
||||||
|
row_res << if (cr.ch_buf.len - 1) == 0 {
|
||||||
|
cr.empty_cell
|
||||||
|
} else {
|
||||||
|
(tos(cr.ch_buf.data, cr.ch_buf.len - 1).clone())
|
||||||
|
}
|
||||||
|
cr.ch_buf.clear()
|
||||||
|
} else if ch == cr.end_line {
|
||||||
|
state = .cell
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if state == .quote {
|
||||||
|
if ch == cr.quote {
|
||||||
|
// must be optimized
|
||||||
|
cr.ch_buf << 0
|
||||||
|
row_res << if (cr.ch_buf.len - 1) == 0 {
|
||||||
|
cr.empty_cell
|
||||||
|
} else {
|
||||||
|
(tos(cr.ch_buf.data, cr.ch_buf.len - 1).clone())
|
||||||
|
}
|
||||||
|
cr.ch_buf.clear()
|
||||||
|
|
||||||
|
state = .after_quote
|
||||||
|
cr.col_count++
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
} else if ch == cr.end_line {
|
||||||
|
return error('ERROR: quote not closed at row ${cr.row_count} after column ${cr.col_count}!')
|
||||||
|
} else { // normal char inside a quote inside a cell
|
||||||
|
cr.ch_buf << ch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if state == .after_quote {
|
||||||
|
if ch == cr.separator {
|
||||||
|
state = .cell
|
||||||
|
} else if ch == cr.end_line {
|
||||||
|
cr.row_count++
|
||||||
|
cr.col_count = 0
|
||||||
|
cr.ch_buf.clear()
|
||||||
|
i += cr.end_line_len - 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cr.col_count++
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
cr.start_index = i
|
||||||
|
return row_res
|
||||||
|
}
|
@ -105,7 +105,68 @@ const txt3 = 'a,b,c,d\r\n0,1,2,3\r\n4,5,6,7\r\n'
|
|||||||
const txt4 = 'a,b,c,d\n0,1,2,3\n4,5,6,7\n'
|
const txt4 = 'a,b,c,d\n0,1,2,3\n4,5,6,7\n'
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
*
|
*
|
||||||
* Test Functions
|
* Test Sequential Functions
|
||||||
|
*
|
||||||
|
******************************************************************************/
|
||||||
|
fn test_csv_sequential() {
|
||||||
|
mut csvr := csv.csv_sequential_reader(scr_buf: txt1.str, scr_buf_len: txt1.len)!
|
||||||
|
mut data := [][]string{}
|
||||||
|
for csvr.has_data() > 1 {
|
||||||
|
data << csvr.get_next_row()!
|
||||||
|
}
|
||||||
|
csvr.dispose_csv_reader()
|
||||||
|
assert data[0][0] == 'a', 'test_csv_sequential1 reading failed!'
|
||||||
|
// there is a final empty row in txt1
|
||||||
|
assert data[data.len - 2][0] == 'a', 'test_csv_sequential2 reading failed!'
|
||||||
|
assert data[data.len - 2][1] == 'b,c,d', 'test_csv_sequential3 reading failed!'
|
||||||
|
|
||||||
|
csvr = csv.csv_sequential_reader(scr_buf: txt2.str, scr_buf_len: txt2.len)!
|
||||||
|
csvr.empty_cell = '####'
|
||||||
|
data = [][]string{}
|
||||||
|
for csvr.has_data() > 1 {
|
||||||
|
data << csvr.get_next_row()!
|
||||||
|
}
|
||||||
|
csvr.dispose_csv_reader()
|
||||||
|
assert data[data.len - 2][2] == '####', 'test_csv_sequential4 reading failed!'
|
||||||
|
assert data[data.len - 2][5] == 'pippo', 'test_csv_sequential5 reading failed!'
|
||||||
|
|
||||||
|
// create a temp file to test csv parsing from file
|
||||||
|
file_path_str := os.join_path(os.temp_dir(), 'test_csv.csv')
|
||||||
|
// println("file_path_str: ${file_path_str}")
|
||||||
|
|
||||||
|
// test Windows confguration
|
||||||
|
mut tmp_txt1 := txt1.replace('\n', '\r\n')
|
||||||
|
|
||||||
|
mut f := os.open_file(file_path_str, 'wb')!
|
||||||
|
unsafe {
|
||||||
|
f.write_ptr(tmp_txt1.str, tmp_txt1.len)
|
||||||
|
}
|
||||||
|
// f.write_string(tmp_txt1)!
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
csvr = csv.csv_sequential_reader(
|
||||||
|
file_path: file_path_str
|
||||||
|
mem_buf_size: 64
|
||||||
|
end_line_len: csv.endline_crlf_len
|
||||||
|
)!
|
||||||
|
data = [][]string{}
|
||||||
|
for csvr.has_data() > 1 {
|
||||||
|
data << csvr.get_next_row()!
|
||||||
|
}
|
||||||
|
csvr.dispose_csv_reader()
|
||||||
|
|
||||||
|
assert data[0][0] == 'a', 'test_csv_sequential1 reading failed!'
|
||||||
|
// there is a final empty row in txt1
|
||||||
|
assert data[data.len - 2][0] == 'a', 'test_csv_sequential2 reading failed!'
|
||||||
|
assert data[data.len - 2][1] == 'b,c,d', 'test_csv_sequential3 reading failed!'
|
||||||
|
|
||||||
|
// remove the temp file
|
||||||
|
os.rm(file_path_str)!
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
*
|
||||||
|
* Test Random Access Functions
|
||||||
*
|
*
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
fn perform_test(mut csvr csv.RandomAccessReader) ! {
|
fn perform_test(mut csvr csv.RandomAccessReader) ! {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user