v / vlib / encoding / csv / csv_reader_test.v
473 lines · 406 sloc · 11.35 KB · e2e5cf8db56f3562c7baa735061690be936bdf3e
Raw
1/*
2csv reader 1.0 alpha
3
4Copyright (c) 2023 Dario Deledda. All rights reserved.
5Use of this source code is governed by an MIT license
6that can be found in the LICENSE file.
7
8This file contains tests
9
10Known limitations:
11*/
12import encoding.csv
13import strings
14import os
15import rand
16
17/******************************************************************************
18*
19* Test Data
20*
21******************************************************************************/
22// dataset 1
23const txt1 = '
24
25#
26# pippo
27#
28a,b,c,d,e,f,g
290,dario,.2,3.2e-2,4,"pero5",6
30# first comment, test @# again
311,2,3,4,5,6,7
322,3,4,5,6,7,8
333,4,5,6,7,8,9
34
35a,"b,c,d",0,#,3,"pippo"
36
37# last comment
38'
39
40const target_header_list = [
41 csv.HeaderItem{
42 label: 'a'
43 column: 0
44 htype: .int
45 },
46 csv.HeaderItem{
47 label: 'b'
48 column: 1
49 htype: .string
50 },
51 csv.HeaderItem{
52 label: 'c'
53 column: 2
54 htype: .f32
55 },
56 csv.HeaderItem{
57 label: 'd'
58 column: 3
59 htype: .f32
60 },
61 csv.HeaderItem{
62 label: 'e'
63 column: 4
64 htype: .int
65 },
66 csv.HeaderItem{
67 label: 'f'
68 column: 5
69 htype: .string
70 },
71 csv.HeaderItem{
72 label: 'g'
73 column: 6
74 htype: .int
75 },
76]
77
78const target_data = [
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
80 ['0', 'dario', '.2', '3.2e-2', '4', '"pero5"', '6'],
81 ['1', '2', '3', '4', '5', '6', '7'],
82 ['2', '3', '4', '5', '6', '7', '8'],
83 ['3', '4', '5', '6', '7', '8', '9'],
84 ['a', '"b,c,d"', '0', '#', '3', '"pippo"'], // 6 columns for test purpose
85]
86
87// dataset 2 crlf string from windows
88const txt2 = '
89
90#
91# pippo
92#
93a,b,c,d,e,f,g
940,dario,.2,3.2e-2,4,"pero5",6
95# first comment, test @# again
961,2,3,4,5,6,7
972,3,4,5,6,7,8
983,4,5,6,7,8,9
99
100a,"b,c,d",,#,3,"pippo"
101
102# last comment
103'
104
105// dataset 3/4
106const txt3 = 'a,b,c,d\r\n0,1,2,3\r\n4,5,6,7\r\n'
107const txt4 = 'a,b,c,d\n0,1,2,3\n4,5,6,7\n'
108/******************************************************************************
109*
110* Test Sequential Functions
111*
112******************************************************************************/
113fn test_csv_sequential() {
114 mut csvr := csv.csv_sequential_reader(scr_buf: txt1.str, scr_buf_len: txt1.len)!
115 mut data := [][]string{}
116 for csvr.has_data() > 1 {
117 data << csvr.get_next_row()!
118 }
119 csvr.dispose_csv_reader()
120 assert data[0][0] == 'a', 'test_csv_sequential1 reading failed!'
121 // there is a final empty row in txt1
122 assert data[data.len - 2][0] == 'a', 'test_csv_sequential2 reading failed!'
123 assert data[data.len - 2][1] == 'b,c,d', 'test_csv_sequential3 reading failed!'
124
125 csvr = csv.csv_sequential_reader(scr_buf: txt2.str, scr_buf_len: txt2.len)!
126 csvr.empty_cell = '####'
127 data = [][]string{}
128 for csvr.has_data() > 1 {
129 data << csvr.get_next_row()!
130 }
131 csvr.dispose_csv_reader()
132 assert data[data.len - 2][2] == '####', 'test_csv_sequential4 reading failed!'
133 assert data[data.len - 2][5] == 'pippo', 'test_csv_sequential5 reading failed!'
134
135 // create a temp file to test csv parsing from file
136 file_path_str := os.join_path(os.temp_dir(), 'test_csv.csv')
137 // println("file_path_str: ${file_path_str}")
138
139 // test Windows confguration
140 mut tmp_txt1 := txt1.replace('\n', '\r\n')
141
142 mut f := os.open_file(file_path_str, 'wb')!
143 unsafe {
144 f.write_ptr(tmp_txt1.str, tmp_txt1.len)
145 }
146 // f.write_string(tmp_txt1)!
147 f.close()
148
149 csvr = csv.csv_sequential_reader(
150 file_path: file_path_str
151 mem_buf_size: 64
152 end_line_len: csv.endline_crlf_len
153 )!
154 data = [][]string{}
155 for csvr.has_data() > 1 {
156 data << csvr.get_next_row()!
157 }
158 csvr.dispose_csv_reader()
159
160 assert data[0][0] == 'a', 'test_csv_sequential1 reading failed!'
161 // there is a final empty row in txt1
162 assert data[data.len - 2][0] == 'a', 'test_csv_sequential2 reading failed!'
163 assert data[data.len - 2][1] == 'b,c,d', 'test_csv_sequential3 reading failed!'
164
165 // remove the temp file
166 os.rm(file_path_str)!
167}
168
169/******************************************************************************
170*
171* Test Random Access Functions
172*
173******************************************************************************/
174fn perform_test(mut csvr csv.RandomAccessReader) ! {
175 csvr.build_header_dict(csv.GetHeaderConf{})!
176
177 // test the Header reader
178 // println("csvr.header_list: ${csvr.header_list}")
179 assert csvr.header_list == target_header_list, 'header_list not matched!'
180
181 /*
182 println("--------------------------------")
183 for x in csvr.csv_map#[..5] {
184 println(x.len)
185 println(x)
186 }
187 println("--------------------------------")
188 */
189
190 // test the data reading
191 mut data := [][]string{len: csvr.csv_map.len}
192 for x in 0 .. csvr.csv_map.len {
193 data[x] = csvr.get_row(x)!
194 // if x % 10000 == 0 {
195 // println("#${x:-6d}")
196 //}
197 }
198
199 /*
200 // debug print
201 println("---------------")
202 for x in 0..csvr.csv_map.len {
203 println(csvr.get_row(x)!)
204 }
205 */
206
207 // test if we have the same amount of data rows
208 assert data.len == csvr.csv_map.len, 'data len not equal'
209
210 // test the data retriever
211 for row_count, row in target_data {
212 // println("${data[row_count]} ${row}")
213 assert data[row_count] == row, ''
214 }
215
216 // test lfcr cr
217 assert csvr.get_cell(x: 6, y: 4)! == '9'
218
219 // test the get cell behaviour
220 assert csvr.get_cell(x: csvr.header_map['b'], y: 1)! == 'dario', 'get_cell failed 1'
221 assert csvr.get_cell(x: csvr.header_map['g'], y: 5)! == csvr.default_cell, 'get_cell out of data failed 2'
222 assert csvr.get_cellt(x: 0, y: 1)! == csv.CellValue(0), 'get_cellt [int] failed'
223 assert csvr.get_cellt(x: 1, y: 1)! == csv.CellValue('dario'), 'get_cellt [string] failed'
224 assert csvr.get_cellt(x: 2, y: 1)! == csv.CellValue(f32(.2)), 'get_cell [f32] failed'
225
226 // test the filter quote flag
227 csvr.quote_remove = true
228 assert csvr.get_cell(x: 1, y: 5)! == 'b,c,d', 'get_cell filer quote flag failed'
229}
230
231fn perform_test2(mut csvr csv.RandomAccessReader) ! {
232 csvr.build_header_dict(csv.GetHeaderConf{})!
233 // test the empty cells
234 assert csvr.get_cell(x: csvr.header_map['c'], y: 5)! == csvr.empty_cell, 'get_cell empty_cell failed 2'
235}
236
237fn perform_test3(mut csvr csv.RandomAccessReader) ! {
238 csvr.build_header_dict(csv.GetHeaderConf{})!
239 /*
240 // debug print
241 println("---------------")
242 for x in 0..csvr.csv_map.len {
243 println(csvr.get_row(x)!)
244 }
245 */
246 assert csvr.get_cell(x: csvr.header_map['d'], y: 2)! == '7', 'test \n \r\n failed'
247}
248
249fn test_csv_string() {
250 // test the csv parsing from RAM
251 mut csvr := csv.csv_reader_from_string(txt1)!
252 perform_test(mut csvr)!
253 csvr.dispose_csv_reader()
254
255 // create a temp file to test csv parsing from file
256 file_path_str := os.join_path(os.temp_dir(), 'test_csv.csv')
257 // println("file_path_str: ${file_path_str}")
258
259 // test Windows confguration
260 mut tmp_txt1 := txt1.replace('\n', '\r\n')
261
262 mut f := os.open_file(file_path_str, 'wb')!
263 unsafe {
264 f.write_ptr(tmp_txt1.str, tmp_txt1.len)
265 }
266 // f.write_string(tmp_txt1)!
267 f.close()
268
269 // parse the temp file
270 csvr = csv.csv_reader(
271 file_path: file_path_str
272 mem_buf_size: 32
273 end_line_len: csv.endline_crlf_len
274 )!
275 perform_test(mut csvr)!
276 csvr.dispose_csv_reader()
277
278 // remove the temp file
279 os.rm(file_path_str)!
280
281 csvr = csv.csv_reader_from_string(txt2)!
282 perform_test2(mut csvr)!
283 csvr.dispose_csv_reader()
284
285 // test crlf endline
286 csvr = csv.csv_reader(
287 scr_buf: txt3.str
288 scr_buf_len: txt3.len
289 end_line_len: csv.endline_crlf_len
290 )!
291 perform_test3(mut csvr)!
292 csvr.dispose_csv_reader()
293
294 // test cr endline
295 csvr =
296 csv.csv_reader(scr_buf: txt4.str, scr_buf_len: txt4.len, end_line_len: csv.endline_cr_len)!
297 perform_test3(mut csvr)!
298 csvr.dispose_csv_reader()
299}
300
301fn test_coherence() {
302 file_path_str := os.join_path(os.temp_dir(), 'test_csv.csv')
303 mut f := os.open_file(file_path_str, 'w')!
304 mut b := strings.new_builder(64536)
305 mut i := u64(0)
306 mut sum := u64(0)
307 for rows in 0 .. 1000 {
308 for col in 0 .. 1000 {
309 if col > 0 {
310 b.write_u8(`,`)
311 }
312 b.write_string(i.str())
313 i++
314 sum += i
315 }
316 b.write_string('\n')
317 }
318 f.write_string(b.str())!
319 f.close()
320
321 sum -= i
322 // println('sum: ${sum}')
323
324 // parse the temp file
325 mut csvr := csv.csv_reader(
326 file_path: file_path_str
327 mem_buf_size: 32
328 end_line_len: csv.endline_cr_len
329 )!
330
331 mut sum1 := u64(0)
332 for row_index in 0 .. csvr.csv_map.len {
333 row := csvr.get_row(row_index)!
334 for x in row {
335 sum1 += u64(x.int())
336 }
337 }
338 // println('sum: ${sum1}')
339
340 csvr.dispose_csv_reader()
341
342 // remove the temp file
343 os.rm(file_path_str)!
344
345 assert sum == sum1, 'csv coherence test failed'
346}
347
348// Debug code
349fn main() {
350 test_csv_string()
351}
352
353// Multithreaded tests
354
355fn create_csv(file_path string, size int) !i64 {
356 // create csv file for the test
357 mut csv_txt := 'pippo,count,count1,pera,sempronio,float'
358
359 mut f := os.open_file(file_path, 'w')!
360 f.write_string(csv_txt + '\n')!
361 mut count := i64(0)
362 for i in 0 .. size {
363 tmp := "${rand.int()}, ${i}, 3, \"txt1${i}\", \"txt2${i}\", ${f32(rand.u32()) / 1000.0}\n"
364 f.write_string(tmp)!
365 // if i % 1_000_000 == 0 {
366 // println(i)
367 // }
368 count += i
369 }
370 f.close()
371 return count
372}
373
374fn read_lines(id int, csvr csv.RandomAccessReader, mut data [][]csv.CellValue, start_row int, end_row int) {
375 // println(" func ${data.len},${data[1].len}")
376 unsafe {
377 for count, col_elem in csvr.header_list {
378 // println("Check: ${col_elem}")
379 match col_elem.htype {
380 .string {
381 // println('id:${id} String here')
382 for row_index in start_row .. end_row {
383 // println("str ${count},${row_index}")
384 data[count][row_index - 1] = csvr.get_cell(x: count, y: row_index) or {
385 panic('Str get_cell failed')
386 }
387 }
388 }
389 .int {
390 // println('id:${id} Int here')
391 for row_index in start_row .. end_row {
392 // println("int ${count},${row_index}")
393 data[count][row_index - 1] = csvr.get_cell(x: count, y: row_index) or {
394 panic('Int get_cell failed')
395 }.trim_space().int()
396 }
397 }
398 .f32 {
399 // println('id:${id} f32 here')
400 for row_index in start_row .. end_row {
401 // println("f32 ${count},${row_index}")
402 data[count][row_index - 1] = csvr.get_cell(x: count, y: row_index) or {
403 panic('F32 get_cell failed')
404 }.trim_space().f32()
405 }
406 }
407 }
408 }
409 } // unsafe
410}
411
412fn test_multithreading() {
413 file_path_str := os.join_path(os.temp_dir(), 'test_csv.csv')
414 size := 10_000
415
416 // create the test file
417 res_count := create_csv(file_path_str, size)!
418
419 slices := 2 // number of slice of the csv
420 mem_buf_size := 1024 * 1024 * 1
421
422 mut csvr := []csv.RandomAccessReader{}
423
424 // init first csv reader
425 csvr << csv.csv_reader(file_path: file_path_str, mem_buf_size: mem_buf_size)!
426 csvr[0].build_header_dict(csv.GetHeaderConf{})!
427
428 // init other csv readers using the first reader configuration
429 for _ in 1 .. slices {
430 mut tmp_csvr := csv.csv_reader(
431 file_path: file_path_str
432 mem_buf_size: mem_buf_size
433 create_map_csv: false
434 )!
435 tmp_csvr.copy_configuration(csvr[0])
436 csvr << tmp_csvr
437 }
438
439 // read the data from the csv file
440 mut data := [][]csv.CellValue{}
441
442 n_rows := csvr[0].csv_map.len
443 unsafe {
444 data = [][]csv.CellValue{len: csvr[0].header_list.len, init: []csv.CellValue{len: n_rows}}
445 }
446 step := n_rows / slices
447 mut start := 1
448 mut end := if (start + step) > n_rows { n_rows } else { start + step }
449
450 mut threads := []thread{}
451 for task_index in 0 .. slices {
452 threads << spawn read_lines(task_index, csvr[task_index], mut &data, start, end)
453 start = end
454 end = if (start + step) > n_rows { n_rows } else { start + step }
455 }
456 threads.wait()
457
458 // release the csv readers
459 for mut item in csvr {
460 item.dispose_csv_reader()
461 }
462
463 // check for the integer column sum
464 mut ck_count := i64(0)
465 for i in 0 .. csvr[0].csv_map.len - 1 {
466 ck_count += data[1][i] as int
467 }
468
469 assert ck_count == res_count, 'check on csv file failed!'
470
471 // remove the temp file
472 os.rm(file_path_str)!
473}
474