v2 / vlib / encoding / csv / reader.v
216 lines · 201 sloc · 4.56 KB · 951d30405f894991c8277a4fa13c03dad1b69591
Raw
1// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module csv
5
6// Once interfaces are further along the idea would be to have something similar to
7// go's io.reader & bufio.reader rather than reading the whole file into string, this
8// would then satisfy that interface. I designed it this way to be easily adapted.
9struct CommentIsDelimiterError {
10 Error
11}
12
13fn (err CommentIsDelimiterError) msg() string {
14 return 'encoding.csv: comment cannot be the same as delimiter'
15}
16
17struct InvalidDelimiterError {
18 Error
19}
20
21fn (err InvalidDelimiterError) msg() string {
22 return 'encoding.csv: invalid delimiter'
23}
24
25struct EndOfFileError {
26 Error
27}
28
29fn (err EndOfFileError) msg() string {
30 return 'encoding.csv: end of file'
31}
32
33struct InvalidLineEndingError {
34 Error
35}
36
37fn (err InvalidLineEndingError) msg() string {
38 return 'encoding.csv: could not find any valid line endings'
39}
40
41struct Reader {
42 // not used yet
43 // has_header bool
44 // headings []string
45 data string
46 delimiter u8
47 comment u8
48mut:
49 is_mac_pre_osx_le bool
50 row_pos int
51}
52
53@[params]
54pub struct ReaderConfig {
55pub:
56 delimiter u8 = `,`
57 comment u8 = `#`
58}
59
60// new_reader initializes a Reader with string data to parse and,
61// optionally, a custom delimiter.
62pub fn new_reader(data string, config ReaderConfig) &Reader {
63 return &Reader{
64 data: data
65 delimiter: config.delimiter
66 comment: config.comment
67 }
68}
69
70// read reads a row from the CSV data.
71// If successful, the result holds an array of each column's data.
72pub fn (mut r Reader) read() ![]string {
73 l := r.read_record()!
74 return l
75}
76
77// Once we have multi dimensional array
78// pub fn (mut r Reader) read_all() ?[][]string {
79// mut records := []string{}
80// for {
81// record := r.read_record() or {
82// if err.error == err_eof.error {
83// return records
84// } else {
85// return err
86// }
87// }
88// records << record
89// }
90// return records
91// }
92fn (mut r Reader) read_line() !string {
93 // last record
94 if r.row_pos >= r.data.len {
95 return &EndOfFileError{}
96 }
97 le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
98 mut i := r.data.index_after(le, r.row_pos) or { -1 }
99 if i == -1 {
100 if r.row_pos == 0 {
101 // check for pre osx mac line endings
102 i = r.data.index_after('\r', r.row_pos) or { -1 }
103 if i != -1 {
104 r.is_mac_pre_osx_le = true
105 } else {
106 // no valid line endings found
107 return &InvalidLineEndingError{}
108 }
109 } else {
110 // No line ending on file
111 i = r.data.len
112 }
113 }
114 mut line := r.data[r.row_pos..i]
115 r.row_pos = i + 1
116 // normalize win line endings (remove extra \r)
117 if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
118 line = line[..line.len - 1]
119 }
120 return line
121}
122
123fn (mut r Reader) read_record() ![]string {
124 if r.delimiter == r.comment {
125 return &CommentIsDelimiterError{}
126 }
127 if !valid_delim(r.delimiter) {
128 return &InvalidDelimiterError{}
129 }
130 mut need_read := true
131 mut keep_raw := false
132 mut line := ''
133 mut fields := []string{}
134 mut i := -1
135 for {
136 if need_read {
137 l := r.read_line()!
138 if l.len <= 0 {
139 if keep_raw {
140 line += '\n'
141 }
142 continue
143 } else if l[0] == r.comment {
144 if keep_raw {
145 line += '\n' + l
146 }
147 continue
148 } else {
149 if keep_raw {
150 line += '\n'
151 }
152 line += l
153 }
154 need_read = false
155 keep_raw = false
156 }
157 if line.len == 0 || line[0] != `"` { // not quoted
158 j := line.index(r.delimiter.ascii_str()) or {
159 // last
160 fields << line[..line.len]
161 break
162 }
163 i = j
164 fields << line[..i]
165 line = line[i + 1..]
166 continue
167 } else { // quoted
168 mut need_more := true
169 mut has_double_quotes := false
170 mut j := 0
171 mut n := 1
172 for n < line.len {
173 if line[n] == `"` {
174 if n == line.len - 1 || line[n + 1] != `"` {
175 need_more = false
176 j = n - 1
177 break
178 } else {
179 has_double_quotes = true
180 n++
181 }
182 }
183 n++
184 }
185 if need_more {
186 need_read = true
187 keep_raw = true
188 continue
189 }
190 line = line[1..]
191 if j + 1 == line.len {
192 // last record
193 fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
194 break
195 }
196 next := line[j + 1]
197 if next == r.delimiter {
198 fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
199 if j + 2 == line.len {
200 line = ''
201 } else {
202 line = line[j + 2..]
203 }
204 continue
205 }
206 }
207 if i <= -1 && fields.len == 0 {
208 return &InvalidDelimiterError{}
209 }
210 }
211 return fields
212}
213
214fn valid_delim(b u8) bool {
215 return b != 0 && b != `"` && b != `\r` && b != `\n`
216}
217