v / cmd / tools / vsqlite / csv.v
153 lines · 143 sloc · 3.52 KB · 1ad6961cbd24366a4efe56e6bb54f21665c65f7a
Raw
1module main
2
3import db.sqlite
4import os
5
6// read_csv reads a CSV file (comma-separated) and returns (headers, data_rows).
7pub fn read_csv(path string) !([]string, [][]string) {
8 return read_csv_sep(path, `,`)
9}
10
11// read_tsv reads a TSV (tab-separated) file and returns (headers, data_rows).
12pub fn read_tsv(path string) !([]string, [][]string) {
13 return read_csv_sep(path, `\t`)
14}
15
16// read_csv_sep reads a delimited file with sep as the field separator.
17pub fn read_csv_sep(path string, sep u8) !([]string, [][]string) {
18 content := os.read_file(path)!
19 if content.len == 0 {
20 return error('file is empty')
21 }
22 records := parse_csv_records(content, sep)
23 if records.len == 0 {
24 return error('CSV file is empty')
25 }
26 return records[0], records[1..]
27}
28
29// write_csv writes rows to a CSV file.
30pub fn write_csv(path string, rows []sqlite.Row, headers bool) ! {
31 mut lines := []string{}
32 if headers && rows.len > 0 {
33 lines << rows[0].names.map(csv_escape(it)).join(',')
34 }
35 for row in rows {
36 lines << row.vals.map(csv_escape(it)).join(',')
37 }
38 os.write_file(path, lines.join('\n') + '\n')!
39}
40
41// csv_escape quotes a field for RFC 4180 CSV (comma separator).
42pub fn csv_escape(s string) string {
43 return csv_escape_sep(s, ',')
44}
45
46// csv_escape_sep quotes a field if it contains the separator, a double-quote, or a newline.
47fn csv_escape_sep(s string, sep string) string {
48 if s.contains(sep) || s.contains('"') || s.contains('\n') {
49 return '"' + s.replace('"', '""') + '"'
50 }
51 return s
52}
53
54// parse_csv_line parses a single CSV line (comma separator).
55pub fn parse_csv_line(line string) []string {
56 return parse_csv_line_sep(line, `,`)
57}
58
59// parse_csv_line_sep parses a single delimited line with a custom separator byte.
60pub fn parse_csv_line_sep(line string, sep u8) []string {
61 mut fields := []string{}
62 mut field := ''
63 mut in_quotes := false
64 mut i := 0
65 bytes := line.bytes()
66 for i < bytes.len {
67 c := bytes[i]
68 if in_quotes {
69 if c == `"` {
70 if i + 1 < bytes.len && bytes[i + 1] == `"` {
71 field += '"'
72 i += 2
73 continue
74 } else {
75 in_quotes = false
76 }
77 } else {
78 field += c.ascii_str()
79 }
80 } else {
81 if c == `"` {
82 in_quotes = true
83 } else if c == sep {
84 fields << field
85 field = ''
86 } else {
87 field += c.ascii_str()
88 }
89 }
90 i++
91 }
92 fields << field
93 return fields
94}
95
96// parse_csv_records parses a full CSV/TSV content string into a slice of records.
97pub fn parse_csv_records(content string, sep u8) [][]string {
98 mut records := [][]string{}
99 mut record := []string{}
100 mut field := ''
101 mut in_quotes := false
102 mut i := 0
103 bytes := content.bytes()
104 for i < bytes.len {
105 c := bytes[i]
106 if in_quotes {
107 if c == `"` {
108 if i + 1 < bytes.len && bytes[i + 1] == `"` {
109 field += '"'
110 i += 2
111 continue
112 }
113 in_quotes = false
114 } else {
115 field += c.ascii_str()
116 }
117 } else {
118 if c == `"` {
119 in_quotes = true
120 } else if c == sep {
121 record << field
122 field = ''
123 } else if c == `\r` {
124 if i + 1 < bytes.len && bytes[i + 1] == `\n` {
125 i++
126 }
127 record << field
128 field = ''
129 if record.len > 1 || (record.len == 1 && record[0] != '') {
130 records << record
131 }
132 record = []string{}
133 } else if c == `\n` {
134 record << field
135 field = ''
136 if record.len > 1 || (record.len == 1 && record[0] != '') {
137 records << record
138 }
139 record = []string{}
140 } else {
141 field += c.ascii_str()
142 }
143 }
144 i++
145 }
146 if field.len > 0 || record.len > 0 {
147 record << field
148 if record.len > 1 || (record.len == 1 && record[0] != '') {
149 records << record
150 }
151 }
152 return records
153}
154