| 1 | module main |
| 2 | |
| 3 | import db.sqlite |
| 4 | import os |
| 5 | |
| 6 | // read_csv reads a CSV file (comma-separated) and returns (headers, data_rows). |
| 7 | pub fn read_csv(path string) !([]string, [][]string) { |
| 8 | return read_csv_sep(path, `,`) |
| 9 | } |
| 10 | |
| 11 | // read_tsv reads a TSV (tab-separated) file and returns (headers, data_rows). |
| 12 | pub fn read_tsv(path string) !([]string, [][]string) { |
| 13 | return read_csv_sep(path, `\t`) |
| 14 | } |
| 15 | |
| 16 | // read_csv_sep reads a delimited file with sep as the field separator. |
| 17 | pub fn read_csv_sep(path string, sep u8) !([]string, [][]string) { |
| 18 | content := os.read_file(path)! |
| 19 | if content.len == 0 { |
| 20 | return error('file is empty') |
| 21 | } |
| 22 | records := parse_csv_records(content, sep) |
| 23 | if records.len == 0 { |
| 24 | return error('CSV file is empty') |
| 25 | } |
| 26 | return records[0], records[1..] |
| 27 | } |
| 28 | |
| 29 | // write_csv writes rows to a CSV file. |
| 30 | pub fn write_csv(path string, rows []sqlite.Row, headers bool) ! { |
| 31 | mut lines := []string{} |
| 32 | if headers && rows.len > 0 { |
| 33 | lines << rows[0].names.map(csv_escape(it)).join(',') |
| 34 | } |
| 35 | for row in rows { |
| 36 | lines << row.vals.map(csv_escape(it)).join(',') |
| 37 | } |
| 38 | os.write_file(path, lines.join('\n') + '\n')! |
| 39 | } |
| 40 | |
| 41 | // csv_escape quotes a field for RFC 4180 CSV (comma separator). |
| 42 | pub fn csv_escape(s string) string { |
| 43 | return csv_escape_sep(s, ',') |
| 44 | } |
| 45 | |
| 46 | // csv_escape_sep quotes a field if it contains the separator, a double-quote, or a newline. |
| 47 | fn csv_escape_sep(s string, sep string) string { |
| 48 | if s.contains(sep) || s.contains('"') || s.contains('\n') { |
| 49 | return '"' + s.replace('"', '""') + '"' |
| 50 | } |
| 51 | return s |
| 52 | } |
| 53 | |
| 54 | // parse_csv_line parses a single CSV line (comma separator). |
| 55 | pub fn parse_csv_line(line string) []string { |
| 56 | return parse_csv_line_sep(line, `,`) |
| 57 | } |
| 58 | |
| 59 | // parse_csv_line_sep parses a single delimited line with a custom separator byte. |
| 60 | pub fn parse_csv_line_sep(line string, sep u8) []string { |
| 61 | mut fields := []string{} |
| 62 | mut field := '' |
| 63 | mut in_quotes := false |
| 64 | mut i := 0 |
| 65 | bytes := line.bytes() |
| 66 | for i < bytes.len { |
| 67 | c := bytes[i] |
| 68 | if in_quotes { |
| 69 | if c == `"` { |
| 70 | if i + 1 < bytes.len && bytes[i + 1] == `"` { |
| 71 | field += '"' |
| 72 | i += 2 |
| 73 | continue |
| 74 | } else { |
| 75 | in_quotes = false |
| 76 | } |
| 77 | } else { |
| 78 | field += c.ascii_str() |
| 79 | } |
| 80 | } else { |
| 81 | if c == `"` { |
| 82 | in_quotes = true |
| 83 | } else if c == sep { |
| 84 | fields << field |
| 85 | field = '' |
| 86 | } else { |
| 87 | field += c.ascii_str() |
| 88 | } |
| 89 | } |
| 90 | i++ |
| 91 | } |
| 92 | fields << field |
| 93 | return fields |
| 94 | } |
| 95 | |
| 96 | // parse_csv_records parses a full CSV/TSV content string into a slice of records. |
| 97 | pub fn parse_csv_records(content string, sep u8) [][]string { |
| 98 | mut records := [][]string{} |
| 99 | mut record := []string{} |
| 100 | mut field := '' |
| 101 | mut in_quotes := false |
| 102 | mut i := 0 |
| 103 | bytes := content.bytes() |
| 104 | for i < bytes.len { |
| 105 | c := bytes[i] |
| 106 | if in_quotes { |
| 107 | if c == `"` { |
| 108 | if i + 1 < bytes.len && bytes[i + 1] == `"` { |
| 109 | field += '"' |
| 110 | i += 2 |
| 111 | continue |
| 112 | } |
| 113 | in_quotes = false |
| 114 | } else { |
| 115 | field += c.ascii_str() |
| 116 | } |
| 117 | } else { |
| 118 | if c == `"` { |
| 119 | in_quotes = true |
| 120 | } else if c == sep { |
| 121 | record << field |
| 122 | field = '' |
| 123 | } else if c == `\r` { |
| 124 | if i + 1 < bytes.len && bytes[i + 1] == `\n` { |
| 125 | i++ |
| 126 | } |
| 127 | record << field |
| 128 | field = '' |
| 129 | if record.len > 1 || (record.len == 1 && record[0] != '') { |
| 130 | records << record |
| 131 | } |
| 132 | record = []string{} |
| 133 | } else if c == `\n` { |
| 134 | record << field |
| 135 | field = '' |
| 136 | if record.len > 1 || (record.len == 1 && record[0] != '') { |
| 137 | records << record |
| 138 | } |
| 139 | record = []string{} |
| 140 | } else { |
| 141 | field += c.ascii_str() |
| 142 | } |
| 143 | } |
| 144 | i++ |
| 145 | } |
| 146 | if field.len > 0 || record.len > 0 { |
| 147 | record << field |
| 148 | if record.len > 1 || (record.len == 1 && record[0] != '') { |
| 149 | records << record |
| 150 | } |
| 151 | } |
| 152 | return records |
| 153 | } |
| 154 | |