| 1 | module yaml |
| 2 | |
| 3 | import os |
| 4 | import x.json2 |
| 5 | |
| 6 | // Edge-case coverage for parse_text + scalar parsing + serialization. |
| 7 | // Complements yaml_test.v which covers the happy path. Each fn targets one |
| 8 | // specific gap that a previous regression in the module would have silently |
| 9 | // passed. |
| 10 | |
| 11 | fn test_parse_strips_utf8_bom() ! { |
| 12 | src := '\xef\xbb\xbfname: app\nport: 8080\n' |
| 13 | doc := parse_text(src)! |
| 14 | assert doc.value('name').string() == 'app' |
| 15 | assert doc.value('port').int() == 8080 |
| 16 | } |
| 17 | |
| 18 | fn test_parse_normalizes_crlf_and_cr() ! { |
| 19 | a := parse_text('a: 1\r\nb: 2\r\n')! |
| 20 | assert a.value('a').int() == 1 |
| 21 | assert a.value('b').int() == 2 |
| 22 | b := parse_text('a: 1\rb: 2\r')! |
| 23 | assert b.value('a').int() == 1 |
| 24 | assert b.value('b').int() == 2 |
| 25 | } |
| 26 | |
| 27 | fn test_parse_empty_and_whitespace_only_documents() ! { |
| 28 | // Per YAML 1.2, a document with no content is the null node. |
| 29 | for src in ['', ' ', '\n\n', ' \n \n'] { |
| 30 | doc := parse_text(src)! |
| 31 | assert doc.root is Null |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | struct EmptyDecodeTarget { |
| 36 | name string |
| 37 | port int |
| 38 | } |
| 39 | |
| 40 | fn test_decode_empty_document_yields_default_struct() ! { |
| 41 | // An empty config file should decode to a zero-initialized struct, not |
| 42 | // raise an error — even though the YAML root is Null per spec. |
| 43 | for src in ['', ' ', '\n\n'] { |
| 44 | got := decode[EmptyDecodeTarget](src)! |
| 45 | assert got.name == '' |
| 46 | assert got.port == 0 |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | fn test_parse_null_variants() ! { |
| 51 | doc := parse_text(' |
| 52 | a: ~ |
| 53 | b: null |
| 54 | c: Null |
| 55 | d: NULL |
| 56 | e: |
| 57 | ')! |
| 58 | for k in ['a', 'b', 'c', 'd', 'e'] { |
| 59 | v := doc.value(k) |
| 60 | assert v is Null, '${k} should be Null, got ${typeof(v).name}' |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | fn test_parse_bool_yaml11_variants() ! { |
| 65 | doc := parse_text(' |
| 66 | t1: true |
| 67 | t2: True |
| 68 | t3: TRUE |
| 69 | t4: yes |
| 70 | t5: YES |
| 71 | t6: on |
| 72 | t7: On |
| 73 | f1: false |
| 74 | f2: False |
| 75 | f3: no |
| 76 | f4: NO |
| 77 | f5: off |
| 78 | f6: Off |
| 79 | ')! |
| 80 | for k in ['t1', 't2', 't3', 't4', 't5', 't6', 't7'] { |
| 81 | assert doc.value(k).bool(), '${k} should be true' |
| 82 | } |
| 83 | for k in ['f1', 'f2', 'f3', 'f4', 'f5', 'f6'] { |
| 84 | assert !doc.value(k).bool(), '${k} should be false' |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | fn test_parse_numeric_underscores_and_signs() ! { |
| 89 | doc := parse_text(' |
| 90 | a: 1_000_000 |
| 91 | b: -42 |
| 92 | c: +17 |
| 93 | d: 1.5e10 |
| 94 | e: -1.0e-5 |
| 95 | ')! |
| 96 | assert doc.value('a').i64() == 1_000_000 |
| 97 | assert doc.value('b').int() == -42 |
| 98 | assert doc.value('c').u64() == 17 |
| 99 | assert doc.value('d').f64() == 1.5e10 |
| 100 | assert doc.value('e').f64() == -1.0e-5 |
| 101 | } |
| 102 | |
| 103 | fn test_parse_quoted_string_escapes() ! { |
| 104 | doc := parse_text(' |
| 105 | a: "line1\\nline2" |
| 106 | b: "tab\\there" |
| 107 | c: "quote: \\"" |
| 108 | d: "unicode: \\u00e9" |
| 109 | e: \'sing\'\'le\' |
| 110 | ')! |
| 111 | assert doc.value('a').string() == 'line1\nline2' |
| 112 | assert doc.value('b').string() == 'tab\there' |
| 113 | assert doc.value('c').string() == 'quote: "' |
| 114 | assert doc.value('d').string() == 'unicode: é' |
| 115 | assert doc.value('e').string() == "sing'le" |
| 116 | } |
| 117 | |
| 118 | fn test_parse_comment_inside_quoted_string_is_preserved() ! { |
| 119 | doc := parse_text('a: "value with # not a comment"\nb: real # comment trimmed\n')! |
| 120 | assert doc.value('a').string() == 'value with # not a comment' |
| 121 | assert doc.value('b').string() == 'real' |
| 122 | } |
| 123 | |
| 124 | fn test_parse_nested_flow_style() ! { |
| 125 | doc := parse_text('root: {a: [1, [2, 3], {b: c, d: [e, f]}], g: 4}\n')! |
| 126 | assert doc.value('root.a[0]').int() == 1 |
| 127 | assert doc.value('root.a[1]').array().len == 2 |
| 128 | assert doc.value('root.a[2].b').string() == 'c' |
| 129 | assert doc.value('root.a[2].d[1]').string() == 'f' |
| 130 | assert doc.value('root.g').int() == 4 |
| 131 | } |
| 132 | |
| 133 | fn test_parse_block_scalar_literal_and_folded() ! { |
| 134 | doc := parse_text(' |
| 135 | literal: | |
| 136 | line1 |
| 137 | line2 |
| 138 | |
| 139 | line4 |
| 140 | folded: > |
| 141 | hello |
| 142 | world |
| 143 | |
| 144 | |
| 145 | next paragraph |
| 146 | ')! |
| 147 | assert doc.value('literal').string() == 'line1\nline2\n\nline4\n' |
| 148 | assert doc.value('folded').string() == 'hello world\n\nnext paragraph\n' |
| 149 | } |
| 150 | |
| 151 | fn test_parse_rejects_tabs_in_indentation() { |
| 152 | if _ := parse_text('a:\n\tb: 1\n') { |
| 153 | assert false, 'tabs in indentation should error' |
| 154 | } else { |
| 155 | msg := err.msg() |
| 156 | assert msg.contains('tabs are not supported') |
| 157 | // Error must point at the offending line so the caller can locate it. |
| 158 | // The tab is on line 2 of the input. |
| 159 | assert msg.contains('line 2'), 'error should report line number, got: ${msg}' |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | fn test_parse_rejects_unexpected_indentation_in_mapping() { |
| 164 | if _ := parse_text('a: 1\n b: 2\n') { |
| 165 | assert false, 'over-indented mapping entry should error' |
| 166 | } else { |
| 167 | assert err.msg().contains('unexpected indentation') |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | fn test_parse_json_superset_path() ! { |
| 172 | // JSON-shaped input takes the json2 fast path in parse_text. |
| 173 | doc := parse_text('{"a": [1, 2, {"b": "c"}], "d": null}')! |
| 174 | assert doc.value('a[0]').int() == 1 |
| 175 | assert doc.value('a[2].b').string() == 'c' |
| 176 | assert doc.value('d') is Null |
| 177 | } |
| 178 | |
| 179 | fn test_parse_empty_inline_collections() ! { |
| 180 | doc := parse_text('a: []\nb: {}\nc: [[]]\nd: [{}]\n')! |
| 181 | assert doc.value('a').array().len == 0 |
| 182 | assert doc.value('b').as_map().len == 0 |
| 183 | assert doc.value('c').array().len == 1 |
| 184 | assert doc.value('d').array().len == 1 |
| 185 | } |
| 186 | |
| 187 | fn test_parse_deeply_nested_structure() ! { |
| 188 | mut src := 'root:\n' |
| 189 | mut indent := ' ' |
| 190 | for i in 0 .. 30 { |
| 191 | src += '${indent}level${i}:\n' |
| 192 | indent += ' ' |
| 193 | } |
| 194 | src += '${indent}leaf: 42\n' |
| 195 | doc := parse_text(src)! |
| 196 | mut node := doc.value('root') |
| 197 | for i in 0 .. 30 { |
| 198 | node = node.value('level${i}') |
| 199 | } |
| 200 | assert node.value('leaf').int() == 42 |
| 201 | } |
| 202 | |
| 203 | fn test_to_yaml_roundtrip_preserves_structure() ! { |
| 204 | src := 'name: app |
| 205 | servers: |
| 206 | - host: a |
| 207 | port: 1 |
| 208 | - host: b |
| 209 | port: 2 |
| 210 | ' |
| 211 | doc := parse_text(src)! |
| 212 | yaml_text := doc.to_yaml() |
| 213 | doc2 := parse_text(yaml_text)! |
| 214 | assert doc2.value('name').string() == 'app' |
| 215 | assert doc2.value('servers[0].host').string() == 'a' |
| 216 | assert doc2.value('servers[1].port').int() == 2 |
| 217 | } |
| 218 | |
| 219 | fn test_to_yaml_is_stable_across_many_calls() ! { |
| 220 | // Anti-regression for a real crash that used to surface only after many |
| 221 | // repeated `to_yaml` calls on the same Doc (sumtype recursion through the |
| 222 | // json2.Any rebuild path under -prod -gc boehm). 1000 iterations are |
| 223 | // enough to flush the original failure mode without bloating CI runtime. |
| 224 | doc := parse_text(' |
| 225 | name: my-app |
| 226 | version: 1.2.3 |
| 227 | servers: |
| 228 | - host: a |
| 229 | port: 1 |
| 230 | - host: b |
| 231 | port: 2 |
| 232 | features: |
| 233 | enable_cache: true |
| 234 | enable_metrics: true |
| 235 | ')! |
| 236 | first := doc.to_yaml() |
| 237 | for _ in 0 .. 1000 { |
| 238 | assert doc.to_yaml() == first |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | fn test_to_json_emits_valid_json_for_unicode() ! { |
| 243 | doc := parse_text('a: "café"\nb: "中文"\n')! |
| 244 | out := doc.to_json() |
| 245 | // Re-parse the output instead of asserting on a substring: this catches |
| 246 | // real corruption of the strings, while staying agnostic to whitespace |
| 247 | // and key ordering choices in the emitter. |
| 248 | parsed := json2.decode[json2.Any](out)! |
| 249 | mapped := parsed as map[string]json2.Any |
| 250 | a := mapped['a'] or { return error('missing key a in re-parsed output') } |
| 251 | b := mapped['b'] or { return error('missing key b in re-parsed output') } |
| 252 | assert a.str() == 'café' |
| 253 | assert b.str() == '中文' |
| 254 | } |
| 255 | |
| 256 | fn test_to_json_escapes_special_chars() ! { |
| 257 | doc := parse_text('a: "tab\there"\nb: "quote: \\""\n')! |
| 258 | out := doc.to_json() |
| 259 | parsed := json2.decode[json2.Any](out)! |
| 260 | mapped := parsed as map[string]json2.Any |
| 261 | a := mapped['a'] or { return error('missing key a in re-parsed output') } |
| 262 | b := mapped['b'] or { return error('missing key b in re-parsed output') } |
| 263 | assert a.str() == 'tab\there' |
| 264 | assert b.str() == 'quote: "' |
| 265 | } |
| 266 | |
| 267 | fn test_to_yaml_quotes_keys_consistently() ! { |
| 268 | doc := parse_text('plain: 1\n"a.b": 2\n')! |
| 269 | out := doc.to_yaml() |
| 270 | // Both keys go through yaml_quote_string -> json.encode, so both end up |
| 271 | // quoted. This guards against a future change that would silently switch |
| 272 | // to plain-style and break round-tripping for keys containing dots. |
| 273 | assert out.contains('"plain":') |
| 274 | assert out.contains('"a.b":') |
| 275 | } |
| 276 | |
| 277 | fn test_value_returns_null_for_missing_path() ! { |
| 278 | doc := parse_text('a: 1\nb:\n c: 2\n')! |
| 279 | assert doc.value('z') is Null |
| 280 | assert doc.value('a.does.not.exist') is Null |
| 281 | assert doc.value('b.c.d') is Null |
| 282 | } |
| 283 | |
| 284 | fn test_value_opt_errors_on_missing() ! { |
| 285 | doc := parse_text('a: 1\n')! |
| 286 | if _ := doc.value_opt('z') { |
| 287 | assert false, 'expected error for missing key' |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | fn test_value_returns_null_on_array_out_of_bounds() ! { |
| 292 | doc := parse_text('a: [1, 2, 3]\n')! |
| 293 | assert doc.value('a[99]') is Null |
| 294 | } |
| 295 | |
| 296 | fn test_parse_skips_yaml_directives() ! { |
| 297 | // `%YAML`, `%TAG`, and any other `%`-prefixed directive line is consumed |
| 298 | // without becoming part of the document. |
| 299 | doc := parse_text('%YAML 1.2\n%TAG !e! tag:example.com,2000:app/\n---\nname: app\n')! |
| 300 | assert doc.value('name').string() == 'app' |
| 301 | } |
| 302 | |
| 303 | fn test_parse_anchor_and_alias_resolution() ! { |
| 304 | // `&id` registers the value, `*id` returns the same value at use sites. |
| 305 | doc := parse_text('a: &x hello\nb: *x\nlist:\n - &y 42\n - *y\n')! |
| 306 | assert doc.value('a').string() == 'hello' |
| 307 | assert doc.value('b').string() == 'hello' |
| 308 | assert doc.value('list[0]').int() == 42 |
| 309 | assert doc.value('list[1]').int() == 42 |
| 310 | } |
| 311 | |
| 312 | fn test_parse_unknown_alias_returns_null() ! { |
| 313 | doc := parse_text('a: *missing\n')! |
| 314 | assert doc.value('a') is Null |
| 315 | } |
| 316 | |
| 317 | fn test_parse_file_happy_path() ! { |
| 318 | path := os.join_path(os.vtmp_dir(), 'yaml_pf_${os.getpid()}.yml') |
| 319 | defer { |
| 320 | os.rm(path) or {} |
| 321 | } |
| 322 | os.write_file(path, 'name: app\nport: 8080\n')! |
| 323 | doc := parse_file(path)! |
| 324 | assert doc.value('name').string() == 'app' |
| 325 | assert doc.value('port').int() == 8080 |
| 326 | } |
| 327 | |
| 328 | fn test_parse_file_returns_error_on_missing_path() { |
| 329 | missing := os.join_path(os.vtmp_dir(), 'yaml_does_not_exist_${os.getpid()}.yml') |
| 330 | if _ := parse_file(missing) { |
| 331 | assert false, 'parse_file on missing path should error' |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | fn test_parse_flow_collection_spanning_multiple_lines() ! { |
| 336 | // Flow `[ ]` and `{ }` may wrap across lines; the parser must accumulate |
| 337 | // until brackets balance. |
| 338 | doc := parse_text('arr: [\n 1,\n 2,\n 3\n]\n')! |
| 339 | assert doc.value('arr').array().len == 3 |
| 340 | assert doc.value('arr[2]').int() == 3 |
| 341 | doc2 := parse_text('obj: {\n a: 1,\n b: 2\n}\n')! |
| 342 | assert doc2.value('obj.a').int() == 1 |
| 343 | assert doc2.value('obj.b').int() == 2 |
| 344 | } |
| 345 | |