Gitly


1 module yaml
2 
3 import strconv
4 import strings
5 
6 struct Parser {
7 mut:
8     lines           []string
9     idx             int
10     anchors         map[string]Any
11     directives_done bool
12 }
13 
14 fn (mut p Parser) parse() !Any {
15     p.skip_ignorable()
16     if p.idx >= p.lines.len {
17         return null
18     }
19     indent := p.line_indent(p.idx)!
20     if indent < 0 {
21         return null
22     }
23     return p.parse_node(indent)
24 }
25 
26 fn (mut p Parser) parse_node(indent int) !Any {
27     p.skip_ignorable()
28     if p.idx >= p.lines.len {
29         return null
30     }
31     current_indent := p.line_indent(p.idx)!
32     if current_indent < indent {
33         return null
34     }
35     content := p.current_content()!
36     if content.starts_with('-') && (content.len == 1 || content[1] == ` `) {
37         return p.parse_sequence(current_indent)
38     }
39     if split_mapping_entry(content).ok {
40         return p.parse_mapping(current_indent)
41     }
42     d := extract_decorators(content)
43     if d.alias != '' && d.rest == '' {
44         p.idx++
45         return p.resolve_alias(d.alias)
46     }
47     if is_block_scalar(d.rest) {
48         p.idx++
49         return p.register_anchor(d.anchor, Any(p.parse_block_scalar(current_indent, d.rest)!))
50     }
51     if d.rest.starts_with('[') || d.rest.starts_with('{') {
52         p.idx++
53         full := p.collect_flow_continuation(d.rest)!
54         return p.register_anchor(d.anchor, parse_flow_value(full)!)
55     }
56     p.idx++
57     if d.rest.len > 0 && (d.rest[0] == `"` || d.rest[0] == `'`) {
58         quoted := p.gather_quoted_continuation(d.rest)!
59         return p.register_anchor(d.anchor, parse_scalar(quoted)!)
60     }
61     folded := p.gather_plain_continuation(d.rest, current_indent)
62     return p.register_anchor(d.anchor, parse_scalar(folded)!)
63 }
64 
65 // gather_plain_continuation extends a plain scalar with subsequent lines that
66 // belong to the same node per YAML 1.2 §6.5.1: adjacent non-blank lines fold
67 // to a single space, blank lines contribute literal `\n`s. The scan stops at
68 // document markers, structural indicators, or a less-indented line.
69 fn (mut p Parser) gather_plain_continuation(initial string, base_indent int) string {
70     mut sb := strings.new_builder(initial.len * 2)
71     sb.write_string(initial.trim_space())
72     mut blanks := 0
73     for p.idx < p.lines.len {
74         line := p.lines[p.idx]
75         trimmed_raw := line.trim_space()
76         if trimmed_raw == '' {
77             blanks++
78             p.idx++
79             continue
80         }
81         trimmed := strip_comments(line).trim_space()
82         if trimmed == '' {
83             blanks++
84             p.idx++
85             continue
86         }
87         if trimmed == '---' || trimmed == '...' {
88             break
89         }
90         line_indent := p.line_indent(p.idx) or { break }
91         if line_indent < base_indent {
92             break
93         }
94         if trimmed.starts_with('- ') || trimmed == '-' {
95             break
96         }
97         if split_mapping_entry(trimmed).ok {
98             break
99         }
100         if blanks > 0 {
101             for _ in 0 .. blanks {
102                 sb.write_u8(`\n`)
103             }
104             blanks = 0
105         } else {
106             sb.write_u8(` `)
107         }
108         sb.write_string(trimmed)
109         p.idx++
110     }
111     return sb.str()
112 }
113 
114 fn (mut p Parser) parse_mapping(indent int) !Any {
115     mut result := map[string]Any{}
116     for p.idx < p.lines.len {
117         p.skip_ignorable()
118         if p.idx >= p.lines.len {
119             break
120         }
121         current_indent := p.line_indent(p.idx)!
122         if current_indent < indent {
123             break
124         }
125         if current_indent > indent {
126             return error('yaml: unexpected indentation on line ${p.idx + 1}')
127         }
128         content := p.current_content()!
129         entry := split_mapping_entry(content)
130         if !entry.ok {
131             return error('yaml: expected a mapping entry on line ${p.idx + 1}')
132         }
133         p.idx++
134         result[entry.key] = p.parse_mapping_value(entry.rest, indent)!
135     }
136     return Any(result)
137 }
138 
139 fn (mut p Parser) parse_mapping_value(rest_in string, indent int) !Any {
140     d := extract_decorators(rest_in)
141     if d.alias != '' && d.rest == '' {
142         return p.resolve_alias(d.alias)
143     }
144     value := if d.rest == '' {
145         next_indent := p.peek_next_indent()
146         if next_indent > indent {
147             p.parse_node(next_indent)!
148         } else {
149             null
150         }
151     } else if is_block_scalar(d.rest) {
152         Any(p.parse_block_scalar(indent, d.rest)!)
153     } else if d.rest.starts_with('[') || d.rest.starts_with('{') {
154         full := p.collect_flow_continuation(d.rest)!
155         parse_flow_value(full)!
156     } else if d.rest.len > 0 && (d.rest[0] == `"` || d.rest[0] == `'`) {
157         quoted := p.gather_quoted_continuation(d.rest)!
158         parse_scalar(quoted)!
159     } else {
160         parse_scalar(d.rest)!
161     }
162     return p.register_anchor(d.anchor, value)
163 }
164 
165 fn (mut p Parser) parse_sequence(indent int) !Any {
166     mut items := []Any{}
167     for p.idx < p.lines.len {
168         p.skip_ignorable()
169         if p.idx >= p.lines.len {
170             break
171         }
172         current_indent := p.line_indent(p.idx)!
173         if current_indent < indent {
174             break
175         }
176         if current_indent > indent {
177             return error('yaml: unexpected indentation on line ${p.idx + 1}')
178         }
179         content := p.current_content()!
180         if !content.starts_with('-') || (content.len > 1 && content[1] != ` `) {
181             break
182         }
183         rest := if content.len == 1 { '' } else { content[1..].trim_space() }
184         p.idx++
185         items << p.parse_sequence_item(rest, indent)!
186     }
187     return Any(items)
188 }
189 
190 fn (mut p Parser) parse_sequence_item(rest_in string, indent int) !Any {
191     d := extract_decorators(rest_in)
192     if d.alias != '' && d.rest == '' {
193         return p.resolve_alias(d.alias)
194     }
195     value := if d.rest == '' {
196         next_indent := p.peek_next_indent()
197         if next_indent > indent {
198             p.parse_node(next_indent)!
199         } else {
200             null
201         }
202     } else if is_block_scalar(d.rest) {
203         Any(p.parse_block_scalar(indent, d.rest)!)
204     } else if d.rest.starts_with('[') || d.rest.starts_with('{') {
205         full := p.collect_flow_continuation(d.rest)!
206         parse_flow_value(full)!
207     } else {
208         entry := split_mapping_entry(d.rest)
209         if entry.ok {
210             mut result := map[string]Any{}
211             child_indent := indent + 2
212             result[entry.key] = p.parse_mapping_value(entry.rest, child_indent)!
213             for p.idx < p.lines.len {
214                 p.skip_ignorable()
215                 if p.idx >= p.lines.len {
216                     break
217                 }
218                 current_indent := p.line_indent(p.idx)!
219                 if current_indent <= indent {
220                     break
221                 }
222                 if current_indent != child_indent {
223                     return error('yaml: unexpected indentation on line ${p.idx + 1}')
224                 }
225                 content := p.current_content()!
226                 next_entry := split_mapping_entry(content)
227                 if !next_entry.ok {
228                     break
229                 }
230                 p.idx++
231                 result[next_entry.key] = p.parse_mapping_value(next_entry.rest, child_indent)!
232             }
233             Any(result)
234         } else if d.rest.len > 0 && (d.rest[0] == `"` || d.rest[0] == `'`) {
235             quoted := p.gather_quoted_continuation(d.rest)!
236             parse_scalar(quoted)!
237         } else {
238             parse_scalar(d.rest)!
239         }
240     }
241     return p.register_anchor(d.anchor, value)
242 }
243 
244 fn (p &Parser) resolve_alias(name string) Any {
245     return p.anchors[name] or { null }
246 }
247 
248 // register_anchor associates `value` with `anchor` when the latter is set,
249 // then returns `value` so call sites can `return p.register_anchor(...)`
250 // in a single statement instead of branching.
251 fn (mut p Parser) register_anchor(anchor string, value Any) Any {
252     if anchor != '' {
253         p.anchors[anchor] = value
254     }
255     return value
256 }
257 
258 // parse_block_scalar parses a `|` (literal) or `>` (folded) block, honoring
259 // the optional chomp indicator from the header (`-` strip, `+` keep, default
260 // clip). Indent indicators (`|2`) are tolerated but ignored — the block's
261 // indentation is auto-detected from the first non-empty content line.
262 fn (mut p Parser) parse_block_scalar(parent_indent int, header string) !string {
263     style, chomp := parse_block_header(header)
264     start := p.idx
265     mut min_indent := -1
266     for i := start; i < p.lines.len; i++ {
267         line := p.lines[i]
268         if line.trim_space() == '' {
269             continue
270         }
271         line_indent := p.line_indent(i)!
272         if line_indent <= parent_indent {
273             break
274         }
275         if min_indent == -1 || line_indent < min_indent {
276             min_indent = line_indent
277         }
278     }
279     if min_indent == -1 {
280         // Body is entirely empty/blank. Keep chomp (`+`) still preserves the
281         // implicit trailing line breaks; strip and clip yield the empty string.
282         if chomp != `+` {
283             return ''
284         }
285         mut blanks := 0
286         for p.idx < p.lines.len {
287             line := p.lines[p.idx]
288             if line.trim_space() != '' {
289                 break
290             }
291             blanks++
292             p.idx++
293         }
294         if blanks == 0 {
295             blanks = 1
296         }
297         return '\n'.repeat(blanks)
298     }
299     mut lines := []string{}
300     for p.idx < p.lines.len {
301         line := p.lines[p.idx]
302         if line.trim_space() == '' {
303             lines << ''
304             p.idx++
305             continue
306         }
307         line_indent := p.line_indent(p.idx)!
308         if line_indent <= parent_indent {
309             break
310         }
311         if line.len <= min_indent {
312             lines << ''
313         } else {
314             lines << line[min_indent..]
315         }
316         p.idx++
317     }
318     mut stripped_trailing := 0
319     for lines.len > 0 && lines[lines.len - 1] == '' {
320         stripped_trailing++
321         lines.delete(lines.len - 1)
322     }
323     body := if style == `|` { lines.join('\n') } else { fold_block_scalar(lines) }
324     return apply_chomp(body, chomp, stripped_trailing)
325 }
326 
327 struct FlowBalance {
328 mut:
329     bracket   int
330     brace     int
331     in_single bool
332     in_double bool
333     escape    bool
334 }
335 
336 // collect_flow_continuation gathers subsequent lines into `initial` until the
337 // `[` / `{` brackets and the active quoted strings are all balanced. YAML 1.2
338 // allows flow collections to span lines; without this the parser would reject
339 // anything that wraps. The returned text is what `parse_flow_value` receives.
340 fn (mut p Parser) collect_flow_continuation(initial string) !string {
341     mut bal := FlowBalance{}
342     bal.scan(initial)
343     if !bal.unbalanced() {
344         return initial
345     }
346     mut sb := strings.new_builder(initial.len * 2)
347     sb.write_string(initial)
348     for p.idx < p.lines.len && bal.unbalanced() {
349         line := p.lines[p.idx]
350         segment := if bal.in_single || bal.in_double { line } else { strip_comments(line) }
351         trimmed := segment.trim_space()
352         if trimmed == '' && !bal.in_single && !bal.in_double {
353             p.idx++
354             continue
355         }
356         sb.write_u8(` `)
357         sb.write_string(trimmed)
358         bal.scan(trimmed)
359         p.idx++
360     }
361     if bal.unbalanced() {
362         return error('yaml: unterminated flow collection')
363     }
364     return sb.str()
365 }
366 
367 fn (b &FlowBalance) unbalanced() bool {
368     return b.bracket > 0 || b.brace > 0 || b.in_single || b.in_double
369 }
370 
371 fn (mut b FlowBalance) scan(s string) {
372     for i := 0; i < s.len; i++ {
373         ch := s[i]
374         if b.in_double {
375             if b.escape {
376                 b.escape = false
377             } else if ch == `\\` {
378                 b.escape = true
379             } else if ch == `"` {
380                 b.in_double = false
381             }
382             continue
383         }
384         if b.in_single {
385             if ch == `'` {
386                 if i + 1 < s.len && s[i + 1] == `'` {
387                     i++
388                     continue
389                 }
390                 b.in_single = false
391             }
392             continue
393         }
394         match ch {
395             `"` { b.in_double = true }
396             `'` { b.in_single = true }
397             `[` { b.bracket++ }
398             `]` { b.bracket-- }
399             `{` { b.brace++ }
400             `}` { b.brace-- }
401             else {}
402         }
403     }
404 }
405 
406 // parse_block_header reads the `|`/`>` style and the optional `+`/`-` chomp
407 // indicator from a block-scalar header like `|`, `|-`, `|+`, `>2-`.
408 fn parse_block_header(s string) (u8, u8) {
409     if s == '' {
410         return `|`, 0
411     }
412     style := s[0]
413     mut chomp := u8(0)
414     for i := 1; i < s.len; i++ {
415         c := s[i]
416         if c == `+` || c == `-` {
417             chomp = c
418         }
419     }
420     return style, chomp
421 }
422 
423 // apply_chomp rewrites the block body's trailing whitespace per RFC 9.1.1.2:
424 // strip removes all trailing newlines, clip keeps a single trailing newline
425 // when the body is non-empty, keep preserves every original trailing newline.
426 fn apply_chomp(body string, chomp u8, stripped_trailing int) string {
427     return match chomp {
428         `-` {
429             body
430         }
431         `+` {
432             clipped := if body == '' { '' } else { body + '\n' }
433             clipped + '\n'.repeat(stripped_trailing)
434         }
435         else {
436             if body == '' {
437                 ''
438             } else {
439                 body + '\n'
440             }
441         }
442     }
443 }
444 
445 // is_ignorable_line returns true when `trimmed` is content the parser must
446 // skip over: blank lines, document markers, and directive lines that occur
447 // before the first body line. Directives (`%YAML`, `%TAG`, …) become plain
448 // text once `directives_done` flips, so the skip is conditional on that flag.
449 fn (p &Parser) is_ignorable_line(trimmed string) bool {
450     return trimmed == '' || trimmed == '---' || trimmed == '...'
451         || (trimmed.starts_with('%') && !p.directives_done)
452 }
453 
454 fn (mut p Parser) skip_ignorable() {
455     for p.idx < p.lines.len {
456         line := p.lines[p.idx]
457         trimmed := strip_comments(line).trim_space()
458         if p.is_ignorable_line(trimmed) {
459             p.idx++
460             continue
461         }
462         // `--- <inline>` (or `---\t…`) folds the document marker away and
463         // keeps the inline content as the document body at column 0.
464         if (line.starts_with('--- ') || line.starts_with('---\t')) && line.len > 4 {
465             p.lines[p.idx] = line[4..]
466         }
467         p.directives_done = true
468         break
469     }
470 }
471 
472 fn (p &Parser) peek_next_indent() int {
473     mut i := p.idx
474     for i < p.lines.len {
475         line := p.lines[i]
476         trimmed := strip_comments(line).trim_space()
477         if p.is_ignorable_line(trimmed) {
478             i++
479             continue
480         }
481         return p.line_indent(i) or { -1 }
482     }
483     return -1
484 }
485 
486 fn (p &Parser) current_content() !string {
487     line := p.lines[p.idx]
488     indent := p.line_indent(p.idx)!
489     if line.len <= indent {
490         return ''
491     }
492     return strip_comments(line[indent..]).trim_space()
493 }
494 
495 fn (p &Parser) line_indent(index int) !int {
496     line := p.lines[index]
497     mut indent := 0
498     for indent < line.len && line[indent] == ` ` {
499         indent++
500     }
501     if indent < line.len && line[indent] == `\t` {
502         return error('yaml: tabs are not supported for indentation on line ${index + 1}')
503     }
504     return indent
505 }
506 
507 struct MappingEntry {
508     key  string
509     rest string
510     ok   bool
511 }
512 
513 fn split_mapping_entry(content string) MappingEntry {
514     mut in_single := false
515     mut in_double := false
516     mut escape := false
517     mut bracket_depth := 0
518     mut brace_depth := 0
519     mut i := 0
520     for i < content.len {
521         ch := content[i]
522         if in_double {
523             if escape {
524                 escape = false
525             } else if ch == `\\` {
526                 escape = true
527             } else if ch == `"` {
528                 in_double = false
529             }
530             i++
531             continue
532         }
533         if in_single {
534             if ch == `'` {
535                 if i + 1 < content.len && content[i + 1] == `'` {
536                     i += 2
537                     continue
538                 }
539                 in_single = false
540             }
541             i++
542             continue
543         }
544         match ch {
545             `"` {
546                 in_double = true
547             }
548             `'` {
549                 in_single = true
550             }
551             `[` {
552                 bracket_depth++
553             }
554             `]` {
555                 if bracket_depth > 0 {
556                     bracket_depth--
557                 }
558             }
559             `{` {
560                 brace_depth++
561             }
562             `}` {
563                 if brace_depth > 0 {
564                     brace_depth--
565                 }
566             }
567             `:` {
568                 if bracket_depth == 0 && brace_depth == 0
569                     && (i + 1 == content.len || content[i + 1].is_space()) {
570                     key_text := content[..i].trim_space()
571                     if key_text == '' {
572                         return MappingEntry{}
573                     }
574                     return MappingEntry{
575                         key:  parse_key(key_text) or { return MappingEntry{} }
576                         rest: if i + 1 < content.len {
577                             content[i + 1..].trim_space()
578                         } else {
579                             ''
580                         }
581                         ok:   true
582                     }
583                 }
584             }
585             else {}
586         }
587 
588         i++
589     }
590     return MappingEntry{}
591 }
592 
593 // strip_node_decorators removes a leading anchor (`&id`), a tag (`!Type` or
594 // `!!Type`), or a sequence of both, from a YAML node's text. The semantics of
595 // anchors and tags are intentionally not implemented: stripping them lets the
596 // underlying scalar/collection still parse, which matches the common practical
597 // case where the document carries decorators but does not rely on them. A
598 // stand-alone alias (`*name`) cannot be resolved without anchor tracking and
599 // is therefore left untouched, so the caller still sees that something was
600 // referenced.
601 fn strip_node_decorators(s string) string {
602     return extract_decorators(s).rest
603 }
604 
605 // Decorators carries the anchor / alias / remaining content split off a node.
606 struct Decorators {
607     anchor string
608     alias  string
609     rest   string
610 }
611 
612 // extract_decorators peels leading anchor / alias / tag decorators off `s` and
613 // returns the anchor name, alias name, and remaining content. At most one
614 // anchor and one alias are recognized; tags are stripped without being
615 // returned.
616 fn extract_decorators(s string) Decorators {
617     mut anchor := ''
618     mut alias := ''
619     mut out := s.trim_left(' \t')
620     for {
621         if out.len < 2 {
622             break
623         }
624         c := out[0]
625         if (c == `&` || c == `*`) && out[1] != ` ` && out[1] != `\t` {
626             mut i := 1
627             for i < out.len && out[i] != ` ` && out[i] != `\t` {
628                 i++
629             }
630             name := out[1..i]
631             if c == `&` {
632                 anchor = name
633             } else {
634                 alias = name
635             }
636             out = out[i..].trim_left(' \t')
637             continue
638         }
639         if c == `!` {
640             mut i := 1
641             if i < out.len && out[i] == `!` {
642                 i++
643             }
644             for i < out.len && out[i] != ` ` && out[i] != `\t` {
645                 i++
646             }
647             out = out[i..].trim_left(' \t')
648             continue
649         }
650         break
651     }
652     return Decorators{
653         anchor: anchor
654         alias:  alias
655         rest:   out
656     }
657 }
658 
659 // parse_key resolves a mapping-key token: drops anchor / tag decorators,
660 // unquotes the result if surrounded by matching `"` or `'` quotes, and
661 // returns the cleaned key string otherwise.
662 fn parse_key(src string) !string {
663     cleaned := strip_node_decorators(src)
664     if cleaned.len >= 2 && ((cleaned[0] == `"` && cleaned[cleaned.len - 1] == `"`)
665         || (cleaned[0] == `'` && cleaned[cleaned.len - 1] == `'`)) {
666         return parse_quoted_string(cleaned)
667     }
668     return cleaned.trim_space()
669 }
670 
671 // parse_scalar resolves a scalar token to its YAML 1.2 typed value: quoted
672 // strings unquote, the case-insensitive keywords `null`/`~`, `true`/`yes`/`on`
673 // and `false`/`no`/`off` produce the matching constants, integer and float
674 // literals (with `_` digit separators) parse to `i64`/`u64`/`f64`, and any
675 // other text falls back to a plain string.
676 fn parse_scalar(text string) !Any {
677     value := strip_node_decorators(text).trim_space()
678     if value == '' {
679         return Any('')
680     }
681     if value.len >= 2 && ((value[0] == `"` && value[value.len - 1] == `"`)
682         || (value[0] == `'` && value[value.len - 1] == `'`)) {
683         return Any(parse_quoted_string(value)!)
684     }
685     // Keyword check: only strings of length 1..5 can match `~`, `null`, `true`,
686     // `yes`, `on`, `false`, `no`, `off`. Length-bound first to skip the
687     // allocation of `to_lower()` for every plain scalar (the overwhelmingly
688     // common case in real documents).
689     if value.len <= 5 {
690         if value.len == 1 && value[0] == `~` {
691             return null
692         }
693         if equals_ascii_ci(value, 'null') {
694             return null
695         }
696         if equals_ascii_ci(value, 'true') || equals_ascii_ci(value, 'yes')
697             || equals_ascii_ci(value, 'on') {
698             return Any(true)
699         }
700         if equals_ascii_ci(value, 'false') || equals_ascii_ci(value, 'no')
701             || equals_ascii_ci(value, 'off') {
702             return Any(false)
703         }
704     }
705     numeric := strip_underscores(value)
706     if is_integer(numeric) {
707         if numeric.starts_with('-') {
708             return Any(numeric.parse_int(0, 64)!)
709         }
710         if numeric.starts_with('+') {
711             return Any(numeric[1..].parse_uint(0, 64)!)
712         }
713         return Any(numeric.parse_uint(0, 64)!)
714     }
715     if is_float(numeric) {
716         return Any(strconv.atof64(numeric)!)
717     }
718     return Any(value)
719 }
720 
721 // parse_quoted_string unquotes a YAML scalar wrapped in matching `"` or `'`
722 // quotes. Single-quoted strings only undouble `''` to `'`. Double-quoted
723 // strings honor the YAML 1.2 §5.7 escape set (`\b \f \n \r \t \" \\ \/
724 // \uXXXX`); any other backslash sequence is rejected as malformed.
725 fn parse_quoted_string(src string) !string {
726     if src.len < 2 {
727         return error('yaml: invalid quoted string')
728     }
729     quote := src[0]
730     inner := fold_quoted_inner(src[1..src.len - 1])
731     if quote == `'` {
732         // Single-quoted strings only undouble `''`. When the body has no
733         // doubled quote, return the slice as-is — `inner` already shares the
734         // source buffer, so this avoids a `replace` allocation.
735         if !inner.contains("''") {
736             return inner
737         }
738         return inner.replace("''", "'")
739     }
740     // Double-quoted fast path: if there's no `\` in the body, no escape
741     // resolution is needed — `inner` is the final value verbatim.
742     if !inner.contains_u8(`\\`) {
743         return inner
744     }
745     mut out := []u8{cap: inner.len}
746     mut i := 0
747     for i < inner.len {
748         ch := inner[i]
749         if ch != `\\` {
750             out << ch
751             i++
752             continue
753         }
754         i++
755         if i >= inner.len {
756             return error('yaml: invalid escape sequence')
757         }
758         esc := inner[i]
759         match esc {
760             `"`, `\\`, `/` {
761                 out << esc
762             }
763             `b` {
764                 out << `\b`
765             }
766             `f` {
767                 out << `\f`
768             }
769             `n` {
770                 out << `\n`
771             }
772             `r` {
773                 out << `\r`
774             }
775             `t` {
776                 out << `\t`
777             }
778             `u` {
779                 if i + 4 >= inner.len {
780                     return error('yaml: invalid unicode escape')
781                 }
782                 code := inner[i + 1..i + 5]
783                 r := rune(code.parse_uint(16, 32)!)
784                 out << r.str().bytes()
785                 i += 4
786             }
787             else {
788                 return error('yaml: unknown escape sequence \\${rune(esc).str()}')
789             }
790         }
791 
792         i++
793     }
794     return out.bytestr()
795 }
796 
797 // fold_quoted_inner applies YAML 1.2 §7.3 line folding rules to the body of a
798 // quoted scalar (the chars between the opening and closing quote): adjacent
799 // non-blank content lines fold to a single space, runs of N consecutive line
800 // breaks collapse to N-1 literal newlines, and leading whitespace inside
801 // continuation lines is stripped. A leading or trailing empty line folds to a
802 // single space (§7.3.1).
803 fn fold_quoted_inner(inner string) string {
804     if !inner.contains_u8(`\n`) {
805         return inner
806     }
807     lines := inner.split('\n')
808     n := lines.len
809     mut trimmed := []string{cap: n}
810     for i := 0; i < n; i++ {
811         line := lines[i]
812         if i == 0 && i != n - 1 {
813             trimmed << line.trim_right(' \t')
814         } else if i == n - 1 && i != 0 {
815             trimmed << line.trim_left(' \t')
816         } else {
817             trimmed << line.trim(' \t')
818         }
819     }
820     has_pre := trimmed[0] == ''
821     has_post := trimmed[n - 1] == ''
822     mut sb := strings.new_builder(inner.len)
823     if has_pre {
824         sb.write_u8(` `)
825     }
826     start := if has_pre { 1 } else { 0 }
827     end := if has_post { n - 1 } else { n }
828     mut blanks := 0
829     mut wrote := false
830     for i := start; i < end; i++ {
831         f := trimmed[i]
832         if f == '' {
833             blanks++
834             continue
835         }
836         if wrote {
837             if blanks == 0 {
838                 sb.write_u8(` `)
839             } else {
840                 for _ in 0 .. blanks {
841                     sb.write_u8(`\n`)
842                 }
843             }
844         }
845         blanks = 0
846         sb.write_string(f)
847         wrote = true
848     }
849     if has_post {
850         sb.write_u8(` `)
851     }
852     return sb.str()
853 }
854 
855 // quoted_terminated reports whether `s` (which starts with a `quote` byte)
856 // ends with the matching closing quote, taking single-quote `''` doubling and
857 // double-quote `\` escapes into account.
858 fn quoted_terminated(s string, quote u8) bool {
859     if s.len < 2 || s[0] != quote {
860         return false
861     }
862     mut i := 1
863     if quote == `'` {
864         for i < s.len {
865             if s[i] == `'` {
866                 if i + 1 < s.len && s[i + 1] == `'` {
867                     i += 2
868                     continue
869                 }
870                 return i == s.len - 1
871             }
872             i++
873         }
874         return false
875     }
876     mut esc := false
877     for i < s.len {
878         ch := s[i]
879         if esc {
880             esc = false
881             i++
882             continue
883         }
884         if ch == `\\` {
885             esc = true
886             i++
887             continue
888         }
889         if ch == `"` {
890             return i == s.len - 1
891         }
892         i++
893     }
894     return false
895 }
896 
897 // gather_quoted_continuation accumulates subsequent lines into a quoted scalar
898 // that doesn't terminate on its first line. The returned string still wraps
899 // the original line breaks; `fold_quoted_inner` collapses them later.
900 fn (mut p Parser) gather_quoted_continuation(initial string) !string {
901     if initial == '' {
902         return initial
903     }
904     quote := initial[0]
905     if quote != `"` && quote != `'` {
906         return initial
907     }
908     if quoted_terminated(initial, quote) {
909         return initial
910     }
911     mut buf := initial.bytes()
912     for p.idx < p.lines.len {
913         buf << `\n`
914         buf << p.lines[p.idx].bytes()
915         p.idx++
916         snap := buf.bytestr()
917         if quoted_terminated(snap, quote) {
918             return snap
919         }
920     }
921     return error('yaml: unterminated quoted string')
922 }
923 
924 fn strip_comments(line string) string {
925     // Fast path: the overwhelming majority of YAML lines have no `#`.
926     if !line.contains_u8(`#`) {
927         return line.trim_right(' \t')
928     }
929     mut in_single := false
930     mut in_double := false
931     mut escape := false
932     mut bracket_depth := 0
933     mut brace_depth := 0
934     mut i := 0
935     for i < line.len {
936         ch := line[i]
937         if in_double {
938             if escape {
939                 escape = false
940             } else if ch == `\\` {
941                 escape = true
942             } else if ch == `"` {
943                 in_double = false
944             }
945             i++
946             continue
947         }
948         if in_single {
949             if ch == `'` {
950                 if i + 1 < line.len && line[i + 1] == `'` {
951                     i += 2
952                     continue
953                 }
954                 in_single = false
955             }
956             i++
957             continue
958         }
959         match ch {
960             `"` {
961                 in_double = true
962             }
963             `'` {
964                 in_single = true
965             }
966             `[` {
967                 bracket_depth++
968             }
969             `]` {
970                 if bracket_depth > 0 {
971                     bracket_depth--
972                 }
973             }
974             `{` {
975                 brace_depth++
976             }
977             `}` {
978                 if brace_depth > 0 {
979                     brace_depth--
980                 }
981             }
982             `#` {
983                 if bracket_depth == 0 && brace_depth == 0 {
984                     return line[..i].trim_right(' \t')
985                 }
986             }
987             else {}
988         }
989 
990         i++
991     }
992     return line.trim_right(' \t')
993 }
994 
995 fn fold_block_scalar(lines []string) string {
996     mut out := ''
997     mut pending_newlines := 0
998     mut started := false
999     for line in lines {
1000         if line == '' {
1001             pending_newlines++
1002             continue
1003         }
1004         if !started {
1005             out = '\n'.repeat(pending_newlines) + line
1006             started = true
1007         } else if pending_newlines > 0 {
1008             out += '\n'.repeat(pending_newlines) + line
1009         } else {
1010             out += ' ' + line
1011         }
1012         pending_newlines = 0
1013     }
1014     if pending_newlines > 0 {
1015         out += '\n'.repeat(pending_newlines)
1016     }
1017     return out
1018 }
1019 
1020 fn is_block_scalar(value string) bool {
1021     if value == '' || (value[0] != `|` && value[0] != `>`) {
1022         return false
1023     }
1024     // Allow `|`, `>`, `|-`, `|+`, `>2`, `|3-`, etc. Any other char rules it out
1025     // (e.g. `> something` is just a plain scalar starting with `>`).
1026     for i := 1; i < value.len; i++ {
1027         c := value[i]
1028         if c != `+` && c != `-` && !(c >= `0` && c <= `9`) {
1029             return false
1030         }
1031     }
1032     return true
1033 }
1034 
1035 fn is_integer(value string) bool {
1036     if value == '' {
1037         return false
1038     }
1039     if value[0] in [`+`, `-`] {
1040         if value.len == 1 {
1041             return false
1042         }
1043         if value[0] == `-` {
1044             value.parse_int(0, 64) or { return false }
1045             return true
1046         }
1047         value[1..].parse_uint(0, 64) or { return false }
1048         return true
1049     }
1050     value.parse_uint(0, 64) or { return false }
1051     return true
1052 }
1053 
1054 fn is_float(value string) bool {
1055     if value == '' {
1056         return false
1057     }
1058     if !value.contains('.') && !value.contains('e') && !value.contains('E') {
1059         return false
1060     }
1061     strconv.atof64(value) or { return false }
1062     return true
1063 }
1064 
1065 // equals_ascii_ci reports whether `s` equals `lower_ref` byte-for-byte once
1066 // ASCII letters in `s` are lower-cased. `lower_ref` MUST already be lowercase
1067 // ASCII; mixing case in it silently breaks the comparison. Used by
1068 // `parse_scalar` to recognize boolean / null keywords without allocating a
1069 // lower-cased copy of every plain scalar in the document.
1070 fn equals_ascii_ci(s string, lower_ref string) bool {
1071     if s.len != lower_ref.len {
1072         return false
1073     }
1074     for i := 0; i < s.len; i++ {
1075         mut c := s[i]
1076         if c >= `A` && c <= `Z` {
1077             c |= 0x20
1078         }
1079         if c != lower_ref[i] {
1080             return false
1081         }
1082     }
1083     return true
1084 }
1085 
1086 // strip_underscores removes `_` digit separators from a numeric literal in a
1087 // single pass. Returns `value` unchanged when no `_` is present, avoiding an
1088 // allocation on the common case.
1089 fn strip_underscores(value string) string {
1090     if !value.contains_u8(`_`) {
1091         return value
1092     }
1093     mut out := []u8{cap: value.len}
1094     for c in value {
1095         if c != `_` {
1096             out << c
1097         }
1098     }
1099     return out.bytestr()
1100 }
1101