v2 / vlib / toml / parser / parser.v
1770 lines · 1614 sloc · 56.58 KB · 58a3b6f56a43444feffe35dd64d72d104fbfb07c
Raw
1// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module parser
5
6import toml.ast
7import toml.checker
8import toml.decoder
9import toml.util
10import toml.token
11import toml.scanner
12
13pub const all_formatting = [token.Kind.whitespace, .tab, .cr, .nl]
14pub const space_formatting = [token.Kind.whitespace, .tab]
15pub const keys_and_space_formatting = [token.Kind.whitespace, .tab, .minus, .bare, .quoted, .boolean,
16 .number, .underscore]
17
18fn all_formatting_kinds() []token.Kind {
19 return [token.Kind.whitespace, .tab, .cr, .nl]
20}
21
22fn space_formatting_kinds() []token.Kind {
23 return [token.Kind.whitespace, .tab]
24}
25
26fn keys_and_space_formatting_kinds() []token.Kind {
27 return [token.Kind.whitespace, .tab, .minus, .bare, .quoted, .boolean, .number, .underscore]
28}
29
30type DottedKey = []string
31
32// str returns the dotted key as a string.
33pub fn (dk DottedKey) str() string {
34 return dk.join('.')
35}
36
37// starts_with returns true if the dotted key starts with the same key entries as `target`.
38fn (dk DottedKey) starts_with(target DottedKey) bool {
39 if dk.len >= target.len {
40 for i := 0; i < target.len; i++ {
41 if dk[i] != target[i] {
42 return false
43 }
44 }
45 return true
46 }
47 return false
48}
49
50// has returns true if the array contains `target`.
51fn (a []DottedKey) has(target DottedKey) bool {
52 for dk in a {
53 if dk == target {
54 return true
55 }
56 }
57 return false
58}
59
60// Parser contains the necessary fields for keeping the state of the parsing process.
61pub struct Parser {
62pub:
63 config Config
64mut:
65 scanner &scanner.Scanner = unsafe { nil }
66 prev_tok token.Token
67 tok token.Token
68 peek_tok token.Token
69 tokens []token.Token // To be able to peek more than one token ahead.
70 skip_next bool
71 // The root map (map is called table in TOML world)
72 root_map map[string]ast.Value
73 root_map_key DottedKey
74 value_is_immutable bool
75 immutable []DottedKey
76 explicit_declared []DottedKey
77 explicit_declared_array_of_tables []DottedKey
78 implicit_declared []DottedKey
79 // Array of Tables state
80 last_aot DottedKey
81 last_aot_index int
82 // Root of the tree
83 ast_root &ast.Root = &ast.Root{}
84}
85
86// Config is used to configure a Parser instance.
87// `run_checks` is used to en- or disable running of the strict `checker.Checker` type checks.
88// `decode_values` is used to en- or disable decoding of values with the `decoder.Decoder`.
89pub struct Config {
90pub:
91 scanner &scanner.Scanner = unsafe { nil }
92 run_checks bool = true
93 decode_values bool = true
94}
95
96// new_parser returns a new, stack allocated, `Parser`.
97pub fn new_parser(config Config) Parser {
98 return Parser{
99 config: config
100 scanner: config.scanner
101 }
102}
103
104// init initializes the parser.
105pub fn (mut p Parser) init() ! {
106 p.root_map = map[string]ast.Value{}
107 p.tokens << p.scanner.scan()!
108 p.next()!
109}
110
111// run_checker validates the parsed `ast.Value` nodes in the
112// the generated AST.
113fn (mut p Parser) run_checker() ! {
114 if p.config.run_checks {
115 chckr := checker.Checker{
116 scanner: p.scanner
117 }
118 chckr.check(p.root_map)!
119 for comment in p.ast_root.comments {
120 chckr.check_comment(comment)!
121 }
122 }
123}
124
125// run_decoder decodes values in the parsed `ast.Value` nodes in the
126// the generated AST.
127fn (mut p Parser) run_decoder() ! {
128 if p.config.decode_values {
129 dcoder := decoder.Decoder{
130 scanner: p.scanner
131 }
132 dcoder.decode(mut p.root_map)!
133 }
134}
135
136// parse starts parsing the input and returns the root
137// of the generated AST.
138pub fn (mut p Parser) parse() !&ast.Root {
139 p.init()!
140 p.root_table()!
141 p.run_checker()!
142 p.run_decoder()!
143 p.ast_root.table = p.root_map
144 return p.ast_root
145}
146
147// next forwards the parser to the next token.
148fn (mut p Parser) next() ! {
149 p.prev_tok = p.tok
150 p.tok = p.peek_tok
151 if p.tokens.len > 0 {
152 p.peek_tok = p.tokens.first()
153 p.tokens.delete(0)
154 p.peek(1)!
155 } else {
156 p.peek(1)!
157 p.peek_tok = p.tokens.first()
158 p.tokens.delete(0)
159 }
160}
161
162// peek peeks forward `n` tokens.
163// peek returns `.unknown` if it can not peek ahead long enough.
164fn (mut p Parser) peek(n int) !token.Token {
165 if n < 0 {
166 return error(@MOD + '.' + @STRUCT + '.' + @FN + ' peeking backwards is not supported.')
167 }
168 if n == 0 {
169 return p.peek_tok
170 } else {
171 // n >= 1
172 if n <= p.tokens.len {
173 return p.tokens[n - 1]
174 } else {
175 mut token_ := token.Token{}
176 mut count := n - p.tokens.len
177 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'buffering ${count} tokens...')
178 for token_.kind != .eof && count != 0 {
179 token_ = p.scanner.scan()!
180 p.tokens << token_
181 count--
182 }
183 return token_
184 }
185 }
186}
187
188// check forwards the parser to the next token if the current
189// token's `Kind` is equal that of `check_token`.
190fn (mut p Parser) check(check_token token.Kind) ! {
191 if p.tok.kind == check_token {
192 p.next()!
193 } else {
194 return error(@MOD + '.' + @STRUCT + '.' + @FN +
195 ' expected token "${check_token}" but found "${p.tok.kind}" in this (excerpt): "...${p.excerpt()}..."')
196 }
197}
198
199// peek_for_correct_line_ending_or_fail peeks past any formatting tokens
200// and return an error if the next token is not one of [.cr, .nl, .hash, .eof].
201fn (mut p Parser) peek_for_correct_line_ending_or_fail() ! {
202 // Disallow anything else than [.cr, .nl, .hash, .eof] after any space formatting.
203 peek_tok, _ := p.peek_over(1, space_formatting_kinds())!
204 if peek_tok.kind !in [.cr, .nl, .hash, .eof] {
205 p.next()! // Forward to the peek_tok
206 return error(@MOD + '.' + @STRUCT + '.' + @FN +
207 ' unexpected EOL "${p.tok.kind}" "${p.tok.lit}" expected one of [.cr, .nl, .hash, .eof] at this (excerpt): "...${p.excerpt()}..."')
208 }
209}
210
211// check_one_of forwards the parser to the next token if the current
212// token's `Kind` can be found in `tokens`. Otherwise it returns an error.
213fn (mut p Parser) check_one_of(tokens []token.Kind) ! {
214 if p.tok.kind in tokens {
215 p.next()!
216 } else {
217 return error(@MOD + '.' + @STRUCT + '.' + @FN +
218 ' expected one of ${tokens} but found "${p.tok.kind}" in this (excerpt): "...${p.excerpt()}..."')
219 }
220}
221
222// ignore_while forwards the parser to the next token as long as the current
223// token's `Kind` can be found in `tokens`. This is helpful for ignoring
224// a stream of formatting tokens.
225fn (mut p Parser) ignore_while(tokens []token.Kind) {
226 if p.tok.kind in tokens {
227 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignoring "${p.tok.kind}" ...')
228 p.next() or { return }
229 p.ignore_while(tokens)
230 }
231}
232
233// ignore_while_peek forwards the parser to the next token as long as `peek_tok`
234// token's `Kind` can be found in `tokens`. This is helpful for ignoring
235// a stream of formatting tokens.
236// In contrast to `ignore_while`, `ignore_while_peek` compares on `peek_tok` this is
237// sometimes necessary since not all parser calls forward using the `next()` call.
238fn (mut p Parser) ignore_while_peek(tokens []token.Kind) {
239 for p.peek_tok.kind in tokens {
240 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'eating "${p.tok.kind}" ...')
241 p.next() or { return }
242 }
243}
244
245// peek_over peeks ahead from token starting at `i` skipping over
246// any `token.Kind`s found in `tokens`. `peek_over` returns the next token *not*
247// found in `tokens`.
248fn (mut p Parser) peek_over(i int, tokens []token.Kind) !(token.Token, int) {
249 mut peek_tok := p.peek_tok
250
251 // Peek ahead as far as we can from token at `i` while the peeked
252 // token is found in `tokens`.
253 mut peek_i := i
254 for peek_tok.kind in tokens {
255 peek_tok = p.peek(peek_i)!
256 peek_i++
257 }
258 return peek_tok, peek_i
259}
260
261// is_at returns true if the token kind is equal to `expected_token`.
262fn (mut p Parser) is_at(expected_token token.Kind) bool {
263 return p.tok.kind == expected_token
264}
265
266// expect will error if the token kind is not equal to `expected_token`.
267fn (mut p Parser) expect(expected_token token.Kind) ! {
268 if p.tok.kind == expected_token {
269 return
270 } else {
271 return error(@MOD + '.' + @STRUCT + '.' + @FN +
272 ' expected token "${expected_token}" but found "${p.tok.kind}" in this text "...${p.excerpt()}..."')
273 }
274}
275
276// build_abs_dotted_key returns the absolute dotted key path.
277fn (p &Parser) build_abs_dotted_key(key DottedKey) DottedKey {
278 if p.root_map_key.len > 0 {
279 mut abs_dotted_key := DottedKey([]string{})
280 abs_dotted_key << p.root_map_key
281 abs_dotted_key << key
282 return abs_dotted_key
283 }
284 return key
285}
286
287// todo_msvc_astring2dkey worksaround a MSVC compile error.
288// TODO: remove.
289fn todo_msvc_astring2dkey(s []string) DottedKey {
290 return s
291}
292
293// check_immutable returns an error if `key` has been declared as immutable.
294fn (p &Parser) check_immutable(key DottedKey) ! {
295 if p.immutable.len > 0 && p.immutable.has(key) {
296 return error(@MOD + '.' + @STRUCT + '.' + @FN +
297 ' key `${key.str()}` is immutable. Unexpected mutation at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
298 }
299}
300
301// check_explicitly_declared returns an error if `key` has been explicitly declared.
302fn (p &Parser) check_explicitly_declared(key DottedKey) ! {
303 if p.explicit_declared.len > 0 && p.explicit_declared.has(key) {
304 return error(@MOD + '.' + @STRUCT + '.' + @FN +
305 ' key `${key.str()}` is already explicitly declared. Unexpected redeclaration at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
306 }
307}
308
309// check_explicitly_declared_array_of_tables returns an error if `key` has been
310// explicitly declared as an array of tables.
311fn (p &Parser) check_explicitly_declared_array_of_tables(key DottedKey) ! {
312 if p.explicit_declared_array_of_tables.len > 0 && p.explicit_declared_array_of_tables.has(key) {
313 return error(@MOD + '.' + @STRUCT + '.' + @FN +
314 ' key `${key.str()}` is already an explicitly declared array of tables. Unexpected redeclaration at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
315 }
316}
317
318// check_implicitly_declared returns an error if `key` has been implicitly declared.
319fn (p &Parser) check_implicitly_declared(key DottedKey) ! {
320 if p.implicit_declared.len > 0 && p.implicit_declared.has(key) {
321 return error(@MOD + '.' + @STRUCT + '.' + @FN +
322 ' key `${key.str()}` is already implicitly declared. Unexpected redeclaration at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
323 }
324}
325
326// find_table returns a reference to a map if found in the *root* table given a "dotted" key (`a.b.c`).
327// If some segments of the key does not exist in the root table find_table will
328// allocate a new map for each segment. This behavior is needed because you can
329// reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents.
330// See also `find_in_table`.
331pub fn (mut p Parser) find_table() !&map[string]ast.Value {
332 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
333 'locating "${p.root_map_key}" in map ${ptr_str(p.root_map)}')
334 mut t := unsafe { &p.root_map }
335 if p.root_map_key.len == 0 {
336 return t
337 }
338
339 return p.find_in_table(mut t, p.root_map_key)
340}
341
342// allocate_table allocates all tables in "dotted" `key` (`a.b.c`) in the *root* table.
343pub fn (mut p Parser) allocate_table(key DottedKey) ! {
344 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
345 'allocating "${key}" in map ${ptr_str(p.root_map)}')
346 mut t := unsafe { &p.root_map }
347 if key.len == 0 {
348 return
349 }
350 p.allocate_in_table(mut t, key)!
351}
352
353// sub_table_key returns the logic parts of a dotted key (`a.b.c`) for
354// use with the `find_sub_table` method.
355pub fn (mut p Parser) sub_table_key(key DottedKey) (DottedKey, DottedKey) {
356 last := [key.last()]
357 first := key[..key.len - 1]
358 return first, last
359}
360
361// find_sub_table returns a reference to a map if found in the *root* table given a "dotted" key (`a.b.c`).
362// If some segments of the key does not exist in the input map find_sub_table will
363// allocate a new map for the segment. This behavior is needed because you can
364// reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents.
365// See also `find_in_table`.
366pub fn (mut p Parser) find_sub_table(key DottedKey) !&map[string]ast.Value {
367 mut ky := DottedKey([]string{})
368 ky << p.root_map_key
369 ky << key
370 if p.root_map_key.len == 0 {
371 ky = unsafe { key }
372 }
373 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
374 'locating "${ky}" in map ${ptr_str(p.root_map)}')
375 mut t := unsafe { &p.root_map }
376 if ky.len == 0 {
377 return t
378 }
379
380 return p.find_in_table(mut t, ky)
381}
382
383// find_in_table returns a reference to a map if found in `table` given a "dotted" key (`a.b.c`).
384// If some segments of the key does not exist in the input map find_in_table will
385// allocate a new map for the segment. This behavior is needed because you can
386// reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents.
387pub fn (mut p Parser) find_in_table(mut table map[string]ast.Value, key DottedKey) !&map[string]ast.Value {
388 // NOTE This code is the result of much trial and error.
389 // I'm still not quite sure *exactly* why it works. All I can leave here is a hope
390 // that this kind of minefield someday will be easier in V :)
391 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "${key}" in map ${ptr_str(table)}')
392 mut t := unsafe { &table }
393 unsafe {
394 for k in key {
395 if val := t[k] {
396 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'found key "${k}" in ${t.keys()}')
397 if val is map[string]ast.Value {
398 t = &val
399 } else {
400 return error(@MOD + '.' + @STRUCT + '.' + @FN +
401 ' "${k}" in "${key}" is not a map but `${val.type_name()}`')
402 }
403 } else {
404 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
405 'no key "${k}" in "${key}" found, allocating new map at key "${k}" in map ${ptr_str(t)}"')
406 t[k] = map[string]ast.Value{}
407 t = &(t[k] as map[string]ast.Value)
408 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocated new map ${ptr_str(t)}"')
409 }
410 }
411 }
412 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning map ${ptr_str(t)}"')
413 return t
414}
415
416// is_all_tables returns `true` if *all* entries in `dotted_key` (`a.b.c`) are tables (`map[string]ast.Value`), `false` otherwise.
417fn is_all_tables(table map[string]ast.Value, dotted_key DottedKey) bool {
418 if dotted_key.len == 0 {
419 return false
420 }
421 unsafe {
422 mut t := &table
423 for key in dotted_key {
424 if val := t[key] {
425 if val is map[string]ast.Value {
426 t = &val
427 } else {
428 return false
429 }
430 } else {
431 return false
432 }
433 }
434 }
435 return true
436}
437
438// find_array_of_tables returns an array if found in the root table based on the parser's
439// last encountered "Array Of Tables" key.
440// If the state key does not exist find_array_in_table will return an error.
441pub fn (mut p Parser) find_array_of_tables() ![]ast.Value {
442 mut t := unsafe { &p.root_map }
443 mut key := p.last_aot
444 if key.len > 1 {
445 key = DottedKey([key[0]])
446 }
447 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "${key}" in map ${ptr_str(t)}')
448 unsafe {
449 if val := t[key.str()] {
450 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'found key "${key}" in ${t.keys()}')
451 if val is []ast.Value {
452 return val
453 }
454 }
455 }
456 return error(@MOD + '.' + @STRUCT + '.' + @FN + 'no key `${key}` found in map ${ptr_str(t)}"')
457}
458
459// allocate_in_table allocates all tables in "dotted" `key` (`a.b.c`) in `table`.
460pub fn (mut p Parser) allocate_in_table(mut table map[string]ast.Value, key DottedKey) ! {
461 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocating "${key}" in map ${ptr_str(table)}')
462 mut t := unsafe { &table }
463 unsafe {
464 for k in key {
465 if val := t[k] {
466 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'found key "${k}" in ${t.keys()}')
467 if val is map[string]ast.Value {
468 t = &val
469 } else {
470 return error(@MOD + '.' + @STRUCT + '.' + @FN +
471 ' "${k}" in "${key}" is not a map (${val.type_name()})')
472 }
473 } else {
474 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
475 'no key "${k}" in "${key}" found, allocating new map at key "${k}" in map ${ptr_str(t)}"')
476 t[k] = map[string]ast.Value{}
477 t = &(t[k] as map[string]ast.Value)
478 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocated new map ${ptr_str(t)}"')
479 }
480 }
481 }
482}
483
484// dotted_key returns a string of the next tokens parsed as
485// sub/nested/path keys (e.g. `a.b.c`). In TOML, this form of key is referred to as a "dotted" key.
486pub fn (mut p Parser) dotted_key() !DottedKey {
487 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing dotted key...')
488 mut dotted_key := DottedKey([]string{})
489 key := p.key()!
490 p.ignore_while_peek(space_formatting_kinds())
491 dotted_key << key.str()
492 for p.peek_tok.kind == .period {
493 p.next()! // .
494 p.check(.period)!
495 p.ignore_while(space_formatting_kinds())
496 next_key := p.key()!
497 dotted_key << next_key.text
498 p.ignore_while_peek(space_formatting_kinds())
499 }
500 p.next()!
501 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
502 'parsed dotted key `${dotted_key}` now at "${p.tok.kind}" "${p.tok.lit}"')
503 return dotted_key
504}
505
506// root_table parses next tokens into the root map of `ast.Value`s.
507// The V `map` type is corresponding to a "table" in TOML.
508pub fn (mut p Parser) root_table() ! {
509 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing root table...')
510
511 for p.tok.kind != .eof {
512 if !p.skip_next {
513 p.next()!
514 } else {
515 p.skip_next = false
516 }
517
518 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
519 'parsing token "${p.tok.kind}" "${p.tok.lit}"')
520 match p.tok.kind {
521 .hash {
522 c := p.comment()
523 p.ast_root.comments << c
524 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "${c.text}"')
525 }
526 .whitespace, .tab, .nl, .cr {
527 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
528 'skipping formatting "${p.tok.kind}" "${p.tok.lit}"')
529 continue
530 }
531 .bare, .boolean, .quoted, .number, .minus, .underscore {
532 // Peek forward as far as we can skipping over space formatting tokens.
533 peek_tok, _ := p.peek_over(1, keys_and_space_formatting_kinds())!
534
535 if peek_tok.kind == .period {
536 dotted_key, val := p.dotted_key_value()!
537
538 sub_table, key := p.sub_table_key(dotted_key)
539
540 if is_all_tables(p.root_map, dotted_key) {
541 return error(@MOD + '.' + @STRUCT + '.' + @FN +
542 ' key `${dotted_key.str()}` is already declared. Unexpected redeclaration at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
543 }
544
545 // NOTE these are *relatively* costly checks. In general - and by specification,
546 // TOML documents are expected to be "small" so this shouldn't be a problem. Famous last words.
547 for explicit_key in p.explicit_declared {
548 // Check for key re-defining:
549 // https://github.com/iarna/toml-spec-tests/blob/1880b1a/errors/inline-table-imutable-1.toml
550
551 if p.build_abs_dotted_key(sub_table) == explicit_key {
552 return error(@MOD + '.' + @STRUCT + '.' + @FN +
553 ' key `${sub_table}` has already been explicitly declared. Unexpected redeclaration at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
554 }
555 if explicit_key.len == 1 || explicit_key == p.root_map_key {
556 continue
557 }
558 // Check for "table injection":
559 // https://github.com/toml-lang/toml-test/blob/576db85/tests/invalid/table/injection-1.toml
560 // https://github.com/toml-lang/toml-test/blob/576db85/tests/invalid/table/injection-2.toml
561 if p.build_abs_dotted_key(sub_table).starts_with(explicit_key) {
562 return error(@MOD + '.' + @STRUCT + '.' + @FN +
563 ' key `${dotted_key}` has already been explicitly declared. Unexpected redeclaration at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
564 }
565 }
566
567 // Register implicit declaration
568 mut dotted_key_copy := dotted_key.clone()
569 dotted_key_copy.pop()
570 implicit_keys := todo_msvc_astring2dkey(dotted_key_copy)
571 mut abs_dotted_key := p.build_abs_dotted_key(implicit_keys)
572 if !p.implicit_declared.has(abs_dotted_key) {
573 p.implicit_declared << abs_dotted_key
574 }
575
576 t := p.find_sub_table(sub_table)!
577 unsafe {
578 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
579 'setting "${key}" = ${val} in table ${ptr_str(t)}')
580 t[key.str()] = val
581 }
582 } else {
583 p.ignore_while(space_formatting_kinds())
584 key, val := p.key_value()!
585
586 t := p.find_table()!
587 unsafe {
588 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
589 'setting "${key.str()}" = ${val} in table ${ptr_str(t)}')
590 key_str := key.str()
591 if _ := t[key_str] {
592 return error(@MOD + '.' + @STRUCT + '.' + @FN +
593 ' key "${key}" is already initialized with a value. At "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
594 }
595 t[key_str] = val
596 }
597 }
598 p.peek_for_correct_line_ending_or_fail()!
599 }
600 .lsbr {
601 p.check(.lsbr)! // '[' bracket
602 mut peek_tok := p.peek_tok
603
604 // Disallow `[ [table]]`
605 if p.tok.kind in space_formatting_kinds() {
606 peek_tok, _ = p.peek_over(1, space_formatting_kinds())!
607 if peek_tok.kind == .lsbr {
608 return error(@MOD + '.' + @STRUCT + '.' + @FN +
609 ' unexpected "${p.tok.kind}" "${p.tok.lit}" at this (excerpt): "...${p.excerpt()}..."')
610 }
611 }
612
613 // Allow `[ d.e.f]`
614 p.ignore_while(space_formatting_kinds())
615
616 // Peek forward as far as we can skipping over space formatting tokens.
617 peek_tok, _ = p.peek_over(1, keys_and_space_formatting_kinds())!
618
619 if p.tok.kind == .lsbr {
620 // Parse `[[table]]`
621 unsafe {
622 p.array_of_tables(mut &p.root_map)!
623 }
624 p.skip_next = true // skip calling p.next() in coming iteration
625 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
626 'leaving double bracket at "${p.tok.kind}" "${p.tok.lit}". NEXT is "${p.peek_tok.kind} "${p.peek_tok.lit}"')
627 } else if peek_tok.kind == .period {
628 // Parse `[d.e.f]`
629 dotted_key := p.dotted_key()!
630
631 // So apparently TOML is a *very* key context sensitive language...
632 // [[table]] <- parsed previously
633 // ...
634 // [table.key] <- parser is here
635 //
636 // `table.key` now shape shifts into being a *double array of tables* key...
637 // ... but with a different set of rules - making it hard to reuse the code we already have for that ...
638 // See `testdata/array_of_tables_edge_case_<N>_test.toml` for the type of constructs parsed.
639 if p.last_aot.len == 1 && dotted_key.len > 1
640 && dotted_key[0] == p.last_aot.str() {
641 // Disallow re-declaring the key
642 p.check_explicitly_declared_array_of_tables(dotted_key)!
643 p.check(.rsbr)!
644 p.ignore_while(space_formatting_kinds())
645 arr := p.find_array_of_tables()!
646 if val := arr[p.last_aot_index] {
647 if val is map[string]ast.Value {
648 mut m := map[string]ast.Value{}
649 p.table_contents(mut m)!
650 unsafe {
651 mut mut_val := &val
652 if dotted_key.len == 2 {
653 // [table.key]
654 mut_val[dotted_key[1].str()] = m
655 } else {
656 // [table.key.key.etc]
657 mut dotted_key_copy := dotted_key.clone()
658 dotted_key_copy.delete(0)
659 new_key := todo_msvc_astring2dkey(dotted_key_copy)
660 sub_table, key := p.sub_table_key(new_key)
661 t := p.find_in_table(mut mut_val, sub_table)!
662 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
663 'setting "${key}" = ${val} in table ${ptr_str(t)}')
664 t[new_key.last().str()] = m
665 }
666 }
667 } else {
668 return error(@MOD + '.' + @STRUCT + '.' + @FN +
669 ' "${p.last_aot_index}" in array is not a map but `${typeof(val).name}`')
670 }
671 }
672 continue
673 }
674
675 // Disallow mutation of immutable values (inline tables)
676 if dotted_key.len > 1 {
677 for part in dotted_key {
678 dotted_part := DottedKey([part])
679 if p.explicit_declared.has(dotted_part) {
680 p.check_immutable(dotted_part)!
681 }
682 }
683 }
684
685 // Disallow re-defining
686 // This check also covers *implicit* table allocations from "dotted" keys, so no need for e.g: `p.check_implicitly_declared(dotted_key)!`
687 if is_all_tables(p.root_map, dotted_key) {
688 return error(@MOD + '.' + @STRUCT + '.' + @FN +
689 ' key `${dotted_key.str()}` is already declared. Unexpected redeclaration at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
690 }
691 p.explicit_declared << dotted_key
692
693 p.ignore_while(space_formatting_kinds())
694 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
695 'setting root map key to `${dotted_key}` at "${p.tok.kind}" "${p.tok.lit}"')
696 p.root_map_key = dotted_key
697 p.allocate_table(p.root_map_key)!
698 p.expect(.rsbr)!
699 p.peek_for_correct_line_ending_or_fail()!
700 } else {
701 // Parse `[key]`
702 key := p.key()!
703 dotted_key := DottedKey([key.str()])
704
705 p.check_implicitly_declared(dotted_key) or {
706 p.check_explicitly_declared(dotted_key) or {
707 if p.root_map[key.str()] or { ast.Bool{} } is map[string]ast.Value {
708 // NOTE: Here we "undo" the implicit-explicit special case declaration for:
709 // https://github.com/toml-lang/toml-test/blob/576db852/tests/invalid/table/array-implicit.toml
710 // ... to make the following test pass:
711 // https://github.com/toml-lang/toml-test/blob/229ce2e/tests/valid/table/array-implicit-and-explicit-after.toml
712 p.undo_special_case_01(dotted_key)
713 }
714 }
715 }
716 // Disallow re-declaring the key
717 p.check_explicitly_declared(dotted_key)!
718 p.explicit_declared << dotted_key
719
720 // Allow [ key ]
721 p.ignore_while(space_formatting_kinds())
722
723 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
724 'setting root map key to `${dotted_key}` at "${p.tok.kind}" "${p.tok.lit}"')
725 p.root_map_key = dotted_key
726 p.allocate_table(p.root_map_key)!
727 p.next()!
728 p.ignore_while(space_formatting_kinds())
729 p.expect(.rsbr)!
730 p.peek_for_correct_line_ending_or_fail()!
731 }
732 }
733 .eof {
734 return
735 }
736 else {
737 return error(@MOD + '.' + @STRUCT + '.' + @FN +
738 ' could not parse "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
739 }
740 }
741 }
742}
743
744// excerpt returns a string of the characters surrounding `Parser.tok.pos`
745fn (p &Parser) excerpt() string {
746 return p.scanner.excerpt(p.tok.pos, 10)
747}
748
749// table_contents parses next tokens into a map of `ast.Value`s.
750// The V `map` type is corresponding to a "table" in TOML.
751pub fn (mut p Parser) table_contents(mut tbl map[string]ast.Value) ! {
752 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing table contents...')
753
754 for p.tok.kind != .eof {
755 if p.peek_tok.kind == .lsbr {
756 return
757 }
758 if !p.skip_next {
759 p.next()!
760 } else {
761 p.skip_next = false
762 }
763
764 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
765 'parsing token "${p.tok.kind}" "${p.tok.lit}"')
766 match p.tok.kind {
767 .hash {
768 c := p.comment()
769 p.ast_root.comments << c
770 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "${c.text}"')
771 }
772 .whitespace, .tab, .nl, .cr {
773 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
774 'skipping formatting "${p.tok.kind}" "${p.tok.lit}"')
775 continue
776 }
777 .bare, .quoted, .number, .minus, .underscore {
778 // Peek forward as far as we can skipping over space formatting tokens.
779 peek_tok, _ := p.peek_over(1, keys_and_space_formatting_kinds())!
780
781 if peek_tok.kind == .period {
782 dotted_key, val := p.dotted_key_value()!
783
784 sub_table, key := p.sub_table_key(dotted_key)
785
786 t := p.find_in_table(mut tbl, sub_table)!
787 unsafe {
788 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
789 'setting "${key}" = ${val} in table ${ptr_str(t)}')
790 t[key.str()] = val
791 }
792 } else {
793 p.ignore_while(space_formatting_kinds())
794 key, val := p.key_value()!
795
796 unsafe {
797 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
798 'setting "${key.str()}" = ${val} in table ${ptr_str(tbl)}')
799 key_str := key.str()
800 if _ := tbl[key_str] {
801 return error(@MOD + '.' + @STRUCT + '.' + @FN +
802 ' key "${key}" is already initialized with a value. At "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
803 }
804 tbl[key_str] = val
805 }
806 }
807 p.peek_for_correct_line_ending_or_fail()!
808 }
809 .eof {
810 break
811 }
812 else {
813 return error(@MOD + '.' + @STRUCT + '.' + @FN +
814 ' could not parse "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
815 }
816 }
817 }
818}
819
820// inline_table parses next tokens into a map of `ast.Value`s.
821// The V map type is corresponding to a "table" in TOML.
822pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ! {
823 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing inline table into ${ptr_str(tbl)}...')
824 defer { p.value_is_immutable = true }
825 mut previous_token_was_value := false
826 for p.tok.kind != .eof {
827 p.next()!
828 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "${p.tok.kind}"')
829
830 if previous_token_was_value {
831 p.ignore_while(space_formatting_kinds())
832 if p.tok.kind != .rcbr {
833 p.expect(.comma)!
834 }
835 previous_token_was_value = false
836 }
837
838 match p.tok.kind {
839 .whitespace, .tab {
840 /*
841 if !p.scanner.config.tokenize_formatting {
842 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${p.tok.kind}" "${p.tok.lit}"')
843 continue
844 }*/
845 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
846 'skipping formatting "${p.tok.kind}" "${p.tok.lit}"')
847 continue
848 }
849 .comma {
850 p.ignore_while_peek(space_formatting_kinds())
851 if p.peek_tok.kind in [.comma, .rcbr] {
852 p.next()! // Forward to the peek_tok
853 return error(@MOD + '.' + @STRUCT + '.' + @FN +
854 ' unexpected "${p.tok.kind}" "${p.tok.lit}" at this (excerpt): "...${p.excerpt()}..."')
855 }
856 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
857 'skipping comma table value separator "${p.tok.lit}"')
858 continue
859 }
860 .rcbr {
861 // '}' bracket
862 return
863 }
864 .bare, .quoted, .number, .minus, .underscore {
865 // Peek forward as far as we can skipping over space formatting tokens.
866 peek_tok, _ := p.peek_over(1, space_formatting_kinds())!
867
868 if peek_tok.kind == .period {
869 dotted_key, val := p.dotted_key_value()!
870
871 sub_table, key := p.sub_table_key(dotted_key)
872 mut t := p.find_in_table(mut tbl, sub_table)!
873
874 // Disallow mutation of immutable values (inline tables)
875 if p.explicit_declared.has(dotted_key) {
876 left_most := DottedKey([dotted_key[0]])
877 if t.len > 0 {
878 p.check_immutable(left_most)!
879 }
880 }
881 key_str := key.str()
882 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
883 'inserting @6 "${key_str}" = ${val} into ${ptr_str(t)}')
884 unsafe {
885 if _ := t[key_str] {
886 return error(@MOD + '.' + @STRUCT + '.' + @FN +
887 ' key "${key_str}" is already initialized with a value. At "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
888 }
889 t[key_str] = val
890 }
891 } else {
892 p.ignore_while(space_formatting_kinds())
893 key, val := p.key_value()!
894 key_str := key.str()
895 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
896 'inserting @5 "${key_str}" = ${val} into ${ptr_str(tbl)}')
897 if _ := tbl[key_str] {
898 return error(@MOD + '.' + @STRUCT + '.' + @FN +
899 ' key "${key_str}" is already initialized with a value. At "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
900 }
901 tbl[key_str] = val
902 }
903 previous_token_was_value = true
904 }
905 else {
906 return error(@MOD + '.' + @STRUCT + '.' + @FN +
907 ' unexpected "${p.tok.kind}" "${p.tok.lit}" at this (excerpt): "...${p.excerpt()}..."')
908 }
909 }
910 }
911 // Make sure the inline-table actually use the return at .rcbr match branch.
912 return error(@MOD + '.' + @STRUCT + '.' + @FN +
913 ' unexpected end of inline-table "${p.tok.kind}" "${p.tok.lit}" at this (excerpt): "...${p.excerpt()}..."')
914}
915
916// array_of_tables parses next tokens into an array of `ast.Value`s.
917@[autofree_bug; manualfree]
918pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Value) ! {
919 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
920 'parsing array of tables "${p.tok.kind}" "${p.tok.lit}"')
921 // NOTE this is starting to get ugly. TOML isn't simple at this point
922 p.check(.lsbr)! // '[' bracket
923
924 // Allow [[ key]]
925 p.ignore_while(space_formatting_kinds())
926 peek_tok, _ := p.peek_over(1, space_formatting_kinds())!
927 p.ignore_while(space_formatting_kinds())
928
929 // [[key.key]] horror
930 if peek_tok.kind == .period {
931 p.double_array_of_tables(mut table)!
932 return
933 }
934
935 key := p.key()!
936 p.next()!
937
938 // Allow [[key ]]
939 p.ignore_while(space_formatting_kinds())
940
941 p.check(.rsbr)!
942 p.peek_for_correct_line_ending_or_fail()!
943 p.expect(.rsbr)!
944
945 p.ignore_while(all_formatting_kinds())
946
947 dotted_key := DottedKey([key.str()])
948 dotted_key_str := dotted_key.str()
949
950 // Disallow re-declaring the key
951 p.check_explicitly_declared(dotted_key)!
952 unsafe {
953 if val := table[dotted_key_str] {
954 if val is []ast.Value {
955 arr := &(table[dotted_key_str] as []ast.Value)
956 arr << p.array_of_tables_contents()!
957 table[dotted_key_str] = arr
958 } else {
959 return error(@MOD + '.' + @STRUCT + '.' + @FN +
960 ' table[${dotted_key_str}] is not an array. (excerpt): "...${p.excerpt()}..."')
961 }
962 } else {
963 table[dotted_key_str] = p.array_of_tables_contents()!
964 }
965 }
966 p.last_aot = dotted_key
967
968 unsafe {
969 arr := &(table[p.last_aot.str()] as []ast.Value)
970 p.last_aot_index = arr.len - 1
971 }
972}
973
974// array_of_tables_contents parses next tokens into an array of `ast.Value`s.
975@[autofree_bug; manualfree]
976pub fn (mut p Parser) array_of_tables_contents() ![]ast.Value {
977 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
978 'parsing contents from "${p.tok.kind}" "${p.tok.lit}"')
979 mut tbl := map[string]ast.Value{}
980
981 p.table_contents(mut tbl)!
982
983 mut arr := []ast.Value{}
984 arr << tbl
985 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
986 'parsed array of tables ${ast.Value(arr)}. leaving at "${p.tok.kind}" "${p.tok.lit}"')
987 return arr
988}
989
990// double_array_of_tables parses next tokens into an array of tables of arrays of `ast.Value`s...
991@[autofree_bug; manualfree]
992pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ! {
993 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
994 'parsing nested array of tables "${p.tok.kind}" "${p.tok.lit}"')
995
996 dotted_key := p.dotted_key()!
997 p.ignore_while(space_formatting_kinds())
998
999 p.check(.rsbr)!
1000 p.expect(.rsbr)!
1001
1002 p.ignore_while(all_formatting_kinds())
1003
1004 p.check_explicitly_declared(dotted_key)!
1005 if is_all_tables(p.root_map, dotted_key) {
1006 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1007 ' key `${dotted_key.str()}` is already declared. Unexpected redeclaration at "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
1008 }
1009
1010 if !p.explicit_declared_array_of_tables.has(dotted_key) {
1011 p.explicit_declared_array_of_tables << dotted_key
1012 }
1013
1014 first := DottedKey([dotted_key[0]]) // The array that holds the entries
1015 last := DottedKey([dotted_key[1]]) // The key the parsed array data should be added to
1016
1017 mut t_arr := &[]ast.Value(unsafe { nil })
1018 mut t_map := ast.Value(ast.Null{})
1019
1020 unsafe {
1021 if dotted_key.len == 2 {
1022 if table_first := table[first.str()] {
1023 if table_first is map[string]ast.Value {
1024 mut t := &(table_first as map[string]ast.Value)
1025 if val := t[last.str()] {
1026 if val is []ast.Value {
1027 mut arr := &val
1028 arr << p.array_of_tables_contents()!
1029 t[last.str()] = arr
1030 } else {
1031 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1032 ' t[${last.str()}] is not an array. (excerpt): "...${p.excerpt()}..."')
1033 }
1034 } else {
1035 t[last.str()] = p.array_of_tables_contents()!
1036 }
1037 p.last_aot.clear()
1038 p.last_aot_index = 0
1039 return
1040 }
1041 } else {
1042 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1043 'implicit allocation of map for `${first}` in dotted key `${dotted_key}`.')
1044 mut t := &map[string]ast.Value{}
1045 p.implicit_declared << first
1046 // NOTE: We register this implicit allocation also as *explicit* to be able to catch a special case like:
1047 // https://github.com/toml-lang/toml-test/blob/576db852/tests/invalid/table/array-implicit.toml
1048 // See also: undo_special_case_01
1049 p.explicit_declared << first
1050 t[last.str()] = p.array_of_tables_contents()!
1051 table[first.str()] = ast.Value(t)
1052 p.last_aot.clear()
1053 p.last_aot_index = 0
1054 return
1055 }
1056 }
1057
1058 // NOTE this is starting to get EVEN uglier. TOML is not *at all* simple at this point...
1059 if first != p.last_aot {
1060 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, '${first} != ${p.last_aot}')
1061 // Implicit allocation
1062 if p.last_aot.len == 0 {
1063 p.last_aot = first
1064 mut nm := &p.root_map
1065 if first.str() in table.keys() {
1066 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1067 'adding to existing table entry at `${first}`.')
1068 table_first := table[first.str()]
1069 if table_first !is map[string]ast.Value {
1070 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1071 ' expected a table at "${first.str()}" but got "${table_first.type_name()}" instead. (excerpt): "...${p.excerpt()}..."')
1072 }
1073 nm = &(table_first as map[string]ast.Value)
1074 } else {
1075 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1076 'implicit allocation of map for `${first}` in dotted key `${dotted_key}`.')
1077 nm = &map[string]ast.Value{}
1078 p.implicit_declared << first
1079 // NOTE: We register this implicit allocation also as *explicit* to be able to catch a special case like:
1080 // https://github.com/toml-lang/toml-test/blob/576db852/tests/invalid/table/array-implicit.toml
1081 // See also: undo_special_case_01
1082 p.explicit_declared << first
1083 }
1084
1085 nm[last.str()] = []ast.Value{}
1086 table[first.str()] = ast.Value(nm)
1087
1088 t_arr = &(nm[last.str()] as []ast.Value)
1089 t_arr << p.array_of_tables_contents()!
1090 return
1091 } else {
1092 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1093 ' nested array of tables key "${first}" does not match "${p.last_aot}". (excerpt): "...${p.excerpt()}..."')
1094 }
1095 }
1096
1097 array_of_tables := table[p.last_aot.str()]
1098 if first == p.last_aot {
1099 if array_of_tables is map[string]ast.Value {
1100 // NOTE: Here we "undo" the implicit-explicit special case declaration for:
1101 // https://github.com/toml-lang/toml-test/blob/576db852/tests/invalid/table/array-implicit.toml
1102 // ... to make the following test pass:
1103 // https://github.com/toml-lang/toml-test/blob/229ce2e/tests/valid/array/open-parent-table.toml
1104 p.undo_special_case_01(dotted_key)
1105 p.next()!
1106 return
1107 }
1108 }
1109
1110 // Give a nicer error if the `as` cast below can not be done
1111 if array_of_tables !is []ast.Value {
1112 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1113 ' nested array of tables "${p.last_aot}" expected an array but got "${table[p.last_aot.str()].type_name()}". Re-definition is not allowed. (excerpt): "...${p.excerpt()}..."')
1114 }
1115 t_arr = &(array_of_tables as []ast.Value)
1116 t_map = ast.Value(map[string]ast.Value{})
1117 if p.last_aot_index < t_arr.len {
1118 t_map = t_arr[p.last_aot_index]
1119 }
1120
1121 if t_map !is map[string]ast.Value {
1122 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1123 ' expected a table but got "${t_map.type_name()}". (excerpt): "...${p.excerpt()}..."')
1124 }
1125 mut t := &(t_map as map[string]ast.Value)
1126
1127 if val := t[last.str()] {
1128 if val is []ast.Value {
1129 mut arr := &val
1130 arr << p.double_array_of_tables_contents(dotted_key)!
1131 t[last.str()] = arr
1132 } else {
1133 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1134 ' t[${last.str()}] is not an array. (excerpt): "...${p.excerpt()}..."')
1135 }
1136 } else {
1137 t[last.str()] = p.double_array_of_tables_contents(dotted_key)!
1138 }
1139 if t_arr.len == 0 {
1140 t_arr << t
1141 p.last_aot_index = t_arr.len - 1
1142 }
1143 }
1144}
1145
1146// double_array_of_tables_contents parses next tokens into an array of `ast.Value`s.
1147@[autofree_bug; manualfree]
1148pub fn (mut p Parser) double_array_of_tables_contents(target_key DottedKey) ![]ast.Value {
1149 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1150 'parsing contents from "${p.tok.kind}" "${p.tok.lit}"')
1151 mut tbl := map[string]ast.Value{}
1152
1153 mut implicit_allocation_key := DottedKey([]string{})
1154 mut peeked_over := 0
1155 mut peek_tok := p.peek_tok
1156
1157 for p.tok.kind != .eof {
1158 p.next()!
1159 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "${p.tok.kind}"')
1160 p.ignore_while(all_formatting_kinds())
1161
1162 // Peek forward as far as we can skipping over space formatting tokens.
1163 peek_tok, peeked_over = p.peek_over(1, space_formatting_kinds())!
1164 // Peek for occurrence of `[[`
1165 if peek_tok.kind == .lsbr {
1166 peek_tok, peeked_over = p.peek_over(peeked_over + 1, space_formatting_kinds())!
1167 if peek_tok.kind == .lsbr {
1168 mut arr := []ast.Value{}
1169 arr << tbl
1170 return arr
1171 }
1172 }
1173
1174 match p.tok.kind {
1175 .bare, .quoted, .number, .minus, .underscore {
1176 // Peek forward as far as we can skipping over space formatting tokens.
1177 peek_tok, _ = p.peek_over(1, space_formatting_kinds())!
1178
1179 if peek_tok.kind == .period {
1180 mut dotted_key, val := p.dotted_key_value()!
1181
1182 if implicit_allocation_key.len > 0 {
1183 dotted_key.insert(0, implicit_allocation_key)
1184 }
1185 sub_table, key := p.sub_table_key(dotted_key)
1186
1187 mut t := p.find_in_table(mut tbl, sub_table)!
1188 unsafe {
1189 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1190 'inserting @6 "${key}" = ${val} into ${ptr_str(t)}')
1191 t[key.str()] = val
1192 }
1193 } else {
1194 key, val := p.key_value()!
1195
1196 mut t := unsafe { &tbl }
1197 if implicit_allocation_key.len > 0 {
1198 t = p.find_in_table(mut tbl, implicit_allocation_key)!
1199 }
1200 unsafe {
1201 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1202 'inserting @7 "${key}" = ${val} into ${ptr_str(t)}')
1203 t[key.str()] = val
1204 }
1205 }
1206 }
1207 .lsbr {
1208 p.check(.lsbr)! // '[' bracket
1209 peek_tok = p.peek_tok
1210
1211 // Allow `[ d.e.f]`
1212 p.ignore_while(space_formatting_kinds())
1213
1214 // Peek forward as far as we can skipping over space formatting tokens.
1215 peek_tok, _ = p.peek_over(1, space_formatting_kinds())!
1216
1217 if peek_tok.kind == .period {
1218 // Parse `[d.e.f]`
1219 p.ignore_while(space_formatting_kinds())
1220 dotted_key := p.dotted_key()!
1221 implicit_allocation_key = unsafe { dotted_key }
1222 if dotted_key.len > 2 {
1223 implicit_allocation_key = dotted_key[2..]
1224 }
1225 p.ignore_while(space_formatting_kinds())
1226 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1227 'keys are: dotted `${dotted_key}`, target `${target_key}`, implicit `${implicit_allocation_key}` at "${p.tok.kind}" "${p.tok.lit}"')
1228 p.expect(.rsbr)!
1229 p.peek_for_correct_line_ending_or_fail()!
1230 p.explicit_declared << dotted_key
1231 continue
1232 } else {
1233 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1234 ' could not parse "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
1235 }
1236 }
1237 else {
1238 break
1239 }
1240 }
1241 }
1242 mut arr := []ast.Value{}
1243 arr << tbl
1244 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1245 'parsed array of tables ${ast.Value(arr)}. leaving at "${p.tok.kind}" "${p.tok.lit}"')
1246 return arr
1247}
1248
1249// array parses next tokens into an array of `ast.Value`s.
1250pub fn (mut p Parser) array() ![]ast.Value {
1251 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array...')
1252 mut arr := []ast.Value{}
1253 p.expect(.lsbr)! // '[' bracket
1254 mut previous_token_was_value := false
1255 for p.tok.kind != .eof {
1256 p.next()!
1257 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1258 'parsing token "${p.tok.kind}" "${p.tok.lit}"')
1259
1260 if previous_token_was_value {
1261 p.ignore_while(all_formatting_kinds())
1262 if p.tok.kind != .rsbr && p.tok.kind != .hash {
1263 p.expect(.comma)!
1264 }
1265 previous_token_was_value = false
1266 } else {
1267 if p.tok.kind == .comma {
1268 p.ignore_while_peek(space_formatting_kinds())
1269 if p.peek_tok.kind == .rsbr {
1270 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1271 ' unexpected empty value in array "${p.tok.kind}" "${p.tok.lit}" at this (excerpt): "...${p.excerpt()}..."')
1272 }
1273 }
1274 }
1275 p.ignore_while(all_formatting_kinds())
1276 match p.tok.kind {
1277 .boolean {
1278 arr << ast.Value(p.boolean()!)
1279 previous_token_was_value = true
1280 }
1281 .comma {
1282 p.ignore_while_peek(space_formatting_kinds())
1283 // Trailing commas before array close is allowed
1284 // so we do not do `if p.peek_tok.kind == .rsbr { ... }`
1285
1286 // Check for known errors:
1287 if p.peek_tok.kind in [.comma, .bare] {
1288 p.next()! // Forward to the peek_tok
1289 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1290 ' unexpected "${p.tok.kind}" "${p.tok.lit}" at this (excerpt): "...${p.excerpt()}..."')
1291 }
1292 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1293 'skipping comma table value separator "${p.tok.lit}"')
1294 continue
1295 }
1296 .eof {
1297 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1298 ' could not parse array. Reached EOF "${p.tok.kind}" "${p.tok.lit}" ("${p.tok.lit}") in this (excerpt): "...${p.excerpt()}..."')
1299 }
1300 .hash {
1301 c := p.comment()
1302 p.ast_root.comments << c
1303 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "${c.text}"')
1304 }
1305 .lcbr {
1306 p.ignore_while(space_formatting_kinds())
1307 mut t := map[string]ast.Value{}
1308 p.inline_table(mut t)!
1309 arr << ast.Value(t)
1310 previous_token_was_value = true
1311 }
1312 .number {
1313 val := p.number_or_date()!
1314 arr << val
1315 previous_token_was_value = true
1316 }
1317 .quoted {
1318 arr << ast.Value(p.quoted())
1319 previous_token_was_value = true
1320 }
1321 .lsbr {
1322 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1323 'parsing array in array "${p.tok.kind}" "${p.tok.lit}"')
1324 arr << ast.Value(p.array()!)
1325 previous_token_was_value = true
1326 }
1327 .rsbr {
1328 break
1329 }
1330 .bare {
1331 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1332 ' unexpected value "${p.tok.lit}". Array values should be quoted (with " or \') in this (excerpt): "...${p.excerpt()}..."')
1333 }
1334 else {
1335 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1336 ' unexpected token "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
1337 }
1338 }
1339 }
1340 p.expect(.rsbr)! // ']' bracket
1341 $if debug {
1342 flat := arr.str().replace('\n', r'\n')
1343 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1344 'parsed array: ${flat} . Currently @ token "${p.tok.kind}"')
1345 }
1346 return arr
1347}
1348
1349// comment returns an `ast.Comment` type.
1350pub fn (mut p Parser) comment() ast.Comment {
1351 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed hash comment "#${p.tok.lit}"')
1352 return ast.Comment{
1353 text: p.tok.lit
1354 pos: p.tok.pos()
1355 }
1356}
1357
1358// key parse and returns an `ast.Key` type.
1359// Keys are the token(s) appearing before an assignment operator (=).
1360pub fn (mut p Parser) key() !ast.Key {
1361 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing key from "${p.tok.lit}" ...')
1362
1363 mut key := ast.Key(ast.Null{})
1364 if p.tok.kind == .number {
1365 if p.peek_tok.kind == .minus {
1366 mut lits := p.tok.lit
1367 pos := p.tok.pos()
1368 for p.peek_tok.kind != .assign && p.peek_tok.kind != .period && p.peek_tok.kind != .rsbr {
1369 p.next()!
1370 if p.tok.kind !in space_formatting_kinds() {
1371 lits += p.tok.lit
1372 }
1373 }
1374 return ast.Key(ast.Bare{
1375 text: lits
1376 pos: pos
1377 })
1378 }
1379 num := p.number()
1380 // Handles if key is `1key`
1381 if p.peek_tok.kind in [.bare, .underscore, .minus] {
1382 bare := p.bare()!
1383 return bare
1384 }
1385 key = ast.Key(num)
1386 } else {
1387 key = match p.tok.kind {
1388 .bare, .underscore, .minus {
1389 ast.Key(p.bare()!)
1390 }
1391 .boolean {
1392 ast.Key(p.boolean()!)
1393 }
1394 .quoted {
1395 ast.Key(p.quoted())
1396 }
1397 else {
1398 ast.Key(ast.Null{})
1399 }
1400 }
1401 }
1402
1403 // NOTE kept for eased debugging
1404 // util.printdbg(@MOD +'.' + @STRUCT + '.' + @FN, 'parsed key "${p.tok.lit}"')
1405 // panic(@MOD + '.' + @STRUCT + '.' + @FN + ' could not parse ${p.tok.kind} ("${p.tok.lit}") token \n${p.tok}')
1406 // return ast.Key(ast.Bare{})
1407
1408 if key is ast.Null {
1409 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1410 ' key expected .bare, .underscore, .number, .quoted or .boolean but got "${p.tok.kind}"')
1411 }
1412
1413 // A few small exceptions that can't easily be done via `checker` or `decoder` *after* the
1414 // main table has been build since information like `is_multiline` is lost when using the key.text as a
1415 // V `map` key directly.
1416 mut decoded_key := key
1417 if key is ast.Quoted {
1418 if p.config.run_checks {
1419 quoted := key as ast.Quoted
1420 if quoted.is_multiline {
1421 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1422 ' multiline string as key is not allowed. (excerpt): "...${p.excerpt()}..."')
1423 }
1424 chckr := checker.Checker{
1425 scanner: p.scanner
1426 }
1427 chckr.check_quoted(quoted)!
1428 }
1429 if p.config.decode_values {
1430 mut quoted := key as ast.Quoted
1431 decoder.decode_quoted_escapes(mut quoted)!
1432 decoded_key = ast.Key(quoted)
1433 }
1434 }
1435
1436 return decoded_key
1437}
1438
1439// key_value parse and returns a pair `ast.Key` and `ast.Value` type.
1440// see also `key()` and `value()`
1441pub fn (mut p Parser) key_value() !(ast.Key, ast.Value) {
1442 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing key value pair...')
1443 key := p.key()!
1444 dotted_key := DottedKey([key.str()])
1445 p.explicit_declared << p.build_abs_dotted_key(dotted_key)
1446 p.next()!
1447 p.ignore_while(space_formatting_kinds())
1448 p.check(.assign)! // Assignment operator
1449 p.ignore_while(space_formatting_kinds())
1450 value := p.value()!
1451 if p.value_is_immutable {
1452 if !p.immutable.has(dotted_key) {
1453 p.immutable << p.build_abs_dotted_key(dotted_key) // Mark the key we are assigning to as immutable
1454 }
1455 p.value_is_immutable = false
1456 }
1457 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed key value pair. `${key} = ${value}`')
1458 return key, value
1459}
1460
1461// dotted_key_value parse and returns a pair `DottedKey` and `ast.Value` type.
1462// see also `key()` and `value()`
1463pub fn (mut p Parser) dotted_key_value() !(DottedKey, ast.Value) {
1464 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing dotted key value pair...')
1465 p.ignore_while(space_formatting_kinds())
1466 dotted_key := p.dotted_key()!
1467 p.explicit_declared << p.build_abs_dotted_key(dotted_key)
1468 p.ignore_while(space_formatting_kinds())
1469 p.check(.assign)!
1470 p.ignore_while(space_formatting_kinds())
1471 value := p.value()!
1472 if p.value_is_immutable {
1473 if !p.immutable.has(dotted_key) {
1474 p.immutable << p.build_abs_dotted_key(dotted_key) // Mark the key we are assigning to as immutable
1475 }
1476 p.value_is_immutable = false
1477 }
1478 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1479 'parsed dotted key value pair `${dotted_key} = ${value}`...')
1480
1481 return dotted_key, value
1482}
1483
1484// value parse and returns an `ast.Value` type.
1485// values are the token(s) appearing after an assignment operator (=).
1486pub fn (mut p Parser) value() !ast.Value {
1487 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN,
1488 'parsing value from token "${p.tok.kind}" "${p.tok.lit}"...')
1489 mut value := ast.Value(ast.Null{})
1490 if p.tok.kind == .number {
1491 number_or_date := p.number_or_date()!
1492 value = number_or_date
1493 } else {
1494 value = match p.tok.kind {
1495 .quoted {
1496 ast.Value(p.quoted())
1497 }
1498 .boolean {
1499 ast.Value(p.boolean()!)
1500 }
1501 .lsbr {
1502 ast.Value(p.array()!)
1503 }
1504 .lcbr {
1505 p.ignore_while(space_formatting_kinds())
1506 mut t := map[string]ast.Value{}
1507 p.inline_table(mut t)!
1508 ast.Value(t)
1509 }
1510 else {
1511 ast.Value(ast.Null{})
1512 }
1513 }
1514
1515 if value is ast.Null {
1516 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1517 ' value expected .boolean, .quoted, .lsbr, .lcbr or .number got "${p.tok.kind}" "${p.tok.lit}" in this (excerpt): "...${p.excerpt()}..."')
1518 }
1519 }
1520 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed "${p.tok.kind}" as value ${value}')
1521 return value
1522}
1523
1524// number_or_date parse and returns an `ast.Value` type as
1525// one of [`ast.Date`, `ast.Time`, `ast.DateTime`, `ast.Number`]
1526pub fn (mut p Parser) number_or_date() !ast.Value {
1527 // Handle Date/Time
1528 if p.peek_tok.kind == .minus || p.peek_tok.kind == .colon {
1529 date_time_type := p.date_time()!
1530 match date_time_type {
1531 ast.Date {
1532 return ast.Value(date_time_type)
1533 }
1534 ast.Time {
1535 return ast.Value(date_time_type)
1536 }
1537 ast.DateTime {
1538 return ast.Value(date_time_type)
1539 }
1540 }
1541 }
1542 return ast.Value(p.number())
1543}
1544
1545// bare parse and returns an `ast.Bare` type.
1546pub fn (mut p Parser) bare() !ast.Bare {
1547 mut lits := p.tok.lit
1548 pos := p.tok.pos()
1549 for p.peek_tok.kind != .assign && p.peek_tok.kind != .period && p.peek_tok.kind != .rsbr
1550 && p.peek_tok.kind !in space_formatting_kinds() {
1551 p.next()!
1552 if p.tok.kind == .bare || p.tok.kind == .minus || p.tok.kind == .underscore {
1553 lits += p.tok.lit
1554 continue
1555 }
1556 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1557 ' bare key expected .bare, .minus, or .underscore but got "${p.tok.kind}"')
1558 }
1559 return ast.Bare{
1560 text: lits
1561 pos: pos
1562 }
1563}
1564
1565// quoted parse and returns an `ast.Quoted` type.
1566pub fn (mut p Parser) quoted() ast.Quoted {
1567 // To get more info about the quote type and enable better checking,
1568 // the scanner is returning the literal *with* single- or double-quotes.
1569 mut quote := p.tok.lit[0]
1570 is_multiline := p.tok.lit.len >= 6 && p.tok.lit[1] == quote && p.tok.lit[2] == quote
1571 mut lit := p.tok.lit[1..p.tok.lit.len - 1]
1572 if is_multiline {
1573 lit = p.tok.lit[3..p.tok.lit.len - 3]
1574 // From https://toml.io/en/v1.0.0#string
1575 // "Multi-line literal strings [...] A newline immediately following the opening
1576 // delimiter will be trimmed. All other content between the delimiters
1577 // is interpreted as-is without modification."
1578 if lit.len > 0 && lit[0] == `\n` {
1579 lit = lit[1..]
1580 }
1581 }
1582 return ast.Quoted{
1583 text: lit
1584 pos: p.tok.pos()
1585 quote: quote
1586 is_multiline: is_multiline
1587 }
1588}
1589
1590// boolean parse and returns an `ast.Bool` type.
1591pub fn (mut p Parser) boolean() !ast.Bool {
1592 if p.tok.lit !in ['true', 'false'] {
1593 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1594 ' expected literal to be either `true` or `false` got "${p.tok.kind}"')
1595 }
1596 return ast.Bool{
1597 text: p.tok.lit
1598 pos: p.tok.pos()
1599 }
1600}
1601
1602// number parse and returns an `ast.Number` type.
1603pub fn (mut p Parser) number() ast.Number {
1604 return ast.Number{
1605 text: p.tok.lit
1606 pos: p.tok.pos()
1607 }
1608}
1609
1610// date_time parses dates and time in RFC 3339 format.
1611// https://datatracker.ietf.org/doc/html/rfc3339
1612pub fn (mut p Parser) date_time() !ast.DateTimeType {
1613 // Date and/or Time
1614 mut lit := ''
1615 pos := p.tok.pos()
1616 mut date := ast.Date{}
1617 mut time := ast.Time{}
1618
1619 if p.peek_tok.kind == .minus {
1620 date = p.date()!
1621 lit += date.text
1622 // Look for any THH:MM:SS or <space>HH:MM:SS
1623 if (p.peek_tok.kind == .bare && (p.peek_tok.lit.starts_with('T')
1624 || p.peek_tok.lit.starts_with('t'))) || p.peek_tok.kind == .whitespace {
1625 p.next()! // Advance to token with Txx or whitespace special case
1626 if p.tok.lit.starts_with('T') || p.tok.lit.starts_with('t') {
1627 lit += p.tok.lit[0].ascii_str() //'T' or 't'
1628 } else {
1629 peek := p.peek(0)!
1630 if peek.kind != .number {
1631 // return early as date for strings yyyy-mm-dd_X... (_ is space, X is not numeric)
1632 return ast.Date{
1633 text: lit
1634 pos: pos
1635 }
1636 }
1637 lit += p.tok.lit
1638 p.next()!
1639 }
1640 time = p.time()!
1641 lit += time.text
1642
1643 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed date-time: "${lit}"')
1644 return ast.DateTime{
1645 text: lit
1646 pos: pos
1647 date: date
1648 time: time
1649 }
1650 }
1651 } else if p.peek_tok.kind == .colon {
1652 time = p.time()!
1653 return time
1654 }
1655
1656 return ast.Date{
1657 text: lit
1658 pos: pos
1659 }
1660}
1661
1662// date parse and returns an `ast.Date` type.
1663pub fn (mut p Parser) date() !ast.Date {
1664 // Date
1665 mut lit := p.tok.lit
1666 pos := p.tok.pos()
1667
1668 p.check(.number)!
1669 lit += p.tok.lit
1670 p.check(.minus)!
1671 lit += p.tok.lit
1672 p.check(.number)!
1673 lit += p.tok.lit
1674 p.check(.minus)!
1675 lit += p.tok.lit
1676 p.expect(.number)!
1677
1678 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed date: "${lit}"')
1679 return ast.Date{
1680 text: lit
1681 pos: pos
1682 }
1683}
1684
1685// time parse and returns an `ast.Time` type.
1686pub fn (mut p Parser) time() !ast.Time {
1687 // Time
1688 mut lit := p.tok.lit
1689 pos := p.tok.pos()
1690
1691 if p.is_at(.bare) && (lit.starts_with('T') || lit.starts_with('t')) {
1692 if p.tok.lit.starts_with('T') {
1693 lit = lit.all_after('T')
1694 } else if p.tok.lit.starts_with('t') {
1695 lit = lit.all_after('t')
1696 }
1697 p.next()!
1698 } else {
1699 p.check(.number)!
1700 }
1701 lit += p.tok.lit
1702 p.check(.colon)!
1703 lit += p.tok.lit
1704 p.check(.number)!
1705 lit += p.tok.lit
1706 // NOTE: TOML v1.1.0 have optional seconds
1707 // if p.peek_tok.kind == .colon {
1708 p.check(.colon)!
1709 lit += p.tok.lit
1710 p.expect(.number)!
1711 //}
1712
1713 // Optional milliseconds
1714 if p.peek_tok.kind == .period {
1715 p.next()!
1716 lit += p.tok.lit // lit += '.'
1717 p.check(.period)!
1718 lit += p.tok.lit
1719 p.expect(.number)!
1720 }
1721
1722 if !lit[lit.len - 1].is_digit() {
1723 return error(@MOD + '.' + @STRUCT + '.' + @FN +
1724 ' expected a number as last occurrence in "${lit}" got "${lit[lit.len -
1725 1].ascii_str()}"')
1726 }
1727
1728 // Parse offset
1729 if p.peek_tok.kind == .minus || p.peek_tok.kind == .plus {
1730 p.next()!
1731 lit += p.tok.lit // lit += '-'
1732 p.check_one_of([.minus, .plus])!
1733 lit += p.tok.lit
1734 p.check(.number)!
1735 lit += p.tok.lit
1736 p.check(.colon)!
1737 lit += p.tok.lit
1738 p.expect(.number)!
1739 } else if p.peek_tok.kind == .bare && (p.peek_tok.lit == 'Z' || p.peek_tok.lit == 'z') {
1740 p.next()!
1741 lit += p.tok.lit
1742 p.expect(.bare)!
1743 }
1744
1745 util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed time: "${lit}"')
1746 return ast.Time{
1747 text: lit
1748 pos: pos
1749 }
1750}
1751
1752// undo_special_case_01 reverts an operation needed for a few special case / edge case tests to pass.
1753// See:
1754// https://github.com/toml-lang/toml-test/blob/576db852/tests/invalid/table/array-implicit.toml
1755// https://github.com/toml-lang/toml-test/blob/229ce2e/tests/valid/table/array-implicit-and-explicit-after.toml
1756// https://github.com/toml-lang/toml-test/blob/229ce2e/tests/valid/array/open-parent-table.toml
1757pub fn (mut p Parser) undo_special_case_01(dotted_key DottedKey) {
1758 exd_i := p.explicit_declared.index(dotted_key)
1759 if exd_i > -1 {
1760 p.explicit_declared.delete(exd_i)
1761 p.last_aot.clear()
1762 }
1763}
1764
1765// eof returns an `ast.EOF` type.
1766pub fn (mut p Parser) eof() ast.EOF {
1767 return ast.EOF{
1768 pos: p.tok.pos()
1769 }
1770}
1771