v2 / vlib / v / parser / asm.v
678 lines · 652 sloc · 16.97 KB · 8e35f4d9848f7ad35d857a187dddbfd2eca5e19d
Raw
1module parser
2
3import ast
4import v.pref
5
6// https://www.felixcloutier.com/x86/lock
7const allowed_lock_prefix_ins = ['add', 'adc', 'and', 'btc', 'btr', 'bts', 'cmpxchg', 'cmpxchg8b',
8 'cmpxchg16b', 'dec', 'inc', 'neg', 'not', 'or', 'sbb', 'sub', 'xor', 'xadd', 'xchg']
9
10fn (mut p Parser) asm_stmt(is_top_level bool) ast.AsmStmt {
11 p.inside_asm = true
12 p.inside_asm_template = true
13 defer {
14 p.inside_asm = false
15 p.inside_asm_template = false
16 }
17 p.n_asm = 0
18 if is_top_level {
19 p.top_level_statement_start()
20 }
21 mut backup_scope := p.scope
22
23 pos := p.tok.pos()
24
25 p.check(.key_asm)
26 mut arch := pref.arch_from_string(p.tok.lit) or { pref.Arch._auto }
27
28 if is_top_level && arch == .wasm32 {
29 p.error("wasm doesn't support toplevel assembly")
30 }
31
32 mut is_volatile := false
33 mut is_goto := false
34 if p.tok.kind == .key_volatile {
35 arch = pref.arch_from_string(p.peek_tok.lit) or { pref.Arch._auto }
36 is_volatile = true
37 p.next()
38 } else if p.tok.kind == .key_goto {
39 arch = pref.arch_from_string(p.peek_tok.lit) or { pref.Arch._auto }
40 is_goto = true
41 p.next()
42 }
43 if arch == ._auto && !p.pref.is_fmt {
44 if p.tok.lit == '' {
45 p.error('missing assembly architecture. Try i386, amd64, arm64, or wasm.')
46 }
47 p.error('unknown assembly architecture')
48 }
49 if p.tok.kind != .name {
50 p.error('must specify assembly architecture')
51 } else {
52 p.next()
53 }
54
55 p.check_for_impure_v(ast.pref_arch_to_table_language(arch), p.prev_tok.pos())
56
57 p.check(.lcbr)
58 p.scope = &ast.Scope{
59 parent: unsafe { nil } // you shouldn't be able to reference other variables in assembly blocks
60 detached_from_parent: true
61 start_pos: p.tok.pos
62 objects: ast.all_registers(mut p.table, arch) //
63 }
64
65 mut local_labels := []string{}
66 // riscv: https://github.com/jameslzhu/riscv-card/releases/download/latest/riscv-card.pdf
67 // x86: https://www.felixcloutier.com/x86/
68 // arm: https://developer.arm.com/documentation/dui0068/b/arm-instruction-reference
69 mut templates := []ast.AsmTemplate{}
70 for p.tok.kind !in [.semicolon, .rcbr, .eof] {
71 template_pos := p.tok.pos()
72 mut name := ''
73 mut comments := []ast.Comment{}
74 if p.tok.kind == .name && arch == .amd64 && p.tok.lit in ['rex', 'vex', 'xop'] {
75 name += p.tok.lit
76 p.next()
77 for p.tok.kind == .dot {
78 p.next()
79 name += '.' + p.tok.lit
80 p.check(.name)
81 }
82 name += ' '
83 }
84 is_directive := p.tok.kind == .dot
85 if is_directive {
86 p.next()
87 }
88 if p.tok.kind in [.key_in, .key_lock, .key_orelse, .key_select, .key_return] { // `in`, `lock`, `or`, `select`, `return` are v keywords that are also x86/arm/riscv/wasm instructions.
89 name += p.tok.kind.str()
90 if p.tok.kind == .key_lock && arch in [.i386, .amd64] {
91 p.next()
92
93 has_suffix := p.tok.lit[p.tok.lit.len - 1] in [`b`, `w`, `l`, `q`]
94 if !(p.tok.lit in allowed_lock_prefix_ins
95 || (has_suffix && p.tok.lit[0..p.tok.lit.len - 1] in allowed_lock_prefix_ins)) {
96 p.error('The lock prefix cannot be used on this instruction')
97 }
98 name += ' '
99 name += p.tok.lit
100 }
101 p.next()
102 } else if p.tok.kind == .number {
103 name += p.tok.lit
104 p.next()
105 } else if p.tok.kind == .comment {
106 for p.tok.kind == .comment {
107 comments << p.comment()
108 }
109 } else {
110 name += p.tok.lit
111 p.check(.name)
112 }
113 // dots are part of instructions for some riscv extensions and webassembly, arm64
114 if arch in [.rv32, .rv64, .wasm32, .arm64, .loongarch64] {
115 for p.tok.kind == .dot {
116 name += '.'
117 p.next()
118 // wasm: i32.const
119 if arch == .wasm32 && p.tok.kind == .key_const {
120 name += 'const'
121 p.next()
122 } else {
123 name += p.tok.lit
124 p.check(.name)
125 }
126 }
127 }
128 mut is_label := false
129
130 mut args := []ast.AsmArg{}
131 if p.tok.line_nr == p.prev_tok.line_nr {
132 args_loop: for {
133 if p.prev_tok.pos().line_nr < p.tok.pos().line_nr {
134 break
135 }
136 mut segment := ''
137 if p.tok.kind == .name && p.peek_tok.kind == .colon {
138 segment = p.tok.lit
139 p.next()
140 p.next()
141 }
142 match p.tok.kind {
143 .name {
144 args << p.reg_or_alias()
145 }
146 .string {
147 // wasm: call 'wasi_unstable' 'proc_exit'
148 args << p.tok.lit
149 p.next()
150 }
151 .number {
152 number_lit := p.parse_number_literal()
153 match number_lit {
154 ast.FloatLiteral {
155 args << ast.FloatLiteral{
156 ...number_lit
157 }
158 }
159 ast.IntegerLiteral {
160 if is_directive {
161 args << ast.AsmDisp{
162 val: number_lit.val
163 pos: number_lit.pos
164 }
165 } else {
166 args << ast.IntegerLiteral{
167 ...number_lit
168 }
169 }
170 }
171 else {
172 p.error('p.parse_number_literal() invalid output: `${number_lit}`')
173 }
174 }
175 }
176 .chartoken {
177 args << ast.CharLiteral{
178 val: p.tok.lit
179 pos: p.tok.pos()
180 }
181 p.next()
182 }
183 .colon {
184 is_label = true
185 p.next()
186 local_labels << name
187 break
188 }
189 .lsbr {
190 if arch == .wasm32 {
191 p.error("wasm doesn't have addressing operands")
192 }
193 mut addressing := p.asm_addressing()
194 addressing.segment = segment
195 args << addressing
196 }
197 .rcbr {
198 break
199 }
200 .semicolon {
201 break
202 }
203 else {
204 p.error('invalid token in assembly block')
205 }
206 }
207
208 if p.tok.kind == .comma {
209 p.next()
210 } else {
211 break
212 }
213 }
214 // if p.prev_tok.pos().line_nr < p.tok.pos().line_nr {
215 // break
216 // }
217 }
218 for p.tok.kind == .comment {
219 comments << p.comment()
220 }
221 if is_directive && name in ['globl', 'global'] {
222 for arg in args {
223 p.global_labels << (arg as ast.AsmAlias).name
224 }
225 }
226 templates << ast.AsmTemplate{
227 name: name
228 args: args
229 comments: comments
230 is_label: is_label
231 is_directive: is_directive
232 pos: template_pos.extend(p.tok.pos())
233 }
234 }
235 mut scope := p.scope
236 p.scope = backup_scope
237 p.inside_asm_template = false
238 mut output, mut input, mut clobbered, mut global_labels := []ast.AsmIO{}, []ast.AsmIO{}, []ast.AsmClobbered{}, []string{}
239 if !is_top_level {
240 if p.tok.kind == .semicolon {
241 output = p.asm_ios(true)
242 if p.tok.kind == .semicolon {
243 input = p.asm_ios(false)
244 }
245 if p.tok.kind == .semicolon {
246 // because p.reg_or_alias() requires the scope with registers to recognize registers.
247 backup_scope = p.scope
248 p.scope = scope
249 p.next()
250 for p.tok.kind == .name {
251 reg := ast.AsmRegister{
252 name: p.tok.lit
253 typ: 0
254 size: -1
255 }
256 p.next()
257
258 mut comments := []ast.Comment{}
259 for p.tok.kind == .comment {
260 comments << p.comment()
261 }
262 clobbered << ast.AsmClobbered{
263 reg: reg
264 comments: comments
265 }
266
267 if p.tok.kind in [.rcbr, .semicolon] {
268 break
269 }
270 }
271
272 if is_goto && p.tok.kind == .semicolon {
273 p.next()
274 for p.tok.kind == .name {
275 global_labels << p.tok.lit
276 p.next()
277 }
278 }
279 }
280 }
281 } else if p.tok.kind == .semicolon {
282 p.error('extended assembly is not allowed as a top level statement')
283 }
284 p.scope = backup_scope
285 p.check(.rcbr)
286 if is_top_level {
287 p.top_level_statement_end()
288 }
289 scope.end_pos = p.prev_tok.pos
290
291 return ast.AsmStmt{
292 arch: arch
293 is_goto: is_goto
294 is_volatile: is_volatile
295 templates: templates
296 output: output
297 input: input
298 clobbered: clobbered
299 pos: pos.extend(p.prev_tok.pos())
300 is_basic: is_top_level || output.len + input.len + clobbered.len == 0
301 scope: scope
302 global_labels: global_labels
303 local_labels: local_labels
304 }
305}
306
307fn (mut p Parser) reg_or_alias() ast.AsmArg {
308 p.check(.name)
309 if p.prev_tok.lit in p.scope.objects {
310 x := unsafe { p.scope.objects[p.prev_tok.lit] }
311 if x is ast.AsmRegister {
312 return ast.AsmArg(x as ast.AsmRegister)
313 } else {
314 p.error('non-register ast.ScopeObject found in scope')
315 return ast.AsmDisp{} // should not be reached
316 }
317 } else if p.prev_tok.len >= 2 && p.prev_tok.lit[0] in [`b`, `f`]
318 && p.prev_tok.lit[1..].bytes().all(it.is_digit()) {
319 return ast.AsmDisp{
320 val: p.prev_tok.lit[1..] + p.prev_tok.lit[0].ascii_str()
321 }
322 } else {
323 return ast.AsmAlias{
324 name: p.prev_tok.lit
325 pos: p.prev_tok.pos()
326 }
327 }
328}
329
330// fn (mut p Parser) asm_addressing() ast.AsmAddressing {
331// pos := p.tok.pos()
332// p.check(.lsbr)
333// unknown_addressing_mode := 'unknown addressing mode. supported ones are [displacement], [base], [base + displacement] [index ∗ scale + displacement], [base + index ∗ scale + displacement], [base + index + displacement] [rip + displacement]'
334// mut mode := ast.AddressingMode.invalid
335// if p.peek_tok.kind == .rsbr {
336// if p.tok.kind == .name {
337// mode = .base
338// } else if p.tok.kind == .number {
339// mode = .displacement
340// } else {
341// p.error(unknown_addressing_mode)
342// }
343// } else if p.peek_tok.kind == .mul {
344// mode = .index_times_scale_plus_displacement
345// } else if p.tok.lit == 'rip' {
346// mode = .rip_plus_displacement
347// } else if p.peek_tok3.kind == .mul {
348// mode = .base_plus_index_times_scale_plus_displacement
349// } else if p.peek_tok.kind == .plus && p.peek_tok3.kind == .rsbr {
350// mode = .base_plus_displacement
351// } else if p.peek_tok.kind == .plus && p.peek_tok3.kind == .plus {
352// mode = .base_plus_index_plus_displacement
353// } else {
354// p.error(unknown_addressing_mode)
355// }
356// mut displacement, mut base, mut index, mut scale := u32(0), ast.AsmArg{}, ast.AsmArg{}, -1
357
358// match mode {
359// .base {
360// base = p.reg_or_alias()
361// }
362// .displacement {
363// displacement = p.tok.lit.u32()
364// p.check(.number)
365// }
366// .base_plus_displacement {
367// base = p.reg_or_alias()
368// p.check(.plus)
369// displacement = p.tok.lit.u32()
370// p.check(.number)
371// }
372// .index_times_scale_plus_displacement {
373// index = p.reg_or_alias()
374// p.check(.mul)
375// scale = p.tok.lit.int()
376// p.check(.number)
377// p.check(.plus)
378// displacement = p.tok.lit.u32()
379// p.check(.number)
380// }
381// .base_plus_index_times_scale_plus_displacement {
382// base = p.reg_or_alias()
383// p.check(.plus)
384// index = p.reg_or_alias()
385// p.check(.mul)
386// scale = p.tok.lit.int()
387// p.check(.number)
388// p.check(.plus)
389// displacement = p.tok.lit.u32()
390// p.check(.number)
391// }
392// .rip_plus_displacement {
393// base = p.reg_or_alias()
394// p.check(.plus)
395// displacement = p.tok.lit.u32()
396// p.check(.number)
397// }
398// .base_plus_index_plus_displacement {
399// base = p.reg_or_alias()
400// p.check(.plus)
401// index = p.reg_or_alias()
402// p.check(.plus)
403// displacement = p.tok.lit.u32()
404// p.check(.number)
405// }
406// .invalid {} // there was already an error above
407// }
408
409// p.check(.rsbr)
410// return ast.AsmAddressing{
411// base: base
412// displacement: displacement
413// index: index
414// scale: scale
415// mode: mode
416// pos: pos.extend(p.prev_tok.pos())
417// }
418// }
419
420fn (mut p Parser) asm_addressing() ast.AsmAddressing {
421 pos := p.tok.pos()
422 p.check(.lsbr)
423 unknown_addressing_mode := 'unknown addressing mode. supported ones are [displacement], [base], [base + displacement], [index ∗ scale + displacement], [base + index ∗ scale + displacement], [base + index + displacement], [rip + displacement]'
424 // this mess used to look much cleaner before the removal of peek_tok2/3, see above code for cleaner version
425 if p.peek_tok.kind == .rsbr { // [displacement] or [base]
426 if p.tok.kind == .name {
427 base := p.reg_or_alias()
428 p.check(.rsbr)
429 return ast.AsmAddressing{
430 mode: .base
431 base: base
432 pos: pos.extend(p.prev_tok.pos())
433 }
434 } else if p.tok.kind == .number {
435 displacement := if p.tok.kind == .name {
436 p.reg_or_alias()
437 } else {
438 x := ast.AsmArg(ast.AsmDisp{
439 val: p.tok.lit
440 pos: p.tok.pos()
441 })
442 p.check(.number)
443 x
444 }
445 p.check(.rsbr)
446 return ast.AsmAddressing{
447 mode: .displacement
448 displacement: displacement
449 pos: pos.extend(p.prev_tok.pos())
450 }
451 } else {
452 p.error(unknown_addressing_mode)
453 }
454 }
455 if p.peek_tok.kind == .plus && p.tok.kind == .name { // [base + displacement], [base + index ∗ scale + displacement], [base + index + displacement] or [rip + displacement]
456 if p.tok.lit == 'rip' {
457 rip := p.reg_or_alias()
458 p.next()
459
460 displacement := if p.tok.kind == .name {
461 p.reg_or_alias()
462 } else {
463 x := ast.AsmArg(ast.AsmDisp{
464 val: p.tok.lit
465 pos: p.tok.pos()
466 })
467 p.check(.number)
468 x
469 }
470 p.check(.rsbr)
471 return ast.AsmAddressing{
472 mode: .rip_plus_displacement
473 base: rip
474 displacement: displacement
475 pos: pos.extend(p.prev_tok.pos())
476 }
477 }
478 base := p.reg_or_alias()
479 p.next()
480 if p.peek_tok.kind == .rsbr {
481 if p.tok.kind == .number {
482 displacement := if p.tok.kind == .name {
483 p.reg_or_alias()
484 } else {
485 x := ast.AsmArg(ast.AsmDisp{
486 val: p.tok.lit
487 pos: p.tok.pos()
488 })
489 p.check(.number)
490 x
491 }
492 p.check(.rsbr)
493 return ast.AsmAddressing{
494 mode: .base_plus_displacement
495 base: base
496 displacement: displacement
497 pos: pos.extend(p.prev_tok.pos())
498 }
499 } else {
500 p.error(unknown_addressing_mode)
501 }
502 }
503 index := p.reg_or_alias()
504 if p.tok.kind == .mul {
505 p.next()
506 scale := p.tok.lit.int()
507 p.check(.number)
508 p.check(.plus)
509 displacement := if p.tok.kind == .name {
510 p.reg_or_alias()
511 } else {
512 x := ast.AsmArg(ast.AsmDisp{
513 val: p.tok.lit
514 pos: p.tok.pos()
515 })
516 p.check(.number)
517 x
518 }
519 p.check(.rsbr)
520 return ast.AsmAddressing{
521 mode: .base_plus_index_times_scale_plus_displacement
522 base: base
523 index: index
524 scale: scale
525 displacement: displacement
526 pos: pos.extend(p.prev_tok.pos())
527 }
528 } else if p.tok.kind == .plus {
529 p.next()
530 displacement := if p.tok.kind == .name {
531 p.reg_or_alias()
532 } else {
533 x := ast.AsmArg(ast.AsmDisp{
534 val: p.tok.lit
535 pos: p.tok.pos()
536 })
537 p.check(.number)
538 x
539 }
540 p.check(.rsbr)
541 return ast.AsmAddressing{
542 mode: .base_plus_index_plus_displacement
543 base: base
544 index: index
545 displacement: displacement
546 pos: pos.extend(p.prev_tok.pos())
547 }
548 }
549 }
550 if p.peek_tok.kind == .mul { // [index ∗ scale + displacement]
551 index := p.reg_or_alias()
552 p.next()
553 scale := p.tok.lit.int()
554 p.check(.number)
555 p.check(.plus)
556 displacement := if p.tok.kind == .name {
557 p.reg_or_alias()
558 } else {
559 x := ast.AsmArg(ast.AsmDisp{
560 val: p.tok.lit
561 pos: p.tok.pos()
562 })
563 p.check(.number)
564 x
565 }
566 p.check(.rsbr)
567 return ast.AsmAddressing{
568 mode: .index_times_scale_plus_displacement
569 index: index
570 scale: scale
571 displacement: displacement
572 pos: pos.extend(p.prev_tok.pos())
573 }
574 }
575 p.error(unknown_addressing_mode)
576 return ast.AsmAddressing{}
577}
578
579fn (mut p Parser) asm_ios(output bool) []ast.AsmIO {
580 mut res := []ast.AsmIO{}
581 p.check(.semicolon)
582 if p.tok.kind in [.rcbr, .semicolon] {
583 return []
584 }
585 for {
586 if p.tok.kind == .eof {
587 p.error('reached eof in asm_ios')
588 return []
589 }
590 pos := p.tok.pos()
591
592 mut constraint := ''
593 if p.tok.kind == .lpar {
594 constraint = if output { '+r' } else { 'r' } // default constraint, though vfmt fmts to `+r` and `r`
595 } else {
596 // https://gcc.gnu.org/onlinedocs/gcc/Modifiers.html
597 if output {
598 // Output constraint
599 if p.tok.kind == .assign {
600 constraint += '='
601 } else if p.tok.kind == .plus {
602 constraint += '+'
603 } else {
604 p.error_with_pos('Output constraint must starts with `=` or `+`', pos)
605 return []
606 }
607 p.next()
608 if p.tok.kind == .amp {
609 constraint += '&'
610 p.next()
611 } else if p.tok.kind == .mul {
612 constraint += '*'
613 p.next()
614 }
615 } else {
616 // Input constraint
617 if p.tok.kind == .mod {
618 constraint += '%'
619 p.next()
620 } else if p.tok.kind == .mul {
621 constraint += '*'
622 p.next()
623 }
624 }
625 if p.tok.kind == .at {
626 // hack: `@ccl` is a single token .at, not .at + .name
627 constraint += p.tok.lit
628 p.next()
629 } else if p.tok.kind == .number && !output {
630 // Numbered constraints - https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html
631 if p.tok.lit.int() >= 10 {
632 p.error_with_pos('The digit must be between 0 and 9 only', pos)
633 return []
634 }
635 constraint += p.tok.lit
636 p.check(.number)
637 } else {
638 constraint += p.tok.lit
639 p.check(.name)
640 }
641 }
642 mut expr := p.expr(0)
643 mut next_expr := ast.Expr(ast.EmptyExpr{})
644 if mut expr is ast.ParExpr {
645 next_expr = expr.expr
646 } else {
647 p.error('asm in/output must be enclosed in brackets')
648 return []
649 }
650 expr = next_expr
651 mut alias := ''
652 if p.tok.kind == .key_as {
653 p.next()
654 alias = p.tok.lit
655 p.check(.name)
656 } else if mut expr is ast.Ident {
657 alias = expr.name
658 }
659 // for constraints like `a`, no alias is needed, it is referred to as rcx
660 mut comments := []ast.Comment{}
661 for p.tok.kind == .comment {
662 comments << p.comment()
663 }
664
665 res << ast.AsmIO{
666 alias: alias
667 constraint: constraint
668 expr: expr
669 comments: comments
670 pos: pos.extend(p.prev_tok.pos())
671 }
672 p.n_asm++
673 if p.tok.kind in [.semicolon, .rcbr] {
674 break
675 }
676 }
677 return res
678}
679