v / vlib / v2 / gen / x64 / x64.v
3371 lines · 3161 sloc · 94.83 KB · ddb021b9866c3b4523b746fa2f4c16a594f8bd89
Raw
1// Copyright (c) 2026 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4
5module x64
6
7import v2.mir
8import v2.ssa
9import encoding.binary
10import math.bits
11
12const x64_windows_stack_probe_page_size = 4096
13
14pub struct Gen {
15 mod &mir.Module
16mut:
17 elf &ElfObject
18 macho &MachOObject
19 coff &CoffObject
20 obj_format ObjectFormat
21 abi X64Abi
22
23 stack_map map[int]int
24 alloca_offsets map[int]int
25 stack_size int
26 curr_offset int
27
28 block_offsets map[int]int
29 pending_labels map[int][]int
30
31 // Register allocation
32 reg_map map[int]int
33 used_regs []int
34
35 cur_func_ret_type int
36 cur_func_abi_ret_indirect bool
37 cur_func_abi_ret_class mir.AbiValueClass
38 sret_save_offset int
39}
40
41struct Interval {
42mut:
43 val_id int
44 start int
45 end int
46 has_call bool
47}
48
49struct ReferencedWindowsCoffGlobals {
50 complete bool
51 indexes map[int]bool
52}
53
54struct X64MainArgGlobals {
55mut:
56 argc []string
57 argv []string
58}
59
60pub fn Gen.new(mod &mir.Module) &Gen {
61 return &Gen{
62 mod: mod
63 elf: ElfObject.new()
64 macho: MachOObject.new()
65 coff: CoffObject.new()
66 obj_format: .elf
67 abi: .sysv
68 }
69}
70
71pub fn Gen.new_with_format(mod &mir.Module, obj_format ObjectFormat) &Gen {
72 return Gen.new_with_format_and_abi(mod, obj_format, .sysv)
73}
74
75pub fn Gen.new_with_format_and_abi(mod &mir.Module, obj_format ObjectFormat, abi X64Abi) &Gen {
76 return &Gen{
77 mod: mod
78 elf: ElfObject.new()
79 macho: MachOObject.new()
80 coff: CoffObject.new()
81 obj_format: obj_format
82 abi: abi
83 }
84}
85
86pub fn (mut g Gen) gen() {
87 for func in g.mod.funcs {
88 if func.is_c_extern {
89 continue
90 }
91 if func.blocks.len == 0 {
92 continue
93 }
94 g.gen_func(func)
95 }
96
97 // Generate Globals in .data
98 referenced_windows_globals := g.referenced_windows_coff_globals()
99 for global_index, gvar in g.mod.globals {
100 if gvar.linkage == .external {
101 continue
102 }
103 if g.omit_unreferenced_windows_coff_global(global_index, referenced_windows_globals) {
104 continue
105 }
106 for g.data_len() % 8 != 0 {
107 g.add_data_byte(0)
108 }
109 addr := u64(g.data_len())
110 g.add_symbol(gvar.name, addr, false, .data)
111 if gvar.initial_data.len > 0 {
112 g.add_data(gvar.initial_data)
113 } else if gvar.is_constant || g.scalar_global_initial_value_supported(gvar.typ) {
114 size := g.type_size(gvar.typ)
115 mut bytes := []u8{len: if size > 0 { size } else { 8 }}
116 if bytes.len >= 8 {
117 binary.little_endian_put_u64(mut bytes, u64(gvar.initial_value))
118 } else {
119 for i := 0; i < bytes.len; i++ {
120 bytes[i] = u8(u64(gvar.initial_value) >> (i * 8))
121 }
122 }
123 g.add_data(bytes)
124 } else {
125 size := g.type_size(gvar.typ)
126 data_size := if size > 0 { size } else { 8 }
127 for _ in 0 .. data_size {
128 g.add_data_byte(0)
129 }
130 }
131 }
132}
133
134fn (g Gen) referenced_windows_coff_globals() ReferencedWindowsCoffGlobals {
135 mut referenced := map[int]bool{}
136 if g.obj_format != .coff || g.abi != .windows {
137 return ReferencedWindowsCoffGlobals{}
138 }
139 for func in g.mod.funcs {
140 if func.is_c_extern || func.blocks.len == 0 {
141 continue
142 }
143 for block_id in func.blocks {
144 if block_id < 0 || block_id >= g.mod.blocks.len {
145 return ReferencedWindowsCoffGlobals{}
146 }
147 block := g.mod.blocks[block_id]
148 for val_id in block.instrs {
149 if val_id < 0 || val_id >= g.mod.values.len {
150 return ReferencedWindowsCoffGlobals{}
151 }
152 val := g.mod.values[val_id]
153 if val.kind != .instruction || val.index < 0 || val.index >= g.mod.instrs.len {
154 return ReferencedWindowsCoffGlobals{}
155 }
156 instr := g.mod.instrs[val.index]
157 for operand_id in instr.operands {
158 if operand_id < 0 || operand_id >= g.mod.values.len {
159 return ReferencedWindowsCoffGlobals{}
160 }
161 operand := g.mod.values[operand_id]
162 if operand.kind != .global {
163 continue
164 }
165 if operand.index < 0 || operand.index >= g.mod.globals.len {
166 return ReferencedWindowsCoffGlobals{}
167 }
168 if g.mod.globals[operand.index].linkage != .external {
169 referenced[operand.index] = true
170 }
171 }
172 }
173 }
174 }
175 return ReferencedWindowsCoffGlobals{
176 complete: true
177 indexes: referenced
178 }
179}
180
181fn (g Gen) omit_unreferenced_windows_coff_global(index int, referenced ReferencedWindowsCoffGlobals) bool {
182 if g.obj_format != .coff || g.abi != .windows {
183 return false
184 }
185 if !referenced.complete {
186 return false
187 }
188 return !referenced.indexes[index]
189}
190
191fn (g Gen) scalar_global_initial_value_supported(typ_id ssa.TypeID) bool {
192 if typ_id <= 0 || typ_id >= g.mod.type_store.types.len {
193 return false
194 }
195 typ := g.mod.type_store.types[typ_id]
196 return typ.kind in [.int_t, .ptr_t]
197}
198
199fn x64_object_symbol_bare_name(name string) string {
200 if name.starts_with('_') {
201 return name[1..]
202 }
203 return name
204}
205
206fn x64_main_argc_global_name(name string) bool {
207 bare_name := x64_object_symbol_bare_name(name)
208 return bare_name == 'g_main_argc' || bare_name == 'builtin__g_main_argc'
209}
210
211fn x64_main_argv_global_name(name string) bool {
212 bare_name := x64_object_symbol_bare_name(name)
213 return bare_name == 'g_main_argv' || bare_name == 'builtin__g_main_argv'
214}
215
216fn x64_add_unique_global_name(mut names []string, name string) {
217 if name !in names {
218 names << name
219 }
220}
221
222fn x64_enqueue_reachable_func(mut queue []string, reachable map[string]bool, name string) {
223 if name != '' && !reachable[name] && name !in queue {
224 queue << name
225 }
226}
227
228fn (g Gen) func_by_name(name string) ?mir.Function {
229 for func in g.mod.funcs {
230 if func.name == name {
231 return func
232 }
233 }
234 return none
235}
236
237fn (g Gen) reachable_funcs_from_main() map[string]bool {
238 mut reachable := map[string]bool{}
239 mut queue := ['main']
240 for queue.len > 0 {
241 name := queue[0]
242 queue.delete(0)
243 if reachable[name] {
244 continue
245 }
246 func := g.func_by_name(name) or { continue }
247 reachable[name] = true
248 for blk_id in func.blocks {
249 if blk_id < 0 || blk_id >= g.mod.blocks.len {
250 continue
251 }
252 blk := g.mod.blocks[blk_id]
253 for instr_id in blk.instrs {
254 if instr_id < 0 || instr_id >= g.mod.values.len {
255 continue
256 }
257 val := g.mod.values[instr_id]
258 if val.kind != .instruction || val.index < 0 || val.index >= g.mod.instrs.len {
259 continue
260 }
261 instr := g.mod.instrs[val.index]
262 for operand_id in instr.operands {
263 if operand_id < 0 || operand_id >= g.mod.values.len {
264 continue
265 }
266 operand := g.mod.values[operand_id]
267 if operand.kind == .func_ref {
268 x64_enqueue_reachable_func(mut queue, reachable, operand.name)
269 }
270 }
271 if instr.op !in [.call, .call_sret, .go_call, .spawn_call]
272 || instr.operands.len == 0 {
273 continue
274 }
275 callee_id := instr.operands[0]
276 if callee_id >= 0 && callee_id < g.mod.values.len {
277 callee := g.mod.values[callee_id]
278 if callee.kind in [.func_ref, .unknown] {
279 x64_enqueue_reachable_func(mut queue, reachable, callee.name)
280 }
281 }
282 }
283 }
284 }
285 return reachable
286}
287
288fn (g Gen) referenced_main_arg_globals(reachable map[string]bool) X64MainArgGlobals {
289 mut refs := X64MainArgGlobals{}
290 for func in g.mod.funcs {
291 if !reachable[func.name] {
292 continue
293 }
294 for blk_id in func.blocks {
295 if blk_id < 0 || blk_id >= g.mod.blocks.len {
296 continue
297 }
298 blk := g.mod.blocks[blk_id]
299 for instr_id in blk.instrs {
300 if instr_id < 0 || instr_id >= g.mod.values.len {
301 continue
302 }
303 val := g.mod.values[instr_id]
304 if val.kind != .instruction || val.index < 0 || val.index >= g.mod.instrs.len {
305 continue
306 }
307 instr := g.mod.instrs[val.index]
308 for operand in instr.operands {
309 if operand < 0 || operand >= g.mod.values.len {
310 continue
311 }
312 operand_val := g.mod.values[operand]
313 if operand_val.kind != .global {
314 continue
315 }
316 if x64_main_argc_global_name(operand_val.name) {
317 x64_add_unique_global_name(mut refs.argc, operand_val.name)
318 } else if x64_main_argv_global_name(operand_val.name) {
319 x64_add_unique_global_name(mut refs.argv, operand_val.name)
320 }
321 }
322 }
323 }
324 }
325 return refs
326}
327
328fn (g Gen) has_defined_global(name string) bool {
329 for gvar in g.mod.globals {
330 if gvar.name == name && gvar.linkage != .external {
331 return true
332 }
333 }
334 return false
335}
336
337fn (mut g Gen) store_reg_to_global_symbol(reg Reg, name string, size int) {
338 sym_idx := g.add_undefined(name)
339 asm_lea_reg_rip(mut g, r10)
340 g.add_rip_reloc(sym_idx)
341 g.emit_u32(0)
342 asm_store_mem_base_disp_reg_size(mut g, r10, 0, reg, size)
343}
344
345fn (mut g Gen) maybe_store_sysv_hosted_main_args(func mir.Function) {
346 if func.name != 'main' || g.abi != .sysv || g.obj_format !in [.elf, .macho] {
347 return
348 }
349 reachable := g.reachable_funcs_from_main()
350 refs := g.referenced_main_arg_globals(reachable)
351 for name in refs.argc {
352 if g.has_defined_global(name) {
353 g.store_reg_to_global_symbol(rdi, name, 4)
354 }
355 }
356 for name in refs.argv {
357 if g.has_defined_global(name) {
358 g.store_reg_to_global_symbol(rsi, name, 8)
359 }
360 }
361}
362
363fn (mut g Gen) gen_func(func mir.Function) {
364 g.curr_offset = g.text_len()
365 g.stack_map = map[int]int{}
366 g.alloca_offsets = map[int]int{}
367 g.block_offsets = map[int]int{}
368 g.pending_labels = map[int][]int{}
369 g.reg_map = map[int]int{}
370 g.used_regs = []int{}
371 g.cur_func_ret_type = func.typ
372 g.cur_func_abi_ret_indirect = func.abi_ret_indirect
373 g.cur_func_abi_ret_class = func.abi_ret_class
374 g.sret_save_offset = 0
375
376 g.allocate_registers(func)
377
378 // Start after callee-saved pushes so locals do not overlap their rbp slots.
379 mut slot_offset := g.used_regs.len * 8
380
381 // Hidden sret pointer slot (SysV: incoming in RDI, Windows: incoming in RCX)
382 if func.abi_ret_indirect {
383 off, next_offset := reserve_stack_bytes(slot_offset, 8, 1)
384 g.sret_save_offset = off
385 slot_offset = next_offset
386 }
387
388 for pi, pid in func.params {
389 param_typ := g.mod.values[pid].typ
390 param_size := g.type_size(param_typ)
391 is_indirect_param := pi < func.abi_param_class.len && func.abi_param_class[pi] == .indirect
392 if (is_indirect_param || g.value_is_aggregate(pid) || param_size > 8) && param_size > 0 {
393 off, next_offset := reserve_stack_bytes(slot_offset, param_size, 16)
394 g.stack_map[pid] = off
395 slot_offset = next_offset
396 } else {
397 off, next_offset := reserve_stack_bytes(slot_offset, 8, 1)
398 g.stack_map[pid] = off
399 slot_offset = next_offset
400 }
401 }
402
403 for blk_id in func.blocks {
404 blk := g.mod.blocks[blk_id]
405 for val_id in blk.instrs {
406 val := g.mod.values[val_id]
407 if val.kind != .instruction {
408 continue
409 }
410 instr := g.mod.instrs[val.index]
411
412 if instr.op == .alloca {
413 // Calculate allocation size based on the type
414 // The alloca result type is ptr(T), so get the element type
415 ptr_type := g.mod.type_store.types[val.typ]
416 alloc_size := g.alloc_size_from_uses(val_id, g.type_size(ptr_type.elem_type))
417
418 off, next_offset := reserve_stack_bytes(slot_offset, alloc_size, 16)
419 g.alloca_offsets[val_id] = off
420 slot_offset = next_offset
421 }
422
423 mut val_has_stack_storage := false
424 if g.value_needs_stack_storage(val_id) {
425 result_size := g.stack_storage_size(val_id)
426 off, next_offset := reserve_stack_bytes(slot_offset, result_size, 16)
427 g.stack_map[val_id] = off
428 slot_offset = next_offset
429 val_has_stack_storage = true
430 }
431
432 for operand in instr.operands {
433 if operand > 0 && operand < g.mod.values.len && g.value_needs_stack_storage(operand)
434 && operand !in g.stack_map {
435 lit_size := g.stack_storage_size(operand)
436 off, next_offset := reserve_stack_bytes(slot_offset, lit_size, 16)
437 g.stack_map[operand] = off
438 slot_offset = next_offset
439 }
440 }
441
442 if val_has_stack_storage {
443 continue
444 }
445
446 if val_id in g.reg_map {
447 continue
448 }
449 off, next_offset := reserve_stack_bytes(slot_offset, 8, 1)
450 g.stack_map[val_id] = off
451 slot_offset = next_offset
452 }
453 }
454
455 g.stack_size = (slot_offset + 16) & ~0xF
456 if g.used_regs.len % 2 == 1 {
457 g.stack_size += 8
458 }
459
460 g.add_symbol(func.name, u64(g.curr_offset), true, .text)
461
462 // Prologue
463 asm_endbr64(mut g)
464 asm_push_rbp(mut g)
465 asm_mov_rbp_rsp(mut g)
466 g.maybe_store_sysv_hosted_main_args(func)
467
468 // Push callee-saved regs
469 for r in g.used_regs {
470 asm_push(mut g, Reg(r))
471 }
472
473 g.emit_stack_allocation()
474
475 abi_regs := g.abi.int_arg_regs()
476 arg_reg_base := if func.abi_ret_indirect { 1 } else { 0 }
477 mut reg_arg_idx := arg_reg_base
478 mut sse_arg_idx := 0
479 float_arg_regs := g.abi.float_arg_regs()
480 if func.abi_ret_indirect && g.sret_save_offset != 0 {
481 asm_store_rbp_disp_reg(mut g, g.sret_save_offset, g.abi.sret_reg())
482 }
483 mut stack_param_offset := 16
484 for i, pid in func.params {
485 is_indirect_param := i < func.abi_param_class.len && func.abi_param_class[i] == .indirect
486 param_size := g.type_size(g.mod.values[pid].typ)
487 if g.abi == .windows {
488 g.move_windows_param(pid, i + arg_reg_base, is_indirect_param, param_size)
489 continue
490 }
491 if g.value_is_float_type(pid) {
492 g.ensure_float_abi_scalar(pid, 'parameter')
493 if sse_arg_idx >= float_arg_regs.len {
494 g.unsupported_float_abi('stack parameter', pid)
495 }
496 asm_store_xmm_rbp_disp(mut g, float_arg_regs[sse_arg_idx], g.stack_map[pid], param_size)
497 sse_arg_idx++
498 continue
499 }
500 if !is_indirect_param && g.value_is_aggregate(pid) && i < func.abi_param_layouts.len
501 && func.abi_param_layouts[i].locs.len > 0 {
502 layout := func.abi_param_layouts[i]
503 g.store_sysv_direct_aggregate_param(pid, layout)
504 int_limit, sse_limit := sysv_layout_register_limits(layout)
505 if int_limit > reg_arg_idx {
506 reg_arg_idx = int_limit
507 }
508 if sse_limit > sse_arg_idx {
509 sse_arg_idx = sse_limit
510 }
511 stack_limit := sysv_layout_stack_slot_limit(layout)
512 if stack_limit > 0 {
513 stack_param_offset = 16 + stack_limit * 8
514 }
515 continue
516 }
517 param_chunks := if !is_indirect_param && g.value_is_aggregate(pid) && param_size > 8
518 && param_size <= 16 {
519 (param_size + 7) / 8
520 } else {
521 1
522 }
523 if reg_arg_idx + param_chunks <= abi_regs.len {
524 src := abi_regs[reg_arg_idx]
525 if is_indirect_param {
526 g.copy_indirect_param_from_reg(pid, src)
527 } else if param_chunks > 1 {
528 offset := g.stack_map[pid]
529 for chunk := 0; chunk < param_chunks; chunk++ {
530 chunk_size := if chunk == param_chunks - 1 {
531 param_size - chunk * 8
532 } else {
533 8
534 }
535 g.store_reg_to_rbp_exact(Reg(abi_regs[reg_arg_idx + chunk]),
536 offset + chunk * 8, chunk_size)
537 }
538 } else if g.value_needs_raw_abi_reg_bytes(pid, param_size) {
539 g.store_reg_to_rbp_exact(Reg(src), g.stack_map[pid], param_size)
540 } else if reg := g.reg_map[pid] {
541 asm_mov_reg_reg(mut g, Reg(reg), Reg(src))
542 } else {
543 offset := g.stack_map[pid]
544 asm_store_rbp_disp_reg(mut g, offset, Reg(src))
545 }
546 reg_arg_idx += param_chunks
547 } else {
548 // Stack parameters start at [rbp+16].
549 if is_indirect_param {
550 // Load pointer from stack into RAX, then copy through it.
551 asm_load_reg_rbp_disp(mut g, rax, stack_param_offset)
552 g.copy_indirect_param_from_reg(pid, int(rax))
553 } else if param_chunks > 1 {
554 g.copy_memory(int(rbp), g.stack_map[pid], int(rbp), stack_param_offset, param_size)
555 } else if g.value_needs_raw_abi_reg_bytes(pid, param_size) {
556 asm_load_reg_rbp_disp(mut g, rax, stack_param_offset)
557 g.store_reg_to_rbp_exact(rax, g.stack_map[pid], param_size)
558 } else if reg := g.reg_map[pid] {
559 asm_load_reg_rbp_disp(mut g, rax, stack_param_offset)
560 asm_mov_reg_reg(mut g, Reg(reg), rax)
561 } else {
562 asm_load_reg_rbp_disp(mut g, rax, stack_param_offset)
563 offset := g.stack_map[pid]
564 asm_store_rbp_disp_reg(mut g, offset, rax)
565 }
566 stack_param_offset += g.param_stack_slots(is_indirect_param, param_chunks, param_size) * 8
567 }
568 }
569
570 for blk_id in func.blocks {
571 blk := g.mod.blocks[blk_id]
572 g.block_offsets[blk_id] = g.text_len() - g.curr_offset
573
574 if offsets := g.pending_labels[blk_id] {
575 for off in offsets {
576 target := g.block_offsets[blk_id]
577 rel := target - (off + 4)
578 abs_off := g.curr_offset + off
579 g.write_u32(abs_off, u32(rel))
580 }
581 }
582
583 for val_id in blk.instrs {
584 g.gen_instr(val_id)
585 }
586 }
587}
588
589// slot_offset is the number of bytes already reserved below rbp.
590fn reserve_stack_bytes(slot_offset int, size int, align int) (int, int) {
591 mut next_offset := slot_offset
592 if align > 1 && slot_offset % align != 0 {
593 next_offset = ((slot_offset + align - 1) / align) * align
594 }
595 alloc_size := if size > 0 { size } else { 8 }
596 next_offset += alloc_size
597 return -next_offset, next_offset
598}
599
600fn (mut g Gen) emit_stack_allocation() {
601 if g.stack_size <= 0 {
602 return
603 }
604 if g.abi == .windows && g.stack_size >= x64_windows_stack_probe_page_size {
605 g.emit_windows_stack_probe_allocation(g.stack_size)
606 return
607 }
608 g.emit_stack_sub(g.stack_size)
609}
610
611fn (mut g Gen) emit_stack_sub(size int) {
612 if size <= 0 {
613 return
614 }
615 if size <= 127 {
616 asm_sub_rsp_imm8(mut g, u8(size))
617 } else {
618 asm_sub_rsp_imm32(mut g, u32(size))
619 }
620}
621
622fn (mut g Gen) emit_windows_stack_probe_allocation(size int) {
623 mut remaining := size
624 for remaining > x64_windows_stack_probe_page_size {
625 g.emit_stack_sub(x64_windows_stack_probe_page_size)
626 asm_test_byte_ptr_rsp_zero(mut g)
627 remaining -= x64_windows_stack_probe_page_size
628 }
629 g.emit_stack_sub(remaining)
630 asm_test_byte_ptr_rsp_zero(mut g)
631}
632
633fn (mut g Gen) move_windows_param(pid int, position int, is_indirect_param bool, param_size int) {
634 if g.value_is_float_type(pid) {
635 g.ensure_float_abi_scalar(pid, 'parameter')
636 if position < 4 {
637 asm_store_xmm_rbp_disp(mut g, g.abi.float_arg_reg_for_position(position),
638 g.stack_map[pid], param_size)
639 } else {
640 asm_load_xmm_mem_base_disp_size(mut g, 0, rbp, g.abi.stack_arg_offset(position),
641 param_size)
642 asm_store_xmm_rbp_disp(mut g, 0, g.stack_map[pid], param_size)
643 }
644 return
645 }
646 param_is_indirect := g.windows_value_passed_indirect(pid, is_indirect_param, param_size)
647 g.ensure_windows_scalar_or_indirect_arg(pid, param_is_indirect, param_size)
648 if position < 4 {
649 src := g.abi.int_arg_reg_for_position(position)
650 if param_is_indirect {
651 g.copy_indirect_param_from_reg(pid, src)
652 } else if g.value_needs_raw_abi_reg_bytes(pid, param_size) {
653 g.store_reg_to_rbp_exact(Reg(src), g.stack_map[pid], param_size)
654 } else if reg := g.reg_map[pid] {
655 asm_mov_reg_reg(mut g, Reg(reg), Reg(src))
656 } else {
657 asm_store_rbp_disp_reg(mut g, g.stack_map[pid], Reg(src))
658 }
659 return
660 }
661 stack_param_offset := g.abi.stack_arg_offset(position)
662 if param_is_indirect {
663 asm_load_reg_rbp_disp(mut g, rax, stack_param_offset)
664 g.copy_indirect_param_from_reg(pid, int(rax))
665 } else if g.value_needs_raw_abi_reg_bytes(pid, param_size) {
666 asm_load_reg_rbp_disp(mut g, rax, stack_param_offset)
667 g.store_reg_to_rbp_exact(rax, g.stack_map[pid], param_size)
668 } else if reg := g.reg_map[pid] {
669 asm_load_reg_rbp_disp(mut g, rax, stack_param_offset)
670 asm_mov_reg_reg(mut g, Reg(reg), rax)
671 } else {
672 asm_load_reg_rbp_disp(mut g, rax, stack_param_offset)
673 asm_store_rbp_disp_reg(mut g, g.stack_map[pid], rax)
674 }
675}
676
677fn (mut g Gen) gen_instr(val_id int) {
678 instr := g.mod.instrs[g.mod.values[val_id].index]
679 op := g.selected_opcode(instr)
680
681 // Temps: 0=RAX, 1=RCX
682
683 match op {
684 .add, .sub, .mul, .sdiv, .udiv, .srem, .urem, .and_, .or_, .xor, .shl, .ashr, .lshr, .eq,
685 .ne, .lt, .gt, .le, .ge, .ult, .ugt, .ule, .uge {
686 if op in [.eq, .ne, .lt, .gt, .le, .ge] && g.value_is_float_type(instr.operands[0]) {
687 g.emit_float_compare(op, instr.operands[0], instr.operands[1], val_id)
688 return
689 }
690 g.load_val_to_reg(0, instr.operands[0]) // RAX
691 g.load_val_to_reg(1, instr.operands[1]) // RCX
692
693 match op {
694 .add {
695 asm_add_rax_rcx(mut g)
696 }
697 .sub {
698 asm_sub_rax_rcx(mut g)
699 }
700 .mul {
701 asm_imul_rax_rcx(mut g)
702 }
703 .sdiv {
704 asm_cqo(mut g)
705 asm_idiv_rcx(mut g)
706 }
707 .udiv {
708 asm_xor_edx_edx(mut g)
709 asm_div_rcx(mut g)
710 }
711 .srem {
712 asm_cqo(mut g)
713 asm_idiv_rcx(mut g)
714 asm_mov_rax_rdx(mut g)
715 }
716 .urem {
717 asm_xor_edx_edx(mut g)
718 asm_div_rcx(mut g)
719 asm_mov_rax_rdx(mut g)
720 }
721 .and_ {
722 asm_and_rax_rcx(mut g)
723 }
724 .or_ {
725 asm_or_rax_rcx(mut g)
726 }
727 .xor {
728 asm_xor_rax_rcx(mut g)
729 }
730 .shl {
731 asm_shl_rax_cl(mut g)
732 }
733 .ashr {
734 asm_sar_rax_cl(mut g)
735 }
736 .lshr {
737 asm_shr_rax_cl(mut g)
738 }
739 .eq, .ne, .lt, .gt, .le, .ge, .ult, .ugt, .ule, .uge {
740 asm_cmp_rax_rcx(mut g)
741 cc := match op {
742 .eq { cc_e }
743 .ne { cc_ne }
744 .lt { cc_l }
745 .gt { cc_g }
746 .le { cc_le }
747 .ge { cc_ge }
748 .ult { cc_b }
749 .ugt { cc_a }
750 .ule { cc_be }
751 .uge { cc_ae }
752 else { cc_e }
753 }
754
755 asm_setcc_al_movzx(mut g, cc)
756 }
757 else {}
758 }
759
760 g.store_reg_to_val(0, val_id)
761 }
762 .store {
763 src_id := instr.operands[0]
764 dst_id := instr.operands[1]
765 src_typ := g.mod.values[src_id].typ
766 src_type_info := g.mod.type_store.types[src_typ]
767 src_size := g.type_size(src_typ)
768 if src_type_info.kind in [.struct_t, .array_t] || src_size > 8 {
769 g.load_struct_src_address_to_reg(int(r10), src_id, src_typ)
770 g.load_val_to_reg(int(r11), dst_id)
771 g.copy_memory(int(r11), 0, int(r10), 0, src_size)
772 } else {
773 store_size := g.scalar_store_size_for_pointer_destination(dst_id, src_size)
774 if g.value_is_float_type(src_id) && src_size in [4, 8] && store_size == src_size {
775 g.load_val_to_reg(1, dst_id) // Ptr -> RCX
776 g.load_float_val_to_xmm(0, src_id, src_size)
777 asm_store_xmm_mem_base_disp_size(mut g, 0, rcx, 0, src_size)
778 return
779 }
780 g.load_val_to_reg(0, src_id) // Val -> RAX
781 g.load_val_to_reg(1, dst_id) // Ptr -> RCX
782 asm_store_mem_base_disp_reg_size(mut g, rcx, 0, rax, store_size)
783 }
784 }
785 .load {
786 g.load_val_to_reg(1, instr.operands[0]) // Ptr -> RCX
787 load_size := g.type_size(instr.typ)
788 load_type_info := g.mod.type_store.types[instr.typ]
789 if load_type_info.kind in [.struct_t, .array_t] || load_size > 8 {
790 g.copy_memory(int(rbp), g.stack_map[val_id], int(rcx), 0, load_size)
791 } else {
792 g.load_typed_mem_to_reg(rax, rcx, 0, instr.typ, load_size)
793 g.store_reg_to_val(0, val_id)
794 }
795 }
796 .alloca {
797 off := g.alloca_offsets[val_id]
798 g.zero_large_fixed_array_alloca(val_id, off)
799 asm_lea_reg_rbp_disp(mut g, rax, off)
800 g.store_reg_to_val(0, val_id)
801 }
802 .heap_alloc {
803 alloc_size := g.heap_alloc_size(val_id)
804 cleanup := g.emit_windows_call_frame(0)
805 if g.abi == .windows {
806 asm_mov_reg_imm32(mut g, rcx, 1)
807 asm_mov_reg_imm64(mut g, rdx, u64(alloc_size))
808 } else {
809 asm_mov_reg_imm32(mut g, rdi, 1)
810 asm_mov_reg_imm64(mut g, rsi, u64(alloc_size))
811 asm_xor_eax_eax(mut g)
812 }
813 asm_call_rel32(mut g)
814 sym_idx := g.add_undefined('calloc')
815 g.add_call_reloc(sym_idx)
816 g.emit_u32(0)
817 g.cleanup_windows_call_frame(cleanup)
818 g.store_reg_to_val(0, val_id)
819 }
820 .get_element_ptr {
821 g.load_val_to_reg(0, instr.operands[0]) // Base -> RAX
822 offset := g.gep_const_offset(instr.operands[0], instr.operands[1], instr.typ)
823 if offset >= 0 {
824 if offset > 0 {
825 asm_mov_reg_imm64(mut g, rcx, u64(offset))
826 asm_add_rax_rcx(mut g)
827 }
828 } else {
829 g.load_val_to_reg(1, instr.operands[1]) // Index -> RCX
830 elem_size := g.gep_elem_size(instr.operands[0])
831 if elem_size == 8 {
832 asm_shl_rcx_3(mut g)
833 } else if elem_size > 1 {
834 asm_mov_reg_reg(mut g, rax, rcx)
835 asm_mov_reg_imm64(mut g, rcx, u64(elem_size))
836 asm_imul_rax_rcx(mut g)
837 asm_mov_reg_reg(mut g, rcx, rax)
838 g.load_val_to_reg(0, instr.operands[0])
839 }
840 asm_add_rax_rcx(mut g)
841 }
842 g.store_reg_to_val(0, val_id)
843 }
844 .call {
845 abi_regs := g.abi.int_arg_regs()
846 num_args := instr.operands.len - 1
847 stack_args := g.call_stack_arg_mask(instr, abi_regs.len, 0)
848 stack_slots := g.call_stack_slots(instr, stack_args)
849 cleanup := g.prepare_call_stack_args(instr, stack_args, stack_slots, 0)
850 if g.abi == .sysv && stack_slots > 0 {
851 if stack_slots % 2 == 1 {
852 asm_push(mut g, rax)
853 }
854 for arg_idx := num_args - 1; arg_idx >= 0; arg_idx-- {
855 if stack_args[arg_idx] {
856 g.push_call_stack_arg(instr.operands[arg_idx + 1], arg_idx, instr)
857 }
858 }
859 }
860
861 // Stack arguments were pushed above; this pass only loads register arguments.
862 sse_arg_idx := g.load_call_register_args(instr, abi_regs, stack_args, 0)
863 fn_val := g.mod.values[instr.operands[0]]
864 is_direct_symbol_call := fn_val.name != '' && fn_val.kind in [.unknown, .func_ref]
865 if !is_direct_symbol_call {
866 g.load_val_to_reg(int(r10), instr.operands[0])
867 }
868
869 // AL carries the number of SSE argument registers for variadic calls.
870 g.emit_sse_arg_count(sse_arg_idx)
871
872 if is_direct_symbol_call {
873 asm_call_rel32(mut g)
874 sym_idx := g.add_undefined(fn_val.name)
875 // Use R_X86_64_PLT32 (4) for function calls to support shared libraries (libc)
876 g.add_call_reloc(sym_idx)
877 g.emit_u32(0)
878 } else {
879 asm_call_r10(mut g)
880 }
881
882 // Clean up stack arguments
883 if g.abi == .windows {
884 g.cleanup_windows_call_frame(cleanup)
885 } else if stack_slots > 0 {
886 sysv_cleanup := (stack_slots + (stack_slots % 2)) * 8
887 if sysv_cleanup <= 127 {
888 asm_add_rsp_imm8(mut g, u8(sysv_cleanup))
889 } else {
890 asm_add_rsp_imm32(mut g, u32(sysv_cleanup))
891 }
892 }
893
894 if g.mod.type_store.types[g.mod.values[val_id].typ].kind != .void_t {
895 g.store_call_result(val_id, instr.abi_ret_class)
896 }
897 }
898 .call_sret {
899 abi_regs := if g.abi == .sysv { g.abi.int_arg_regs() } else { g.abi.sret_arg_regs() }
900 num_args := instr.operands.len - 1
901 arg_position_base := 1
902 stack_args := g.call_stack_arg_mask(instr, abi_regs.len, arg_position_base)
903 stack_slots := g.call_stack_slots(instr, stack_args)
904
905 if g.abi != .windows {
906 g.load_address_of_val_to_reg(int(g.abi.sret_reg()), val_id)
907 }
908
909 cleanup := g.prepare_call_stack_args(instr, stack_args, stack_slots, arg_position_base)
910 if g.abi == .sysv && stack_slots > 0 {
911 if stack_slots % 2 == 1 {
912 asm_push(mut g, rax)
913 }
914 for arg_idx := num_args - 1; arg_idx >= 0; arg_idx-- {
915 if stack_args[arg_idx] {
916 g.push_call_stack_arg(instr.operands[arg_idx + 1], arg_idx, instr)
917 }
918 }
919 }
920
921 // Stack arguments were pushed above; this pass only loads register arguments.
922 sse_arg_idx := g.load_call_register_args(instr, abi_regs, stack_args, arg_position_base)
923 if g.abi == .windows {
924 g.load_address_of_val_to_reg(int(g.abi.sret_reg()), val_id)
925 }
926
927 fn_val := g.mod.values[instr.operands[0]]
928 is_direct_symbol_call := fn_val.name != '' && fn_val.kind in [.unknown, .func_ref]
929 if !is_direct_symbol_call {
930 g.load_val_to_reg(int(r10), instr.operands[0])
931 }
932
933 // AL carries the number of SSE argument registers for variadic calls.
934 g.emit_sse_arg_count(sse_arg_idx)
935
936 if is_direct_symbol_call {
937 asm_call_rel32(mut g)
938 sym_idx := g.add_undefined(fn_val.name)
939 g.add_call_reloc(sym_idx)
940 g.emit_u32(0)
941 } else {
942 asm_call_r10(mut g)
943 }
944
945 // Clean up stack arguments
946 if g.abi == .windows {
947 g.cleanup_windows_call_frame(cleanup)
948 } else if stack_slots > 0 {
949 sysv_cleanup := (stack_slots + (stack_slots % 2)) * 8
950 if sysv_cleanup <= 127 {
951 asm_add_rsp_imm8(mut g, u8(sysv_cleanup))
952 } else {
953 asm_add_rsp_imm32(mut g, u32(sysv_cleanup))
954 }
955 }
956 }
957 .call_indirect {
958 // Indirect call through function pointer
959 // operands[0] is the function pointer, rest are arguments
960 abi_regs := g.abi.int_arg_regs()
961 num_args := instr.operands.len - 1
962 stack_args := g.call_stack_arg_mask(instr, abi_regs.len, 0)
963 stack_slots := g.call_stack_slots(instr, stack_args)
964 cleanup := g.prepare_call_stack_args(instr, stack_args, stack_slots, 0)
965 if g.abi == .sysv && stack_slots > 0 {
966 if stack_slots % 2 == 1 {
967 asm_push(mut g, rax)
968 }
969 for arg_idx := num_args - 1; arg_idx >= 0; arg_idx-- {
970 if stack_args[arg_idx] {
971 g.push_call_stack_arg(instr.operands[arg_idx + 1], arg_idx, instr)
972 }
973 }
974 }
975
976 // Stack arguments were pushed above; this pass only loads register arguments.
977 sse_arg_idx := g.load_call_register_args(instr, abi_regs, stack_args, 0)
978
979 // Load function pointer to r10 (caller-saved, not used for args)
980 g.load_val_to_reg(10, instr.operands[0])
981
982 // AL carries the number of SSE argument registers for variadic calls.
983 g.emit_sse_arg_count(sse_arg_idx)
984
985 // call *r10
986 asm_call_r10(mut g)
987
988 // Clean up stack arguments
989 if g.abi == .windows {
990 g.cleanup_windows_call_frame(cleanup)
991 } else if stack_slots > 0 {
992 sysv_cleanup := (stack_slots + (stack_slots % 2)) * 8
993 if sysv_cleanup <= 127 {
994 asm_add_rsp_imm8(mut g, u8(sysv_cleanup))
995 } else {
996 asm_add_rsp_imm32(mut g, u32(sysv_cleanup))
997 }
998 }
999
1000 if g.mod.type_store.types[g.mod.values[val_id].typ].kind != .void_t {
1001 g.store_call_result(val_id, instr.abi_ret_class)
1002 }
1003 }
1004 .ret {
1005 if g.cur_func_abi_ret_indirect {
1006 if g.sret_save_offset != 0 {
1007 asm_load_reg_rbp_disp(mut g, g.abi.sret_reg(), g.sret_save_offset)
1008 }
1009 if instr.operands.len > 0 {
1010 ret_val_id := instr.operands[0]
1011 ret_size := g.type_size(g.cur_func_ret_type)
1012 if ret_size > 0 {
1013 g.load_struct_src_address_to_reg(int(r10), ret_val_id, g.cur_func_ret_type)
1014 g.copy_memory(int(g.abi.sret_reg()), 0, int(r10), 0, ret_size)
1015 }
1016 }
1017 asm_mov_reg_reg(mut g, rax, g.abi.sret_reg())
1018 } else if instr.operands.len > 0 {
1019 ret_val_id := instr.operands[0]
1020 g.ensure_windows_direct_return_supported(ret_val_id, g.cur_func_abi_ret_class,
1021 'direct return')
1022 if g.value_is_float_type(ret_val_id) {
1023 g.ensure_float_abi_scalar(ret_val_id, 'return')
1024 g.load_float_val_to_xmm(0, ret_val_id,
1025 g.type_size(g.mod.values[ret_val_id].typ))
1026 } else if g.load_sysv_direct_aggregate_return(ret_val_id, g.cur_func_abi_ret_class) {
1027 } else if g.load_sysv_integer_pair_return(ret_val_id, g.cur_func_abi_ret_class) {
1028 } else {
1029 g.load_val_to_reg(0, ret_val_id)
1030 }
1031 }
1032 g.emit_epilogue()
1033 }
1034 .jmp {
1035 target_idx := g.mod.values[instr.operands[0]].index
1036 g.emit_jmp(target_idx)
1037 }
1038 .br {
1039 cond_id := instr.operands[0]
1040 true_blk := g.mod.values[instr.operands[1]].index
1041 false_blk := g.mod.values[instr.operands[2]].index
1042
1043 // Test condition register directly if register-allocated
1044 if reg := g.reg_map[cond_id] {
1045 asm_test_reg_reg(mut g, Reg(reg))
1046 } else {
1047 g.load_val_to_reg(0, cond_id)
1048 asm_test_rax_rax(mut g)
1049 }
1050
1051 // Emit je false_blk (jump if zero/false)
1052 asm_je_rel32(mut g)
1053 g.emit_rel32_to_block(false_blk)
1054 // Jump to true block (can't assume it's the next block)
1055 g.emit_jmp(true_blk)
1056 }
1057 .switch_ {
1058 g.load_val_to_reg(0, instr.operands[0]) // RAX
1059 for i := 2; i < instr.operands.len; i += 2 {
1060 g.load_val_to_reg(1, instr.operands[i])
1061 asm_cmp_rax_rcx(mut g)
1062 asm_je_rel32(mut g)
1063 target_idx := g.mod.values[instr.operands[i + 1]].index
1064 g.emit_rel32_to_block(target_idx)
1065 }
1066 def_idx := g.mod.values[instr.operands[1]].index
1067 g.emit_jmp(def_idx)
1068 }
1069 .assign {
1070 dest_id := instr.operands[0]
1071 src_id := instr.operands[1]
1072 dest_size := g.type_size(g.mod.values[dest_id].typ)
1073 if g.value_is_aggregate(dest_id) || dest_size > 8 {
1074 g.copy_value_bytes(dest_id, src_id, dest_size)
1075 } else {
1076 g.load_val_to_reg(0, src_id)
1077 g.store_reg_to_val(0, dest_id)
1078 }
1079 }
1080 .bitcast, .trunc, .zext, .sext {
1081 if instr.operands.len > 0 {
1082 src_typ := g.mod.values[instr.operands[0]].typ
1083 dst_typ := instr.typ
1084 src_info := g.mod.type_store.types[src_typ]
1085 dst_info := g.mod.type_store.types[dst_typ]
1086 src_size := g.type_size(src_typ)
1087 dst_size := g.type_size(dst_typ)
1088 if op in [.trunc, .zext] && src_info.kind == .float_t && dst_info.kind == .float_t {
1089 g.load_float_val_to_xmm(0, instr.operands[0], src_size)
1090 if op == .trunc && src_size == 8 && dst_size == 4 {
1091 asm_cvtsd2ss_xmm0_xmm0(mut g)
1092 } else if op == .zext && src_size == 4 && dst_size == 8 {
1093 asm_cvtss2sd_xmm0_xmm0(mut g)
1094 } else {
1095 g.unsupported_numeric_conversion(op, src_size, dst_size, val_id)
1096 }
1097 asm_store_xmm0_rbp_disp(mut g, g.stack_map[val_id], dst_size)
1098 } else if op in [.trunc, .zext, .sext] && src_info.kind == .int_t
1099 && dst_info.kind == .int_t {
1100 g.load_val_to_reg(0, instr.operands[0])
1101 if op == .trunc {
1102 g.normalize_integer_rax_for_type(dst_typ, op, val_id)
1103 } else if op == .zext {
1104 g.mask_rax_to_size(src_size, op, val_id)
1105 } else if op == .sext {
1106 if src_size == 1 {
1107 asm_movsx_rax_al(mut g)
1108 } else if src_size == 2 {
1109 asm_movsx_rax_ax(mut g)
1110 } else if src_size == 4 {
1111 asm_movsxd_rax_eax(mut g)
1112 } else if src_size != 8 {
1113 g.unsupported_numeric_conversion(op, src_size, dst_size, val_id)
1114 }
1115 }
1116 g.store_reg_to_val(0, val_id)
1117 } else if op == .bitcast {
1118 g.load_val_to_reg(0, instr.operands[0])
1119 g.store_reg_to_val(0, val_id)
1120 } else {
1121 g.unsupported_numeric_conversion(op, src_size, dst_size, val_id)
1122 }
1123 }
1124 }
1125 .sitofp, .uitofp {
1126 if instr.operands.len > 0 {
1127 g.load_val_to_reg(0, instr.operands[0])
1128 src_size := g.type_size(g.mod.values[instr.operands[0]].typ)
1129 if op == .uitofp {
1130 g.emit_unsigned_int_to_float(src_size, g.type_size(instr.typ), val_id)
1131 } else {
1132 g.emit_signed_int_to_float(g.type_size(instr.typ), op, val_id)
1133 }
1134 asm_store_xmm0_rbp_disp(mut g, g.stack_map[val_id], g.type_size(instr.typ))
1135 }
1136 }
1137 .fptosi, .fptoui {
1138 if instr.operands.len > 0 {
1139 src_size := g.type_size(g.mod.values[instr.operands[0]].typ)
1140 dst_size := g.type_size(instr.typ)
1141 g.load_float_val_to_xmm(0, instr.operands[0], src_size)
1142 if op == .fptoui {
1143 g.emit_float_to_unsigned_int(src_size, dst_size, val_id)
1144 } else {
1145 g.emit_float_to_signed_int(src_size, op, val_id)
1146 }
1147 g.store_reg_to_val(0, val_id)
1148 }
1149 }
1150 .fadd, .fsub, .fmul, .fdiv {
1151 result_size := g.type_size(instr.typ)
1152 g.load_float_val_to_xmm(0, instr.operands[0], result_size)
1153 g.load_float_val_to_xmm(1, instr.operands[1], result_size)
1154 opcode := match op {
1155 .fadd { u8(0x58) }
1156 .fsub { u8(0x5C) }
1157 .fmul { u8(0x59) }
1158 .fdiv { u8(0x5E) }
1159 else { u8(0x58) }
1160 }
1161
1162 asm_float_binop_xmm0_xmm1(mut g, opcode, result_size)
1163 asm_store_xmm0_rbp_disp(mut g, g.stack_map[val_id], result_size)
1164 }
1165 .inline_string_init {
1166 g.zero_value_bytes(val_id, g.stack_storage_size(val_id))
1167 for fi, field_id in instr.operands {
1168 field_typ := g.struct_field_type(instr.typ, fi, g.mod.values[field_id].typ)
1169 g.store_field_value(val_id, instr.typ, fi, field_id, g.type_size(field_typ))
1170 }
1171 }
1172 .struct_init {
1173 g.zero_value_bytes(val_id, g.type_size(instr.typ))
1174 for fi, field_id in instr.operands {
1175 field_typ := g.struct_field_type(instr.typ, fi, g.mod.values[field_id].typ)
1176 g.store_field_value(val_id, instr.typ, fi, field_id, g.type_size(field_typ))
1177 }
1178 }
1179 .insertvalue {
1180 tuple_id := instr.operands[0]
1181 elem_id := instr.operands[1]
1182 idx := g.const_int_operand(instr.operands[2])
1183 total_size := g.type_size(instr.typ)
1184 if !(g.mod.values[tuple_id].kind == .constant && g.mod.values[tuple_id].name == 'undef') {
1185 g.copy_value_bytes(val_id, tuple_id, total_size)
1186 } else {
1187 g.zero_value_bytes(val_id, total_size)
1188 }
1189 elem_typ := g.struct_field_type(instr.typ, idx, g.mod.values[elem_id].typ)
1190 g.store_field_value(val_id, instr.typ, idx, elem_id, g.type_size(elem_typ))
1191 }
1192 .extractvalue {
1193 tuple_id := instr.operands[0]
1194 idx := g.const_int_operand(instr.operands[1])
1195 field_off := g.struct_field_offset_bytes(g.mod.values[tuple_id].typ, idx)
1196 result_size := g.type_size(instr.typ)
1197 g.load_struct_src_address_to_reg(int(r10), tuple_id, g.mod.values[tuple_id].typ)
1198 if result_size > 8 || g.value_is_aggregate(val_id) {
1199 g.copy_memory(int(rbp), g.stack_map[val_id], int(r10), field_off, result_size)
1200 } else {
1201 g.load_typed_mem_to_reg(rax, r10, field_off, instr.typ, result_size)
1202 g.store_reg_to_val(0, val_id)
1203 }
1204 }
1205 .phi {
1206 // Phi nodes are eliminated by optimization (converted to assignments)
1207 // but the instructions remain in the block. We ignore them here.
1208 }
1209 .unreachable {
1210 // Emit UD2 instruction (undefined trap)
1211 asm_ud2(mut g)
1212 }
1213 else {
1214 x64_unsupported('op ${op} in value ${val_id}')
1215 }
1216 }
1217}
1218
1219fn (mut g Gen) mask_rax_to_size(size int, op ssa.OpCode, val_id int) {
1220 match size {
1221 1 {
1222 asm_mov_reg_imm64(mut g, rcx, 0xff)
1223 asm_and_rax_rcx(mut g)
1224 }
1225 2 {
1226 asm_mov_reg_imm64(mut g, rcx, 0xffff)
1227 asm_and_rax_rcx(mut g)
1228 }
1229 4 {
1230 asm_mov_reg_imm64(mut g, rcx, 0xffffffff)
1231 asm_and_rax_rcx(mut g)
1232 }
1233 8 {}
1234 else {
1235 g.unsupported_numeric_conversion(op, size, size, val_id)
1236 }
1237 }
1238}
1239
1240fn (mut g Gen) emit_float_compare(op ssa.OpCode, lhs int, rhs int, val_id int) {
1241 lhs_size := g.type_size(g.mod.values[lhs].typ)
1242 rhs_size := g.type_size(g.mod.values[rhs].typ)
1243 if lhs_size !in [4, 8] || rhs_size !in [4, 8] || !g.value_is_float_type(rhs) {
1244 g.unsupported_numeric_conversion(op, lhs_size, rhs_size, val_id)
1245 }
1246 g.load_float_val_to_xmm(0, lhs, lhs_size)
1247 g.load_float_val_to_xmm(1, rhs, rhs_size)
1248 size := if lhs_size == 8 || rhs_size == 8 { 8 } else { 4 }
1249 if lhs_size == 4 && size == 8 {
1250 asm_cvtss2sd_xmm0_xmm0(mut g)
1251 }
1252 if rhs_size == 4 && size == 8 {
1253 asm_cvtss2sd_xmm1_xmm1(mut g)
1254 }
1255 asm_ucomis_xmm0_xmm1(mut g, size)
1256 match op {
1257 .eq {
1258 asm_setcc_al_movzx(mut g, cc_e)
1259 asm_setcc_cl_movzx(mut g, cc_np)
1260 asm_and_rax_rcx(mut g)
1261 }
1262 .ne {
1263 asm_setcc_al_movzx(mut g, cc_ne)
1264 asm_setcc_cl_movzx(mut g, cc_p)
1265 asm_or_rax_rcx(mut g)
1266 }
1267 .lt {
1268 asm_setcc_al_movzx(mut g, cc_b)
1269 asm_setcc_cl_movzx(mut g, cc_np)
1270 asm_and_rax_rcx(mut g)
1271 }
1272 .gt {
1273 asm_setcc_al_movzx(mut g, cc_a)
1274 }
1275 .le {
1276 asm_setcc_al_movzx(mut g, cc_be)
1277 asm_setcc_cl_movzx(mut g, cc_np)
1278 asm_and_rax_rcx(mut g)
1279 }
1280 .ge {
1281 asm_setcc_al_movzx(mut g, cc_ae)
1282 }
1283 else {
1284 g.unsupported_numeric_conversion(op, size, size, val_id)
1285 }
1286 }
1287
1288 g.store_reg_to_val(0, val_id)
1289}
1290
1291fn (mut g Gen) emit_signed_int_to_float(result_size int, op ssa.OpCode, val_id int) {
1292 if result_size == 4 {
1293 asm_cvtsi2ss_xmm0_rax(mut g)
1294 } else if result_size == 8 {
1295 asm_cvtsi2sd_xmm0_rax(mut g)
1296 } else {
1297 g.unsupported_numeric_conversion(op, 8, result_size, val_id)
1298 }
1299}
1300
1301fn (mut g Gen) emit_unsigned_int_to_float(src_size int, result_size int, val_id int) {
1302 if src_size < 8 {
1303 g.mask_rax_to_size(src_size, .uitofp, val_id)
1304 g.emit_signed_int_to_float(result_size, .uitofp, val_id)
1305 return
1306 }
1307 if src_size != 8 {
1308 g.unsupported_numeric_conversion(.uitofp, src_size, result_size, val_id)
1309 }
1310 asm_test_rax_rax(mut g)
1311 asm_jns_rel32(mut g)
1312 normal_patch := g.text_len()
1313 g.emit_u32(0)
1314 asm_mov_reg_reg(mut g, rcx, rax)
1315 asm_and_rcx_imm8(mut g, 1)
1316 asm_shr_rax_1(mut g)
1317 asm_or_rax_rcx(mut g)
1318 g.emit_signed_int_to_float(result_size, .uitofp, val_id)
1319 asm_add_float_xmm0_xmm0(mut g, result_size)
1320 asm_jmp_rel32(mut g)
1321 end_patch := g.text_len()
1322 g.emit_u32(0)
1323 g.patch_rel32(normal_patch)
1324 g.emit_signed_int_to_float(result_size, .uitofp, val_id)
1325 g.patch_rel32(end_patch)
1326}
1327
1328fn (mut g Gen) emit_float_to_signed_int(src_size int, op ssa.OpCode, val_id int) {
1329 if src_size == 4 {
1330 asm_cvttss2si_rax_xmm0(mut g)
1331 } else if src_size == 8 {
1332 asm_cvttsd2si_rax_xmm0(mut g)
1333 } else {
1334 g.unsupported_numeric_conversion(op, src_size, 8, val_id)
1335 }
1336}
1337
1338fn (mut g Gen) emit_float_to_unsigned_int(src_size int, dst_size int, val_id int) {
1339 if dst_size != 8 {
1340 g.emit_float_to_signed_int(src_size, .fptoui, val_id)
1341 g.mask_rax_to_size(dst_size, .fptoui, val_id)
1342 return
1343 }
1344 g.load_fp_2p63_to_xmm1(src_size, val_id)
1345 asm_ucomis_xmm0_xmm1(mut g, src_size)
1346 asm_jae_rel32(mut g)
1347 big_patch := g.text_len()
1348 g.emit_u32(0)
1349 g.emit_float_to_signed_int(src_size, .fptoui, val_id)
1350 asm_jmp_rel32(mut g)
1351 end_patch := g.text_len()
1352 g.emit_u32(0)
1353 g.patch_rel32(big_patch)
1354 asm_sub_float_xmm0_xmm1(mut g, src_size)
1355 g.emit_float_to_signed_int(src_size, .fptoui, val_id)
1356 asm_mov_reg_imm64(mut g, rcx, 0x8000000000000000)
1357 asm_or_rax_rcx(mut g)
1358 g.patch_rel32(end_patch)
1359}
1360
1361fn (mut g Gen) load_fp_2p63_to_xmm1(size int, val_id int) {
1362 mut bytes := []u8{len: size}
1363 if size == 4 {
1364 binary.little_endian_put_u32(mut bytes, 0x5f000000)
1365 } else if size == 8 {
1366 binary.little_endian_put_u64(mut bytes, 0x43e0000000000000)
1367 } else {
1368 g.unsupported_numeric_conversion(.fptoui, size, 8, val_id)
1369 }
1370 str_offset := g.rodata_len()
1371 g.add_rodata(bytes)
1372 sym_name := 'L_fp_${g.curr_offset}_${str_offset}'
1373 sym_idx := g.add_symbol(sym_name, u64(str_offset), false, .rodata)
1374 asm_lea_reg_rip(mut g, r10)
1375 g.add_rip_reloc(sym_idx)
1376 g.emit_u32(0)
1377 asm_load_xmm_mem_base_disp_size(mut g, 1, r10, 0, size)
1378}
1379
1380fn (mut g Gen) patch_rel32(patch_pos int) {
1381 target := g.text_len()
1382 rel := target - (patch_pos + 4)
1383 g.write_u32(patch_pos, u32(rel))
1384}
1385
1386fn (g Gen) unsupported_numeric_conversion(op ssa.OpCode, src_size int, dst_size int, val_id int) {
1387 x64_unsupported('numeric conversion ${op} from ${src_size * 8}-bit to ${dst_size * 8}-bit in value ${val_id}')
1388}
1389
1390fn (g Gen) const_int_operand(val_id int) int {
1391 if val_id > 0 && val_id < g.mod.values.len {
1392 return int(g.mod.values[val_id].name.i64())
1393 }
1394 return 0
1395}
1396
1397fn (g Gen) heap_alloc_size(val_id int) int {
1398 val := g.mod.values[val_id]
1399 mut min_size := 8
1400 if val.typ > 0 && val.typ < g.mod.type_store.types.len {
1401 ptr_typ := g.mod.type_store.types[val.typ]
1402 if ptr_typ.kind == .ptr_t && ptr_typ.elem_type > 0 {
1403 size := g.type_size(ptr_typ.elem_type)
1404 min_size = if size > 0 { size } else { 8 }
1405 }
1406 }
1407 return g.alloc_size_from_uses(val_id, min_size)
1408}
1409
1410fn valid_x64_scalar_memory_size(size int) bool {
1411 return size in [1, 2, 4, 8]
1412}
1413
1414fn x64_scalar_memory_size_or_default(size int) int {
1415 return if valid_x64_scalar_memory_size(size) { size } else { 8 }
1416}
1417
1418fn (g Gen) scalar_store_size_for_pointer_destination(ptr_id int, fallback_size int) int {
1419 if ptr_id <= 0 || ptr_id >= g.mod.values.len {
1420 return x64_scalar_memory_size_or_default(fallback_size)
1421 }
1422 ptr_typ_id := g.mod.values[ptr_id].typ
1423 if ptr_typ_id <= 0 || ptr_typ_id >= g.mod.type_store.types.len {
1424 return x64_scalar_memory_size_or_default(fallback_size)
1425 }
1426 ptr_typ := g.mod.type_store.types[ptr_typ_id]
1427 if ptr_typ.kind != .ptr_t || ptr_typ.elem_type <= 0 {
1428 return x64_scalar_memory_size_or_default(fallback_size)
1429 }
1430 elem_size := g.type_size(ptr_typ.elem_type)
1431 if valid_x64_scalar_memory_size(elem_size) {
1432 return elem_size
1433 }
1434 return x64_scalar_memory_size_or_default(fallback_size)
1435}
1436
1437fn (g Gen) store_access_size(src_id int, dst_id int) int {
1438 src_typ := g.mod.values[src_id].typ
1439 src_type_info := g.mod.type_store.types[src_typ]
1440 src_size := g.type_size(src_typ)
1441 if src_type_info.kind in [.struct_t, .array_t] || src_size > 8 {
1442 return src_size
1443 }
1444 return g.scalar_store_size_for_pointer_destination(dst_id, src_size)
1445}
1446
1447fn (g Gen) alloc_size_from_uses(ptr_id int, min_size int) int {
1448 mut size := if min_size > 0 { min_size } else { 8 }
1449 if ptr_id <= 0 || ptr_id >= g.mod.values.len {
1450 return size
1451 }
1452 for use_id in g.mod.values[ptr_id].uses {
1453 if use_id <= 0 || use_id >= g.mod.values.len || g.mod.values[use_id].kind != .instruction {
1454 continue
1455 }
1456 use_instr := g.mod.instrs[g.mod.values[use_id].index]
1457 if use_instr.op == .store && use_instr.operands.len >= 2 && use_instr.operands[1] == ptr_id {
1458 store_size := g.store_access_size(use_instr.operands[0], use_instr.operands[1])
1459 if store_size > size {
1460 size = store_size
1461 }
1462 }
1463 if use_instr.op != .get_element_ptr || use_instr.operands.len < 2
1464 || use_instr.operands[0] != ptr_id {
1465 continue
1466 }
1467 offset := g.gep_const_offset(ptr_id, use_instr.operands[1], use_instr.typ)
1468 if offset < 0 {
1469 continue
1470 }
1471 access_size := g.pointer_access_size(use_id)
1472 end := offset + if access_size > 0 { access_size } else { g.gep_elem_size(ptr_id) }
1473 if end > size {
1474 size = end
1475 }
1476 }
1477 return size
1478}
1479
1480fn (g Gen) pointer_access_size(ptr_id int) int {
1481 if ptr_id <= 0 || ptr_id >= g.mod.values.len {
1482 return 0
1483 }
1484 mut size := 0
1485 for use_id in g.mod.values[ptr_id].uses {
1486 if use_id <= 0 || use_id >= g.mod.values.len || g.mod.values[use_id].kind != .instruction {
1487 continue
1488 }
1489 use_instr := g.mod.instrs[g.mod.values[use_id].index]
1490 if use_instr.op == .store && use_instr.operands.len >= 2 && use_instr.operands[1] == ptr_id {
1491 store_size := g.store_access_size(use_instr.operands[0], use_instr.operands[1])
1492 if store_size > size {
1493 size = store_size
1494 }
1495 } else if use_instr.op == .load && use_instr.operands.len >= 1
1496 && use_instr.operands[0] == ptr_id {
1497 load_size := g.type_size(use_instr.typ)
1498 if load_size > size {
1499 size = load_size
1500 }
1501 }
1502 }
1503 return size
1504}
1505
1506fn (g Gen) struct_field_type(struct_typ_id int, field_idx int, fallback int) int {
1507 if struct_typ_id > 0 && struct_typ_id < g.mod.type_store.types.len {
1508 typ := g.mod.type_store.types[struct_typ_id]
1509 if typ.kind == .struct_t && field_idx >= 0 && field_idx < typ.fields.len {
1510 return typ.fields[field_idx]
1511 }
1512 if typ.kind == .array_t {
1513 return typ.elem_type
1514 }
1515 }
1516 return fallback
1517}
1518
1519fn (g Gen) struct_field_offset_bytes(struct_typ_id int, field_idx int) int {
1520 if struct_typ_id <= 0 || struct_typ_id >= g.mod.type_store.types.len {
1521 return field_idx * 8
1522 }
1523 typ := g.mod.type_store.types[struct_typ_id]
1524 if typ.kind == .array_t {
1525 return field_idx * g.type_size(typ.elem_type)
1526 }
1527 if typ.kind != .struct_t {
1528 return field_idx * 8
1529 }
1530 if typ.is_union {
1531 return 0
1532 }
1533 mut off := 0
1534 for i, field_typ in typ.fields {
1535 align := g.type_align(field_typ)
1536 if align > 1 && off % align != 0 {
1537 off = (off + align - 1) & ~(align - 1)
1538 }
1539 if i == field_idx {
1540 return off
1541 }
1542 off += g.type_size(field_typ)
1543 }
1544 return field_idx * 8
1545}
1546
1547fn (g Gen) gep_const_offset(base_id int, idx_id int, result_typ_id ssa.TypeID) int {
1548 if idx_id <= 0 || idx_id >= g.mod.values.len || g.mod.values[idx_id].kind != .constant {
1549 return -1
1550 }
1551 idx := g.const_int_operand(idx_id)
1552 if base_id <= 0 || base_id >= g.mod.values.len {
1553 return idx * 8
1554 }
1555 base_typ_id := g.mod.values[base_id].typ
1556 if base_typ_id <= 0 || base_typ_id >= g.mod.type_store.types.len {
1557 return idx * 8
1558 }
1559 base_typ := g.mod.type_store.types[base_typ_id]
1560 if base_typ.kind != .ptr_t {
1561 return idx * 8
1562 }
1563 elem_typ := g.mod.type_store.types[base_typ.elem_type]
1564 if elem_typ.kind == .struct_t {
1565 if result_typ_id > 0 && result_typ_id < g.mod.type_store.types.len {
1566 result_typ := g.mod.type_store.types[result_typ_id]
1567 if result_typ.kind == .ptr_t && result_typ.elem_type == base_typ.elem_type {
1568 return idx * g.type_size(base_typ.elem_type)
1569 }
1570 }
1571 return g.struct_field_offset_bytes(base_typ.elem_type, idx)
1572 }
1573 if elem_typ.kind == .array_t {
1574 return idx * g.type_size(elem_typ.elem_type)
1575 }
1576 return idx * g.type_size(base_typ.elem_type)
1577}
1578
1579fn (g Gen) gep_elem_size(base_id int) int {
1580 if base_id > 0 && base_id < g.mod.values.len {
1581 base_typ_id := g.mod.values[base_id].typ
1582 if base_typ_id > 0 && base_typ_id < g.mod.type_store.types.len {
1583 base_typ := g.mod.type_store.types[base_typ_id]
1584 if base_typ.kind == .ptr_t {
1585 elem_typ := g.mod.type_store.types[base_typ.elem_type]
1586 if elem_typ.kind == .array_t {
1587 size := g.type_size(elem_typ.elem_type)
1588 return if size > 0 { size } else { 8 }
1589 }
1590 size := g.type_size(base_typ.elem_type)
1591 return if size > 0 { size } else { 8 }
1592 }
1593 }
1594 }
1595 return 8
1596}
1597
1598fn (mut g Gen) zero_value_bytes(val_id int, size int) {
1599 if size <= 0 {
1600 return
1601 }
1602 dst_off := g.stack_map[val_id]
1603 asm_xor_reg_reg(mut g, rax)
1604 g.store_repeated_zero(int(rbp), dst_off, size)
1605}
1606
1607fn (mut g Gen) zero_large_fixed_array_alloca(val_id int, off int) {
1608 if val_id <= 0 || val_id >= g.mod.values.len {
1609 return
1610 }
1611 alloca_val := g.mod.values[val_id]
1612 if alloca_val.typ <= 0 || alloca_val.typ >= g.mod.type_store.types.len {
1613 return
1614 }
1615 alloca_ptr_type := g.mod.type_store.types[alloca_val.typ]
1616 if alloca_ptr_type.kind != .ptr_t || alloca_ptr_type.elem_type <= 0
1617 || alloca_ptr_type.elem_type >= g.mod.type_store.types.len {
1618 return
1619 }
1620 elem_typ := g.mod.type_store.types[alloca_ptr_type.elem_type]
1621 if elem_typ.kind != .array_t || elem_typ.len <= 16 {
1622 return
1623 }
1624 arr_size := g.type_size(alloca_ptr_type.elem_type)
1625 if arr_size <= 0 {
1626 return
1627 }
1628 asm_xor_reg_reg(mut g, rax)
1629 g.store_repeated_zero(int(rbp), off, arr_size)
1630}
1631
1632fn (mut g Gen) store_repeated_zero(base int, off int, size int) {
1633 mut done := 0
1634 for done + 8 <= size {
1635 asm_store_mem_base_disp_reg_size(mut g, Reg(base), off + done, rax, 8)
1636 done += 8
1637 }
1638 for done < size {
1639 chunk := raw_memory_chunk_size(size - done)
1640 asm_store_mem_base_disp_reg_size(mut g, Reg(base), off + done, rax, chunk)
1641 done += chunk
1642 }
1643}
1644
1645fn (mut g Gen) copy_value_bytes(dst_id int, src_id int, size int) {
1646 if size <= 0 {
1647 return
1648 }
1649 g.load_struct_src_address_to_reg(int(r10), src_id, g.mod.values[src_id].typ)
1650 g.copy_memory(int(rbp), g.stack_map[dst_id], int(r10), 0, size)
1651}
1652
1653fn (mut g Gen) copy_memory(dst_base int, dst_off int, src_base int, src_off int, size int) {
1654 mut done := 0
1655 for done + 8 <= size {
1656 asm_load_reg_mem_base_disp_size(mut g, rax, Reg(src_base), src_off + done, 8)
1657 asm_store_mem_base_disp_reg_size(mut g, Reg(dst_base), dst_off + done, rax, 8)
1658 done += 8
1659 }
1660 for done < size {
1661 chunk := raw_memory_chunk_size(size - done)
1662 asm_load_reg_mem_base_disp_size(mut g, rax, Reg(src_base), src_off + done, chunk)
1663 asm_store_mem_base_disp_reg_size(mut g, Reg(dst_base), dst_off + done, rax, chunk)
1664 done += chunk
1665 }
1666}
1667
1668fn raw_memory_chunk_size(size int) int {
1669 if size >= 4 {
1670 return 4
1671 }
1672 if size >= 2 {
1673 return 2
1674 }
1675 return 1
1676}
1677
1678fn is_raw_abi_reg_size(size int) bool {
1679 return size !in [1, 2, 4, 8]
1680}
1681
1682fn (mut g Gen) load_typed_mem_to_reg(reg Reg, base Reg, disp int, typ_id int, size int) {
1683 typ := g.mod.type_store.types[typ_id]
1684 if typ.kind == .int_t && !typ.is_unsigned {
1685 asm_load_reg_mem_base_disp_size_signed(mut g, reg, base, disp, size)
1686 return
1687 }
1688 asm_load_reg_mem_base_disp_size(mut g, reg, base, disp, size)
1689}
1690
1691fn (mut g Gen) load_raw_mem_to_reg(reg Reg, base Reg, disp int, size int) {
1692 if size in [1, 2, 4, 8] {
1693 asm_load_reg_mem_base_disp_size(mut g, reg, base, disp, size)
1694 return
1695 }
1696 if size <= 0 || size > 8 {
1697 x64_unsupported('raw memory size ${size}')
1698 }
1699 asm_xor_reg_reg(mut g, rax)
1700 mut done := 0
1701 for done < size {
1702 chunk := raw_memory_chunk_size(size - done)
1703 asm_load_reg_mem_base_disp_size(mut g, r11, base, disp + done, chunk)
1704 if done > 0 {
1705 asm_shl_r11_imm8(mut g, u8(done * 8))
1706 }
1707 asm_or_rax_r11(mut g)
1708 done += chunk
1709 }
1710 if reg != rax {
1711 asm_mov_reg_reg(mut g, reg, rax)
1712 }
1713}
1714
1715fn (mut g Gen) store_reg_to_rbp_exact(reg Reg, off int, size int) {
1716 if size in [1, 2, 4, 8] {
1717 asm_store_rbp_disp_reg_size(mut g, off, reg, size)
1718 return
1719 }
1720 if size <= 0 || size > 8 {
1721 x64_unsupported('raw register spill size ${size}')
1722 }
1723 if reg != rax {
1724 asm_mov_reg_reg(mut g, rax, reg)
1725 }
1726 mut done := 0
1727 for done < size {
1728 chunk := raw_memory_chunk_size(size - done)
1729 asm_store_rbp_disp_reg_size(mut g, off + done, rax, chunk)
1730 done += chunk
1731 if done < size {
1732 asm_shr_rax_imm8(mut g, u8(chunk * 8))
1733 }
1734 }
1735}
1736
1737fn (mut g Gen) store_field_value(dst_id int, dst_typ int, field_idx int, src_id int, size int) {
1738 field_off := g.struct_field_offset_bytes(dst_typ, field_idx)
1739 dst_off := g.stack_map[dst_id] + field_off
1740 if size > 8 || g.value_is_aggregate(src_id) {
1741 if g.value_is_zero_constant(src_id) {
1742 asm_xor_reg_reg(mut g, rax)
1743 g.store_repeated_zero(int(rbp), dst_off, size)
1744 return
1745 }
1746 g.load_struct_src_address_to_reg(int(r10), src_id, g.mod.values[src_id].typ)
1747 g.copy_memory(int(rbp), dst_off, int(r10), 0, size)
1748 return
1749 }
1750 if g.value_is_float_type(src_id) && size in [4, 8] {
1751 g.load_float_val_to_xmm(0, src_id, size)
1752 asm_store_xmm_rbp_disp(mut g, 0, dst_off, size)
1753 return
1754 }
1755 g.load_val_to_reg(0, src_id)
1756 asm_store_rbp_disp_reg_size(mut g, dst_off, rax, size)
1757}
1758
1759fn (mut g Gen) emit_epilogue() {
1760 if g.stack_size > 0 {
1761 if g.stack_size <= 127 {
1762 asm_add_rsp_imm8(mut g, u8(g.stack_size))
1763 } else {
1764 asm_add_rsp_imm32(mut g, u32(g.stack_size))
1765 }
1766 }
1767 for i := g.used_regs.len - 1; i >= 0; i-- {
1768 asm_pop(mut g, Reg(g.used_regs[i]))
1769 }
1770 asm_pop_rbp(mut g)
1771 asm_ret(mut g)
1772}
1773
1774fn (g Gen) selected_opcode(instr mir.Instruction) ssa.OpCode {
1775 _ = g
1776 return instr.op
1777}
1778
1779fn (mut g Gen) emit_jmp(target_idx int) {
1780 asm_jmp_rel32(mut g)
1781 g.emit_rel32_to_block(target_idx)
1782}
1783
1784fn (mut g Gen) emit_rel32_to_block(target_idx int) {
1785 if target_idx in g.block_offsets {
1786 off := g.block_offsets[target_idx]
1787 rel := off - (g.text_len() - g.curr_offset + 4)
1788 g.emit_u32(u32(rel))
1789 return
1790 }
1791 g.record_pending_label(target_idx)
1792 g.emit_u32(0)
1793}
1794
1795fn (mut g Gen) load_call_arg_to_reg(reg int, val_id int, arg_idx int, instr mir.Instruction) {
1796 is_indirect := g.call_arg_is_indirect(val_id, arg_idx, instr)
1797 if is_indirect {
1798 g.load_address_of_val_to_reg(reg, val_id)
1799 return
1800 }
1801 size := g.type_size(g.mod.values[val_id].typ)
1802 if g.value_needs_raw_abi_reg_bytes(val_id, size) {
1803 if g.value_is_zero_constant(val_id) {
1804 asm_xor_reg_reg(mut g, Reg(reg))
1805 return
1806 }
1807 g.load_struct_src_address_to_reg(int(r10), val_id, g.mod.values[val_id].typ)
1808 g.load_raw_mem_to_reg(Reg(reg), r10, 0, size)
1809 return
1810 }
1811 g.load_val_to_reg(reg, val_id)
1812}
1813
1814fn (mut g Gen) load_sysv_direct_aggregate_arg_to_regs(val_id int, layout mir.AbiValueLayout, abi_regs []int) {
1815 g.ensure_sysv_direct_aggregate_supported(val_id, layout.value_class, 'argument')
1816 size := g.type_size(g.mod.values[val_id].typ)
1817 g.load_struct_src_address_to_reg(int(r10), val_id, g.mod.values[val_id].typ)
1818 sse_regs := g.abi.float_arg_regs()
1819 mut loc_idx := 0
1820 for loc_idx < layout.locs.len {
1821 loc := layout.locs[loc_idx]
1822 if loc.kind in [.none, .stack] {
1823 loc_idx++
1824 continue
1825 }
1826 chunk_size := sysv_abi_chunk_size(size, loc.offset)
1827 if chunk_size <= 0 {
1828 loc_idx++
1829 continue
1830 }
1831 match loc.kind {
1832 .int_reg {
1833 reg := sysv_checked_int_reg(abi_regs, loc.index, 'argument')
1834 g.load_raw_mem_to_reg(reg, r10, loc.offset, chunk_size)
1835 loc_idx++
1836 }
1837 .sse_reg {
1838 if sysv_layout_has_sseup_pair(layout, loc_idx, size) {
1839 xmm := sysv_checked_sse_reg(sse_regs, loc.index, 'argument')
1840 asm_load_xmm_mem_base_disp_128(mut g, xmm, r10, loc.offset)
1841 loc_idx += 2
1842 continue
1843 }
1844 if loc.class == .sseup {
1845 x64_unsupported('backend feature: SysV direct aggregate argument with unpaired SSEUP ABI location is not implemented yet')
1846 }
1847 sysv_checked_sse_chunk_size(chunk_size, 'argument')
1848 xmm := sysv_checked_sse_reg(sse_regs, loc.index, 'argument')
1849 asm_load_xmm_mem_base_disp_size(mut g, xmm, r10, loc.offset, chunk_size)
1850 loc_idx++
1851 }
1852 else {
1853 x64_unsupported('backend feature: SysV direct aggregate argument with unsupported ABI location is not implemented yet')
1854 }
1855 }
1856 }
1857}
1858
1859fn (mut g Gen) prepare_call_stack_args(instr mir.Instruction, stack_args []bool, stack_slots int, arg_position_base int) int {
1860 if g.abi != .windows {
1861 return 0
1862 }
1863 cleanup := g.emit_windows_call_frame(stack_slots)
1864 for arg_idx, is_stack in stack_args {
1865 if is_stack {
1866 g.store_windows_call_stack_arg(instr.operands[arg_idx + 1], arg_idx, arg_idx +
1867 arg_position_base, instr)
1868 }
1869 }
1870 return cleanup
1871}
1872
1873fn (mut g Gen) emit_windows_call_frame(stack_slots int) int {
1874 if g.abi != .windows {
1875 return 0
1876 }
1877 cleanup := g.abi.call_frame_size(stack_slots)
1878 if cleanup <= 127 {
1879 asm_sub_rsp_imm8(mut g, u8(cleanup))
1880 } else {
1881 asm_sub_rsp_imm32(mut g, u32(cleanup))
1882 }
1883 return cleanup
1884}
1885
1886fn (mut g Gen) cleanup_windows_call_frame(cleanup int) {
1887 if cleanup == 0 {
1888 return
1889 }
1890 if cleanup <= 127 {
1891 asm_add_rsp_imm8(mut g, u8(cleanup))
1892 } else {
1893 asm_add_rsp_imm32(mut g, u32(cleanup))
1894 }
1895}
1896
1897fn (mut g Gen) store_windows_call_stack_arg(val_id int, arg_idx int, position int, instr mir.Instruction) {
1898 is_indirect := g.call_arg_is_indirect(val_id, arg_idx, instr)
1899 size := g.type_size(g.mod.values[val_id].typ)
1900 g.ensure_windows_scalar_or_indirect_arg(val_id, is_indirect, size)
1901 disp := g.abi.call_stack_arg_offset(position)
1902 if g.value_is_float_type(val_id) {
1903 g.ensure_float_abi_scalar(val_id, 'stack argument')
1904 g.load_float_val_to_xmm(0, val_id, size)
1905 asm_store_xmm_mem_base_disp_size(mut g, 0, rsp, disp, size)
1906 return
1907 }
1908 g.load_call_arg_to_reg(0, val_id, arg_idx, instr)
1909 asm_store_mem_base_disp_reg_size(mut g, rsp, disp, rax, 8)
1910}
1911
1912fn (mut g Gen) ensure_windows_scalar_or_indirect_arg(val_id int, is_indirect bool, size int) {
1913 if is_indirect {
1914 return
1915 }
1916 if g.value_is_aggregate(val_id) && size !in [1, 2, 4, 8] {
1917 g.unsupported_windows_abi_arg(
1918 'direct aggregate argument larger than 8 bytes reached codegen; ' +
1919 'expected ABI lowering before codegen to pass it indirectly', val_id)
1920 }
1921 if !g.value_is_aggregate(val_id) && size !in [1, 2, 4, 8] {
1922 g.unsupported_windows_abi_arg(
1923 'scalar argument with unsupported storage width ${size} bytes reached codegen; ' +
1924 'expected ABI lowering before codegen', val_id)
1925 }
1926}
1927
1928fn (g Gen) unsupported_windows_abi_arg(reason string, val_id int) {
1929 x64_unsupported('backend feature: Windows argument lowering for value ${val_id}: ${reason}; check ABI lowering')
1930}
1931
1932fn (g Gen) ensure_sysv_direct_aggregate_supported(val_id int, value_class mir.AbiValueClass, context string) {
1933 if g.abi != .sysv || value_class.mode != .direct || !g.value_is_aggregate(val_id)
1934 || value_class.classes.len == 0 {
1935 return
1936 }
1937 for i, class in value_class.classes {
1938 match class {
1939 .no_class, .integer, .sse {}
1940 .sseup {
1941 if i == 0 || value_class.classes[i - 1] !in [.sse, .sseup] {
1942 x64_unsupported('backend feature: SysV direct aggregate ${context} with unpaired SSEUP eightbyte class is not implemented yet')
1943 }
1944 }
1945 else {
1946 x64_unsupported('backend feature: SysV direct aggregate ${context} with MEMORY eightbyte classes is not implemented yet')
1947 }
1948 }
1949 }
1950}
1951
1952fn sysv_abi_chunk_size(total_size int, offset int) int {
1953 if total_size <= offset {
1954 return 0
1955 }
1956 remaining := total_size - offset
1957 if remaining < 8 {
1958 return remaining
1959 }
1960 return 8
1961}
1962
1963fn sysv_layout_register_limits(layout mir.AbiValueLayout) (int, int) {
1964 mut int_limit := 0
1965 mut sse_limit := 0
1966 for loc in layout.locs {
1967 if loc.kind == .int_reg && loc.index + 1 > int_limit {
1968 int_limit = loc.index + 1
1969 }
1970 if loc.kind == .sse_reg && loc.class == .sse && loc.index + 1 > sse_limit {
1971 sse_limit = loc.index + 1
1972 }
1973 }
1974 return int_limit, sse_limit
1975}
1976
1977fn sysv_layout_stack_slot_limit(layout mir.AbiValueLayout) int {
1978 mut limit := 0
1979 for loc in layout.locs {
1980 if loc.kind == .stack && loc.index + 1 > limit {
1981 limit = loc.index + 1
1982 }
1983 }
1984 return limit
1985}
1986
1987fn sysv_layout_uses_stack(layout mir.AbiValueLayout) bool {
1988 for loc in layout.locs {
1989 if loc.kind == .stack {
1990 return true
1991 }
1992 }
1993 return false
1994}
1995
1996fn sysv_checked_int_reg(regs []int, index int, context string) Reg {
1997 if index < 0 || index >= regs.len {
1998 x64_unsupported('backend feature: SysV direct aggregate ${context} needs INTEGER register ${index} outside available ABI registers')
1999 }
2000 return Reg(regs[index])
2001}
2002
2003fn sysv_checked_sse_reg(regs []int, index int, context string) int {
2004 if index < 0 || index >= regs.len {
2005 x64_unsupported('backend feature: SysV direct aggregate ${context} needs SSE register ${index} outside available ABI registers')
2006 }
2007 return regs[index]
2008}
2009
2010fn sysv_checked_sse_chunk_size(size int, context string) {
2011 if size !in [4, 8] {
2012 x64_unsupported('backend feature: SysV direct aggregate ${context} with ${size}-byte SSE eightbyte chunk is not implemented yet')
2013 }
2014}
2015
2016fn sysv_layout_has_sseup_pair(layout mir.AbiValueLayout, loc_idx int, total_size int) bool {
2017 if loc_idx + 1 >= layout.locs.len {
2018 return false
2019 }
2020 loc := layout.locs[loc_idx]
2021 next := layout.locs[loc_idx + 1]
2022 return loc.kind == .sse_reg && loc.class == .sse && next.kind == .sse_reg
2023 && next.class == .sseup && next.index == loc.index && next.offset == loc.offset + 8
2024 && total_size >= loc.offset + 16
2025}
2026
2027fn sysv_class_has_sseup_pair(classes []mir.AbiEightbyteClass, class_idx int, total_size int) bool {
2028 return class_idx + 1 < classes.len && classes[class_idx] == .sse
2029 && classes[class_idx + 1] == .sseup && total_size >= class_idx * 8 + 16
2030}
2031
2032fn sysv_int_return_reg(index int, context string) Reg {
2033 return match index {
2034 0 {
2035 rax
2036 }
2037 1 {
2038 rdx
2039 }
2040 else {
2041 x64_unsupported('backend feature: SysV direct aggregate ${context} needs INTEGER return register ${index} outside available ABI registers')
2042 rax
2043 }
2044 }
2045}
2046
2047fn sysv_sse_return_reg(index int, context string) int {
2048 if index < 0 || index >= 2 {
2049 x64_unsupported('backend feature: SysV direct aggregate ${context} needs SSE return register ${index} outside available ABI registers')
2050 }
2051 return index
2052}
2053
2054fn (mut g Gen) store_sysv_direct_aggregate_param(pid int, layout mir.AbiValueLayout) {
2055 if g.abi != .sysv {
2056 return
2057 }
2058 g.ensure_sysv_direct_aggregate_supported(pid, layout.value_class, 'parameter')
2059 param_size := g.type_size(g.mod.values[pid].typ)
2060 dst_off := g.stack_map[pid]
2061 int_regs := g.abi.int_arg_regs()
2062 sse_regs := g.abi.float_arg_regs()
2063 mut loc_idx := 0
2064 for loc_idx < layout.locs.len {
2065 loc := layout.locs[loc_idx]
2066 if loc.kind == .none {
2067 loc_idx++
2068 continue
2069 }
2070 chunk_size := sysv_abi_chunk_size(param_size, loc.offset)
2071 if chunk_size <= 0 {
2072 loc_idx++
2073 continue
2074 }
2075 match loc.kind {
2076 .int_reg {
2077 reg := sysv_checked_int_reg(int_regs, loc.index, 'parameter')
2078 g.store_reg_to_rbp_exact(reg, dst_off + loc.offset, chunk_size)
2079 loc_idx++
2080 }
2081 .sse_reg {
2082 if sysv_layout_has_sseup_pair(layout, loc_idx, param_size) {
2083 xmm := sysv_checked_sse_reg(sse_regs, loc.index, 'parameter')
2084 asm_store_xmm_mem_base_disp_128(mut g, xmm, rbp, dst_off + loc.offset)
2085 loc_idx += 2
2086 continue
2087 }
2088 if loc.class == .sseup {
2089 x64_unsupported('backend feature: SysV direct aggregate parameter with unpaired SSEUP ABI location is not implemented yet')
2090 }
2091 sysv_checked_sse_chunk_size(chunk_size, 'parameter')
2092 xmm := sysv_checked_sse_reg(sse_regs, loc.index, 'parameter')
2093 asm_store_xmm_rbp_disp(mut g, xmm, dst_off + loc.offset, chunk_size)
2094 loc_idx++
2095 }
2096 .stack {
2097 g.copy_memory(int(rbp), dst_off + loc.offset, int(rbp), 16 + loc.index * 8,
2098 chunk_size)
2099 loc_idx++
2100 }
2101 else {
2102 x64_unsupported('backend feature: SysV direct aggregate parameter with unsupported ABI location is not implemented yet')
2103 }
2104 }
2105 }
2106}
2107
2108fn (g Gen) ensure_windows_direct_return_supported(val_id int, ret_class mir.AbiValueClass, context string) {
2109 if g.abi != .windows {
2110 return
2111 }
2112 if ret_class.mode == .indirect {
2113 return
2114 }
2115 if ret_class.size == 0 && ret_class.classes.len == 0 {
2116 return
2117 }
2118 size := g.type_size(g.mod.values[val_id].typ)
2119 if g.value_is_aggregate(val_id) && size !in [1, 2, 4, 8] {
2120 x64_unsupported('backend feature: Windows ${context} lowering for value ${val_id}: ' +
2121 'aggregate return larger than 8 bytes reached direct return codegen; ' +
2122 'expected ABI lowering before codegen to use hidden sret pointer; check ABI lowering')
2123 }
2124 if !g.value_is_aggregate(val_id) && !g.value_is_float_type(val_id) && size !in [1, 2, 4, 8] {
2125 x64_unsupported('backend feature: Windows ${context} lowering for value ${val_id}: ' +
2126 'scalar return with unsupported storage width ${size} bytes reached codegen; ' +
2127 'expected ABI lowering before codegen; check ABI lowering')
2128 }
2129}
2130
2131fn (g Gen) windows_value_passed_indirect(val_id int, marked_indirect bool, size int) bool {
2132 if marked_indirect {
2133 return true
2134 }
2135 return g.abi == .windows && g.value_is_aggregate(val_id) && size !in [1, 2, 4, 8]
2136}
2137
2138fn (g Gen) call_arg_is_indirect(val_id int, arg_idx int, instr mir.Instruction) bool {
2139 marked_indirect := arg_idx >= 0 && arg_idx < instr.abi_arg_class.len
2140 && instr.abi_arg_class[arg_idx] == .indirect
2141 return g.windows_value_passed_indirect(val_id, marked_indirect,
2142 g.type_size(g.mod.values[val_id].typ))
2143}
2144
2145fn (mut g Gen) load_call_register_args(instr mir.Instruction, abi_regs []int, stack_args []bool, arg_position_base int) int {
2146 if g.abi.uses_positional_arg_regs() {
2147 return g.load_windows_call_register_args(instr, stack_args, arg_position_base)
2148 }
2149
2150 mut reg_arg_idx := arg_position_base
2151 mut sse_arg_idx := 0
2152 float_arg_regs := g.abi.float_arg_regs()
2153 for i in 1 .. instr.operands.len {
2154 arg_idx := i - 1
2155 arg_id := instr.operands[i]
2156 if g.value_is_float_type(arg_id) {
2157 if sse_arg_idx >= float_arg_regs.len {
2158 g.unsupported_float_abi('stack argument', arg_id)
2159 }
2160 g.load_float_call_arg_to_xmm(float_arg_regs[sse_arg_idx], arg_id)
2161 sse_arg_idx++
2162 continue
2163 }
2164 if arg_idx < instr.abi_arg_classes.len {
2165 g.ensure_sysv_direct_aggregate_supported(arg_id, instr.abi_arg_classes[arg_idx],
2166 'argument')
2167 }
2168 if stack_args[arg_idx] {
2169 continue
2170 }
2171 if !g.call_arg_is_indirect(arg_id, arg_idx, instr) && g.value_is_aggregate(arg_id)
2172 && arg_idx < instr.abi_arg_layouts.len && instr.abi_arg_layouts[arg_idx].locs.len > 0 {
2173 layout := instr.abi_arg_layouts[arg_idx]
2174 g.load_sysv_direct_aggregate_arg_to_regs(arg_id, layout, abi_regs)
2175 int_limit, sse_limit := sysv_layout_register_limits(layout)
2176 if int_limit > reg_arg_idx {
2177 reg_arg_idx = int_limit
2178 }
2179 if sse_limit > sse_arg_idx {
2180 sse_arg_idx = sse_limit
2181 }
2182 continue
2183 }
2184 arg_chunks := g.call_arg_reg_chunks(arg_id, arg_idx, instr)
2185 if reg_arg_idx + arg_chunks <= abi_regs.len {
2186 if arg_chunks > 1 {
2187 g.load_aggregate_arg_to_regs(arg_id,
2188 abi_regs[reg_arg_idx..reg_arg_idx + arg_chunks],
2189 g.type_size(g.mod.values[arg_id].typ))
2190 } else {
2191 g.load_call_arg_to_reg(abi_regs[reg_arg_idx], arg_id, arg_idx, instr)
2192 }
2193 reg_arg_idx += arg_chunks
2194 }
2195 }
2196 return sse_arg_idx
2197}
2198
2199fn (mut g Gen) load_windows_call_register_args(instr mir.Instruction, stack_args []bool, arg_position_base int) int {
2200 mut sse_arg_count := 0
2201 duplicate_vararg_float := g.call_needs_windows_vararg_float_duplication(instr)
2202 // Load positional Windows arguments from right to left. Some value loaders use
2203 // RCX as scratch, so loading arg0 first can corrupt it before the call.
2204 for i := instr.operands.len - 1; i >= 1; i-- {
2205 arg_idx := i - 1
2206 if stack_args[arg_idx] {
2207 continue
2208 }
2209 arg_id := instr.operands[i]
2210 position := arg_idx + arg_position_base
2211 if g.value_is_float_type(arg_id) {
2212 xmm := g.abi.float_arg_reg_for_position(position)
2213 if xmm == x64_no_arg_reg {
2214 g.unsupported_float_abi('stack argument', arg_id)
2215 }
2216 g.load_float_call_arg_to_xmm(xmm, arg_id)
2217 if duplicate_vararg_float {
2218 g.duplicate_windows_vararg_float_arg_to_gp(position, xmm)
2219 }
2220 sse_arg_count++
2221 continue
2222 }
2223 reg := g.abi.int_arg_reg_for_position(position)
2224 if reg == x64_no_arg_reg {
2225 continue
2226 }
2227 is_indirect := g.call_arg_is_indirect(arg_id, arg_idx, instr)
2228 size := g.type_size(g.mod.values[arg_id].typ)
2229 g.ensure_windows_scalar_or_indirect_arg(arg_id, is_indirect, size)
2230 g.load_call_arg_to_reg(reg, arg_id, arg_idx, instr)
2231 }
2232 return sse_arg_count
2233}
2234
2235fn (g Gen) call_needs_windows_vararg_float_duplication(instr mir.Instruction) bool {
2236 if g.abi != .windows || instr.operands.len == 0 {
2237 return false
2238 }
2239 fn_val := g.mod.values[instr.operands[0]]
2240 return fn_val.name in ['snprintf', '_scprintf', '_snprintf']
2241}
2242
2243fn (mut g Gen) duplicate_windows_vararg_float_arg_to_gp(position int, xmm int) {
2244 if position >= 4 {
2245 return
2246 }
2247 reg := g.abi.int_arg_reg_for_position(position)
2248 if reg == x64_no_arg_reg {
2249 return
2250 }
2251 g.emit_movq_reg_xmm(Reg(reg), xmm)
2252}
2253
2254fn (mut g Gen) emit_movq_reg_xmm(dst Reg, src_xmm int) {
2255 dst_hw := g.map_reg(int(dst))
2256 mut rex := u8(0x48)
2257 if src_xmm >= 8 {
2258 rex |= 4
2259 }
2260 if dst_hw >= 8 {
2261 rex |= 1
2262 }
2263 g.emit(0x66)
2264 g.emit(rex)
2265 g.emit(0x0f)
2266 g.emit(0x7e)
2267 src := u8(src_xmm & 7)
2268 dst_bits := u8(dst_hw & 7)
2269 g.emit(0xc0 | (src << 3) | dst_bits)
2270}
2271
2272fn (mut g Gen) load_float_call_arg_to_xmm(xmm int, val_id int) {
2273 g.ensure_float_abi_scalar(val_id, 'argument')
2274 g.load_float_val_to_xmm(xmm, val_id, g.type_size(g.mod.values[val_id].typ))
2275}
2276
2277fn (mut g Gen) store_call_result(val_id int, ret_class mir.AbiValueClass) {
2278 g.ensure_windows_direct_return_supported(val_id, ret_class, 'call result')
2279 if g.value_is_float_type(val_id) {
2280 g.ensure_float_abi_scalar(val_id, 'call result')
2281 asm_store_xmm0_rbp_disp(mut g, g.stack_map[val_id], g.type_size(g.mod.values[val_id].typ))
2282 return
2283 }
2284 if g.store_sysv_direct_aggregate_call_result(val_id, ret_class) {
2285 return
2286 }
2287 g.ensure_sysv_direct_aggregate_supported(val_id, ret_class, 'call result')
2288 if g.store_sysv_integer_pair_call_result(val_id, ret_class) {
2289 return
2290 }
2291 g.normalize_integer_call_result(val_id)
2292 g.store_reg_to_val(0, val_id)
2293}
2294
2295fn (mut g Gen) normalize_integer_call_result(val_id int) {
2296 typ_id := g.mod.values[val_id].typ
2297 g.normalize_integer_rax_for_type(typ_id, .sext, val_id)
2298}
2299
2300fn (mut g Gen) normalize_integer_rax_for_type(typ_id int, op ssa.OpCode, val_id int) {
2301 if typ_id <= 0 || typ_id >= g.mod.type_store.types.len {
2302 return
2303 }
2304 typ := g.mod.type_store.types[typ_id]
2305 if typ.kind != .int_t {
2306 return
2307 }
2308 size := g.type_size(typ_id)
2309 if size == 8 {
2310 return
2311 }
2312 if typ.width == 1 {
2313 asm_mov_reg_imm32(mut g, rcx, 1)
2314 asm_and_rax_rcx(mut g)
2315 return
2316 }
2317 if typ.is_unsigned {
2318 g.mask_rax_to_size(size, .zext, val_id)
2319 return
2320 }
2321 match size {
2322 1 { asm_movsx_rax_al(mut g) }
2323 2 { asm_movsx_rax_ax(mut g) }
2324 4 { asm_movsxd_rax_eax(mut g) }
2325 else { g.unsupported_numeric_conversion(op, size, 8, val_id) }
2326 }
2327}
2328
2329fn (g Gen) is_sysv_integer_pair_return(ret_class mir.AbiValueClass) bool {
2330 return g.abi == .sysv && ret_class.mode == .direct && ret_class.size > 8 && ret_class.size <= 16
2331 && ret_class.classes.len == 2 && ret_class.classes[0] == .integer
2332 && ret_class.classes[1] == .integer
2333}
2334
2335fn (g Gen) is_sysv_direct_aggregate_return(val_id int, ret_class mir.AbiValueClass) bool {
2336 return g.abi == .sysv && ret_class.mode == .direct && g.value_is_aggregate(val_id)
2337 && ret_class.classes.len > 0
2338}
2339
2340fn (mut g Gen) store_sysv_direct_aggregate_call_result(val_id int, ret_class mir.AbiValueClass) bool {
2341 if !g.is_sysv_direct_aggregate_return(val_id, ret_class)
2342 || g.is_sysv_integer_pair_return(ret_class) {
2343 return false
2344 }
2345 g.ensure_sysv_direct_aggregate_supported(val_id, ret_class, 'call result')
2346 off := g.stack_map[val_id]
2347 mut int_idx := 0
2348 mut sse_idx := 0
2349 mut i := 0
2350 for i < ret_class.classes.len {
2351 class := ret_class.classes[i]
2352 loc_off := i * 8
2353 chunk_size := sysv_abi_chunk_size(ret_class.size, loc_off)
2354 if chunk_size <= 0 {
2355 i++
2356 continue
2357 }
2358 match class {
2359 .no_class {
2360 i++
2361 }
2362 .integer {
2363 reg := sysv_int_return_reg(int_idx, 'call result')
2364 g.store_reg_to_rbp_exact(reg, off + loc_off, chunk_size)
2365 int_idx++
2366 i++
2367 }
2368 .sse {
2369 if sysv_class_has_sseup_pair(ret_class.classes, i, ret_class.size) {
2370 xmm := sysv_sse_return_reg(sse_idx, 'call result')
2371 asm_store_xmm_mem_base_disp_128(mut g, xmm, rbp, off + loc_off)
2372 sse_idx++
2373 i += 2
2374 continue
2375 }
2376 sysv_checked_sse_chunk_size(chunk_size, 'call result')
2377 xmm := sysv_sse_return_reg(sse_idx, 'call result')
2378 asm_store_xmm_rbp_disp(mut g, xmm, off + loc_off, chunk_size)
2379 sse_idx++
2380 i++
2381 }
2382 .sseup {
2383 x64_unsupported('backend feature: SysV direct aggregate call result with unpaired SSEUP eightbyte class is not implemented yet')
2384 }
2385 else {
2386 g.ensure_sysv_direct_aggregate_supported(val_id, ret_class, 'call result')
2387 i++
2388 }
2389 }
2390 }
2391 return true
2392}
2393
2394fn (mut g Gen) load_sysv_direct_aggregate_return(ret_val_id int, ret_class mir.AbiValueClass) bool {
2395 if !g.is_sysv_direct_aggregate_return(ret_val_id, ret_class)
2396 || g.is_sysv_integer_pair_return(ret_class) {
2397 return false
2398 }
2399 g.ensure_sysv_direct_aggregate_supported(ret_val_id, ret_class, 'return')
2400 g.load_struct_src_address_to_reg(int(r10), ret_val_id, g.cur_func_ret_type)
2401 mut int_idx := 0
2402 mut sse_idx := 0
2403 mut i := 0
2404 for i < ret_class.classes.len {
2405 class := ret_class.classes[i]
2406 loc_off := i * 8
2407 chunk_size := sysv_abi_chunk_size(ret_class.size, loc_off)
2408 if chunk_size <= 0 {
2409 i++
2410 continue
2411 }
2412 match class {
2413 .no_class {
2414 i++
2415 }
2416 .integer {
2417 reg := sysv_int_return_reg(int_idx, 'return')
2418 g.load_raw_mem_to_reg(reg, r10, loc_off, chunk_size)
2419 int_idx++
2420 i++
2421 }
2422 .sse {
2423 if sysv_class_has_sseup_pair(ret_class.classes, i, ret_class.size) {
2424 xmm := sysv_sse_return_reg(sse_idx, 'return')
2425 asm_load_xmm_mem_base_disp_128(mut g, xmm, r10, loc_off)
2426 sse_idx++
2427 i += 2
2428 continue
2429 }
2430 sysv_checked_sse_chunk_size(chunk_size, 'return')
2431 xmm := sysv_sse_return_reg(sse_idx, 'return')
2432 asm_load_xmm_mem_base_disp_size(mut g, xmm, r10, loc_off, chunk_size)
2433 sse_idx++
2434 i++
2435 }
2436 .sseup {
2437 x64_unsupported('backend feature: SysV direct aggregate return with unpaired SSEUP eightbyte class is not implemented yet')
2438 }
2439 else {
2440 g.ensure_sysv_direct_aggregate_supported(ret_val_id, ret_class, 'return')
2441 i++
2442 }
2443 }
2444 }
2445 return true
2446}
2447
2448fn (mut g Gen) store_sysv_integer_pair_call_result(val_id int, ret_class mir.AbiValueClass) bool {
2449 if !g.is_sysv_integer_pair_return(ret_class) {
2450 return false
2451 }
2452 off := g.stack_map[val_id]
2453 g.store_reg_to_rbp_exact(rax, off, 8)
2454 second_size := ret_class.size - 8
2455 if second_size > 0 {
2456 g.store_reg_to_rbp_exact(rdx, off + 8, second_size)
2457 }
2458 return true
2459}
2460
2461fn (mut g Gen) load_sysv_integer_pair_return(ret_val_id int, ret_class mir.AbiValueClass) bool {
2462 if !g.is_sysv_integer_pair_return(ret_class) {
2463 return false
2464 }
2465 g.load_struct_src_address_to_reg(int(r10), ret_val_id, g.cur_func_ret_type)
2466 second_size := ret_class.size - 8
2467 if second_size == 8 {
2468 g.load_raw_mem_to_reg(rax, r10, 0, 8)
2469 g.load_raw_mem_to_reg(rdx, r10, 8, second_size)
2470 } else if second_size > 0 {
2471 g.load_raw_mem_to_reg(rdx, r10, 8, second_size)
2472 g.load_raw_mem_to_reg(rax, r10, 0, 8)
2473 } else {
2474 g.load_raw_mem_to_reg(rax, r10, 0, 8)
2475 }
2476 return true
2477}
2478
2479fn (mut g Gen) emit_sse_arg_count(count int) {
2480 if g.abi != .sysv {
2481 return
2482 }
2483 if count == 0 {
2484 asm_xor_eax_eax(mut g)
2485 } else {
2486 asm_mov_reg_imm32(mut g, rax, u32(count))
2487 }
2488}
2489
2490fn (g Gen) param_stack_slots(is_indirect bool, reg_chunks int, size int) int {
2491 if is_indirect {
2492 return 1
2493 }
2494 if reg_chunks > 1 {
2495 return reg_chunks
2496 }
2497 if size > 8 {
2498 return (size + 7) / 8
2499 }
2500 return 1
2501}
2502
2503fn (g Gen) call_arg_reg_chunks(val_id int, arg_idx int, instr mir.Instruction) int {
2504 is_indirect := g.call_arg_is_indirect(val_id, arg_idx, instr)
2505 if is_indirect || !g.value_is_aggregate(val_id) {
2506 return 1
2507 }
2508 size := g.type_size(g.mod.values[val_id].typ)
2509 if size > 8 && size <= 16 {
2510 return (size + 7) / 8
2511 }
2512 return 1
2513}
2514
2515fn (g Gen) call_arg_stack_slots(val_id int, arg_idx int, instr mir.Instruction) int {
2516 is_indirect := g.call_arg_is_indirect(val_id, arg_idx, instr)
2517 if is_indirect {
2518 return 1
2519 }
2520 size := g.type_size(g.mod.values[val_id].typ)
2521 if g.value_is_aggregate(val_id) || size > 8 {
2522 return (size + 7) / 8
2523 }
2524 return 1
2525}
2526
2527fn (g Gen) call_stack_arg_mask(instr mir.Instruction, abi_reg_count int, arg_position_base int) []bool {
2528 num_args := instr.operands.len - 1
2529 mut stack_args := []bool{len: num_args}
2530 if g.abi.uses_positional_arg_regs() {
2531 for arg_idx := 0; arg_idx < num_args; arg_idx++ {
2532 position := arg_idx + arg_position_base
2533 if position >= g.abi.int_arg_regs().len {
2534 stack_args[arg_idx] = true
2535 continue
2536 }
2537 arg_id := instr.operands[arg_idx + 1]
2538 if g.value_is_float_type(arg_id) {
2539 g.ensure_float_abi_scalar(arg_id, 'argument')
2540 continue
2541 }
2542 if g.call_arg_reg_chunks(arg_id, arg_idx, instr) > 1 {
2543 g.unsupported_windows_abi_arg('aggregate register splitting reached codegen; ' +
2544 'expected ABI lowering before codegen to pass it indirectly or as one legal slot',
2545 arg_id)
2546 }
2547 }
2548 return stack_args
2549 }
2550
2551 mut reg_arg_idx := arg_position_base
2552 mut sse_arg_idx := 0
2553 float_arg_regs := g.abi.float_arg_regs()
2554 for arg_idx := 0; arg_idx < num_args; arg_idx++ {
2555 arg_id := instr.operands[arg_idx + 1]
2556 if g.value_is_float_type(arg_id) {
2557 g.ensure_float_abi_scalar(arg_id, 'argument')
2558 if sse_arg_idx >= float_arg_regs.len {
2559 g.unsupported_float_abi('stack argument', arg_id)
2560 }
2561 sse_arg_idx++
2562 continue
2563 }
2564 if !g.call_arg_is_indirect(arg_id, arg_idx, instr) && g.value_is_aggregate(arg_id)
2565 && arg_idx < instr.abi_arg_layouts.len && instr.abi_arg_layouts[arg_idx].locs.len > 0 {
2566 layout := instr.abi_arg_layouts[arg_idx]
2567 g.ensure_sysv_direct_aggregate_supported(arg_id, layout.value_class, 'argument')
2568 stack_args[arg_idx] = sysv_layout_uses_stack(layout)
2569 int_limit, sse_limit := sysv_layout_register_limits(layout)
2570 if int_limit > reg_arg_idx {
2571 reg_arg_idx = int_limit
2572 }
2573 if sse_limit > sse_arg_idx {
2574 sse_arg_idx = sse_limit
2575 }
2576 continue
2577 }
2578 if arg_idx < instr.abi_arg_classes.len {
2579 g.ensure_sysv_direct_aggregate_supported(arg_id, instr.abi_arg_classes[arg_idx],
2580 'argument')
2581 }
2582 arg_chunks := g.call_arg_reg_chunks(arg_id, arg_idx, instr)
2583 if reg_arg_idx + arg_chunks <= abi_reg_count {
2584 reg_arg_idx += arg_chunks
2585 } else {
2586 stack_args[arg_idx] = true
2587 }
2588 }
2589 return stack_args
2590}
2591
2592fn (g Gen) call_stack_slots(instr mir.Instruction, stack_args []bool) int {
2593 mut slots := 0
2594 for arg_idx, is_stack in stack_args {
2595 if is_stack {
2596 slots += g.call_arg_stack_slots(instr.operands[arg_idx + 1], arg_idx, instr)
2597 }
2598 }
2599 return slots
2600}
2601
2602fn (mut g Gen) push_call_stack_arg(val_id int, arg_idx int, instr mir.Instruction) {
2603 is_indirect := g.call_arg_is_indirect(val_id, arg_idx, instr)
2604 size := g.type_size(g.mod.values[val_id].typ)
2605 slots := g.call_arg_stack_slots(val_id, arg_idx, instr)
2606 if !is_indirect && slots > 1 {
2607 g.load_struct_src_address_to_reg(int(r10), val_id, g.mod.values[val_id].typ)
2608 for chunk := slots - 1; chunk >= 0; chunk-- {
2609 chunk_size := if chunk == slots - 1 { size - chunk * 8 } else { 8 }
2610 g.load_raw_mem_to_reg(rax, r10, chunk * 8, chunk_size)
2611 asm_push(mut g, rax)
2612 }
2613 return
2614 }
2615 g.load_call_arg_to_reg(0, val_id, arg_idx, instr)
2616 asm_push(mut g, rax)
2617}
2618
2619fn (mut g Gen) load_aggregate_arg_to_regs(val_id int, regs []int, size int) {
2620 g.load_struct_src_address_to_reg(int(r10), val_id, g.mod.values[val_id].typ)
2621 for chunk, reg in regs {
2622 chunk_size := if chunk == regs.len - 1 { size - chunk * 8 } else { 8 }
2623 g.load_raw_mem_to_reg(Reg(reg), r10, chunk * 8, chunk_size)
2624 }
2625}
2626
2627fn (mut g Gen) load_struct_src_address_to_reg(reg int, val_id int, expected_struct_typ int) {
2628 val := g.mod.values[val_id]
2629 if val.kind == .string_literal {
2630 g.materialize_string_literal(reg, val_id)
2631 return
2632 }
2633 if val.typ > 0 && val.typ < g.mod.type_store.types.len {
2634 val_typ := g.mod.type_store.types[val.typ]
2635 if val_typ.kind == .ptr_t && val_typ.elem_type == expected_struct_typ {
2636 g.load_val_to_reg(reg, val_id)
2637 return
2638 }
2639 }
2640 g.load_address_of_val_to_reg(reg, val_id)
2641}
2642
2643fn (mut g Gen) copy_indirect_param_from_reg(param_id int, src_reg int) {
2644 param_typ := g.mod.values[param_id].typ
2645 param_size := g.type_size(param_typ)
2646 if param_size <= 0 {
2647 offset := g.stack_map[param_id]
2648 asm_store_rbp_disp_reg(mut g, offset, Reg(src_reg))
2649 return
2650 }
2651 if src_reg != int(r10) {
2652 asm_mov_reg_reg(mut g, r10, Reg(src_reg))
2653 }
2654 g.load_address_of_val_to_reg(int(r11), param_id)
2655 g.copy_memory(int(r11), 0, int(r10), 0, param_size)
2656}
2657
2658fn (mut g Gen) load_val_to_reg(reg int, val_id int) {
2659 val := g.mod.values[val_id]
2660 if val.kind == .constant {
2661 if val.name.starts_with('"') {
2662 str_content := val.name.trim('"')
2663 // Handle escapes like arm64.v
2664 mut raw_bytes := []u8{}
2665 mut i := 0
2666 for i < str_content.len {
2667 if str_content[i] == `\\` && i + 1 < str_content.len {
2668 match str_content[i + 1] {
2669 `n` { raw_bytes << 10 }
2670 `t` { raw_bytes << 9 }
2671 `r` { raw_bytes << 13 }
2672 `\\` { raw_bytes << 92 }
2673 `"` { raw_bytes << 34 }
2674 `'` { raw_bytes << 39 }
2675 else { raw_bytes << str_content[i + 1] }
2676 }
2677
2678 i += 2
2679 } else {
2680 raw_bytes << str_content[i]
2681 i++
2682 }
2683 }
2684
2685 str_offset := g.rodata_len()
2686 g.add_rodata(raw_bytes)
2687 g.add_rodata_byte(0)
2688 sym_name := 'L_str_${g.curr_offset}_${str_offset}'
2689 sym_idx := g.add_symbol(sym_name, u64(str_offset), false, .rodata)
2690
2691 // lea reg, [rip + disp]
2692 asm_lea_reg_rip(mut g, Reg(reg))
2693 g.add_rip_reloc(sym_idx)
2694 g.emit_u32(0)
2695 } else {
2696 int_val := val.name.i64()
2697 if int_val == 0 {
2698 asm_xor_reg_reg(mut g, Reg(reg))
2699 } else if int_val > 0 && int_val <= 0x7FFFFFFF {
2700 asm_mov_reg_imm32(mut g, Reg(reg), u32(int_val))
2701 } else {
2702 asm_mov_reg_imm64(mut g, Reg(reg), u64(int_val))
2703 }
2704 }
2705 } else if val.kind == .func_ref {
2706 sym_idx := g.add_undefined(val.name)
2707 asm_lea_reg_rip(mut g, Reg(reg))
2708 g.add_rip_reloc(sym_idx)
2709 g.emit_u32(0)
2710 } else if val.kind == .c_string_literal {
2711 g.materialize_c_string_literal(reg, val_id)
2712 } else if val.kind == .global {
2713 sym_idx := g.add_undefined(val.name)
2714 if g.obj_format == .macho && val.index >= 0 && val.index < g.mod.globals.len
2715 && g.mod.globals[val.index].linkage == .external {
2716 asm_mov_reg_got_rip(mut g, Reg(reg))
2717 g.add_macho_got_load_reloc(sym_idx)
2718 } else {
2719 asm_lea_reg_rip(mut g, Reg(reg))
2720 g.add_rip_reloc(sym_idx)
2721 }
2722 g.emit_u32(0)
2723 } else if val.kind == .string_literal {
2724 g.materialize_string_literal(reg, val_id)
2725 } else {
2726 if val.kind == .instruction {
2727 instr := g.mod.instrs[val.index]
2728 if instr.op == .alloca {
2729 if off := g.alloca_offsets[val_id] {
2730 asm_lea_reg_rbp_disp(mut g, Reg(reg), off)
2731 return
2732 }
2733 }
2734 }
2735 if reg_idx := g.reg_map[val_id] {
2736 if reg_idx != reg {
2737 asm_mov_reg_reg(mut g, Reg(reg), Reg(reg_idx))
2738 }
2739 } else {
2740 offset := g.stack_map[val_id]
2741 asm_load_reg_rbp_disp(mut g, Reg(reg), offset)
2742 }
2743 }
2744}
2745
2746fn (mut g Gen) materialize_c_string_literal(reg int, val_id int) {
2747 val := g.mod.values[val_id]
2748 raw_bytes := decode_c_string_literal_bytes(val.name)
2749 str_offset := g.rodata_len()
2750 g.add_rodata(raw_bytes)
2751 g.add_rodata_byte(0)
2752 sym_name := 'L_cstr_${g.curr_offset}_${str_offset}'
2753 sym_idx := g.add_symbol(sym_name, u64(str_offset), false, .rodata)
2754 asm_lea_reg_rip(mut g, Reg(reg))
2755 g.add_rip_reloc(sym_idx)
2756 g.emit_u32(0)
2757}
2758
2759fn x64_c_escape_hex_digit(c u8) int {
2760 if c >= `0` && c <= `9` {
2761 return int(c - `0`)
2762 }
2763 if c >= `a` && c <= `f` {
2764 return int(c - `a`) + 10
2765 }
2766 if c >= `A` && c <= `F` {
2767 return int(c - `A`) + 10
2768 }
2769 return -1
2770}
2771
2772fn x64_c_escape_is_octal_digit(c u8) bool {
2773 return c >= `0` && c <= `7`
2774}
2775
2776fn decode_c_string_literal_bytes(raw string) []u8 {
2777 mut raw_bytes := []u8{cap: raw.len}
2778 mut i := 0
2779 for i < raw.len {
2780 if raw[i] == `\\` && i + 1 < raw.len {
2781 next := raw[i + 1]
2782 if x64_c_escape_is_octal_digit(next) {
2783 mut j := i + 1
2784 mut value := u32(0)
2785 mut digits := 0
2786 for j < raw.len && digits < 3 && x64_c_escape_is_octal_digit(raw[j]) {
2787 value = (value * 8 + u32(raw[j] - `0`)) & 0xff
2788 j++
2789 digits++
2790 }
2791 raw_bytes << u8(value)
2792 i = j
2793 continue
2794 }
2795 match next {
2796 `a` {
2797 raw_bytes << 7
2798 }
2799 `b` {
2800 raw_bytes << 8
2801 }
2802 `f` {
2803 raw_bytes << 12
2804 }
2805 `n` {
2806 raw_bytes << 10
2807 }
2808 `t` {
2809 raw_bytes << 9
2810 }
2811 `r` {
2812 raw_bytes << 13
2813 }
2814 `v` {
2815 raw_bytes << 11
2816 }
2817 `\\` {
2818 raw_bytes << 92
2819 }
2820 `"` {
2821 raw_bytes << 34
2822 }
2823 `'` {
2824 raw_bytes << 39
2825 }
2826 `?` {
2827 raw_bytes << 63
2828 }
2829 `x` {
2830 mut j := i + 2
2831 mut value := u32(0)
2832 mut saw_digit := false
2833 for j < raw.len {
2834 digit := x64_c_escape_hex_digit(raw[j])
2835 if digit < 0 {
2836 break
2837 }
2838 value = ((value << 4) + u32(digit)) & 0xff
2839 saw_digit = true
2840 j++
2841 }
2842 if saw_digit {
2843 raw_bytes << u8(value)
2844 i = j
2845 continue
2846 }
2847 raw_bytes << next
2848 }
2849 else {
2850 raw_bytes << next
2851 }
2852 }
2853
2854 i += 2
2855 } else {
2856 raw_bytes << raw[i]
2857 i++
2858 }
2859 }
2860 return raw_bytes
2861}
2862
2863fn (mut g Gen) store_reg_to_val(reg int, val_id int) {
2864 if reg_idx := g.reg_map[val_id] {
2865 if reg_idx != reg {
2866 asm_mov_reg_reg(mut g, Reg(reg_idx), Reg(reg))
2867 }
2868 } else {
2869 offset := g.stack_map[val_id]
2870 asm_store_rbp_disp_reg(mut g, offset, Reg(reg))
2871 }
2872}
2873
2874fn (mut g Gen) materialize_string_literal(reg int, val_id int) {
2875 val := g.mod.values[val_id]
2876 str_offset := g.rodata_len()
2877 g.add_rodata(val.name.bytes())
2878 g.add_rodata_byte(0)
2879 sym_name := 'L_str_${g.curr_offset}_${str_offset}'
2880 sym_idx := g.add_symbol(sym_name, u64(str_offset), false, .rodata)
2881
2882 slot_off := g.stack_map[val_id]
2883 asm_lea_reg_rip(mut g, rax)
2884 g.add_rip_reloc(sym_idx)
2885 g.emit_u32(0)
2886 mut str_field_size := g.type_size(g.struct_field_type(val.typ, 0, 0))
2887 if str_field_size <= 0 {
2888 str_field_size = 8
2889 }
2890 mut len_field_size := g.type_size(g.struct_field_type(val.typ, 1, 0))
2891 if len_field_size <= 0 {
2892 len_field_size = 8
2893 }
2894 mut is_lit_field_size := g.type_size(g.struct_field_type(val.typ, 2, 0))
2895 if is_lit_field_size <= 0 {
2896 is_lit_field_size = 8
2897 }
2898 asm_store_rbp_disp_reg_size(mut g, slot_off + g.struct_field_offset_bytes(val.typ, 0), rax,
2899 str_field_size)
2900 asm_mov_reg_imm32(mut g, rax, u32(val.index))
2901 asm_store_rbp_disp_reg_size(mut g, slot_off + g.struct_field_offset_bytes(val.typ, 1), rax,
2902 len_field_size)
2903 asm_mov_reg_imm32(mut g, rax, 1)
2904 asm_store_rbp_disp_reg_size(mut g, slot_off + g.struct_field_offset_bytes(val.typ, 2), rax,
2905 is_lit_field_size)
2906 if slot_off >= -128 && slot_off <= 127 {
2907 asm_lea_rax_rbp_disp8(mut g, i8(slot_off))
2908 } else {
2909 asm_lea_rax_rbp_disp32(mut g, slot_off)
2910 }
2911 if reg != 0 {
2912 asm_mov_reg_reg(mut g, Reg(reg), rax)
2913 }
2914}
2915
2916fn (mut g Gen) load_float_val_to_xmm(xmm int, val_id int, size int) {
2917 val := g.mod.values[val_id]
2918 if val.kind == .constant {
2919 if size == 4 {
2920 asm_mov_reg_imm32(mut g, rax, bits.f32_bits(val.name.f32()))
2921 } else {
2922 asm_mov_reg_imm64(mut g, rax, bits.f64_bits(val.name.f64()))
2923 }
2924 asm_store_rbp_disp_reg_size(mut g, g.stack_map[val_id], rax, size)
2925 }
2926 asm_load_xmm_rbp_disp(mut g, xmm, g.stack_map[val_id], size)
2927}
2928
2929fn (g Gen) type_size(typ_id ssa.TypeID) int {
2930 if typ_id == 0 {
2931 return 0
2932 }
2933 if typ_id < 0 || typ_id >= g.mod.type_store.types.len {
2934 return 8
2935 }
2936 typ := g.mod.type_store.types[typ_id]
2937 match typ.kind {
2938 .void_t {
2939 return 0
2940 }
2941 .int_t {
2942 return if typ.width > 0 { (typ.width + 7) / 8 } else { 8 }
2943 }
2944 .float_t {
2945 return if typ.width > 0 { (typ.width + 7) / 8 } else { 8 }
2946 }
2947 .ptr_t {
2948 return 8
2949 }
2950 .array_t {
2951 return typ.len * g.type_size(typ.elem_type)
2952 }
2953 .struct_t {
2954 if typ.is_union {
2955 mut max_size := 0
2956 mut max_align := 1
2957 for field_typ in typ.fields {
2958 field_size := g.type_size(field_typ)
2959 if field_size > max_size {
2960 max_size = field_size
2961 }
2962 field_align := g.type_align(field_typ)
2963 if field_align > max_align {
2964 max_align = field_align
2965 }
2966 }
2967 if max_align > 1 && max_size % max_align != 0 {
2968 max_size = (max_size + max_align - 1) & ~(max_align - 1)
2969 }
2970 return if max_size > 0 { max_size } else { 8 }
2971 }
2972 mut total := 0
2973 mut max_align := 1
2974 for field_typ in typ.fields {
2975 align := g.type_align(field_typ)
2976 if align > max_align {
2977 max_align = align
2978 }
2979 if align > 1 && total % align != 0 {
2980 total = (total + align - 1) & ~(align - 1)
2981 }
2982 total += g.type_size(field_typ)
2983 }
2984 if max_align > 1 && total % max_align != 0 {
2985 total = (total + max_align - 1) & ~(max_align - 1)
2986 }
2987 return if total > 0 { total } else { 8 }
2988 }
2989 .func_t {
2990 return 8
2991 }
2992 .label_t, .metadata_t {
2993 return 0
2994 }
2995 }
2996}
2997
2998fn (g Gen) type_align(typ_id ssa.TypeID) int {
2999 if typ_id > 0 && typ_id < g.mod.type_store.types.len {
3000 typ := g.mod.type_store.types[typ_id]
3001 if typ.kind == .array_t {
3002 return g.type_align(typ.elem_type)
3003 }
3004 if typ.kind == .struct_t && typ.is_union {
3005 mut max_align := 1
3006 for field_typ in typ.fields {
3007 align := g.type_align(field_typ)
3008 if align > max_align {
3009 max_align = align
3010 }
3011 }
3012 return max_align
3013 }
3014 }
3015 size := g.type_size(typ_id)
3016 if size >= 8 {
3017 return 8
3018 }
3019 if size >= 4 {
3020 return 4
3021 }
3022 if size >= 2 {
3023 return 2
3024 }
3025 return 1
3026}
3027
3028fn (mut g Gen) load_address_of_val_to_reg(reg int, val_id int) {
3029 if val_id > 0 && val_id < g.mod.values.len && g.mod.values[val_id].kind == .string_literal {
3030 g.materialize_string_literal(reg, val_id)
3031 return
3032 }
3033 offset := g.stack_map[val_id]
3034 if offset != 0 {
3035 if offset >= -128 && offset <= 127 {
3036 asm_lea_rax_rbp_disp8(mut g, i8(offset))
3037 } else {
3038 asm_lea_rax_rbp_disp32(mut g, offset)
3039 }
3040 if reg != 0 {
3041 asm_mov_reg_reg(mut g, Reg(reg), rax)
3042 }
3043 return
3044 }
3045 // Fallback: value already holds a pointer.
3046 g.load_val_to_reg(reg, val_id)
3047}
3048
3049fn (g Gen) map_reg(r int) u8 {
3050 return u8(r)
3051}
3052
3053fn (mut g Gen) record_pending_label(blk int) {
3054 off := g.text_len() - g.curr_offset
3055 g.pending_labels[blk] << off
3056}
3057
3058// Register Allocation Logic
3059
3060fn (mut g Gen) allocate_registers(func mir.Function) {
3061 if g.abi == .windows {
3062 return
3063 }
3064 mut intervals := map[int]&Interval{}
3065 mut instr_idx := 0
3066 mut total_instrs := 0
3067
3068 for blk_id in func.blocks {
3069 total_instrs += g.mod.blocks[blk_id].instrs.len
3070 }
3071
3072 // Phi elimination lowers edge copies as `.assign dest, src` and leaves
3073 // placeholder `.bitcast` values for former phis. Keep these CFG-carried
3074 // values on the stack so branch order does not decide register lifetime.
3075 mut phi_related_vals := map[int]bool{}
3076 for blk_id in func.blocks {
3077 blk := g.mod.blocks[blk_id]
3078 for val_id in blk.instrs {
3079 val := g.mod.values[val_id]
3080 if val.kind != .instruction {
3081 continue
3082 }
3083 instr := g.mod.instrs[val.index]
3084 if instr.op == .assign {
3085 phi_related_vals[val_id] = true
3086 if instr.operands.len > 0 {
3087 phi_related_vals[instr.operands[0]] = true
3088 }
3089 if instr.operands.len > 1 {
3090 phi_related_vals[instr.operands[1]] = true
3091 }
3092 } else if instr.op == .bitcast && instr.operands.len == 0 {
3093 phi_related_vals[val_id] = true
3094 }
3095 }
3096 }
3097
3098 // Track which values are alloca results - don't register allocate these
3099 // as they hold addresses that may be needed across the function
3100 mut alloca_vals := map[int]bool{}
3101
3102 for i, pid in func.params {
3103 param_size := g.type_size(g.mod.values[pid].typ)
3104 if i < func.abi_param_class.len && func.abi_param_class[i] == .indirect {
3105 alloca_vals[pid] = true
3106 continue
3107 }
3108 if g.value_is_float_type(pid) {
3109 alloca_vals[pid] = true
3110 continue
3111 }
3112 if g.value_is_aggregate(pid) || param_size > 8
3113 || g.value_needs_raw_abi_reg_bytes(pid, param_size) {
3114 alloca_vals[pid] = true
3115 continue
3116 }
3117 intervals[pid] = &Interval{
3118 val_id: pid
3119 start: 0
3120 // ABI lowering can hide original parameter uses inside selected call sequences.
3121 // Keep incoming parameters live across the function to avoid reusing their
3122 // callee-saved register while later calls still need the parameter value.
3123 end: total_instrs
3124 }
3125 }
3126
3127 for blk_id in func.blocks {
3128 blk := g.mod.blocks[blk_id]
3129 for val_id in blk.instrs {
3130 val := g.mod.values[val_id]
3131 if val.kind == .instruction || val.kind == .argument {
3132 if unsafe { intervals[val_id] == nil } && !(val.kind == .instruction
3133 && g.value_needs_stack_storage(val_id)) && val_id !in phi_related_vals {
3134 intervals[val_id] = &Interval{
3135 val_id: val_id
3136 start: instr_idx
3137 end: instr_idx
3138 }
3139 }
3140 }
3141 instr := g.mod.instrs[val.index]
3142 // Mark alloca results as non-register-allocatable
3143 if instr.op in [.alloca, .call_sret] || g.value_needs_stack_storage(val_id) {
3144 alloca_vals[val_id] = true
3145 }
3146 for op in instr.operands {
3147 if op in phi_related_vals {
3148 continue
3149 }
3150 if g.mod.values[op].kind in [.instruction, .argument] {
3151 if mut interval := intervals[op] {
3152 if instr_idx > interval.end {
3153 interval.end = instr_idx
3154 }
3155 }
3156 }
3157 }
3158 instr_idx++
3159 }
3160 }
3161
3162 mut block_of_def := map[int]int{}
3163 for blk_id in func.blocks {
3164 blk := g.mod.blocks[blk_id]
3165 for val_id in blk.instrs {
3166 block_of_def[val_id] = blk_id
3167 }
3168 }
3169 for blk_id in func.blocks {
3170 blk := g.mod.blocks[blk_id]
3171 for val_id in blk.instrs {
3172 val := g.mod.values[val_id]
3173 if val.kind != .instruction {
3174 continue
3175 }
3176 instr := g.mod.instrs[val.index]
3177 for op in instr.operands {
3178 if op in phi_related_vals {
3179 continue
3180 }
3181 if g.mod.values[op].kind in [.instruction, .argument] {
3182 if def_blk := block_of_def[op] {
3183 if def_blk != blk_id {
3184 if mut interval := intervals[op] {
3185 interval.end = total_instrs
3186 }
3187 }
3188 }
3189 }
3190 }
3191 }
3192 }
3193
3194 mut sorted := []&Interval{}
3195 for _, i in intervals {
3196 sorted << i
3197 mut j := sorted.len - 1
3198 for j > 0 && sorted[j - 1].start > sorted[j].start {
3199 sorted[j - 1], sorted[j] = sorted[j], sorted[j - 1]
3200 j--
3201 }
3202 }
3203
3204 mut active := []&Interval{}
3205 // Use callee-saved registers: RBX(3), R12(12), R13(13), R14(14), R15(15)
3206 regs := [3, 12, 13, 14, 15]
3207
3208 for i in sorted {
3209 // Skip alloca results - they must stay on stack to preserve addresses
3210 if alloca_vals[i.val_id] {
3211 continue
3212 }
3213 for j := 0; j < active.len; j++ {
3214 if active[j].end < i.start {
3215 active.delete(j)
3216 j--
3217 }
3218 }
3219 if active.len < regs.len {
3220 mut used := []bool{len: 16, init: false}
3221 for a in active {
3222 used[g.reg_map[a.val_id]] = true
3223 }
3224 for r in regs {
3225 if !used[r] {
3226 g.reg_map[i.val_id] = r
3227 active << i
3228 if r !in g.used_regs {
3229 g.used_regs << r
3230 }
3231 break
3232 }
3233 }
3234 }
3235 }
3236 g.used_regs.sort()
3237}
3238
3239fn (g Gen) value_is_aggregate(val_id int) bool {
3240 if val_id <= 0 || val_id >= g.mod.values.len {
3241 return false
3242 }
3243 typ_id := g.mod.values[val_id].typ
3244 if typ_id <= 0 || typ_id >= g.mod.type_store.types.len {
3245 return false
3246 }
3247 typ := g.mod.type_store.types[typ_id]
3248 return typ.kind in [.struct_t, .array_t]
3249}
3250
3251fn (g Gen) value_is_float_type(val_id int) bool {
3252 if val_id <= 0 || val_id >= g.mod.values.len {
3253 return false
3254 }
3255 typ_id := g.mod.values[val_id].typ
3256 if typ_id <= 0 || typ_id >= g.mod.type_store.types.len {
3257 return false
3258 }
3259 return g.mod.type_store.types[typ_id].kind == .float_t
3260}
3261
3262fn (g Gen) ensure_float_abi_scalar(val_id int, context string) {
3263 size := g.type_size(g.mod.values[val_id].typ)
3264 if size == 4 || size == 8 {
3265 return
3266 }
3267 g.unsupported_float_abi(context, val_id)
3268}
3269
3270fn (g Gen) unsupported_float_abi(context string, val_id int) {
3271 size := g.type_size(g.mod.values[val_id].typ)
3272 subject := if context == 'stack parameter' {
3273 'stack-passed float parameter'
3274 } else if context == 'stack argument' {
3275 'stack-passed float argument'
3276 } else {
3277 'float ${context}'
3278 }
3279 x64_unsupported('backend feature: ${subject} (${size * 8}-bit type in value ${val_id}) ' +
3280 'is not implemented for this ABI yet')
3281}
3282
3283fn (g Gen) value_is_zero_constant(val_id int) bool {
3284 if val_id <= 0 || val_id >= g.mod.values.len {
3285 return false
3286 }
3287 val := g.mod.values[val_id]
3288 return val.kind == .constant && val.name == '0'
3289}
3290
3291fn (g Gen) value_needs_raw_abi_reg_bytes(val_id int, size int) bool {
3292 return size > 0 && size <= 8 && (g.value_is_aggregate(val_id) || is_raw_abi_reg_size(size))
3293}
3294
3295fn (g Gen) stack_storage_size(val_id int) int {
3296 if val_id <= 0 || val_id >= g.mod.values.len {
3297 return 8
3298 }
3299 val := g.mod.values[val_id]
3300 size := g.type_size(val.typ)
3301 if val.kind == .string_literal && size < 16 {
3302 return 16
3303 }
3304 if g.value_is_aggregate(val_id) {
3305 return if size > 0 { size } else { 8 }
3306 }
3307 return if size > 8 { size } else { 8 }
3308}
3309
3310fn (g Gen) value_needs_stack_storage(val_id int) bool {
3311 if val_id <= 0 || val_id >= g.mod.values.len {
3312 return false
3313 }
3314 val := g.mod.values[val_id]
3315 if val.kind == .string_literal {
3316 return true
3317 }
3318 if g.value_is_eliminated_phi_placeholder(val_id) {
3319 return true
3320 }
3321 if val.typ <= 0 || val.typ >= g.mod.type_store.types.len {
3322 return false
3323 }
3324 if val.kind == .instruction {
3325 instr := g.mod.instrs[val.index]
3326 if instr.op == .bitcast && instr.operands.len > 0
3327 && g.bitcast_touches_pointer(instr.operands[0], val.typ) {
3328 return false
3329 }
3330 }
3331 typ := g.mod.type_store.types[val.typ]
3332 if typ.kind == .float_t {
3333 return true
3334 }
3335 if typ.kind in [.struct_t, .array_t] || g.type_size(val.typ) > 8 {
3336 return true
3337 }
3338 if val.kind != .instruction {
3339 return false
3340 }
3341 instr := g.mod.instrs[val.index]
3342 return instr.op in [.call_sret, .inline_string_init, .struct_init, .insertvalue, .extractvalue]
3343}
3344
3345fn (g Gen) value_is_eliminated_phi_placeholder(val_id int) bool {
3346 if val_id <= 0 || val_id >= g.mod.values.len {
3347 return false
3348 }
3349 val := g.mod.values[val_id]
3350 if val.kind != .instruction || val.uses.len == 0 {
3351 return false
3352 }
3353 if val.index < 0 || val.index >= g.mod.instrs.len {
3354 return false
3355 }
3356 instr := g.mod.instrs[val.index]
3357 return instr.op == .bitcast && instr.operands.len == 0
3358}
3359
3360fn (g Gen) bitcast_touches_pointer(src_id int, dst_typ int) bool {
3361 if dst_typ > 0 && dst_typ < g.mod.type_store.types.len
3362 && g.mod.type_store.types[dst_typ].kind == .ptr_t {
3363 return true
3364 }
3365 if src_id <= 0 || src_id >= g.mod.values.len {
3366 return false
3367 }
3368 src_typ := g.mod.values[src_id].typ
3369 return src_typ > 0 && src_typ < g.mod.type_store.types.len
3370 && g.mod.type_store.types[src_typ].kind == .ptr_t
3371}
3372