v / vlib / v2 / gen / x64 / asm.v
1020 lines · 897 sloc · 17.63 KB · 81a5657604ec6da99c25e26546870c6888d6fdde
Raw
1// Copyright (c) 2026 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4
5module x64
6
7// x64 Instruction Encoding Helpers
8// These functions provide type-safe instruction encoding for the x64 backend.
9
10// Register type for type safety
11type Reg = int
12
13// Common register constants (System V AMD64 ABI)
14const rax = Reg(0)
15const rcx = Reg(1)
16const rdx = Reg(2)
17const rbx = Reg(3)
18const rsp = Reg(4)
19const rbp = Reg(5)
20const rsi = Reg(6)
21const rdi = Reg(7)
22const r8 = Reg(8)
23const r9 = Reg(9)
24const r10 = Reg(10)
25const r11 = Reg(11)
26const r12 = Reg(12)
27const r13 = Reg(13)
28const r14 = Reg(14)
29const r15 = Reg(15)
30
31// === Prologue/Epilogue ===
32
33// endbr64 (CET/IBT protection)
34fn asm_endbr64(mut g Gen) {
35 g.emit(0xF3)
36 g.emit(0x0F)
37 g.emit(0x1E)
38 g.emit(0xFA)
39}
40
41// push rbp
42fn asm_push_rbp(mut g Gen) {
43 g.emit(0x55)
44}
45
46// mov rbp, rsp
47fn asm_mov_rbp_rsp(mut g Gen) {
48 g.emit(0x48)
49 g.emit(0x89)
50 g.emit(0xE5)
51}
52
53// pop rbp
54fn asm_pop_rbp(mut g Gen) {
55 g.emit(0x5D)
56}
57
58// ret
59fn asm_ret(mut g Gen) {
60 g.emit(0xC3)
61}
62
63// === Push/Pop ===
64
65// push reg (handles REX for r8-r15)
66fn asm_push(mut g Gen, reg Reg) {
67 hw_reg := g.map_reg(int(reg))
68 if hw_reg >= 8 {
69 g.emit(0x41)
70 g.emit(0x50 | (hw_reg & 7))
71 } else {
72 g.emit(0x50 | hw_reg)
73 }
74}
75
76// pop reg (handles REX for r8-r15)
77fn asm_pop(mut g Gen, reg Reg) {
78 hw_reg := g.map_reg(int(reg))
79 if hw_reg >= 8 {
80 g.emit(0x41)
81 g.emit(0x58 | (hw_reg & 7))
82 } else {
83 g.emit(0x58 | hw_reg)
84 }
85}
86
87// === Stack Arithmetic ===
88
89// sub rsp, imm8
90fn asm_sub_rsp_imm8(mut g Gen, imm u8) {
91 g.emit(0x48)
92 g.emit(0x83)
93 g.emit(0xEC)
94 g.emit(imm)
95}
96
97// sub rsp, imm32
98fn asm_sub_rsp_imm32(mut g Gen, imm u32) {
99 g.emit(0x48)
100 g.emit(0x81)
101 g.emit(0xEC)
102 g.emit_u32(imm)
103}
104
105// test byte ptr [rsp], 0
106fn asm_test_byte_ptr_rsp_zero(mut g Gen) {
107 g.emit(0xF6)
108 g.emit(0x04)
109 g.emit(0x24)
110 g.emit(0x00)
111}
112
113// add rsp, imm8
114fn asm_add_rsp_imm8(mut g Gen, imm u8) {
115 g.emit(0x48)
116 g.emit(0x83)
117 g.emit(0xC4)
118 g.emit(imm)
119}
120
121// add rsp, imm32
122fn asm_add_rsp_imm32(mut g Gen, imm u32) {
123 g.emit(0x48)
124 g.emit(0x81)
125 g.emit(0xC4)
126 g.emit_u32(imm)
127}
128
129// === Arithmetic ===
130
131// add rax, rcx
132fn asm_add_rax_rcx(mut g Gen) {
133 g.emit(0x48)
134 g.emit(0x01)
135 g.emit(0xC8)
136}
137
138// sub rax, rcx
139fn asm_sub_rax_rcx(mut g Gen) {
140 g.emit(0x48)
141 g.emit(0x29)
142 g.emit(0xC8)
143}
144
145// imul rax, rcx
146fn asm_imul_rax_rcx(mut g Gen) {
147 g.emit(0x48)
148 g.emit(0x0F)
149 g.emit(0xAF)
150 g.emit(0xC1)
151}
152
153// cqo (sign-extend rax to rdx:rax)
154fn asm_cqo(mut g Gen) {
155 g.emit(0x48)
156 g.emit(0x99)
157}
158
159// idiv rcx (rdx:rax / rcx -> quotient in rax, remainder in rdx)
160fn asm_idiv_rcx(mut g Gen) {
161 g.emit(0x48)
162 g.emit(0xF7)
163 g.emit(0xF9)
164}
165
166// div rcx (unsigned rdx:rax / rcx -> quotient in rax, remainder in rdx)
167fn asm_div_rcx(mut g Gen) {
168 g.emit(0x48)
169 g.emit(0xF7)
170 g.emit(0xF1)
171}
172
173// mov rax, rdx (for getting remainder after idiv)
174fn asm_mov_rax_rdx(mut g Gen) {
175 g.emit(0x48)
176 g.emit(0x89)
177 g.emit(0xD0)
178}
179
180// === Logical ===
181
182// and rax, rcx
183fn asm_and_rax_rcx(mut g Gen) {
184 g.emit(0x48)
185 g.emit(0x21)
186 g.emit(0xC8)
187}
188
189// or rax, rcx
190fn asm_or_rax_rcx(mut g Gen) {
191 g.emit(0x48)
192 g.emit(0x09)
193 g.emit(0xC8)
194}
195
196fn asm_or_rax_r11(mut g Gen) {
197 g.emit(0x4C)
198 g.emit(0x09)
199 g.emit(0xD8)
200}
201
202// xor rax, rcx
203fn asm_xor_rax_rcx(mut g Gen) {
204 g.emit(0x48)
205 g.emit(0x31)
206 g.emit(0xC8)
207}
208
209// xor eax, eax (clear rax, 2 bytes, also clears AL for variadic calls)
210fn asm_xor_eax_eax(mut g Gen) {
211 g.emit(0x31)
212 g.emit(0xC0)
213}
214
215// xor edx, edx (clear rdx before unsigned division)
216fn asm_xor_edx_edx(mut g Gen) {
217 g.emit(0x31)
218 g.emit(0xD2)
219}
220
221// xor reg, reg (clear register - handles r8-r15)
222fn asm_xor_reg_reg(mut g Gen, reg Reg) {
223 hw_reg := g.map_reg(int(reg))
224 if hw_reg >= 8 {
225 g.emit(0x45) // REX.RB
226 g.emit(0x31)
227 g.emit(0xC0 | ((hw_reg & 7) << 3) | (hw_reg & 7))
228 } else {
229 g.emit(0x31)
230 g.emit(0xC0 | (hw_reg << 3) | hw_reg)
231 }
232}
233
234// and rcx, imm8
235fn asm_and_rcx_imm8(mut g Gen, imm u8) {
236 g.emit(0x48)
237 g.emit(0x83)
238 g.emit(0xE1)
239 g.emit(imm)
240}
241
242// === Shifts ===
243
244// shl rax, cl
245fn asm_shl_rax_cl(mut g Gen) {
246 g.emit(0x48)
247 g.emit(0xD3)
248 g.emit(0xE0)
249}
250
251// sar rax, cl (arithmetic shift right)
252fn asm_sar_rax_cl(mut g Gen) {
253 g.emit(0x48)
254 g.emit(0xD3)
255 g.emit(0xF8)
256}
257
258// shr rax, cl (logical shift right)
259fn asm_shr_rax_cl(mut g Gen) {
260 g.emit(0x48)
261 g.emit(0xD3)
262 g.emit(0xE8)
263}
264
265// shr rax, 1
266fn asm_shr_rax_1(mut g Gen) {
267 g.emit(0x48)
268 g.emit(0xD1)
269 g.emit(0xE8)
270}
271
272fn asm_shr_rax_imm8(mut g Gen, imm u8) {
273 g.emit(0x48)
274 g.emit(0xC1)
275 g.emit(0xE8)
276 g.emit(imm)
277}
278
279// shl rcx, 3 (for GEP: index * 8)
280fn asm_shl_rcx_3(mut g Gen) {
281 g.emit(0x48)
282 g.emit(0xC1)
283 g.emit(0xE1)
284 g.emit(0x03)
285}
286
287fn asm_shl_r11_imm8(mut g Gen, imm u8) {
288 g.emit(0x49)
289 g.emit(0xC1)
290 g.emit(0xE3)
291 g.emit(imm)
292}
293
294// === Compare ===
295
296// cmp rax, rcx
297fn asm_cmp_rax_rcx(mut g Gen) {
298 g.emit(0x48)
299 g.emit(0x39)
300 g.emit(0xC8)
301}
302
303// test rax, rax
304fn asm_test_rax_rax(mut g Gen) {
305 g.emit(0x48)
306 g.emit(0x85)
307 g.emit(0xC0)
308}
309
310// test reg, reg (handles r8-r15)
311fn asm_test_reg_reg(mut g Gen, reg Reg) {
312 hw_reg := g.map_reg(int(reg))
313 mut rex := u8(0x48)
314 if hw_reg >= 8 {
315 rex |= 5 // REX.RB
316 }
317 g.emit(rex)
318 g.emit(0x85)
319 g.emit(0xC0 | ((hw_reg & 7) << 3) | (hw_reg & 7))
320}
321
322// === Conditional Set ===
323
324// Condition codes
325const cc_e = u8(0x94) // equal
326
327const cc_ne = u8(0x95) // not equal
328
329const cc_l = u8(0x9C) // less (signed)
330
331const cc_g = u8(0x9F) // greater (signed)
332
333const cc_le = u8(0x9E) // less or equal (signed)
334
335const cc_ge = u8(0x9D) // greater or equal (signed)
336
337const cc_b = u8(0x92) // below (unsigned less)
338
339const cc_a = u8(0x97) // above (unsigned greater)
340
341const cc_be = u8(0x96) // below or equal (unsigned)
342
343const cc_ae = u8(0x93) // above or equal (unsigned)
344
345const cc_p = u8(0x9A) // parity (unordered for ucomis)
346
347const cc_np = u8(0x9B) // not parity (ordered for ucomis)
348
349// setcc al + movzx rax, al
350fn asm_setcc_al_movzx(mut g Gen, cc u8) {
351 g.emit(0x0F)
352 g.emit(cc)
353 g.emit(0xC0) // setcc al
354 g.emit(0x48)
355 g.emit(0x0F)
356 g.emit(0xB6)
357 g.emit(0xC0) // movzx rax, al
358}
359
360fn asm_setcc_cl_movzx(mut g Gen, cc u8) {
361 g.emit(0x0F)
362 g.emit(cc)
363 g.emit(0xC1) // setcc cl
364 g.emit(0x48)
365 g.emit(0x0F)
366 g.emit(0xB6)
367 g.emit(0xC9) // movzx rcx, cl
368}
369
370fn asm_ucomis_xmm0_xmm1(mut g Gen, size int) {
371 if size == 8 {
372 g.emit(0x66)
373 }
374 g.emit(0x0F)
375 g.emit(0x2E)
376 g.emit(0xC1)
377}
378
379// === Memory ===
380
381// mov [rcx], rax
382fn asm_mov_mem_rcx_rax(mut g Gen) {
383 g.emit(0x48)
384 g.emit(0x89)
385 g.emit(0x01)
386}
387
388// mov rax, [rcx]
389fn asm_mov_rax_mem_rcx(mut g Gen) {
390 g.emit(0x48)
391 g.emit(0x8B)
392 g.emit(0x01)
393}
394
395fn asm_emit_modrm_base_disp(mut g Gen, reg_bits u8, base_hw u8, disp int) {
396 rm := base_hw & 7
397 needs_sib := rm == 4 // rsp/r12
398 if disp == 0 && rm != 5 {
399 g.emit((reg_bits << 3) | rm)
400 if needs_sib {
401 g.emit(0x24)
402 }
403 } else if disp >= -128 && disp <= 127 {
404 g.emit(0x40 | (reg_bits << 3) | rm)
405 if needs_sib {
406 g.emit(0x24)
407 }
408 g.emit(u8(disp))
409 } else {
410 g.emit(0x80 | (reg_bits << 3) | rm)
411 if needs_sib {
412 g.emit(0x24)
413 }
414 g.emit_u32(u32(disp))
415 }
416}
417
418// mov rax, [base + disp]
419fn asm_mov_rax_mem_base_disp(mut g Gen, base Reg, disp int) {
420 base_hw := g.map_reg(int(base))
421 mut rex := u8(0x48)
422 if base_hw >= 8 {
423 rex |= 1 // REX.B
424 }
425 g.emit(rex)
426 g.emit(0x8B)
427
428 rm := base_hw & 7
429 needs_sib := rm == 4 // rsp/r12
430
431 if disp == 0 && rm != 5 {
432 g.emit(rm)
433 if needs_sib {
434 g.emit(0x24)
435 }
436 } else if disp >= -128 && disp <= 127 {
437 g.emit(0x40 | rm)
438 if needs_sib {
439 g.emit(0x24)
440 }
441 g.emit(u8(disp))
442 } else {
443 g.emit(0x80 | rm)
444 if needs_sib {
445 g.emit(0x24)
446 }
447 g.emit_u32(u32(disp))
448 }
449}
450
451fn asm_load_reg_mem_base_disp_size(mut g Gen, reg Reg, base Reg, disp int, size int) {
452 reg_hw := g.map_reg(int(reg))
453 base_hw := g.map_reg(int(base))
454 if size == 8 {
455 mut rex := u8(0x48)
456 if reg_hw >= 8 {
457 rex |= 4 // REX.R
458 }
459 if base_hw >= 8 {
460 rex |= 1 // REX.B
461 }
462 g.emit(rex)
463 g.emit(0x8B)
464 asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp)
465 return
466 }
467 if size == 4 {
468 mut rex := u8(0)
469 if reg_hw >= 8 {
470 rex |= 4
471 }
472 if base_hw >= 8 {
473 rex |= 1
474 }
475 if rex != 0 {
476 g.emit(0x40 | rex)
477 }
478 g.emit(0x8B)
479 asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp)
480 return
481 }
482 if size == 2 || size == 1 {
483 mut rex := u8(0)
484 if reg_hw >= 8 {
485 rex |= 4
486 }
487 if base_hw >= 8 {
488 rex |= 1
489 }
490 if rex != 0 {
491 g.emit(0x40 | rex)
492 }
493 g.emit(0x0F)
494 g.emit(if size == 2 { u8(0xB7) } else { u8(0xB6) })
495 asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp)
496 return
497 }
498 asm_unsupported_memory_size('load', size)
499}
500
501fn asm_load_reg_mem_base_disp_size_signed(mut g Gen, reg Reg, base Reg, disp int, size int) {
502 reg_hw := g.map_reg(int(reg))
503 base_hw := g.map_reg(int(base))
504 if size == 8 {
505 asm_load_reg_mem_base_disp_size(mut g, reg, base, disp, size)
506 return
507 }
508 mut rex := u8(0x48)
509 if reg_hw >= 8 {
510 rex |= 4
511 }
512 if base_hw >= 8 {
513 rex |= 1
514 }
515 g.emit(rex)
516 if size == 4 {
517 g.emit(0x63)
518 asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp)
519 return
520 }
521 if size == 2 || size == 1 {
522 g.emit(0x0F)
523 g.emit(if size == 2 { u8(0xBF) } else { u8(0xBE) })
524 asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp)
525 return
526 }
527 asm_unsupported_memory_size('signed load', size)
528}
529
530// mov [base + disp], rax
531fn asm_mov_mem_base_disp_rax(mut g Gen, base Reg, disp int) {
532 base_hw := g.map_reg(int(base))
533 mut rex := u8(0x48)
534 if base_hw >= 8 {
535 rex |= 1 // REX.B
536 }
537 g.emit(rex)
538 g.emit(0x89)
539
540 rm := base_hw & 7
541 needs_sib := rm == 4 // rsp/r12
542
543 if disp == 0 && rm != 5 {
544 g.emit(rm)
545 if needs_sib {
546 g.emit(0x24)
547 }
548 } else if disp >= -128 && disp <= 127 {
549 g.emit(0x40 | rm)
550 if needs_sib {
551 g.emit(0x24)
552 }
553 g.emit(u8(disp))
554 } else {
555 g.emit(0x80 | rm)
556 if needs_sib {
557 g.emit(0x24)
558 }
559 g.emit_u32(u32(disp))
560 }
561}
562
563fn asm_store_mem_base_disp_reg_size(mut g Gen, base Reg, disp int, reg Reg, size int) {
564 reg_hw := g.map_reg(int(reg))
565 base_hw := g.map_reg(int(base))
566 if size == 8 {
567 mut rex := u8(0x48)
568 if reg_hw >= 8 {
569 rex |= 4 // REX.R
570 }
571 if base_hw >= 8 {
572 rex |= 1 // REX.B
573 }
574 g.emit(rex)
575 g.emit(0x89)
576 asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp)
577 return
578 }
579 if size == 4 || size == 2 || size == 1 {
580 if size == 2 {
581 g.emit(0x66)
582 }
583 mut rex := u8(0)
584 if reg_hw >= 8 {
585 rex |= 4
586 }
587 if base_hw >= 8 {
588 rex |= 1
589 }
590 if rex != 0 {
591 g.emit(0x40 | rex)
592 }
593 g.emit(if size == 1 { u8(0x88) } else { u8(0x89) })
594 asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp)
595 return
596 }
597 asm_unsupported_memory_size('store', size)
598}
599
600fn asm_unsupported_memory_size(op string, size int) {
601 x64_unsupported('${op} memory size ${size}')
602}
603
604// lea rax, [rbp + disp8]
605fn asm_lea_rax_rbp_disp8(mut g Gen, disp i8) {
606 g.emit(0x48)
607 g.emit(0x8D)
608 g.emit(0x45) // ModRM 01 = disp8
609 g.emit(u8(disp))
610}
611
612// lea rax, [rbp + disp32]
613fn asm_lea_rax_rbp_disp32(mut g Gen, disp i32) {
614 g.emit(0x48)
615 g.emit(0x8D)
616 g.emit(0x85) // ModRM 10 = disp32
617 g.emit_u32(u32(disp))
618}
619
620// lea reg, [rbp + disp]
621fn asm_lea_reg_rbp_disp(mut g Gen, reg Reg, disp int) {
622 reg_hw := g.map_reg(int(reg))
623 base_hw := g.map_reg(int(rbp))
624 mut rex := u8(0x48)
625 if reg_hw >= 8 {
626 rex |= 4 // REX.R
627 }
628 if base_hw >= 8 {
629 rex |= 1 // REX.B
630 }
631 g.emit(rex)
632 g.emit(0x8D)
633 asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp)
634}
635
636// lea reg, [rip + disp32] (for globals/strings)
637fn asm_lea_reg_rip(mut g Gen, reg Reg) {
638 hw_reg := g.map_reg(int(reg))
639 mut rex := u8(0x48)
640 if hw_reg >= 8 {
641 rex |= 4
642 }
643 g.emit(rex)
644 g.emit(0x8D)
645 g.emit(0x05 | ((hw_reg & 7) << 3))
646}
647
648// mov reg, qword ptr [rip + disp32] (for Mach-O GOTPCREL loads)
649fn asm_mov_reg_got_rip(mut g Gen, reg Reg) {
650 hw_reg := g.map_reg(int(reg))
651 mut rex := u8(0x48)
652 if hw_reg >= 8 {
653 rex |= 4
654 }
655 g.emit(rex)
656 g.emit(0x8B)
657 g.emit(0x05 | ((hw_reg & 7) << 3))
658}
659
660// === Move Register ===
661
662// mov dst, src (64-bit, handles REX)
663fn asm_mov_reg_reg(mut g Gen, dst Reg, src Reg) {
664 dst_hw := g.map_reg(int(dst))
665 src_hw := g.map_reg(int(src))
666 mut rex := u8(0x48)
667 if src_hw >= 8 {
668 rex |= 4
669 }
670 if dst_hw >= 8 {
671 rex |= 1
672 }
673 g.emit(rex)
674 g.emit(0x89)
675 g.emit(0xC0 | ((src_hw & 7) << 3) | (dst_hw & 7))
676}
677
678// === Move Immediate ===
679
680// mov reg, imm32 (zero-extends to 64-bit)
681fn asm_mov_reg_imm32(mut g Gen, reg Reg, imm u32) {
682 hw_reg := g.map_reg(int(reg))
683 if hw_reg >= 8 {
684 g.emit(0x41) // REX.B
685 }
686 g.emit(0xB8 | (hw_reg & 7))
687 g.emit_u32(imm)
688}
689
690// movabs reg, imm64
691fn asm_mov_reg_imm64(mut g Gen, reg Reg, imm u64) {
692 hw_reg := g.map_reg(int(reg))
693 mut rex := u8(0x48)
694 if hw_reg >= 8 {
695 rex |= 1
696 }
697 g.emit(rex)
698 g.emit(0xB8 | (hw_reg & 7))
699 g.emit_u64(imm)
700}
701
702// === Load/Store with displacement ===
703
704// mov reg, [rbp + disp] (load)
705fn asm_load_reg_rbp_disp(mut g Gen, reg Reg, disp int) {
706 hw_reg := g.map_reg(int(reg))
707 mut rex := u8(0x48)
708 if hw_reg >= 8 {
709 rex |= 4
710 }
711 g.emit(rex)
712 g.emit(0x8B)
713 if disp >= -128 && disp <= 127 {
714 g.emit(0x45 | ((hw_reg & 7) << 3)) // ModRM 01 = disp8
715 g.emit(u8(disp))
716 } else {
717 g.emit(0x85 | ((hw_reg & 7) << 3)) // ModRM 10 = disp32
718 g.emit_u32(u32(disp))
719 }
720}
721
722// mov [rbp + disp], reg (store)
723fn asm_store_rbp_disp_reg(mut g Gen, disp int, reg Reg) {
724 hw_reg := g.map_reg(int(reg))
725 mut rex := u8(0x48)
726 if hw_reg >= 8 {
727 rex |= 4
728 }
729 g.emit(rex)
730 g.emit(0x89)
731 if disp >= -128 && disp <= 127 {
732 g.emit(0x45 | ((hw_reg & 7) << 3)) // ModRM 01 = disp8
733 g.emit(u8(disp))
734 } else {
735 g.emit(0x85 | ((hw_reg & 7) << 3)) // ModRM 10 = disp32
736 g.emit_u32(u32(disp))
737 }
738}
739
740fn asm_store_rbp_disp_reg_size(mut g Gen, disp int, reg Reg, size int) {
741 asm_store_mem_base_disp_reg_size(mut g, rbp, disp, reg, size)
742}
743
744fn asm_cvtsi2ss_xmm0_rax(mut g Gen) {
745 g.emit(0xF3)
746 g.emit(0x48)
747 g.emit(0x0F)
748 g.emit(0x2A)
749 g.emit(0xC0)
750}
751
752fn asm_cvtsi2sd_xmm0_rax(mut g Gen) {
753 g.emit(0xF2)
754 g.emit(0x48)
755 g.emit(0x0F)
756 g.emit(0x2A)
757 g.emit(0xC0)
758}
759
760fn asm_cvttss2si_rax_xmm0(mut g Gen) {
761 g.emit(0xF3)
762 g.emit(0x48)
763 g.emit(0x0F)
764 g.emit(0x2C)
765 g.emit(0xC0)
766}
767
768fn asm_cvttsd2si_rax_xmm0(mut g Gen) {
769 g.emit(0xF2)
770 g.emit(0x48)
771 g.emit(0x0F)
772 g.emit(0x2C)
773 g.emit(0xC0)
774}
775
776fn asm_store_xmm0_rbp_disp(mut g Gen, disp int, size int) {
777 asm_store_xmm_rbp_disp(mut g, 0, disp, size)
778}
779
780fn asm_store_xmm_rbp_disp(mut g Gen, xmm int, disp int, size int) {
781 if size == 4 {
782 g.emit(0xF3)
783 } else {
784 g.emit(0xF2)
785 }
786 if xmm >= 8 {
787 g.emit(0x44)
788 }
789 g.emit(0x0F)
790 g.emit(0x11)
791 if disp >= -128 && disp <= 127 {
792 g.emit(0x45 | (u8(xmm & 7) << 3))
793 g.emit(u8(disp))
794 } else {
795 g.emit(0x85 | (u8(xmm & 7) << 3))
796 g.emit_u32(u32(disp))
797 }
798}
799
800fn asm_store_xmm_mem_base_disp_size(mut g Gen, xmm int, base Reg, disp int, size int) {
801 base_hw := g.map_reg(int(base))
802 if size == 4 {
803 g.emit(0xF3)
804 } else {
805 g.emit(0xF2)
806 }
807 mut rex := u8(0)
808 if xmm >= 8 {
809 rex |= 4
810 }
811 if base_hw >= 8 {
812 rex |= 1
813 }
814 if rex != 0 {
815 g.emit(0x40 | rex)
816 }
817 g.emit(0x0F)
818 g.emit(0x11)
819 asm_emit_modrm_base_disp(mut g, u8(xmm & 7), base_hw, disp)
820}
821
822fn asm_store_xmm_mem_base_disp_128(mut g Gen, xmm int, base Reg, disp int) {
823 base_hw := g.map_reg(int(base))
824 g.emit(0xF3)
825 mut rex := u8(0)
826 if xmm >= 8 {
827 rex |= 4
828 }
829 if base_hw >= 8 {
830 rex |= 1
831 }
832 if rex != 0 {
833 g.emit(0x40 | rex)
834 }
835 g.emit(0x0F)
836 g.emit(0x7F)
837 asm_emit_modrm_base_disp(mut g, u8(xmm & 7), base_hw, disp)
838}
839
840fn asm_load_xmm_rbp_disp(mut g Gen, xmm int, disp int, size int) {
841 if size == 4 {
842 g.emit(0xF3)
843 } else {
844 g.emit(0xF2)
845 }
846 g.emit(0x0F)
847 g.emit(0x10)
848 if disp >= -128 && disp <= 127 {
849 g.emit(0x45 | (u8(xmm & 7) << 3))
850 g.emit(u8(disp))
851 } else {
852 g.emit(0x85 | (u8(xmm & 7) << 3))
853 g.emit_u32(u32(disp))
854 }
855}
856
857fn asm_load_xmm_mem_base_disp_size(mut g Gen, xmm int, base Reg, disp int, size int) {
858 base_hw := g.map_reg(int(base))
859 if size == 4 {
860 g.emit(0xF3)
861 } else {
862 g.emit(0xF2)
863 }
864 mut rex := u8(0)
865 if xmm >= 8 {
866 rex |= 4
867 }
868 if base_hw >= 8 {
869 rex |= 1
870 }
871 if rex != 0 {
872 g.emit(0x40 | rex)
873 }
874 g.emit(0x0F)
875 g.emit(0x10)
876 asm_emit_modrm_base_disp(mut g, u8(xmm & 7), base_hw, disp)
877}
878
879fn asm_load_xmm_mem_base_disp_128(mut g Gen, xmm int, base Reg, disp int) {
880 base_hw := g.map_reg(int(base))
881 g.emit(0xF3)
882 mut rex := u8(0)
883 if xmm >= 8 {
884 rex |= 4
885 }
886 if base_hw >= 8 {
887 rex |= 1
888 }
889 if rex != 0 {
890 g.emit(0x40 | rex)
891 }
892 g.emit(0x0F)
893 g.emit(0x6F)
894 asm_emit_modrm_base_disp(mut g, u8(xmm & 7), base_hw, disp)
895}
896
897fn asm_cvtss2sd_xmm0_xmm0(mut g Gen) {
898 g.emit(0xF3)
899 g.emit(0x0F)
900 g.emit(0x5A)
901 g.emit(0xC0)
902}
903
904fn asm_cvtss2sd_xmm1_xmm1(mut g Gen) {
905 g.emit(0xF3)
906 g.emit(0x0F)
907 g.emit(0x5A)
908 g.emit(0xC9)
909}
910
911fn asm_cvtsd2ss_xmm0_xmm0(mut g Gen) {
912 g.emit(0xF2)
913 g.emit(0x0F)
914 g.emit(0x5A)
915 g.emit(0xC0)
916}
917
918fn asm_add_float_xmm0_xmm0(mut g Gen, size int) {
919 if size == 4 {
920 g.emit(0xF3)
921 } else {
922 g.emit(0xF2)
923 }
924 g.emit(0x0F)
925 g.emit(0x58)
926 g.emit(0xC0)
927}
928
929fn asm_sub_float_xmm0_xmm1(mut g Gen, size int) {
930 if size == 4 {
931 g.emit(0xF3)
932 } else {
933 g.emit(0xF2)
934 }
935 g.emit(0x0F)
936 g.emit(0x5C)
937 g.emit(0xC1)
938}
939
940fn asm_float_binop_xmm0_xmm1(mut g Gen, op u8, size int) {
941 if size == 4 {
942 g.emit(0xF3)
943 } else {
944 g.emit(0xF2)
945 }
946 g.emit(0x0F)
947 g.emit(op)
948 g.emit(0xC1)
949}
950
951fn asm_movsxd_rax_eax(mut g Gen) {
952 g.emit(0x48)
953 g.emit(0x63)
954 g.emit(0xC0)
955}
956
957fn asm_movsx_rax_ax(mut g Gen) {
958 g.emit(0x48)
959 g.emit(0x0F)
960 g.emit(0xBF)
961 g.emit(0xC0)
962}
963
964fn asm_movsx_rax_al(mut g Gen) {
965 g.emit(0x48)
966 g.emit(0x0F)
967 g.emit(0xBE)
968 g.emit(0xC0)
969}
970
971// === Branches ===
972
973// jmp rel32
974fn asm_jmp_rel32(mut g Gen) {
975 g.emit(0xE9)
976}
977
978// je rel32
979fn asm_je_rel32(mut g Gen) {
980 g.emit(0x0F)
981 g.emit(0x84)
982}
983
984// jne rel32
985fn asm_jne_rel32(mut g Gen) {
986 g.emit(0x0F)
987 g.emit(0x85)
988}
989
990fn asm_jns_rel32(mut g Gen) {
991 g.emit(0x0F)
992 g.emit(0x89)
993}
994
995fn asm_jae_rel32(mut g Gen) {
996 g.emit(0x0F)
997 g.emit(0x83)
998}
999
1000// === Call ===
1001
1002// call rel32
1003fn asm_call_rel32(mut g Gen) {
1004 g.emit(0xE8)
1005}
1006
1007// call *r10 (indirect call through r10)
1008fn asm_call_r10(mut g Gen) {
1009 g.emit(0x41) // REX.B for r10
1010 g.emit(0xFF) // call opcode
1011 g.emit(0xD2) // ModRM: call *r10
1012}
1013
1014// === Special ===
1015
1016// ud2 (undefined instruction - trap)
1017fn asm_ud2(mut g Gen) {
1018 g.emit(0x0F)
1019 g.emit(0x0B)
1020}
1021