| 1 | // Copyright (c) 2026 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | |
| 5 | module x64 |
| 6 | |
| 7 | // x64 Instruction Encoding Helpers |
| 8 | // These functions provide type-safe instruction encoding for the x64 backend. |
| 9 | |
| 10 | // Register type for type safety |
| 11 | type Reg = int |
| 12 | |
| 13 | // Common register constants (System V AMD64 ABI) |
| 14 | const rax = Reg(0) |
| 15 | const rcx = Reg(1) |
| 16 | const rdx = Reg(2) |
| 17 | const rbx = Reg(3) |
| 18 | const rsp = Reg(4) |
| 19 | const rbp = Reg(5) |
| 20 | const rsi = Reg(6) |
| 21 | const rdi = Reg(7) |
| 22 | const r8 = Reg(8) |
| 23 | const r9 = Reg(9) |
| 24 | const r10 = Reg(10) |
| 25 | const r11 = Reg(11) |
| 26 | const r12 = Reg(12) |
| 27 | const r13 = Reg(13) |
| 28 | const r14 = Reg(14) |
| 29 | const r15 = Reg(15) |
| 30 | |
| 31 | // === Prologue/Epilogue === |
| 32 | |
| 33 | // endbr64 (CET/IBT protection) |
| 34 | fn asm_endbr64(mut g Gen) { |
| 35 | g.emit(0xF3) |
| 36 | g.emit(0x0F) |
| 37 | g.emit(0x1E) |
| 38 | g.emit(0xFA) |
| 39 | } |
| 40 | |
| 41 | // push rbp |
| 42 | fn asm_push_rbp(mut g Gen) { |
| 43 | g.emit(0x55) |
| 44 | } |
| 45 | |
| 46 | // mov rbp, rsp |
| 47 | fn asm_mov_rbp_rsp(mut g Gen) { |
| 48 | g.emit(0x48) |
| 49 | g.emit(0x89) |
| 50 | g.emit(0xE5) |
| 51 | } |
| 52 | |
| 53 | // pop rbp |
| 54 | fn asm_pop_rbp(mut g Gen) { |
| 55 | g.emit(0x5D) |
| 56 | } |
| 57 | |
| 58 | // ret |
| 59 | fn asm_ret(mut g Gen) { |
| 60 | g.emit(0xC3) |
| 61 | } |
| 62 | |
| 63 | // === Push/Pop === |
| 64 | |
| 65 | // push reg (handles REX for r8-r15) |
| 66 | fn asm_push(mut g Gen, reg Reg) { |
| 67 | hw_reg := g.map_reg(int(reg)) |
| 68 | if hw_reg >= 8 { |
| 69 | g.emit(0x41) |
| 70 | g.emit(0x50 | (hw_reg & 7)) |
| 71 | } else { |
| 72 | g.emit(0x50 | hw_reg) |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | // pop reg (handles REX for r8-r15) |
| 77 | fn asm_pop(mut g Gen, reg Reg) { |
| 78 | hw_reg := g.map_reg(int(reg)) |
| 79 | if hw_reg >= 8 { |
| 80 | g.emit(0x41) |
| 81 | g.emit(0x58 | (hw_reg & 7)) |
| 82 | } else { |
| 83 | g.emit(0x58 | hw_reg) |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | // === Stack Arithmetic === |
| 88 | |
| 89 | // sub rsp, imm8 |
| 90 | fn asm_sub_rsp_imm8(mut g Gen, imm u8) { |
| 91 | g.emit(0x48) |
| 92 | g.emit(0x83) |
| 93 | g.emit(0xEC) |
| 94 | g.emit(imm) |
| 95 | } |
| 96 | |
| 97 | // sub rsp, imm32 |
| 98 | fn asm_sub_rsp_imm32(mut g Gen, imm u32) { |
| 99 | g.emit(0x48) |
| 100 | g.emit(0x81) |
| 101 | g.emit(0xEC) |
| 102 | g.emit_u32(imm) |
| 103 | } |
| 104 | |
| 105 | // test byte ptr [rsp], 0 |
| 106 | fn asm_test_byte_ptr_rsp_zero(mut g Gen) { |
| 107 | g.emit(0xF6) |
| 108 | g.emit(0x04) |
| 109 | g.emit(0x24) |
| 110 | g.emit(0x00) |
| 111 | } |
| 112 | |
| 113 | // add rsp, imm8 |
| 114 | fn asm_add_rsp_imm8(mut g Gen, imm u8) { |
| 115 | g.emit(0x48) |
| 116 | g.emit(0x83) |
| 117 | g.emit(0xC4) |
| 118 | g.emit(imm) |
| 119 | } |
| 120 | |
| 121 | // add rsp, imm32 |
| 122 | fn asm_add_rsp_imm32(mut g Gen, imm u32) { |
| 123 | g.emit(0x48) |
| 124 | g.emit(0x81) |
| 125 | g.emit(0xC4) |
| 126 | g.emit_u32(imm) |
| 127 | } |
| 128 | |
| 129 | // === Arithmetic === |
| 130 | |
| 131 | // add rax, rcx |
| 132 | fn asm_add_rax_rcx(mut g Gen) { |
| 133 | g.emit(0x48) |
| 134 | g.emit(0x01) |
| 135 | g.emit(0xC8) |
| 136 | } |
| 137 | |
| 138 | // sub rax, rcx |
| 139 | fn asm_sub_rax_rcx(mut g Gen) { |
| 140 | g.emit(0x48) |
| 141 | g.emit(0x29) |
| 142 | g.emit(0xC8) |
| 143 | } |
| 144 | |
| 145 | // imul rax, rcx |
| 146 | fn asm_imul_rax_rcx(mut g Gen) { |
| 147 | g.emit(0x48) |
| 148 | g.emit(0x0F) |
| 149 | g.emit(0xAF) |
| 150 | g.emit(0xC1) |
| 151 | } |
| 152 | |
| 153 | // cqo (sign-extend rax to rdx:rax) |
| 154 | fn asm_cqo(mut g Gen) { |
| 155 | g.emit(0x48) |
| 156 | g.emit(0x99) |
| 157 | } |
| 158 | |
| 159 | // idiv rcx (rdx:rax / rcx -> quotient in rax, remainder in rdx) |
| 160 | fn asm_idiv_rcx(mut g Gen) { |
| 161 | g.emit(0x48) |
| 162 | g.emit(0xF7) |
| 163 | g.emit(0xF9) |
| 164 | } |
| 165 | |
| 166 | // div rcx (unsigned rdx:rax / rcx -> quotient in rax, remainder in rdx) |
| 167 | fn asm_div_rcx(mut g Gen) { |
| 168 | g.emit(0x48) |
| 169 | g.emit(0xF7) |
| 170 | g.emit(0xF1) |
| 171 | } |
| 172 | |
| 173 | // mov rax, rdx (for getting remainder after idiv) |
| 174 | fn asm_mov_rax_rdx(mut g Gen) { |
| 175 | g.emit(0x48) |
| 176 | g.emit(0x89) |
| 177 | g.emit(0xD0) |
| 178 | } |
| 179 | |
| 180 | // === Logical === |
| 181 | |
| 182 | // and rax, rcx |
| 183 | fn asm_and_rax_rcx(mut g Gen) { |
| 184 | g.emit(0x48) |
| 185 | g.emit(0x21) |
| 186 | g.emit(0xC8) |
| 187 | } |
| 188 | |
| 189 | // or rax, rcx |
| 190 | fn asm_or_rax_rcx(mut g Gen) { |
| 191 | g.emit(0x48) |
| 192 | g.emit(0x09) |
| 193 | g.emit(0xC8) |
| 194 | } |
| 195 | |
| 196 | fn asm_or_rax_r11(mut g Gen) { |
| 197 | g.emit(0x4C) |
| 198 | g.emit(0x09) |
| 199 | g.emit(0xD8) |
| 200 | } |
| 201 | |
| 202 | // xor rax, rcx |
| 203 | fn asm_xor_rax_rcx(mut g Gen) { |
| 204 | g.emit(0x48) |
| 205 | g.emit(0x31) |
| 206 | g.emit(0xC8) |
| 207 | } |
| 208 | |
| 209 | // xor eax, eax (clear rax, 2 bytes, also clears AL for variadic calls) |
| 210 | fn asm_xor_eax_eax(mut g Gen) { |
| 211 | g.emit(0x31) |
| 212 | g.emit(0xC0) |
| 213 | } |
| 214 | |
| 215 | // xor edx, edx (clear rdx before unsigned division) |
| 216 | fn asm_xor_edx_edx(mut g Gen) { |
| 217 | g.emit(0x31) |
| 218 | g.emit(0xD2) |
| 219 | } |
| 220 | |
| 221 | // xor reg, reg (clear register - handles r8-r15) |
| 222 | fn asm_xor_reg_reg(mut g Gen, reg Reg) { |
| 223 | hw_reg := g.map_reg(int(reg)) |
| 224 | if hw_reg >= 8 { |
| 225 | g.emit(0x45) // REX.RB |
| 226 | g.emit(0x31) |
| 227 | g.emit(0xC0 | ((hw_reg & 7) << 3) | (hw_reg & 7)) |
| 228 | } else { |
| 229 | g.emit(0x31) |
| 230 | g.emit(0xC0 | (hw_reg << 3) | hw_reg) |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | // and rcx, imm8 |
| 235 | fn asm_and_rcx_imm8(mut g Gen, imm u8) { |
| 236 | g.emit(0x48) |
| 237 | g.emit(0x83) |
| 238 | g.emit(0xE1) |
| 239 | g.emit(imm) |
| 240 | } |
| 241 | |
| 242 | // === Shifts === |
| 243 | |
| 244 | // shl rax, cl |
| 245 | fn asm_shl_rax_cl(mut g Gen) { |
| 246 | g.emit(0x48) |
| 247 | g.emit(0xD3) |
| 248 | g.emit(0xE0) |
| 249 | } |
| 250 | |
| 251 | // sar rax, cl (arithmetic shift right) |
| 252 | fn asm_sar_rax_cl(mut g Gen) { |
| 253 | g.emit(0x48) |
| 254 | g.emit(0xD3) |
| 255 | g.emit(0xF8) |
| 256 | } |
| 257 | |
| 258 | // shr rax, cl (logical shift right) |
| 259 | fn asm_shr_rax_cl(mut g Gen) { |
| 260 | g.emit(0x48) |
| 261 | g.emit(0xD3) |
| 262 | g.emit(0xE8) |
| 263 | } |
| 264 | |
| 265 | // shr rax, 1 |
| 266 | fn asm_shr_rax_1(mut g Gen) { |
| 267 | g.emit(0x48) |
| 268 | g.emit(0xD1) |
| 269 | g.emit(0xE8) |
| 270 | } |
| 271 | |
| 272 | fn asm_shr_rax_imm8(mut g Gen, imm u8) { |
| 273 | g.emit(0x48) |
| 274 | g.emit(0xC1) |
| 275 | g.emit(0xE8) |
| 276 | g.emit(imm) |
| 277 | } |
| 278 | |
| 279 | // shl rcx, 3 (for GEP: index * 8) |
| 280 | fn asm_shl_rcx_3(mut g Gen) { |
| 281 | g.emit(0x48) |
| 282 | g.emit(0xC1) |
| 283 | g.emit(0xE1) |
| 284 | g.emit(0x03) |
| 285 | } |
| 286 | |
| 287 | fn asm_shl_r11_imm8(mut g Gen, imm u8) { |
| 288 | g.emit(0x49) |
| 289 | g.emit(0xC1) |
| 290 | g.emit(0xE3) |
| 291 | g.emit(imm) |
| 292 | } |
| 293 | |
| 294 | // === Compare === |
| 295 | |
| 296 | // cmp rax, rcx |
| 297 | fn asm_cmp_rax_rcx(mut g Gen) { |
| 298 | g.emit(0x48) |
| 299 | g.emit(0x39) |
| 300 | g.emit(0xC8) |
| 301 | } |
| 302 | |
| 303 | // test rax, rax |
| 304 | fn asm_test_rax_rax(mut g Gen) { |
| 305 | g.emit(0x48) |
| 306 | g.emit(0x85) |
| 307 | g.emit(0xC0) |
| 308 | } |
| 309 | |
| 310 | // test reg, reg (handles r8-r15) |
| 311 | fn asm_test_reg_reg(mut g Gen, reg Reg) { |
| 312 | hw_reg := g.map_reg(int(reg)) |
| 313 | mut rex := u8(0x48) |
| 314 | if hw_reg >= 8 { |
| 315 | rex |= 5 // REX.RB |
| 316 | } |
| 317 | g.emit(rex) |
| 318 | g.emit(0x85) |
| 319 | g.emit(0xC0 | ((hw_reg & 7) << 3) | (hw_reg & 7)) |
| 320 | } |
| 321 | |
| 322 | // === Conditional Set === |
| 323 | |
| 324 | // Condition codes |
| 325 | const cc_e = u8(0x94) // equal |
| 326 | |
| 327 | const cc_ne = u8(0x95) // not equal |
| 328 | |
| 329 | const cc_l = u8(0x9C) // less (signed) |
| 330 | |
| 331 | const cc_g = u8(0x9F) // greater (signed) |
| 332 | |
| 333 | const cc_le = u8(0x9E) // less or equal (signed) |
| 334 | |
| 335 | const cc_ge = u8(0x9D) // greater or equal (signed) |
| 336 | |
| 337 | const cc_b = u8(0x92) // below (unsigned less) |
| 338 | |
| 339 | const cc_a = u8(0x97) // above (unsigned greater) |
| 340 | |
| 341 | const cc_be = u8(0x96) // below or equal (unsigned) |
| 342 | |
| 343 | const cc_ae = u8(0x93) // above or equal (unsigned) |
| 344 | |
| 345 | const cc_p = u8(0x9A) // parity (unordered for ucomis) |
| 346 | |
| 347 | const cc_np = u8(0x9B) // not parity (ordered for ucomis) |
| 348 | |
| 349 | // setcc al + movzx rax, al |
| 350 | fn asm_setcc_al_movzx(mut g Gen, cc u8) { |
| 351 | g.emit(0x0F) |
| 352 | g.emit(cc) |
| 353 | g.emit(0xC0) // setcc al |
| 354 | g.emit(0x48) |
| 355 | g.emit(0x0F) |
| 356 | g.emit(0xB6) |
| 357 | g.emit(0xC0) // movzx rax, al |
| 358 | } |
| 359 | |
| 360 | fn asm_setcc_cl_movzx(mut g Gen, cc u8) { |
| 361 | g.emit(0x0F) |
| 362 | g.emit(cc) |
| 363 | g.emit(0xC1) // setcc cl |
| 364 | g.emit(0x48) |
| 365 | g.emit(0x0F) |
| 366 | g.emit(0xB6) |
| 367 | g.emit(0xC9) // movzx rcx, cl |
| 368 | } |
| 369 | |
| 370 | fn asm_ucomis_xmm0_xmm1(mut g Gen, size int) { |
| 371 | if size == 8 { |
| 372 | g.emit(0x66) |
| 373 | } |
| 374 | g.emit(0x0F) |
| 375 | g.emit(0x2E) |
| 376 | g.emit(0xC1) |
| 377 | } |
| 378 | |
| 379 | // === Memory === |
| 380 | |
| 381 | // mov [rcx], rax |
| 382 | fn asm_mov_mem_rcx_rax(mut g Gen) { |
| 383 | g.emit(0x48) |
| 384 | g.emit(0x89) |
| 385 | g.emit(0x01) |
| 386 | } |
| 387 | |
| 388 | // mov rax, [rcx] |
| 389 | fn asm_mov_rax_mem_rcx(mut g Gen) { |
| 390 | g.emit(0x48) |
| 391 | g.emit(0x8B) |
| 392 | g.emit(0x01) |
| 393 | } |
| 394 | |
| 395 | fn asm_emit_modrm_base_disp(mut g Gen, reg_bits u8, base_hw u8, disp int) { |
| 396 | rm := base_hw & 7 |
| 397 | needs_sib := rm == 4 // rsp/r12 |
| 398 | if disp == 0 && rm != 5 { |
| 399 | g.emit((reg_bits << 3) | rm) |
| 400 | if needs_sib { |
| 401 | g.emit(0x24) |
| 402 | } |
| 403 | } else if disp >= -128 && disp <= 127 { |
| 404 | g.emit(0x40 | (reg_bits << 3) | rm) |
| 405 | if needs_sib { |
| 406 | g.emit(0x24) |
| 407 | } |
| 408 | g.emit(u8(disp)) |
| 409 | } else { |
| 410 | g.emit(0x80 | (reg_bits << 3) | rm) |
| 411 | if needs_sib { |
| 412 | g.emit(0x24) |
| 413 | } |
| 414 | g.emit_u32(u32(disp)) |
| 415 | } |
| 416 | } |
| 417 | |
| 418 | // mov rax, [base + disp] |
| 419 | fn asm_mov_rax_mem_base_disp(mut g Gen, base Reg, disp int) { |
| 420 | base_hw := g.map_reg(int(base)) |
| 421 | mut rex := u8(0x48) |
| 422 | if base_hw >= 8 { |
| 423 | rex |= 1 // REX.B |
| 424 | } |
| 425 | g.emit(rex) |
| 426 | g.emit(0x8B) |
| 427 | |
| 428 | rm := base_hw & 7 |
| 429 | needs_sib := rm == 4 // rsp/r12 |
| 430 | |
| 431 | if disp == 0 && rm != 5 { |
| 432 | g.emit(rm) |
| 433 | if needs_sib { |
| 434 | g.emit(0x24) |
| 435 | } |
| 436 | } else if disp >= -128 && disp <= 127 { |
| 437 | g.emit(0x40 | rm) |
| 438 | if needs_sib { |
| 439 | g.emit(0x24) |
| 440 | } |
| 441 | g.emit(u8(disp)) |
| 442 | } else { |
| 443 | g.emit(0x80 | rm) |
| 444 | if needs_sib { |
| 445 | g.emit(0x24) |
| 446 | } |
| 447 | g.emit_u32(u32(disp)) |
| 448 | } |
| 449 | } |
| 450 | |
| 451 | fn asm_load_reg_mem_base_disp_size(mut g Gen, reg Reg, base Reg, disp int, size int) { |
| 452 | reg_hw := g.map_reg(int(reg)) |
| 453 | base_hw := g.map_reg(int(base)) |
| 454 | if size == 8 { |
| 455 | mut rex := u8(0x48) |
| 456 | if reg_hw >= 8 { |
| 457 | rex |= 4 // REX.R |
| 458 | } |
| 459 | if base_hw >= 8 { |
| 460 | rex |= 1 // REX.B |
| 461 | } |
| 462 | g.emit(rex) |
| 463 | g.emit(0x8B) |
| 464 | asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp) |
| 465 | return |
| 466 | } |
| 467 | if size == 4 { |
| 468 | mut rex := u8(0) |
| 469 | if reg_hw >= 8 { |
| 470 | rex |= 4 |
| 471 | } |
| 472 | if base_hw >= 8 { |
| 473 | rex |= 1 |
| 474 | } |
| 475 | if rex != 0 { |
| 476 | g.emit(0x40 | rex) |
| 477 | } |
| 478 | g.emit(0x8B) |
| 479 | asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp) |
| 480 | return |
| 481 | } |
| 482 | if size == 2 || size == 1 { |
| 483 | mut rex := u8(0) |
| 484 | if reg_hw >= 8 { |
| 485 | rex |= 4 |
| 486 | } |
| 487 | if base_hw >= 8 { |
| 488 | rex |= 1 |
| 489 | } |
| 490 | if rex != 0 { |
| 491 | g.emit(0x40 | rex) |
| 492 | } |
| 493 | g.emit(0x0F) |
| 494 | g.emit(if size == 2 { u8(0xB7) } else { u8(0xB6) }) |
| 495 | asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp) |
| 496 | return |
| 497 | } |
| 498 | asm_unsupported_memory_size('load', size) |
| 499 | } |
| 500 | |
| 501 | fn asm_load_reg_mem_base_disp_size_signed(mut g Gen, reg Reg, base Reg, disp int, size int) { |
| 502 | reg_hw := g.map_reg(int(reg)) |
| 503 | base_hw := g.map_reg(int(base)) |
| 504 | if size == 8 { |
| 505 | asm_load_reg_mem_base_disp_size(mut g, reg, base, disp, size) |
| 506 | return |
| 507 | } |
| 508 | mut rex := u8(0x48) |
| 509 | if reg_hw >= 8 { |
| 510 | rex |= 4 |
| 511 | } |
| 512 | if base_hw >= 8 { |
| 513 | rex |= 1 |
| 514 | } |
| 515 | g.emit(rex) |
| 516 | if size == 4 { |
| 517 | g.emit(0x63) |
| 518 | asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp) |
| 519 | return |
| 520 | } |
| 521 | if size == 2 || size == 1 { |
| 522 | g.emit(0x0F) |
| 523 | g.emit(if size == 2 { u8(0xBF) } else { u8(0xBE) }) |
| 524 | asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp) |
| 525 | return |
| 526 | } |
| 527 | asm_unsupported_memory_size('signed load', size) |
| 528 | } |
| 529 | |
| 530 | // mov [base + disp], rax |
| 531 | fn asm_mov_mem_base_disp_rax(mut g Gen, base Reg, disp int) { |
| 532 | base_hw := g.map_reg(int(base)) |
| 533 | mut rex := u8(0x48) |
| 534 | if base_hw >= 8 { |
| 535 | rex |= 1 // REX.B |
| 536 | } |
| 537 | g.emit(rex) |
| 538 | g.emit(0x89) |
| 539 | |
| 540 | rm := base_hw & 7 |
| 541 | needs_sib := rm == 4 // rsp/r12 |
| 542 | |
| 543 | if disp == 0 && rm != 5 { |
| 544 | g.emit(rm) |
| 545 | if needs_sib { |
| 546 | g.emit(0x24) |
| 547 | } |
| 548 | } else if disp >= -128 && disp <= 127 { |
| 549 | g.emit(0x40 | rm) |
| 550 | if needs_sib { |
| 551 | g.emit(0x24) |
| 552 | } |
| 553 | g.emit(u8(disp)) |
| 554 | } else { |
| 555 | g.emit(0x80 | rm) |
| 556 | if needs_sib { |
| 557 | g.emit(0x24) |
| 558 | } |
| 559 | g.emit_u32(u32(disp)) |
| 560 | } |
| 561 | } |
| 562 | |
| 563 | fn asm_store_mem_base_disp_reg_size(mut g Gen, base Reg, disp int, reg Reg, size int) { |
| 564 | reg_hw := g.map_reg(int(reg)) |
| 565 | base_hw := g.map_reg(int(base)) |
| 566 | if size == 8 { |
| 567 | mut rex := u8(0x48) |
| 568 | if reg_hw >= 8 { |
| 569 | rex |= 4 // REX.R |
| 570 | } |
| 571 | if base_hw >= 8 { |
| 572 | rex |= 1 // REX.B |
| 573 | } |
| 574 | g.emit(rex) |
| 575 | g.emit(0x89) |
| 576 | asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp) |
| 577 | return |
| 578 | } |
| 579 | if size == 4 || size == 2 || size == 1 { |
| 580 | if size == 2 { |
| 581 | g.emit(0x66) |
| 582 | } |
| 583 | mut rex := u8(0) |
| 584 | if reg_hw >= 8 { |
| 585 | rex |= 4 |
| 586 | } |
| 587 | if base_hw >= 8 { |
| 588 | rex |= 1 |
| 589 | } |
| 590 | if rex != 0 { |
| 591 | g.emit(0x40 | rex) |
| 592 | } |
| 593 | g.emit(if size == 1 { u8(0x88) } else { u8(0x89) }) |
| 594 | asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp) |
| 595 | return |
| 596 | } |
| 597 | asm_unsupported_memory_size('store', size) |
| 598 | } |
| 599 | |
| 600 | fn asm_unsupported_memory_size(op string, size int) { |
| 601 | x64_unsupported('${op} memory size ${size}') |
| 602 | } |
| 603 | |
| 604 | // lea rax, [rbp + disp8] |
| 605 | fn asm_lea_rax_rbp_disp8(mut g Gen, disp i8) { |
| 606 | g.emit(0x48) |
| 607 | g.emit(0x8D) |
| 608 | g.emit(0x45) // ModRM 01 = disp8 |
| 609 | g.emit(u8(disp)) |
| 610 | } |
| 611 | |
| 612 | // lea rax, [rbp + disp32] |
| 613 | fn asm_lea_rax_rbp_disp32(mut g Gen, disp i32) { |
| 614 | g.emit(0x48) |
| 615 | g.emit(0x8D) |
| 616 | g.emit(0x85) // ModRM 10 = disp32 |
| 617 | g.emit_u32(u32(disp)) |
| 618 | } |
| 619 | |
| 620 | // lea reg, [rbp + disp] |
| 621 | fn asm_lea_reg_rbp_disp(mut g Gen, reg Reg, disp int) { |
| 622 | reg_hw := g.map_reg(int(reg)) |
| 623 | base_hw := g.map_reg(int(rbp)) |
| 624 | mut rex := u8(0x48) |
| 625 | if reg_hw >= 8 { |
| 626 | rex |= 4 // REX.R |
| 627 | } |
| 628 | if base_hw >= 8 { |
| 629 | rex |= 1 // REX.B |
| 630 | } |
| 631 | g.emit(rex) |
| 632 | g.emit(0x8D) |
| 633 | asm_emit_modrm_base_disp(mut g, reg_hw & 7, base_hw, disp) |
| 634 | } |
| 635 | |
| 636 | // lea reg, [rip + disp32] (for globals/strings) |
| 637 | fn asm_lea_reg_rip(mut g Gen, reg Reg) { |
| 638 | hw_reg := g.map_reg(int(reg)) |
| 639 | mut rex := u8(0x48) |
| 640 | if hw_reg >= 8 { |
| 641 | rex |= 4 |
| 642 | } |
| 643 | g.emit(rex) |
| 644 | g.emit(0x8D) |
| 645 | g.emit(0x05 | ((hw_reg & 7) << 3)) |
| 646 | } |
| 647 | |
| 648 | // mov reg, qword ptr [rip + disp32] (for Mach-O GOTPCREL loads) |
| 649 | fn asm_mov_reg_got_rip(mut g Gen, reg Reg) { |
| 650 | hw_reg := g.map_reg(int(reg)) |
| 651 | mut rex := u8(0x48) |
| 652 | if hw_reg >= 8 { |
| 653 | rex |= 4 |
| 654 | } |
| 655 | g.emit(rex) |
| 656 | g.emit(0x8B) |
| 657 | g.emit(0x05 | ((hw_reg & 7) << 3)) |
| 658 | } |
| 659 | |
| 660 | // === Move Register === |
| 661 | |
| 662 | // mov dst, src (64-bit, handles REX) |
| 663 | fn asm_mov_reg_reg(mut g Gen, dst Reg, src Reg) { |
| 664 | dst_hw := g.map_reg(int(dst)) |
| 665 | src_hw := g.map_reg(int(src)) |
| 666 | mut rex := u8(0x48) |
| 667 | if src_hw >= 8 { |
| 668 | rex |= 4 |
| 669 | } |
| 670 | if dst_hw >= 8 { |
| 671 | rex |= 1 |
| 672 | } |
| 673 | g.emit(rex) |
| 674 | g.emit(0x89) |
| 675 | g.emit(0xC0 | ((src_hw & 7) << 3) | (dst_hw & 7)) |
| 676 | } |
| 677 | |
| 678 | // === Move Immediate === |
| 679 | |
| 680 | // mov reg, imm32 (zero-extends to 64-bit) |
| 681 | fn asm_mov_reg_imm32(mut g Gen, reg Reg, imm u32) { |
| 682 | hw_reg := g.map_reg(int(reg)) |
| 683 | if hw_reg >= 8 { |
| 684 | g.emit(0x41) // REX.B |
| 685 | } |
| 686 | g.emit(0xB8 | (hw_reg & 7)) |
| 687 | g.emit_u32(imm) |
| 688 | } |
| 689 | |
| 690 | // movabs reg, imm64 |
| 691 | fn asm_mov_reg_imm64(mut g Gen, reg Reg, imm u64) { |
| 692 | hw_reg := g.map_reg(int(reg)) |
| 693 | mut rex := u8(0x48) |
| 694 | if hw_reg >= 8 { |
| 695 | rex |= 1 |
| 696 | } |
| 697 | g.emit(rex) |
| 698 | g.emit(0xB8 | (hw_reg & 7)) |
| 699 | g.emit_u64(imm) |
| 700 | } |
| 701 | |
| 702 | // === Load/Store with displacement === |
| 703 | |
| 704 | // mov reg, [rbp + disp] (load) |
| 705 | fn asm_load_reg_rbp_disp(mut g Gen, reg Reg, disp int) { |
| 706 | hw_reg := g.map_reg(int(reg)) |
| 707 | mut rex := u8(0x48) |
| 708 | if hw_reg >= 8 { |
| 709 | rex |= 4 |
| 710 | } |
| 711 | g.emit(rex) |
| 712 | g.emit(0x8B) |
| 713 | if disp >= -128 && disp <= 127 { |
| 714 | g.emit(0x45 | ((hw_reg & 7) << 3)) // ModRM 01 = disp8 |
| 715 | g.emit(u8(disp)) |
| 716 | } else { |
| 717 | g.emit(0x85 | ((hw_reg & 7) << 3)) // ModRM 10 = disp32 |
| 718 | g.emit_u32(u32(disp)) |
| 719 | } |
| 720 | } |
| 721 | |
| 722 | // mov [rbp + disp], reg (store) |
| 723 | fn asm_store_rbp_disp_reg(mut g Gen, disp int, reg Reg) { |
| 724 | hw_reg := g.map_reg(int(reg)) |
| 725 | mut rex := u8(0x48) |
| 726 | if hw_reg >= 8 { |
| 727 | rex |= 4 |
| 728 | } |
| 729 | g.emit(rex) |
| 730 | g.emit(0x89) |
| 731 | if disp >= -128 && disp <= 127 { |
| 732 | g.emit(0x45 | ((hw_reg & 7) << 3)) // ModRM 01 = disp8 |
| 733 | g.emit(u8(disp)) |
| 734 | } else { |
| 735 | g.emit(0x85 | ((hw_reg & 7) << 3)) // ModRM 10 = disp32 |
| 736 | g.emit_u32(u32(disp)) |
| 737 | } |
| 738 | } |
| 739 | |
| 740 | fn asm_store_rbp_disp_reg_size(mut g Gen, disp int, reg Reg, size int) { |
| 741 | asm_store_mem_base_disp_reg_size(mut g, rbp, disp, reg, size) |
| 742 | } |
| 743 | |
| 744 | fn asm_cvtsi2ss_xmm0_rax(mut g Gen) { |
| 745 | g.emit(0xF3) |
| 746 | g.emit(0x48) |
| 747 | g.emit(0x0F) |
| 748 | g.emit(0x2A) |
| 749 | g.emit(0xC0) |
| 750 | } |
| 751 | |
| 752 | fn asm_cvtsi2sd_xmm0_rax(mut g Gen) { |
| 753 | g.emit(0xF2) |
| 754 | g.emit(0x48) |
| 755 | g.emit(0x0F) |
| 756 | g.emit(0x2A) |
| 757 | g.emit(0xC0) |
| 758 | } |
| 759 | |
| 760 | fn asm_cvttss2si_rax_xmm0(mut g Gen) { |
| 761 | g.emit(0xF3) |
| 762 | g.emit(0x48) |
| 763 | g.emit(0x0F) |
| 764 | g.emit(0x2C) |
| 765 | g.emit(0xC0) |
| 766 | } |
| 767 | |
| 768 | fn asm_cvttsd2si_rax_xmm0(mut g Gen) { |
| 769 | g.emit(0xF2) |
| 770 | g.emit(0x48) |
| 771 | g.emit(0x0F) |
| 772 | g.emit(0x2C) |
| 773 | g.emit(0xC0) |
| 774 | } |
| 775 | |
| 776 | fn asm_store_xmm0_rbp_disp(mut g Gen, disp int, size int) { |
| 777 | asm_store_xmm_rbp_disp(mut g, 0, disp, size) |
| 778 | } |
| 779 | |
| 780 | fn asm_store_xmm_rbp_disp(mut g Gen, xmm int, disp int, size int) { |
| 781 | if size == 4 { |
| 782 | g.emit(0xF3) |
| 783 | } else { |
| 784 | g.emit(0xF2) |
| 785 | } |
| 786 | if xmm >= 8 { |
| 787 | g.emit(0x44) |
| 788 | } |
| 789 | g.emit(0x0F) |
| 790 | g.emit(0x11) |
| 791 | if disp >= -128 && disp <= 127 { |
| 792 | g.emit(0x45 | (u8(xmm & 7) << 3)) |
| 793 | g.emit(u8(disp)) |
| 794 | } else { |
| 795 | g.emit(0x85 | (u8(xmm & 7) << 3)) |
| 796 | g.emit_u32(u32(disp)) |
| 797 | } |
| 798 | } |
| 799 | |
| 800 | fn asm_store_xmm_mem_base_disp_size(mut g Gen, xmm int, base Reg, disp int, size int) { |
| 801 | base_hw := g.map_reg(int(base)) |
| 802 | if size == 4 { |
| 803 | g.emit(0xF3) |
| 804 | } else { |
| 805 | g.emit(0xF2) |
| 806 | } |
| 807 | mut rex := u8(0) |
| 808 | if xmm >= 8 { |
| 809 | rex |= 4 |
| 810 | } |
| 811 | if base_hw >= 8 { |
| 812 | rex |= 1 |
| 813 | } |
| 814 | if rex != 0 { |
| 815 | g.emit(0x40 | rex) |
| 816 | } |
| 817 | g.emit(0x0F) |
| 818 | g.emit(0x11) |
| 819 | asm_emit_modrm_base_disp(mut g, u8(xmm & 7), base_hw, disp) |
| 820 | } |
| 821 | |
| 822 | fn asm_store_xmm_mem_base_disp_128(mut g Gen, xmm int, base Reg, disp int) { |
| 823 | base_hw := g.map_reg(int(base)) |
| 824 | g.emit(0xF3) |
| 825 | mut rex := u8(0) |
| 826 | if xmm >= 8 { |
| 827 | rex |= 4 |
| 828 | } |
| 829 | if base_hw >= 8 { |
| 830 | rex |= 1 |
| 831 | } |
| 832 | if rex != 0 { |
| 833 | g.emit(0x40 | rex) |
| 834 | } |
| 835 | g.emit(0x0F) |
| 836 | g.emit(0x7F) |
| 837 | asm_emit_modrm_base_disp(mut g, u8(xmm & 7), base_hw, disp) |
| 838 | } |
| 839 | |
| 840 | fn asm_load_xmm_rbp_disp(mut g Gen, xmm int, disp int, size int) { |
| 841 | if size == 4 { |
| 842 | g.emit(0xF3) |
| 843 | } else { |
| 844 | g.emit(0xF2) |
| 845 | } |
| 846 | g.emit(0x0F) |
| 847 | g.emit(0x10) |
| 848 | if disp >= -128 && disp <= 127 { |
| 849 | g.emit(0x45 | (u8(xmm & 7) << 3)) |
| 850 | g.emit(u8(disp)) |
| 851 | } else { |
| 852 | g.emit(0x85 | (u8(xmm & 7) << 3)) |
| 853 | g.emit_u32(u32(disp)) |
| 854 | } |
| 855 | } |
| 856 | |
| 857 | fn asm_load_xmm_mem_base_disp_size(mut g Gen, xmm int, base Reg, disp int, size int) { |
| 858 | base_hw := g.map_reg(int(base)) |
| 859 | if size == 4 { |
| 860 | g.emit(0xF3) |
| 861 | } else { |
| 862 | g.emit(0xF2) |
| 863 | } |
| 864 | mut rex := u8(0) |
| 865 | if xmm >= 8 { |
| 866 | rex |= 4 |
| 867 | } |
| 868 | if base_hw >= 8 { |
| 869 | rex |= 1 |
| 870 | } |
| 871 | if rex != 0 { |
| 872 | g.emit(0x40 | rex) |
| 873 | } |
| 874 | g.emit(0x0F) |
| 875 | g.emit(0x10) |
| 876 | asm_emit_modrm_base_disp(mut g, u8(xmm & 7), base_hw, disp) |
| 877 | } |
| 878 | |
| 879 | fn asm_load_xmm_mem_base_disp_128(mut g Gen, xmm int, base Reg, disp int) { |
| 880 | base_hw := g.map_reg(int(base)) |
| 881 | g.emit(0xF3) |
| 882 | mut rex := u8(0) |
| 883 | if xmm >= 8 { |
| 884 | rex |= 4 |
| 885 | } |
| 886 | if base_hw >= 8 { |
| 887 | rex |= 1 |
| 888 | } |
| 889 | if rex != 0 { |
| 890 | g.emit(0x40 | rex) |
| 891 | } |
| 892 | g.emit(0x0F) |
| 893 | g.emit(0x6F) |
| 894 | asm_emit_modrm_base_disp(mut g, u8(xmm & 7), base_hw, disp) |
| 895 | } |
| 896 | |
| 897 | fn asm_cvtss2sd_xmm0_xmm0(mut g Gen) { |
| 898 | g.emit(0xF3) |
| 899 | g.emit(0x0F) |
| 900 | g.emit(0x5A) |
| 901 | g.emit(0xC0) |
| 902 | } |
| 903 | |
| 904 | fn asm_cvtss2sd_xmm1_xmm1(mut g Gen) { |
| 905 | g.emit(0xF3) |
| 906 | g.emit(0x0F) |
| 907 | g.emit(0x5A) |
| 908 | g.emit(0xC9) |
| 909 | } |
| 910 | |
| 911 | fn asm_cvtsd2ss_xmm0_xmm0(mut g Gen) { |
| 912 | g.emit(0xF2) |
| 913 | g.emit(0x0F) |
| 914 | g.emit(0x5A) |
| 915 | g.emit(0xC0) |
| 916 | } |
| 917 | |
| 918 | fn asm_add_float_xmm0_xmm0(mut g Gen, size int) { |
| 919 | if size == 4 { |
| 920 | g.emit(0xF3) |
| 921 | } else { |
| 922 | g.emit(0xF2) |
| 923 | } |
| 924 | g.emit(0x0F) |
| 925 | g.emit(0x58) |
| 926 | g.emit(0xC0) |
| 927 | } |
| 928 | |
| 929 | fn asm_sub_float_xmm0_xmm1(mut g Gen, size int) { |
| 930 | if size == 4 { |
| 931 | g.emit(0xF3) |
| 932 | } else { |
| 933 | g.emit(0xF2) |
| 934 | } |
| 935 | g.emit(0x0F) |
| 936 | g.emit(0x5C) |
| 937 | g.emit(0xC1) |
| 938 | } |
| 939 | |
| 940 | fn asm_float_binop_xmm0_xmm1(mut g Gen, op u8, size int) { |
| 941 | if size == 4 { |
| 942 | g.emit(0xF3) |
| 943 | } else { |
| 944 | g.emit(0xF2) |
| 945 | } |
| 946 | g.emit(0x0F) |
| 947 | g.emit(op) |
| 948 | g.emit(0xC1) |
| 949 | } |
| 950 | |
| 951 | fn asm_movsxd_rax_eax(mut g Gen) { |
| 952 | g.emit(0x48) |
| 953 | g.emit(0x63) |
| 954 | g.emit(0xC0) |
| 955 | } |
| 956 | |
| 957 | fn asm_movsx_rax_ax(mut g Gen) { |
| 958 | g.emit(0x48) |
| 959 | g.emit(0x0F) |
| 960 | g.emit(0xBF) |
| 961 | g.emit(0xC0) |
| 962 | } |
| 963 | |
| 964 | fn asm_movsx_rax_al(mut g Gen) { |
| 965 | g.emit(0x48) |
| 966 | g.emit(0x0F) |
| 967 | g.emit(0xBE) |
| 968 | g.emit(0xC0) |
| 969 | } |
| 970 | |
| 971 | // === Branches === |
| 972 | |
| 973 | // jmp rel32 |
| 974 | fn asm_jmp_rel32(mut g Gen) { |
| 975 | g.emit(0xE9) |
| 976 | } |
| 977 | |
| 978 | // je rel32 |
| 979 | fn asm_je_rel32(mut g Gen) { |
| 980 | g.emit(0x0F) |
| 981 | g.emit(0x84) |
| 982 | } |
| 983 | |
| 984 | // jne rel32 |
| 985 | fn asm_jne_rel32(mut g Gen) { |
| 986 | g.emit(0x0F) |
| 987 | g.emit(0x85) |
| 988 | } |
| 989 | |
| 990 | fn asm_jns_rel32(mut g Gen) { |
| 991 | g.emit(0x0F) |
| 992 | g.emit(0x89) |
| 993 | } |
| 994 | |
| 995 | fn asm_jae_rel32(mut g Gen) { |
| 996 | g.emit(0x0F) |
| 997 | g.emit(0x83) |
| 998 | } |
| 999 | |
| 1000 | // === Call === |
| 1001 | |
| 1002 | // call rel32 |
| 1003 | fn asm_call_rel32(mut g Gen) { |
| 1004 | g.emit(0xE8) |
| 1005 | } |
| 1006 | |
| 1007 | // call *r10 (indirect call through r10) |
| 1008 | fn asm_call_r10(mut g Gen) { |
| 1009 | g.emit(0x41) // REX.B for r10 |
| 1010 | g.emit(0xFF) // call opcode |
| 1011 | g.emit(0xD2) // ModRM: call *r10 |
| 1012 | } |
| 1013 | |
| 1014 | // === Special === |
| 1015 | |
| 1016 | // ud2 (undefined instruction - trap) |
| 1017 | fn asm_ud2(mut g Gen) { |
| 1018 | g.emit(0x0F) |
| 1019 | g.emit(0x0B) |
| 1020 | } |
| 1021 | |