| 1 | // Copyright (c) 2026 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | |
| 5 | module optimize |
| 6 | |
| 7 | import v2.ssa |
| 8 | |
| 9 | // --- Mem2Reg (Promote Allocas) --- |
| 10 | // All data structures use flat arrays indexed by value_id or block_id |
| 11 | // to avoid map operations which are extremely slow in the ARM64 backend. |
| 12 | struct Mem2RegCtx { |
| 13 | mut: |
| 14 | // Indexed by alloc_id (value_id): list of block_ids where this alloc is stored to |
| 15 | defs [][]int |
| 16 | // Indexed by alloc_id (value_id): list of block_ids where this alloc is loaded from |
| 17 | uses [][]int |
| 18 | // Indexed by block_id: list of alloc_ids that need phi nodes in this block |
| 19 | phi_placements [][]int |
| 20 | // Indexed by block_id: parallel to phi_placements, stores the phi value_ids |
| 21 | // phi_vals[blk_id][i] is the phi val_id for phi_placements[blk_id][i] |
| 22 | phi_vals [][]int |
| 23 | // Indexed by alloc_id (value_id): stack of current SSA values during renaming |
| 24 | stacks [][]int |
| 25 | // Track which alloc_ids have been initialized (for cleanup) |
| 26 | is_promotable []bool |
| 27 | // Track which block_ids have phi placements (for cleanup and iteration) |
| 28 | phi_blocks []int |
| 29 | // Deferred phi operands: flat list of [instr_idx, val, bvid, instr_idx, val, bvid, ...]. |
| 30 | // Applied after rename to avoid m.instrs[idx].operands << x which is broken |
| 31 | // in ARM64 self-hosted binaries (chained array-element-field-append bug). |
| 32 | deferred_phi_ops []int |
| 33 | } |
| 34 | |
| 35 | // Helper: append to an inner array of a [][]int with proper write-back. |
| 36 | // Direct `arr[idx] << val` is broken in ARM64 self-hosted binaries |
| 37 | // (chained array-element append copies the struct instead of modifying in-place). |
| 38 | fn array2d_append(mut arr [][]int, idx int, val int) { |
| 39 | mut inner := arr[idx] |
| 40 | inner << val |
| 41 | arr[idx] = inner |
| 42 | } |
| 43 | |
| 44 | fn promote_memory_to_register(mut m ssa.Module, dom DomInfo, cfg &CfgData) { |
| 45 | n_values := m.values.len |
| 46 | n_blocks := m.blocks.len |
| 47 | |
| 48 | // Pre-allocate flat arrays ONCE outside the function loop. |
| 49 | mut ctx := Mem2RegCtx{ |
| 50 | defs: [][]int{len: n_values} |
| 51 | uses: [][]int{len: n_values} |
| 52 | phi_placements: [][]int{len: n_blocks} |
| 53 | phi_vals: [][]int{len: n_blocks} |
| 54 | stacks: [][]int{len: n_values} |
| 55 | is_promotable: []bool{len: n_values} |
| 56 | phi_blocks: []int{} |
| 57 | } |
| 58 | |
| 59 | // Pre-allocate stack_counts array (for rename_recursive). |
| 60 | mut stack_counts := []int{len: n_values} |
| 61 | for sci in 0 .. n_values { |
| 62 | stack_counts[sci] = -1 |
| 63 | } |
| 64 | |
| 65 | // Dominance frontier: indexed by block_id |
| 66 | mut df := [][]int{len: n_blocks} |
| 67 | // Track which blocks have DF entries for cleanup |
| 68 | mut df_blocks := []int{} |
| 69 | |
| 70 | // Per-alloc visited/has_phi bitsets (reused across allocs within a function) |
| 71 | mut visited := []bool{len: n_blocks} |
| 72 | mut has_phi := []bool{len: n_blocks} |
| 73 | |
| 74 | mut total_allocas := 0 |
| 75 | mut total_promoted := 0 |
| 76 | mut total_not_promoted := 0 |
| 77 | mut total_blocks_visited := 0 |
| 78 | mut total_blocks_skipped := 0 |
| 79 | mut total_blocks_in_funcs := 0 |
| 80 | |
| 81 | for fi in 0 .. m.funcs.len { |
| 82 | // 1. Analyze Allocas |
| 83 | mut promotable := []int{} |
| 84 | func := m.funcs[fi] |
| 85 | n_func_blocks := func.blocks.len |
| 86 | for fbi in 0 .. n_func_blocks { |
| 87 | blk_id := func.blocks[fbi] |
| 88 | blk := m.blocks[blk_id] |
| 89 | n_blk_instrs := blk.instrs.len |
| 90 | for ii in 0 .. n_blk_instrs { |
| 91 | val_id := blk.instrs[ii] |
| 92 | val := m.values[val_id] |
| 93 | instr := m.instrs[val.index] |
| 94 | if instr.op == .alloca { |
| 95 | total_allocas += 1 |
| 96 | if is_promotable(m, val_id) { |
| 97 | promotable << val_id |
| 98 | ctx.is_promotable[val_id] = true |
| 99 | total_promoted += 1 |
| 100 | } else { |
| 101 | total_not_promoted += 1 |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | if instr.op == .store { |
| 106 | ptr := instr.operands[1] |
| 107 | // Avoid duplicate def entries for same block |
| 108 | if ptr < n_values && blk_id !in ctx.defs[ptr] { |
| 109 | array2d_append(mut ctx.defs, ptr, blk_id) |
| 110 | } |
| 111 | } else if instr.op == .load { |
| 112 | ptr := instr.operands[0] |
| 113 | // Avoid duplicate use entries for same block |
| 114 | if ptr < n_values && blk_id !in ctx.uses[ptr] { |
| 115 | array2d_append(mut ctx.uses, ptr, blk_id) |
| 116 | } |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | // 2. Compute Dominance Frontier (flat array version) |
| 122 | compute_dominance_frontier_flat(m, fi, &dom, cfg, mut df, mut df_blocks) |
| 123 | |
| 124 | // 3. Insert Phis (Dominance Frontier) |
| 125 | n_promotable := promotable.len |
| 126 | for pri in 0 .. n_promotable { |
| 127 | alloc_id := promotable[pri] |
| 128 | mut worklist := ctx.defs[alloc_id].clone() |
| 129 | // Track blocks we mark visited/has_phi for cleanup |
| 130 | mut visited_list := []int{} |
| 131 | mut has_phi_list := []int{} |
| 132 | |
| 133 | for worklist.len > 0 { |
| 134 | b := worklist.pop() |
| 135 | if b < 0 || b >= n_blocks { |
| 136 | continue |
| 137 | } |
| 138 | n_df_b := df[b].len |
| 139 | for di in 0 .. n_df_b { |
| 140 | d := df[b][di] |
| 141 | if !has_phi[d] { |
| 142 | array2d_append(mut ctx.phi_placements, d, alloc_id) |
| 143 | if d !in ctx.phi_blocks { |
| 144 | ctx.phi_blocks << d |
| 145 | } |
| 146 | has_phi[d] = true |
| 147 | has_phi_list << d |
| 148 | if !visited[d] { |
| 149 | visited[d] = true |
| 150 | visited_list << d |
| 151 | worklist << d |
| 152 | } |
| 153 | } |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | // Reset visited/has_phi for this alloc |
| 158 | for vli in 0 .. visited_list.len { |
| 159 | visited[visited_list[vli]] = false |
| 160 | } |
| 161 | for hli in 0 .. has_phi_list.len { |
| 162 | has_phi[has_phi_list[hli]] = false |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | // Insert Phis |
| 167 | n_phi_blocks := ctx.phi_blocks.len |
| 168 | for pbi in 0 .. n_phi_blocks { |
| 169 | blk_id := ctx.phi_blocks[pbi] |
| 170 | n_phi_allocs := ctx.phi_placements[blk_id].len |
| 171 | for pai in 0 .. n_phi_allocs { |
| 172 | alloc_id := ctx.phi_placements[blk_id][pai] |
| 173 | alloc_val := m.values[alloc_id] |
| 174 | alloc_typ := m.type_store.types[alloc_val.typ] |
| 175 | typ := alloc_typ.elem_type |
| 176 | phi_val := m.add_instr_front(.phi, blk_id, typ, []) |
| 177 | // Avoid m.values[phi_val].name = ... -- chained field assign broken in ARM64 self-hosted |
| 178 | mut pv := m.values[phi_val] |
| 179 | pv.name = '${alloc_val.name}.phi_${blk_id}' |
| 180 | m.values[phi_val] = pv |
| 181 | // Store phi_val_id in parallel array for direct lookup (no string matching) |
| 182 | array2d_append(mut ctx.phi_vals, blk_id, phi_val) |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | // 4. Rename Variables |
| 187 | func2 := m.funcs[fi] |
| 188 | if func2.blocks.len > 0 { |
| 189 | entry := func2.blocks[0] |
| 190 | total_blocks_in_funcs += func2.blocks.len |
| 191 | bv, bs := |
| 192 | rename_recursive(mut m, entry, mut ctx, promotable, mut stack_counts, &dom, cfg) |
| 193 | total_blocks_visited += bv |
| 194 | total_blocks_skipped += bs |
| 195 | } |
| 196 | |
| 197 | // 4b. Apply deferred phi operands. |
| 198 | // Builds complete operand lists for each phi and assigns them all at once. |
| 199 | // This avoids m.instrs[idx].operands << x which is broken in v3-compiled binaries |
| 200 | // (chained array-element-field-append copies the struct instead of modifying in-place). |
| 201 | if ctx.deferred_phi_ops.len > 0 { |
| 202 | n_deferred := ctx.deferred_phi_ops.len |
| 203 | n_phi_blocks3 := ctx.phi_blocks.len |
| 204 | mut n_applied := 0 |
| 205 | mut n_verified := 0 |
| 206 | mut n_mismatch := 0 |
| 207 | for pbi3 in 0 .. n_phi_blocks3 { |
| 208 | pb_id := ctx.phi_blocks[pbi3] |
| 209 | n_phi_allocs3 := ctx.phi_vals[pb_id].len |
| 210 | for pai3 in 0 .. n_phi_allocs3 { |
| 211 | phi_vid := ctx.phi_vals[pb_id][pai3] |
| 212 | phi_v := m.values[phi_vid] |
| 213 | phi_instr_idx := phi_v.index |
| 214 | // Collect all operands for this phi from deferred list |
| 215 | mut ops := []int{} |
| 216 | mut dj := 0 |
| 217 | for dj + 2 < n_deferred { |
| 218 | if ctx.deferred_phi_ops[dj] == phi_instr_idx { |
| 219 | ops << ctx.deferred_phi_ops[dj + 1] |
| 220 | ops << ctx.deferred_phi_ops[dj + 2] |
| 221 | } |
| 222 | dj += 3 |
| 223 | } |
| 224 | if ops.len > 0 { |
| 225 | // Avoid m.instrs[X].operands = ops -- chained field assign broken in ARM64 self-hosted |
| 226 | mut phi_instr := m.instrs[phi_instr_idx] |
| 227 | phi_instr.operands = ops |
| 228 | m.instrs[phi_instr_idx] = phi_instr |
| 229 | n_applied += 1 |
| 230 | // Verify the assignment was actually written |
| 231 | actual_len := m.instrs[phi_instr_idx].operands.len |
| 232 | if actual_len != ops.len { |
| 233 | n_mismatch += 1 |
| 234 | if n_mismatch <= 5 { |
| 235 | eprintln(' DEFERRED MISMATCH: phi_vid=${phi_vid} instr=${phi_instr_idx} wanted=${ops.len} got=${actual_len}') |
| 236 | } |
| 237 | } else { |
| 238 | n_verified += 1 |
| 239 | // Also verify first operand value |
| 240 | if ops.len >= 2 { |
| 241 | actual0 := m.instrs[phi_instr_idx].operands[0] |
| 242 | actual1 := m.instrs[phi_instr_idx].operands[1] |
| 243 | if actual0 != ops[0] || actual1 != ops[1] { |
| 244 | n_mismatch += 1 |
| 245 | if n_mismatch <= 5 { |
| 246 | eprintln(' DEFERRED VALUE MISMATCH: phi_vid=${phi_vid} instr=${phi_instr_idx} wanted[0]=${ops[0]},${ops[1]} got=${actual0},${actual1}') |
| 247 | } |
| 248 | } |
| 249 | } |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | if n_mismatch > 0 { |
| 255 | eprintln(' DEFERRED phi ops: applied=${n_applied} verified=${n_verified} MISMATCH=${n_mismatch}') |
| 256 | } |
| 257 | ctx.deferred_phi_ops = [] |
| 258 | } |
| 259 | // Update uses for phi operands (deferred from step 3) |
| 260 | n_phi_blocks4 := ctx.phi_blocks.len |
| 261 | for pbi4 in 0 .. n_phi_blocks4 { |
| 262 | pb_id := ctx.phi_blocks[pbi4] |
| 263 | n_phi_allocs4 := ctx.phi_vals[pb_id].len |
| 264 | for pai4 in 0 .. n_phi_allocs4 { |
| 265 | phi_vid := ctx.phi_vals[pb_id][pai4] |
| 266 | phi_v := m.values[phi_vid] |
| 267 | phi_instr_idx := phi_v.index |
| 268 | instr := m.instrs[phi_instr_idx] |
| 269 | for oi := 0; oi < instr.operands.len; oi += 2 { |
| 270 | val_op := instr.operands[oi] |
| 271 | if val_op < m.values.len && phi_vid !in m.values[val_op].uses { |
| 272 | // Read whole struct, modify, write back (chained broken in ARM64) |
| 273 | mut vop := m.values[val_op] |
| 274 | vop.uses << phi_vid |
| 275 | m.values[val_op] = vop |
| 276 | } |
| 277 | } |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | // 5. Cleanup for next function: reset only entries we touched |
| 282 | for cli in 0 .. promotable.len { |
| 283 | alloc_id := promotable[cli] |
| 284 | ctx.defs[alloc_id] = [] |
| 285 | ctx.uses[alloc_id] = [] |
| 286 | ctx.stacks[alloc_id] = [] |
| 287 | ctx.is_promotable[alloc_id] = false |
| 288 | } |
| 289 | n_phi_blocks2 := ctx.phi_blocks.len |
| 290 | for cli2 in 0 .. n_phi_blocks2 { |
| 291 | ctx.phi_placements[ctx.phi_blocks[cli2]] = [] |
| 292 | ctx.phi_vals[ctx.phi_blocks[cli2]] = [] |
| 293 | } |
| 294 | ctx.phi_blocks = [] |
| 295 | n_df_blocks := df_blocks.len |
| 296 | for cli3 in 0 .. n_df_blocks { |
| 297 | df[df_blocks[cli3]] = [] |
| 298 | } |
| 299 | df_blocks = [] |
| 300 | } |
| 301 | eprintln(' mem2reg stats: allocas=${total_allocas} promoted=${total_promoted} not_promoted=${total_not_promoted} blocks_total=${total_blocks_in_funcs} blocks_visited=${total_blocks_visited} blocks_skipped=${total_blocks_skipped}') |
| 302 | } |
| 303 | |
| 304 | fn is_promotable(m &ssa.Module, alloc_id int) bool { |
| 305 | // Keep array-backed slots in memory. Promoting pointer-to-array allocas can |
| 306 | // lose correct addressing semantics for fixed-array literals/indexing. |
| 307 | if alloc_id > 0 && alloc_id < m.values.len { |
| 308 | alloc_val := m.values[alloc_id] |
| 309 | alloc_typ_id := alloc_val.typ |
| 310 | if alloc_typ_id > 0 && alloc_typ_id < m.type_store.types.len { |
| 311 | alloc_typ := m.type_store.types[alloc_typ_id] |
| 312 | if alloc_typ.kind == .ptr_t { |
| 313 | elem_typ_id := alloc_typ.elem_type |
| 314 | if elem_typ_id > 0 && elem_typ_id < m.type_store.types.len { |
| 315 | elem_typ := m.type_store.types[elem_typ_id] |
| 316 | if elem_typ.kind == .ptr_t { |
| 317 | return false |
| 318 | } |
| 319 | if elem_typ.kind == .array_t { |
| 320 | return false |
| 321 | } |
| 322 | } |
| 323 | } |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | alloc_val2 := m.values[alloc_id] |
| 328 | uses := alloc_val2.uses |
| 329 | n_uses := uses.len |
| 330 | for ui in 0 .. n_uses { |
| 331 | u := uses[ui] |
| 332 | if u >= m.values.len { |
| 333 | continue |
| 334 | } |
| 335 | user := m.values[u] |
| 336 | if user.kind != .instruction { |
| 337 | return false |
| 338 | } |
| 339 | instr := m.instrs[user.index] |
| 340 | match instr.op { |
| 341 | .load { |
| 342 | if instr.operands.len == 0 || instr.operands[0] != alloc_id { |
| 343 | return false |
| 344 | } |
| 345 | } |
| 346 | .store { |
| 347 | // Only safe if used as pointer (index 1) |
| 348 | if instr.operands.len < 2 || instr.operands[1] != alloc_id { |
| 349 | return false |
| 350 | } |
| 351 | } |
| 352 | else { |
| 353 | // Escape (GEP, Call, Phi, etc.) |
| 354 | return false |
| 355 | } |
| 356 | } |
| 357 | } |
| 358 | return true |
| 359 | } |
| 360 | |
| 361 | fn compute_dominance_frontier_flat(m &ssa.Module, func_idx int, dom &DomInfo, cfg &CfgData, mut df [][]int, mut df_blocks []int) { |
| 362 | func := m.funcs[func_idx] |
| 363 | n_func_blocks := func.blocks.len |
| 364 | for bi in 0 .. n_func_blocks { |
| 365 | blk_id := func.blocks[bi] |
| 366 | if blk_id < 0 || blk_id >= m.blocks.len { |
| 367 | continue |
| 368 | } |
| 369 | num_preds := cfg.preds[blk_id].len |
| 370 | if num_preds >= 2 { |
| 371 | for pi in 0 .. num_preds { |
| 372 | mut runner := cfg.preds[blk_id][pi] |
| 373 | idom := dom.idom[blk_id] |
| 374 | // Safety check: idom != -1 |
| 375 | for runner != -1 && runner != idom { |
| 376 | if runner < 0 || runner >= m.blocks.len { |
| 377 | break |
| 378 | } |
| 379 | // Avoid duplicate entries in dominance frontier |
| 380 | if blk_id !in df[runner] { |
| 381 | array2d_append(mut df, runner, blk_id) |
| 382 | if runner !in df_blocks { |
| 383 | df_blocks << runner |
| 384 | } |
| 385 | } |
| 386 | if runner == dom.idom[runner] { |
| 387 | break |
| 388 | } |
| 389 | runner = dom.idom[runner] |
| 390 | } |
| 391 | } |
| 392 | } |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | // RenameFrame holds the state for one level of the iterative dom tree walk. |
| 397 | struct RenameFrame { |
| 398 | mut: |
| 399 | blk_id int |
| 400 | child_idx int // next child to process in dom_tree |
| 401 | pushed_allocs []int // alloc_ids pushed in this block (for cleanup) |
| 402 | processed bool // whether steps 1-3 have been run for this block |
| 403 | } |
| 404 | |
| 405 | fn rename_iterative(mut m ssa.Module, root_blk int, mut ctx Mem2RegCtx, _promotable []int, mut _stack_counts []int, dom &DomInfo, cfg &CfgData) (int, int) { |
| 406 | // Pre-build block_val_ids[] by scanning values for basic_block kind. |
| 407 | // This avoids m.blocks[blk_id].val_id which produces wrong results |
| 408 | // in ARM64-compiled binaries (large struct field access bug). |
| 409 | n_blks := m.blocks.len |
| 410 | mut block_val_ids := []int{len: n_blks} |
| 411 | n_vals := m.values.len |
| 412 | mut n_bb_found := 0 |
| 413 | mut n_bb_valid := 0 |
| 414 | for vi in 0 .. n_vals { |
| 415 | if m.values[vi].kind == .basic_block { |
| 416 | bid := m.values[vi].index |
| 417 | n_bb_found += 1 |
| 418 | if bid >= 0 && bid < n_blks { |
| 419 | block_val_ids[bid] = vi |
| 420 | n_bb_valid += 1 |
| 421 | } |
| 422 | } |
| 423 | } |
| 424 | // Verify: also check using m.blocks[bid].val_id approach and compare |
| 425 | mut n_match := 0 |
| 426 | mut n_mismatch := 0 |
| 427 | for bid in 0 .. n_blks { |
| 428 | bval := m.blocks[bid].val_id |
| 429 | if block_val_ids[bid] != bval { |
| 430 | n_mismatch += 1 |
| 431 | if n_mismatch <= 3 { |
| 432 | eprintln(' mem2reg block_val_ids MISMATCH: bid=${bid} scan=${block_val_ids[bid]} blocks=${bval}') |
| 433 | } |
| 434 | } else { |
| 435 | n_match += 1 |
| 436 | } |
| 437 | } |
| 438 | if n_mismatch > 0 { |
| 439 | eprintln(' mem2reg block_val_ids: bb_found=${n_bb_found} valid=${n_bb_valid} match=${n_match} mismatch=${n_mismatch}') |
| 440 | } |
| 441 | |
| 442 | mut work := []RenameFrame{} |
| 443 | work << RenameFrame{ |
| 444 | blk_id: root_blk |
| 445 | } |
| 446 | mut visited := []bool{len: m.blocks.len} |
| 447 | mut blocks_visited := 0 |
| 448 | mut blocks_skipped := 0 |
| 449 | |
| 450 | for work.len > 0 { |
| 451 | fi := work.len - 1 |
| 452 | blk_id := work[fi].blk_id |
| 453 | |
| 454 | if !work[fi].processed { |
| 455 | // Cycle detection: skip blocks already visited in this traversal |
| 456 | if blk_id >= 0 && blk_id < visited.len && visited[blk_id] { |
| 457 | blocks_skipped += 1 |
| 458 | work.pop() |
| 459 | continue |
| 460 | } |
| 461 | if blk_id >= 0 && blk_id < visited.len { |
| 462 | visited[blk_id] = true |
| 463 | } |
| 464 | blocks_visited += 1 |
| 465 | // Steps 1-3: process this block (runs once per block) |
| 466 | // Avoid work[fi].processed = true — chained field assignment broken in ARM64 self-hosted. |
| 467 | mut frame2 := work[fi] |
| 468 | frame2.processed = true |
| 469 | work[fi] = frame2 |
| 470 | |
| 471 | // 1. Push Phis to stack (use phi_vals[] for direct lookup, no string matching) |
| 472 | if blk_id < ctx.phi_placements.len && ctx.phi_placements[blk_id].len > 0 { |
| 473 | n_phi_allocs := ctx.phi_placements[blk_id].len |
| 474 | for pai in 0 .. n_phi_allocs { |
| 475 | alloc_id := ctx.phi_placements[blk_id][pai] |
| 476 | // Direct lookup: phi_vals[blk_id][pai] is the phi for this alloc |
| 477 | if blk_id < ctx.phi_vals.len && pai < ctx.phi_vals[blk_id].len { |
| 478 | phi_val_id := ctx.phi_vals[blk_id][pai] |
| 479 | // Avoid ctx.stacks[X] << Y — chained append broken in ARM64 self-hosted. |
| 480 | mut new_stack := ctx.stacks[alloc_id].clone() |
| 481 | new_stack << phi_val_id |
| 482 | ctx.stacks[alloc_id] = new_stack |
| 483 | // Avoid work[fi].pushed_allocs = X — chained field assignment broken in ARM64. |
| 484 | mut wf := work[fi] |
| 485 | wf.pushed_allocs << alloc_id |
| 486 | work[fi] = wf |
| 487 | } |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | // 2. Process Instructions |
| 492 | mut instrs_to_nop := []int{} |
| 493 | |
| 494 | blk2 := m.blocks[blk_id] |
| 495 | n_instrs2 := blk2.instrs.len |
| 496 | for ii in 0 .. n_instrs2 { |
| 497 | val_id := blk2.instrs[ii] |
| 498 | val := m.values[val_id] |
| 499 | instr := m.instrs[val.index] |
| 500 | if instr.op == .store { |
| 501 | ptr := instr.operands[1] |
| 502 | if ptr < ctx.is_promotable.len && ctx.is_promotable[ptr] { |
| 503 | // Avoid ctx.stacks[X] << Y — chained append broken in ARM64 self-hosted. |
| 504 | mut new_stack := ctx.stacks[ptr].clone() |
| 505 | new_stack << instr.operands[0] |
| 506 | ctx.stacks[ptr] = new_stack |
| 507 | // Avoid work[fi].pushed_allocs = X — chained field assignment broken in ARM64. |
| 508 | mut wf := work[fi] |
| 509 | wf.pushed_allocs << ptr |
| 510 | work[fi] = wf |
| 511 | instrs_to_nop << val_id |
| 512 | } |
| 513 | } else if instr.op == .load { |
| 514 | ptr := instr.operands[0] |
| 515 | if ptr < ctx.is_promotable.len && ctx.is_promotable[ptr] { |
| 516 | stack := ctx.stacks[ptr] |
| 517 | mut repl := 0 |
| 518 | if stack.len > 0 { |
| 519 | repl = stack.last() |
| 520 | } else { |
| 521 | // Undef - reading uninitialized memory |
| 522 | res_type := val.typ |
| 523 | repl = m.get_or_add_const(res_type, 'undef') |
| 524 | } |
| 525 | m.replace_uses(val_id, repl) |
| 526 | instrs_to_nop << val_id |
| 527 | } |
| 528 | } else if instr.op == .alloca { |
| 529 | if val_id < ctx.is_promotable.len && ctx.is_promotable[val_id] { |
| 530 | instrs_to_nop << val_id |
| 531 | } |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | for nopi in 0 .. instrs_to_nop.len { |
| 536 | vid := instrs_to_nop[nopi] |
| 537 | vid_val := m.values[vid] |
| 538 | // Avoid m.instrs[X].op/operands = ... -- chained field assign broken in ARM64 self-hosted |
| 539 | mut nop_instr := m.instrs[vid_val.index] |
| 540 | nop_instr.op = .bitcast |
| 541 | nop_instr.operands = [] |
| 542 | m.instrs[vid_val.index] = nop_instr |
| 543 | } |
| 544 | |
| 545 | // 3. Update Successor Phi Operands |
| 546 | n_succs := cfg.succs[blk_id].len |
| 547 | for si in 0 .. n_succs { |
| 548 | succ_id := cfg.succs[blk_id][si] |
| 549 | if succ_id < ctx.phi_placements.len && ctx.phi_placements[succ_id].len > 0 { |
| 550 | n_succ_phi_allocs := ctx.phi_placements[succ_id].len |
| 551 | for spai in 0 .. n_succ_phi_allocs { |
| 552 | alloc_id := ctx.phi_placements[succ_id][spai] |
| 553 | // Direct lookup: phi_vals[succ_id][spai] is the phi for this alloc |
| 554 | if succ_id < ctx.phi_vals.len && spai < ctx.phi_vals[succ_id].len { |
| 555 | vid := ctx.phi_vals[succ_id][spai] |
| 556 | phi_v := m.values[vid] |
| 557 | mut phi_val := if ctx.stacks[alloc_id].len > 0 { |
| 558 | ctx.stacks[alloc_id].last() |
| 559 | } else { |
| 560 | // Undef - reading uninitialized memory |
| 561 | alloc_v := m.values[alloc_id] |
| 562 | alloc_typ := m.type_store.types[alloc_v.typ] |
| 563 | m.get_or_add_const(alloc_typ.elem_type, 'undef') |
| 564 | } |
| 565 | bvid := block_val_ids[blk_id] |
| 566 | // Defer phi operand append to avoid m.instrs[idx].operands << x |
| 567 | // which is broken in ARM64 self-hosted binaries. |
| 568 | ctx.deferred_phi_ops << phi_v.index |
| 569 | ctx.deferred_phi_ops << phi_val |
| 570 | ctx.deferred_phi_ops << bvid |
| 571 | // FIX: Update uses so DCE doesn't remove the value |
| 572 | // Avoid m.values[X].uses << Y — chained append broken in ARM64 self-hosted. |
| 573 | if phi_val < m.values.len { |
| 574 | if vid !in m.values[phi_val].uses { |
| 575 | // Read whole struct, modify, write back (chained broken in ARM64) |
| 576 | mut vv := m.values[phi_val] |
| 577 | vv.uses << vid |
| 578 | m.values[phi_val] = vv |
| 579 | } |
| 580 | } |
| 581 | } |
| 582 | } |
| 583 | } |
| 584 | } |
| 585 | } |
| 586 | |
| 587 | // 4. Push next unvisited dom child |
| 588 | n_children := dom.dom_tree[blk_id].len |
| 589 | child_idx := work[fi].child_idx |
| 590 | if child_idx < n_children { |
| 591 | child := dom.dom_tree[blk_id][child_idx] |
| 592 | // Avoid work[fi].child_idx++ — chained field increment broken in ARM64 self-hosted. |
| 593 | mut frame := work[fi] |
| 594 | frame.child_idx++ |
| 595 | work[fi] = frame |
| 596 | work << RenameFrame{ |
| 597 | blk_id: child |
| 598 | } |
| 599 | } else { |
| 600 | // 5. All children processed - pop stacks (cleanup) |
| 601 | pushed := work[fi].pushed_allocs.clone() |
| 602 | work.pop() |
| 603 | for i := pushed.len - 1; i >= 0; i-- { |
| 604 | // Avoid ctx.stacks[X].pop() — chained method broken in ARM64 self-hosted. |
| 605 | mut s := ctx.stacks[pushed[i]].clone() |
| 606 | s.pop() |
| 607 | ctx.stacks[pushed[i]] = s |
| 608 | } |
| 609 | } |
| 610 | } |
| 611 | return blocks_visited, blocks_skipped |
| 612 | } |
| 613 | |
| 614 | fn rename_recursive(mut m ssa.Module, blk_id int, mut ctx Mem2RegCtx, promotable []int, mut stack_counts []int, dom &DomInfo, cfg &CfgData) (int, int) { |
| 615 | return rename_iterative(mut m, blk_id, mut ctx, promotable, mut stack_counts, dom, cfg) |
| 616 | } |
| 617 | |