| 1 | // Copyright (c) 2026 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | |
| 5 | module optimize |
| 6 | |
| 7 | import v2.ssa |
| 8 | |
| 9 | // --- Simplify Phi Nodes --- |
| 10 | // Remove trivial phi nodes where all operands are the same or self-referential. |
| 11 | // A phi is trivial if all its non-self operands resolve to the same value. |
| 12 | // This reduces unnecessary instructions before phi elimination. |
| 13 | fn simplify_phi_nodes(mut m ssa.Module) bool { |
| 14 | mut any_changed := false |
| 15 | mut changed := true |
| 16 | for changed { |
| 17 | changed = false |
| 18 | for fi in 0 .. m.funcs.len { |
| 19 | func := m.funcs[fi] |
| 20 | n_func_blocks := func.blocks.len |
| 21 | for fbi in 0 .. n_func_blocks { |
| 22 | blk_id := func.blocks[fbi] |
| 23 | blk := m.blocks[blk_id] |
| 24 | n_blk_instrs := blk.instrs.len |
| 25 | for ii in 0 .. n_blk_instrs { |
| 26 | val_id := blk.instrs[ii] |
| 27 | val := m.values[val_id] |
| 28 | if val.kind != .instruction { |
| 29 | continue |
| 30 | } |
| 31 | instr := m.instrs[val.index] |
| 32 | if instr.op != .phi { |
| 33 | continue |
| 34 | } |
| 35 | |
| 36 | // Dead phi removal: phi with no uses can be removed |
| 37 | if val.uses.len == 0 { |
| 38 | // Remove uses from this phi's operands |
| 39 | for i := 0; i < instr.operands.len; i += 2 { |
| 40 | op_val := instr.operands[i] |
| 41 | if op_val != val_id { // Don't try to remove self-reference |
| 42 | remove_phi_use(mut m, op_val, val_id) |
| 43 | } |
| 44 | } |
| 45 | // Mark phi as dead |
| 46 | // Avoid m.instrs[X].field = ... -- chained field assign broken in ARM64 self-hosted |
| 47 | mut dead_phi := m.instrs[val.index] |
| 48 | dead_phi.op = .bitcast |
| 49 | dead_phi.operands = [] |
| 50 | m.instrs[val.index] = dead_phi |
| 51 | changed = true |
| 52 | any_changed = true |
| 53 | continue |
| 54 | } |
| 55 | |
| 56 | // Check if phi is trivial (all non-self operands are the same) |
| 57 | mut unique_val := -1 |
| 58 | mut is_trivial := true |
| 59 | |
| 60 | for i := 0; i < instr.operands.len; i += 2 { |
| 61 | op_val := instr.operands[i] |
| 62 | // Skip self-references (phi refers to itself) |
| 63 | if op_val == val_id { |
| 64 | continue |
| 65 | } |
| 66 | if unique_val == -1 { |
| 67 | unique_val = op_val |
| 68 | } else if unique_val != op_val { |
| 69 | is_trivial = false |
| 70 | break |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | // If trivial and we found a unique value, replace all uses |
| 75 | if is_trivial && unique_val != -1 { |
| 76 | // Replace all uses of this phi with the unique value |
| 77 | m.replace_uses(val_id, unique_val) |
| 78 | // Mark phi as dead (will be cleaned up by DCE or ignored) |
| 79 | // Avoid m.instrs[X].field = ... -- chained field assign broken in ARM64 self-hosted |
| 80 | mut triv_phi := m.instrs[val.index] |
| 81 | triv_phi.op = .bitcast |
| 82 | triv_phi.operands = [] |
| 83 | m.instrs[val.index] = triv_phi |
| 84 | changed = true |
| 85 | any_changed = true |
| 86 | } |
| 87 | } |
| 88 | } |
| 89 | } |
| 90 | } |
| 91 | return any_changed |
| 92 | } |
| 93 | |
| 94 | // Helper to remove a use from a value's uses list (for dead phi removal) |
| 95 | fn remove_phi_use(mut m ssa.Module, val_id int, user_id int) { |
| 96 | if val_id >= m.values.len { |
| 97 | return |
| 98 | } |
| 99 | mut val := &m.values[val_id] |
| 100 | for i := val.uses.len - 1; i >= 0; i-- { |
| 101 | if val.uses[i] == user_id { |
| 102 | val.uses.delete(i) |
| 103 | } |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | // get_block_val_id returns the value ID for a block, using a pre-built lookup table. |
| 108 | // This avoids m.blocks[bid].val_id which can return wrong values for the large |
| 109 | // BasicBlock struct (168 bytes) in ARM64-compiled binaries. |
| 110 | fn get_block_val_id(_m &ssa.Module, bid int, block_val_ids []int) int { |
| 111 | if bid >= 0 && bid < block_val_ids.len { |
| 112 | return block_val_ids[bid] |
| 113 | } |
| 114 | return 0 |
| 115 | } |
| 116 | |
| 117 | // build_block_val_ids scans values for basic_block kind and builds block_id → val_id mapping. |
| 118 | fn build_block_val_ids(m &ssa.Module) []int { |
| 119 | n_blks := m.blocks.len |
| 120 | mut block_val_ids := []int{len: n_blks} |
| 121 | n_vals := m.values.len |
| 122 | for vi in 0 .. n_vals { |
| 123 | if m.values[vi].kind == .basic_block { |
| 124 | bid := m.values[vi].index |
| 125 | if bid >= 0 && bid < n_blks { |
| 126 | block_val_ids[bid] = vi |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | return block_val_ids |
| 131 | } |
| 132 | |
| 133 | // --- Critical Edge Splitting --- |
| 134 | fn split_critical_edges(mut m ssa.Module) { |
| 135 | mut cfg := build_cfg(mut m) |
| 136 | |
| 137 | for fi in 0 .. m.funcs.len { |
| 138 | mut new_blocks := []ssa.BlockID{} |
| 139 | |
| 140 | // Collect edges to split (can't modify while iterating) |
| 141 | mut edges_to_split := [][]ssa.BlockID{} // [pred_id, succ_id] |
| 142 | |
| 143 | // Find all critical edges |
| 144 | func := m.funcs[fi] |
| 145 | func_blocks_len := func.blocks.len |
| 146 | for bi in 0 .. func_blocks_len { |
| 147 | blk_id := func.blocks[bi] |
| 148 | if blk_id < 0 || blk_id >= m.blocks.len { |
| 149 | continue |
| 150 | } |
| 151 | n_blk_succs := cfg.succs[blk_id].len |
| 152 | if n_blk_succs > 1 { |
| 153 | for si in 0 .. n_blk_succs { |
| 154 | succ_id := cfg.succs[blk_id][si] |
| 155 | if succ_id < 0 || succ_id >= m.blocks.len { |
| 156 | continue |
| 157 | } |
| 158 | if cfg.preds[succ_id].len > 1 { |
| 159 | edges_to_split << [blk_id, succ_id] |
| 160 | } |
| 161 | } |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | // Split each critical edge |
| 166 | n_edges := edges_to_split.len |
| 167 | for ei in 0 .. n_edges { |
| 168 | pred_id := edges_to_split[ei][0] |
| 169 | succ_id := edges_to_split[ei][1] |
| 170 | |
| 171 | if pred_id < 0 || pred_id >= m.blocks.len || succ_id < 0 || succ_id >= m.blocks.len { |
| 172 | continue |
| 173 | } |
| 174 | |
| 175 | // Create new intermediate block |
| 176 | func2 := m.funcs[fi] |
| 177 | split_blk := m.add_block(func2.id, 'split_${pred_id}_${succ_id}') |
| 178 | new_blocks << split_blk |
| 179 | |
| 180 | // Re-build block_val_ids after adding the split block |
| 181 | // (new blocks get new val_ids that need to be in the table) |
| 182 | block_val_ids := build_block_val_ids(m) |
| 183 | |
| 184 | succ_val := get_block_val_id(m, succ_id, block_val_ids) |
| 185 | // Add unconditional jump from split block to original successor |
| 186 | m.add_instr(.jmp, split_blk, 0, [succ_val]) |
| 187 | |
| 188 | // Update predecessor's terminator to jump to split block instead of successor |
| 189 | pred_blk := m.blocks[pred_id] |
| 190 | if pred_blk.instrs.len > 0 { |
| 191 | term_val_id := pred_blk.instrs[pred_blk.instrs.len - 1] |
| 192 | if term_val_id >= 0 && term_val_id < m.values.len { |
| 193 | term_val := m.values[term_val_id] |
| 194 | mut term := &m.instrs[term_val.index] |
| 195 | |
| 196 | old_succ_val := succ_val |
| 197 | new_succ_val := get_block_val_id(m, split_blk, block_val_ids) |
| 198 | |
| 199 | // Replace ALL occurrences (handles switch with duplicate targets) |
| 200 | for i in 0 .. term.operands.len { |
| 201 | if term.operands[i] == old_succ_val { |
| 202 | term.operands[i] = new_succ_val |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | // Update phi nodes in successor to reference split block instead of pred |
| 209 | succ_blk2 := m.blocks[succ_id] |
| 210 | n_succ_instrs := succ_blk2.instrs.len |
| 211 | for vi in 0 .. n_succ_instrs { |
| 212 | val_id := succ_blk2.instrs[vi] |
| 213 | if val_id < 0 || val_id >= m.values.len { |
| 214 | continue |
| 215 | } |
| 216 | val := m.values[val_id] |
| 217 | if val.kind != .instruction { |
| 218 | continue |
| 219 | } |
| 220 | idx := val.index |
| 221 | if idx < 0 || idx >= m.instrs.len { |
| 222 | continue |
| 223 | } |
| 224 | mut instr := &m.instrs[idx] |
| 225 | if instr.op == .phi { |
| 226 | old_pred_val := get_block_val_id(m, pred_id, block_val_ids) |
| 227 | new_pred_val := get_block_val_id(m, split_blk, block_val_ids) |
| 228 | // Replace all occurrences (defensive - handles edge cases) |
| 229 | for i := 1; i < instr.operands.len; i += 2 { |
| 230 | if instr.operands[i] == old_pred_val { |
| 231 | instr.operands[i] = new_pred_val |
| 232 | } |
| 233 | } |
| 234 | } |
| 235 | } |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | // Rebuild CFG after splitting |
| 240 | cfg = build_cfg(mut m) |
| 241 | _ = cfg |
| 242 | } |
| 243 | |
| 244 | fn eliminate_phi_nodes(mut m ssa.Module) { |
| 245 | // First split critical edges to ensure correct copy placement |
| 246 | split_critical_edges(mut m) |
| 247 | |
| 248 | n_blocks := m.blocks.len |
| 249 | n_funcs := m.funcs.len |
| 250 | |
| 251 | // Build reverse map: val_id → block_id for block-kind values. |
| 252 | // Scan values array for basic_block kind instead of using m.blocks[bid].val_id |
| 253 | // which returns wrong results in ARM64-compiled binaries (large struct field access bug). |
| 254 | mut val_to_block := []int{len: m.values.len, init: -1} |
| 255 | n_vals := m.values.len |
| 256 | for vi in 0 .. n_vals { |
| 257 | if m.values[vi].kind == .basic_block { |
| 258 | bid := m.values[vi].index |
| 259 | if bid >= 0 && bid < n_blocks { |
| 260 | val_to_block[vi] = bid |
| 261 | } |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | // Use flat parallel arrays indexed by block_id instead of [][]ParallelCopy. |
| 266 | // pred_copy_dests[blk_id] and pred_copy_srcs[blk_id] store the dest/src pairs. |
| 267 | mut pred_copy_dests := [][]int{len: n_blocks} |
| 268 | mut pred_copy_srcs := [][]int{len: n_blocks} |
| 269 | mut pred_copy_blocks := []int{} |
| 270 | |
| 271 | // Pre-allocate shared src_ref_count array for resolve_parallel_copies_flat. |
| 272 | // Sized to m.values.len + headroom for temps. Reused across all calls. |
| 273 | mut src_ref_count := []int{len: m.values.len + 1024} |
| 274 | mut touched_ids := []int{cap: 256} |
| 275 | |
| 276 | for fi in 0 .. n_funcs { |
| 277 | func := m.funcs[fi] |
| 278 | func_blocks_len2 := func.blocks.len |
| 279 | for bi2 in 0 .. func_blocks_len2 { |
| 280 | blk_id := func.blocks[bi2] |
| 281 | if blk_id < 0 || blk_id >= m.blocks.len { |
| 282 | continue |
| 283 | } |
| 284 | blk := m.blocks[blk_id] |
| 285 | n_instrs := blk.instrs.len |
| 286 | for ii in 0 .. n_instrs { |
| 287 | val_id := blk.instrs[ii] |
| 288 | if val_id < 0 || val_id >= m.values.len { |
| 289 | continue |
| 290 | } |
| 291 | val := m.values[val_id] |
| 292 | if val.kind != .instruction { |
| 293 | continue |
| 294 | } |
| 295 | idx := val.index |
| 296 | if idx < 0 || idx >= m.instrs.len { |
| 297 | continue |
| 298 | } |
| 299 | instr := m.instrs[idx] |
| 300 | if instr.op == .phi { |
| 301 | is_do_loop := func.name == 'do_loop' |
| 302 | if is_do_loop { |
| 303 | eprintln(' do_loop phi: val=${val_id} blk=${blk_id} nops=${instr.operands.len}') |
| 304 | } |
| 305 | // Phi operands: [val0, blk0, val1, blk1, ...] |
| 306 | for i := 0; i < instr.operands.len; i += 2 { |
| 307 | if i + 1 >= instr.operands.len { |
| 308 | break |
| 309 | } |
| 310 | val_in := instr.operands[i] |
| 311 | blk_val := instr.operands[i + 1] |
| 312 | if blk_val < 0 || blk_val >= m.values.len { |
| 313 | if is_do_loop { |
| 314 | eprintln(' op[${i}]: val_in=${val_in} blk_val=${blk_val} SKIP(oob)') |
| 315 | } |
| 316 | continue |
| 317 | } |
| 318 | // Use val_to_block reverse map instead of blk_v.index |
| 319 | pred_blk_idx := val_to_block[blk_val] |
| 320 | if is_do_loop { |
| 321 | blk_kind := m.values[blk_val].kind |
| 322 | blk_index := m.values[blk_val].index |
| 323 | eprintln(' op[${i}]: val_in=${val_in} blk_val=${blk_val} kind=${blk_kind} index=${blk_index} pred_blk_idx=${pred_blk_idx}') |
| 324 | } |
| 325 | |
| 326 | if pred_blk_idx >= 0 && pred_blk_idx < n_blocks { |
| 327 | if pred_copy_dests[pred_blk_idx].len == 0 { |
| 328 | pred_copy_blocks << pred_blk_idx |
| 329 | } |
| 330 | mut pcd := pred_copy_dests[pred_blk_idx] |
| 331 | pcd << val_id |
| 332 | pred_copy_dests[pred_blk_idx] = pcd |
| 333 | mut pcs := pred_copy_srcs[pred_blk_idx] |
| 334 | pcs << val_in |
| 335 | pred_copy_srcs[pred_blk_idx] = pcs |
| 336 | } |
| 337 | } |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | // For each predecessor that has copies, resolve with Briggs algorithm |
| 343 | for pki in 0 .. pred_copy_blocks.len { |
| 344 | pred_blk := pred_copy_blocks[pki] |
| 345 | resolve_parallel_copies_flat(mut m, pred_blk, pred_copy_dests[pred_blk], |
| 346 | pred_copy_srcs[pred_blk], mut src_ref_count, mut touched_ids) |
| 347 | } |
| 348 | |
| 349 | // Cleanup: clear pred_copies for blocks we touched |
| 350 | for pki2 in 0 .. pred_copy_blocks.len { |
| 351 | pred_copy_dests[pred_copy_blocks[pki2]] = [] |
| 352 | pred_copy_srcs[pred_copy_blocks[pki2]] = [] |
| 353 | } |
| 354 | pred_copy_blocks.clear() |
| 355 | |
| 356 | // Remove phi instructions (mark as nop/bitcast with no operands) |
| 357 | func2 := m.funcs[fi] |
| 358 | func_blocks_len3 := func2.blocks.len |
| 359 | for bi3 in 0 .. func_blocks_len3 { |
| 360 | blk_id := func2.blocks[bi3] |
| 361 | if blk_id < 0 || blk_id >= m.blocks.len { |
| 362 | continue |
| 363 | } |
| 364 | blk := m.blocks[blk_id] |
| 365 | n_instrs3 := blk.instrs.len |
| 366 | for ii3 in 0 .. n_instrs3 { |
| 367 | val_id := blk.instrs[ii3] |
| 368 | if val_id < 0 || val_id >= m.values.len { |
| 369 | continue |
| 370 | } |
| 371 | val := m.values[val_id] |
| 372 | if val.kind != .instruction { |
| 373 | continue |
| 374 | } |
| 375 | idx := val.index |
| 376 | if idx < 0 || idx >= m.instrs.len { |
| 377 | continue |
| 378 | } |
| 379 | if m.instrs[idx].op == .phi { |
| 380 | // Avoid m.instrs[X].field = ... -- chained field assign broken in ARM64 self-hosted |
| 381 | mut cleanup_instr := m.instrs[idx] |
| 382 | cleanup_instr.op = .bitcast |
| 383 | cleanup_instr.operands = [] |
| 384 | m.instrs[idx] = cleanup_instr |
| 385 | } |
| 386 | } |
| 387 | } |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | // resolve_parallel_copies_flat sequences a set of parallel copies (dest[i] = src[i]) |
| 392 | // using the Briggs algorithm with worklist-based scheduling. |
| 393 | // A copy dest←src is "ready" when no other pending copy reads from dest. |
| 394 | // Uses caller-provided flat array src_ref_count[] indexed by value ID (no maps). |
| 395 | // touched_ids tracks which entries were modified so we can zero only those on cleanup. |
| 396 | fn resolve_parallel_copies_flat(mut m ssa.Module, blk_id int, dests []int, srcs []int, mut src_ref_count []int, mut touched_ids []int) { |
| 397 | if dests.len == 0 { |
| 398 | return |
| 399 | } |
| 400 | |
| 401 | n := dests.len |
| 402 | |
| 403 | // Build pending copies, filtering self-copies |
| 404 | mut p_dest := []int{cap: n} |
| 405 | mut p_src := []int{cap: n} |
| 406 | for ci in 0 .. n { |
| 407 | if dests[ci] != srcs[ci] { |
| 408 | p_dest << dests[ci] |
| 409 | p_src << srcs[ci] |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | np := p_dest.len |
| 414 | if np == 0 { |
| 415 | return |
| 416 | } |
| 417 | |
| 418 | // Ensure src_ref_count is large enough for all value IDs we'll encounter. |
| 419 | // Check current max needed against array length. |
| 420 | mut need_len := m.values.len + np + 4 |
| 421 | for i in 0 .. np { |
| 422 | if p_dest[i] >= need_len { |
| 423 | need_len = p_dest[i] + np + 4 |
| 424 | } |
| 425 | if p_src[i] >= need_len { |
| 426 | need_len = p_src[i] + np + 4 |
| 427 | } |
| 428 | } |
| 429 | if need_len > src_ref_count.len { |
| 430 | // Grow array |
| 431 | for _ in 0 .. need_len - src_ref_count.len { |
| 432 | src_ref_count << 0 |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | // src_ref_count[val] = how many pending copies use val as source |
| 437 | // (array is pre-zeroed or cleaned from previous call via touched_ids) |
| 438 | touched_ids.clear() |
| 439 | for i in 0 .. np { |
| 440 | s := p_src[i] |
| 441 | if src_ref_count[s] == 0 { |
| 442 | touched_ids << s |
| 443 | } |
| 444 | src_ref_count[s] = src_ref_count[s] + 1 |
| 445 | } |
| 446 | // Also track dests we'll read from src_ref_count |
| 447 | for i in 0 .. np { |
| 448 | d := p_dest[i] |
| 449 | if src_ref_count[d] == 0 { |
| 450 | // dest not in touched_ids yet but we read it; track for safety |
| 451 | // (it's already 0, but if cycle-breaking modifies it we need to clean it) |
| 452 | } |
| 453 | } |
| 454 | |
| 455 | // Use int instead of bool for alive - ARM64 codegen may mishandle []bool{init: true} |
| 456 | // alive[i] = 1 if copy i is still pending, 0 if done |
| 457 | mut alive := []int{len: np} |
| 458 | for ai in 0 .. np { |
| 459 | alive[ai] = 1 |
| 460 | } |
| 461 | |
| 462 | // Worklist: use manual stack pointer instead of .delete() which may be buggy in ARM64 |
| 463 | mut worklist := []int{len: np + np + 4} |
| 464 | mut wl_top := 0 // stack pointer: worklist[0..wl_top] are valid entries |
| 465 | for i in 0 .. np { |
| 466 | d := p_dest[i] |
| 467 | if src_ref_count[d] == 0 { |
| 468 | worklist[wl_top] = i |
| 469 | wl_top += 1 |
| 470 | } |
| 471 | } |
| 472 | |
| 473 | // Sequenced output copies |
| 474 | mut s_dest := []int{cap: np + 4} |
| 475 | mut s_src := []int{cap: np + 4} |
| 476 | |
| 477 | mut remaining := np |
| 478 | for remaining > 0 { |
| 479 | if wl_top > 0 { |
| 480 | // Pop a ready copy from the worklist |
| 481 | wl_top -= 1 |
| 482 | idx := worklist[wl_top] |
| 483 | if idx < 0 || idx >= np || alive[idx] == 0 { |
| 484 | continue |
| 485 | } |
| 486 | alive[idx] = 0 |
| 487 | remaining -= 1 |
| 488 | |
| 489 | d := p_dest[idx] |
| 490 | s := p_src[idx] |
| 491 | s_dest << d |
| 492 | s_src << s |
| 493 | |
| 494 | // Decrement ref count for the source |
| 495 | src_ref_count[s] = src_ref_count[s] - 1 |
| 496 | if src_ref_count[s] == 0 { |
| 497 | // s is no longer used as source; any pending copy with dest==s is now ready |
| 498 | for j in 0 .. np { |
| 499 | if alive[j] == 1 && p_dest[j] == s { |
| 500 | // Grow worklist if needed |
| 501 | if wl_top >= worklist.len { |
| 502 | worklist << 0 |
| 503 | } |
| 504 | worklist[wl_top] = j |
| 505 | wl_top += 1 |
| 506 | } |
| 507 | } |
| 508 | } |
| 509 | continue |
| 510 | } |
| 511 | |
| 512 | // Worklist empty but copies remain → cycle. Break it with a temp. |
| 513 | mut ci := -1 |
| 514 | for k in 0 .. np { |
| 515 | if alive[k] == 1 { |
| 516 | ci = k |
| 517 | break |
| 518 | } |
| 519 | } |
| 520 | if ci < 0 { |
| 521 | break |
| 522 | } |
| 523 | |
| 524 | cycle_src := p_src[ci] |
| 525 | mut typ := 0 |
| 526 | if cycle_src >= 0 && cycle_src < m.values.len { |
| 527 | typ = m.values[cycle_src].typ |
| 528 | } |
| 529 | if typ == 0 && p_dest[ci] >= 0 && p_dest[ci] < m.values.len { |
| 530 | typ = m.values[p_dest[ci]].typ |
| 531 | } |
| 532 | temp := insert_temp_in_block(mut m, blk_id, cycle_src, typ) |
| 533 | |
| 534 | // Grow src_ref_count if temp exceeds array bounds |
| 535 | if temp >= src_ref_count.len { |
| 536 | for _ in 0 .. temp + 1 - src_ref_count.len { |
| 537 | src_ref_count << 0 |
| 538 | } |
| 539 | } |
| 540 | |
| 541 | // Replace all references to cycle_src with temp in pending copies |
| 542 | src_ref_count[temp] = 0 |
| 543 | touched_ids << temp |
| 544 | for i in 0 .. np { |
| 545 | if alive[i] == 1 && p_src[i] == cycle_src { |
| 546 | p_src[i] = temp |
| 547 | src_ref_count[temp] = src_ref_count[temp] + 1 |
| 548 | } |
| 549 | } |
| 550 | src_ref_count[cycle_src] = 0 |
| 551 | |
| 552 | // Check if any copy whose dest was cycle_src is now ready |
| 553 | for j in 0 .. np { |
| 554 | if alive[j] == 1 && p_dest[j] == cycle_src { |
| 555 | if wl_top >= worklist.len { |
| 556 | worklist << 0 |
| 557 | } |
| 558 | worklist[wl_top] = j |
| 559 | wl_top += 1 |
| 560 | } |
| 561 | } |
| 562 | } |
| 563 | |
| 564 | // Clean up src_ref_count: zero only the entries we touched |
| 565 | for ti in 0 .. touched_ids.len { |
| 566 | tid := touched_ids[ti] |
| 567 | if tid < src_ref_count.len { |
| 568 | src_ref_count[tid] = 0 |
| 569 | } |
| 570 | } |
| 571 | |
| 572 | // Emit the sequenced copies |
| 573 | for si in 0 .. s_dest.len { |
| 574 | insert_copy_in_block(mut m, blk_id, s_dest[si], s_src[si]) |
| 575 | } |
| 576 | } |
| 577 | |
| 578 | fn insert_temp_in_block(mut m ssa.Module, blk_id int, src int, typ int) int { |
| 579 | m.instrs << ssa.Instruction{ |
| 580 | op: .bitcast |
| 581 | block: blk_id |
| 582 | typ: typ |
| 583 | operands: [ssa.ValueID(src)] |
| 584 | } |
| 585 | temp_id := m.add_value_node(.instruction, typ, 'phi_tmp_${m.values.len}', m.instrs.len - 1) |
| 586 | |
| 587 | // Read whole struct, modify, write back (chained field assign broken in ARM64) |
| 588 | if src < m.values.len && temp_id !in m.values[src].uses { |
| 589 | mut sv := m.values[src] |
| 590 | sv.uses << temp_id |
| 591 | m.values[src] = sv |
| 592 | } |
| 593 | |
| 594 | // Insert before terminator — build new instrs array |
| 595 | mut blk := m.blocks[blk_id] |
| 596 | mut insert_idx := blk.instrs.len |
| 597 | if insert_idx > 0 { |
| 598 | last_val_id := blk.instrs[blk.instrs.len - 1] |
| 599 | last_val := m.values[last_val_id] |
| 600 | last_instr := m.instrs[last_val.index] |
| 601 | if last_instr.op in [.ret, .br, .jmp, .switch_, .unreachable] { |
| 602 | insert_idx = blk.instrs.len - 1 |
| 603 | } |
| 604 | } |
| 605 | mut new_instrs := []int{cap: blk.instrs.len + 1} |
| 606 | for ii in 0 .. insert_idx { |
| 607 | new_instrs << blk.instrs[ii] |
| 608 | } |
| 609 | new_instrs << temp_id |
| 610 | for ii in insert_idx .. blk.instrs.len { |
| 611 | new_instrs << blk.instrs[ii] |
| 612 | } |
| 613 | blk.instrs = new_instrs |
| 614 | m.blocks[blk_id] = blk |
| 615 | return temp_id |
| 616 | } |
| 617 | |
| 618 | fn insert_copy_in_block(mut m ssa.Module, blk_id int, dest int, src int) { |
| 619 | dest_val := m.values[dest] |
| 620 | typ := dest_val.typ |
| 621 | m.instrs << ssa.Instruction{ |
| 622 | op: .assign |
| 623 | block: blk_id |
| 624 | typ: typ |
| 625 | operands: [ssa.ValueID(dest), src] |
| 626 | } |
| 627 | val_id := m.add_value_node(.instruction, typ, 'copy', m.instrs.len - 1) |
| 628 | // Read whole struct, modify, write back (chained field assign broken in ARM64) |
| 629 | if dest < m.values.len && val_id !in m.values[dest].uses { |
| 630 | mut dv := m.values[dest] |
| 631 | dv.uses << val_id |
| 632 | m.values[dest] = dv |
| 633 | } |
| 634 | if src < m.values.len && val_id !in m.values[src].uses { |
| 635 | mut sv := m.values[src] |
| 636 | sv.uses << val_id |
| 637 | m.values[src] = sv |
| 638 | } |
| 639 | |
| 640 | // Insert before terminator — build new instrs array |
| 641 | mut blk := m.blocks[blk_id] |
| 642 | mut insert_idx := blk.instrs.len |
| 643 | if insert_idx > 0 { |
| 644 | last_val_id := blk.instrs[blk.instrs.len - 1] |
| 645 | last_val := m.values[last_val_id] |
| 646 | last_instr := m.instrs[last_val.index] |
| 647 | if last_instr.op in [.ret, .br, .jmp, .switch_, .unreachable] { |
| 648 | insert_idx = blk.instrs.len - 1 |
| 649 | } |
| 650 | } |
| 651 | mut new_instrs := []int{cap: blk.instrs.len + 1} |
| 652 | for ii in 0 .. insert_idx { |
| 653 | new_instrs << blk.instrs[ii] |
| 654 | } |
| 655 | new_instrs << val_id |
| 656 | for ii in insert_idx .. blk.instrs.len { |
| 657 | new_instrs << blk.instrs[ii] |
| 658 | } |
| 659 | blk.instrs = new_instrs |
| 660 | m.blocks[blk_id] = blk |
| 661 | } |
| 662 | |