v / vlib / v2 / ssa / module.v
702 lines · 643 sloc · 19.17 KB · a7153322629091f3f2d79f0bf318d0fb2c13c425
Raw
1// Copyright (c) 2026 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4
5module ssa
6
7import v2.types
8
9pub struct TargetData {
10pub:
11 ptr_size int
12 endian_little bool
13}
14
15@[heap]
16pub struct Module {
17pub mut:
18 name string
19 target TargetData
20 type_store TypeStore
21 // For parallel SSA workers: shared reference to main module's type_store.
22 // When non-nil, type operations use this instead of the local type_store.
23 shared_type_store &TypeStore = unsafe { nil }
24
25 // Type checker environment (optional, for backends that need rich type info)
26 env &types.Environment = unsafe { nil }
27
28 // Arenas
29 values []Value
30 instrs []Instruction
31 blocks []BasicBlock
32 funcs []Function
33 globals []GlobalVar
34
35 // C struct names: TypeID -> C struct name (e.g., 114 -> "stat" for struct stat)
36 // Used by the C gen to emit `typedef struct <name> Struct_N;` instead of
37 // generating a custom struct definition that would have the wrong memory layout.
38 c_struct_names map[int]string
39 // C structs marked with @[typedef] – already a C typedef, not a struct tag.
40 c_typedef_structs map[int]bool
41
42 // Constant cache: (type, name) -> ValueID for deduplication
43 const_cache map[string]ValueID
44}
45
46pub fn Module.new(name string) &Module {
47 mut m := &Module{
48 name: name
49 type_store: TypeStore.new()
50 }
51 // Pre-allocate arenas to avoid repeated reallocation during SSA building.
52 // A typical hello.v compilation needs ~158K values, ~134K instrs, ~10K blocks, ~1.8K funcs.
53 // Pre-allocating avoids ARM64 backend issues with array growth reallocation.
54 m.values = []Value{cap: 262144}
55 m.instrs = []Instruction{cap: 262144}
56 m.blocks = []BasicBlock{cap: 16384}
57 m.funcs = []Function{cap: 4096}
58 m.globals = []GlobalVar{cap: 2048}
59 // Reserve ID 0 to represent "null" or "invalid", avoiding collisions
60 // with map lookups returning 0.
61 m.values << Value{
62 kind: .unknown
63 id: 0
64 }
65 return m
66}
67
68// release_outer_arenas_after_mir_lower releases SSA arena buffers that MIR has
69// copied by value. Nested slices (value uses, instruction operands, block edges,
70// function params/blocks) are intentionally not freed because MIR shallow-shares
71// them after lowering.
72pub fn (mut m Module) release_outer_arenas_after_mir_lower() {
73 unsafe {
74 m.values.free()
75 m.instrs.free()
76 m.blocks.free()
77 m.funcs.free()
78 }
79 m.values = []Value{}
80 m.instrs = []Instruction{}
81 m.blocks = []BasicBlock{}
82 m.funcs = []Function{}
83}
84
85pub fn (mut m Module) new_function(name string, ret TypeID, params []TypeID) int {
86 // Check if function already exists (avoid duplicates from multiple files)
87 for i, f in m.funcs {
88 if f.name == name {
89 return i
90 }
91 }
92 id := m.funcs.len
93 m.funcs << Function{
94 id: id
95 name: name
96 typ: ret
97 }
98 return id
99}
100
101pub fn (mut m Module) add_block(func_id int, name string) BlockID {
102 id := m.blocks.len
103 // FIX: Sanitize block names for C labels (replace . with _)
104 safe_name := name.replace('.', '_')
105 unique_name := '${safe_name}_${id}'
106
107 // Store 'id' (index in blocks arena) in the Value
108 val_id := m.add_value_node(.basic_block, 0, unique_name, id)
109
110 m.blocks << BasicBlock{
111 id: id
112 val_id: val_id
113 name: unique_name
114 parent: func_id
115 }
116 // Avoid m.funcs[func_id].blocks << id -- chained broken in ARM64 self-hosted
117 mut f := m.funcs[func_id]
118 f.blocks << id
119 f.is_prototype = false
120 m.funcs[func_id] = f
121 return id
122}
123
124// Updated to accept 'index'
125pub fn (mut m Module) add_value_node(kind ValueKind, typ TypeID, name string, index int) ValueID {
126 id := m.values.len
127 m.values << Value{
128 id: id
129 kind: kind
130 typ: typ
131 name: name
132 index: index
133 }
134 return id
135}
136
137// --- Helpers to avoid chained struct-array mutations (broken in ARM64 self-hosted) ---
138
139pub fn (mut m Module) func_add_param(func_id int, param_val ValueID) {
140 mut f := m.funcs[func_id]
141 f.params << param_val
142 m.funcs[func_id] = f
143}
144
145pub fn (mut m Module) func_set_c_extern(func_id int, val bool) {
146 mut f := m.funcs[func_id]
147 f.is_c_extern = val
148 m.funcs[func_id] = f
149}
150
151pub fn (mut m Module) func_set_prototype(func_id int, val bool) {
152 mut f := m.funcs[func_id]
153 f.is_prototype = val
154 m.funcs[func_id] = f
155}
156
157pub fn (mut m Module) func_clear_blocks(func_id int) {
158 mut f := m.funcs[func_id]
159 f.blocks.clear()
160 m.funcs[func_id] = f
161}
162
163pub fn (mut m Module) func_clear_params(func_id int) {
164 mut f := m.funcs[func_id]
165 f.params.clear()
166 m.funcs[func_id] = f
167}
168
169pub fn (mut m Module) func_set_params(func_id int, params []ValueID) {
170 mut f := m.funcs[func_id]
171 f.params = params
172 m.funcs[func_id] = f
173}
174
175pub fn (mut m Module) block_add_succ(from int, to int) {
176 mut blk := m.blocks[from]
177 blk.succs << to
178 m.blocks[from] = blk
179}
180
181pub fn (mut m Module) block_add_pred(to int, from int) {
182 mut blk := m.blocks[to]
183 blk.preds << from
184 m.blocks[to] = blk
185}
186
187pub fn (mut m Module) nop_instr(val_idx int) {
188 mut instr := m.instrs[val_idx]
189 instr.op = .bitcast
190 instr.operands = []
191 m.instrs[val_idx] = instr
192}
193
194// Get or create a constant value, reusing existing ones when possible.
195// This maintains SSA's immutability principle by avoiding duplicate constants.
196pub fn (mut m Module) get_or_add_const(typ TypeID, name string) ValueID {
197 key := '${typ}:${name}'
198 if existing := map_get_value_id(m.const_cache, key) {
199 return existing
200 }
201 id := m.add_value_node(.constant, typ, name, 0)
202 m.const_cache[key] = id
203 return id
204}
205
206pub fn (m Module) get_block_from_val(val_id int) int {
207 return m.values[val_id].index
208}
209
210pub fn (mut m Module) add_instr(op OpCode, block BlockID, typ TypeID, operands []ValueID) ValueID {
211 // 1. Save Instruction Index
212 instr_idx := m.instrs.len
213
214 instr := Instruction{
215 op: op
216 block: block
217 typ: typ
218 operands: operands
219 }
220 m.instrs << instr
221
222 // 2. Pass instr_idx to Value
223 val_id := m.add_value_node(.instruction, typ, '', instr_idx)
224
225 // 3. Link Block — read whole struct, modify, write back (chained broken in ARM64)
226 mut blk := m.blocks[block]
227 blk.instrs << val_id
228 m.blocks[block] = blk
229
230 // 4. Update Def-Use — same pattern
231 for op_id in operands {
232 if op_id < m.values.len {
233 mut v := m.values[op_id]
234 v.uses << val_id
235 m.values[op_id] = v
236 }
237 }
238
239 return val_id
240}
241
242pub fn (mut m Module) add_global(name string, typ TypeID, is_const bool) int {
243 return m.add_global_with_value(name, typ, is_const, 0)
244}
245
246pub fn (mut m Module) add_global_with_value(name string, typ TypeID, is_const bool, initial_value i64) int {
247 id := m.globals.len
248 g := GlobalVar{
249 name: name
250 typ: typ
251 linkage: .private // Local global, not external
252 is_constant: is_const
253 initial_value: initial_value
254 }
255 m.globals << g
256
257 // FIX: The Value representing a global is a POINTER to the data
258 ptr_typ := m.type_store.get_ptr(typ)
259 return m.add_value_node(.global, ptr_typ, name, id)
260}
261
262pub fn (mut m Module) add_global_with_data(name string, elem_type TypeID, is_const bool, data []u8) int {
263 id := m.globals.len
264 g := GlobalVar{
265 name: name
266 typ: elem_type
267 linkage: .private
268 is_constant: is_const
269 initial_data: data
270 }
271 m.globals << g
272
273 // The Value is a POINTER to element data (for direct indexing)
274 ptr_typ := m.type_store.get_ptr(elem_type)
275 return m.add_value_node(.global, ptr_typ, name, id)
276}
277
278// add_external_global adds an external global variable (defined outside this module)
279// Returns the ValueID for the global pointer
280pub fn (mut m Module) add_external_global(name string, typ TypeID) ValueID {
281 // Check if this external global already exists
282 for v in m.values {
283 if v.kind == .global && v.name == name {
284 return v.id
285 }
286 }
287
288 // Create a new external global
289 id := m.globals.len
290 g := GlobalVar{
291 name: name
292 typ: typ
293 linkage: .external
294 }
295 m.globals << g
296
297 // The Value representing a global is a POINTER to the data
298 ptr_typ := m.type_store.get_ptr(typ)
299 return m.add_value_node(.global, ptr_typ, name, id)
300}
301
302pub fn (mut m Module) add_instr_front(op OpCode, block BlockID, typ TypeID, operands []ValueID) ValueID {
303 instr_idx := m.instrs.len
304 instr := Instruction{
305 op: op
306 block: block
307 typ: typ
308 operands: operands
309 }
310 m.instrs << instr
311 val_id := m.add_value_node(.instruction, typ, '', instr_idx)
312
313 // Prepend to block instructions — read whole struct, modify, write back (chained broken in ARM64)
314 mut blk := m.blocks[block]
315 blk.instrs.prepend(val_id)
316 m.blocks[block] = blk
317
318 // Update Def-Use — same pattern
319 for op_id in operands {
320 if op_id < m.values.len {
321 mut v := m.values[op_id]
322 v.uses << val_id
323 m.values[op_id] = v
324 }
325 }
326 return val_id
327}
328
329// append_phi_operands appends a (val, block_val) pair to a phi instruction's operands.
330// Append (val, block_val) pair to phi instruction operands.
331// Avoid m.instrs[idx].operands << x — chained append broken in ARM64 self-hosted.
332pub fn (mut m Module) append_phi_operands(instr_idx int, val int, block_val int) {
333 // Read whole struct, modify, write back (chained broken in ARM64)
334 mut instr := m.instrs[instr_idx]
335 instr.operands << val
336 instr.operands << block_val
337 m.instrs[instr_idx] = instr
338}
339
340pub fn (mut m Module) replace_uses(old_val int, new_val int) {
341 // Copy uses, because we modify instr operands which might change things
342 uses := m.values[old_val].uses.clone()
343 for use_id in uses {
344 use_val := m.values[use_id]
345 if use_val.kind == .instruction {
346 // Read whole instr, modify operands, write back (chained broken in ARM64)
347 mut instr := m.instrs[use_val.index]
348 mut replaced := false
349 for i in 0 .. instr.operands.len {
350 if instr.operands[i] == old_val {
351 instr.operands[i] = new_val
352 replaced = true
353 }
354 }
355 if replaced {
356 m.instrs[use_val.index] = instr
357 }
358 // Only add to uses list once per user, even if used multiple times
359 if replaced && use_id !in m.values[new_val].uses {
360 mut nv := m.values[new_val]
361 nv.uses << use_id
362 m.values[new_val] = nv
363 }
364 }
365 }
366 mut ov := m.values[old_val]
367 ov.uses = []
368 m.values[old_val] = ov
369}
370
371fn dfs(mut m Module, blk int, mut visited map[int]bool, mut rpo []int) {
372 visited[blk] = true
373 for s in m.blocks[blk].succs {
374 if !visited[s] {
375 dfs(mut m, s, mut visited, mut rpo)
376 }
377 }
378 rpo << blk
379}
380
381fn (mut m Module) get_rpo(func Function) []int {
382 mut visited := map[int]bool{}
383 mut rpo := []int{}
384 dfs(mut m, func.blocks[0], mut visited, mut rpo)
385 // rpo.reverse_inplace()
386 rpo = rpo.reverse()
387 return rpo
388}
389
390// new_worker_module creates a lightweight Module for parallel SSA building.
391// Each worker gets its own copy of type_store (COW from main).
392// Has its own values/instrs/blocks arenas starting from empty.
393pub fn (mut m Module) new_worker_module() &Module {
394 // Explicitly clone funcs and globals to avoid COW races between threads.
395 mut wf := []Function{cap: m.funcs.len}
396 for f in m.funcs {
397 wf << f
398 }
399 mut wg := []GlobalVar{cap: m.globals.len}
400 for g in m.globals {
401 wg << g
402 }
403 // Deep-clone TypeStore to avoid COW races on types[] and cache map.
404 mut wts := TypeStore{}
405 wts.types = []Type{cap: m.type_store.types.len}
406 for t in m.type_store.types {
407 wts.types << t
408 }
409 wts.cache = m.type_store.cache.clone()
410
411 mut w := &Module{
412 name: m.name
413 shared_type_store: unsafe { nil }
414 type_store: wts
415 env: m.env
416 funcs: wf
417 globals: wg
418 }
419 // Seed worker with main module's values so that ValueIDs from Phases 1-3
420 // (constants, globals, func_refs, params) are valid in the worker.
421 w.values = []Value{cap: m.values.len + 32768}
422 for v in m.values {
423 w.values << v
424 }
425 // Also seed instrs and blocks (typically empty after Phases 1-3, but just in case).
426 w.instrs = []Instruction{cap: m.instrs.len + 32768}
427 for instr in m.instrs {
428 w.instrs << instr
429 }
430 w.blocks = []BasicBlock{cap: m.blocks.len + 2048}
431 for blk in m.blocks {
432 w.blocks << blk
433 }
434 return w
435}
436
437// FuncSSAData holds the SSA data produced by a worker for a single function.
438pub struct FuncSSAData {
439pub:
440 func_idx int // Index into main module's funcs[]
441 blocks []BlockID // Worker-local block IDs
442 params []ValueID // Worker-local param ValueIDs
443}
444
445// merge_worker_module merges a worker's SSA arenas into the main module.
446// Workers are seeded with the main module's values/instrs/blocks from Phases 1-3.
447// seed_values/seed_instrs/seed_blocks/seed_funcs are the lengths of the seed data.
448// Only data beyond the seed is new and needs to be merged with ID remapping.
449// func_data contains (func_idx, blocks, params) for updating main funcs[].
450pub fn (mut m Module) merge_worker_module(w &Module, func_data []FuncSSAData, seed_values int, seed_instrs int, seed_blocks int, seed_types int, seed_funcs int) {
451 // Build type remapping: worker type IDs >= seed_types may differ from main.
452 // For each new worker type, find or create equivalent in main.
453 mut type_remap := []TypeID{len: w.type_store.types.len, init: 0}
454 // Seed types map to themselves (identity)
455 for ti := 0; ti < seed_types && ti < type_remap.len; ti++ {
456 type_remap[ti] = ti
457 }
458 // Map new worker types to main types
459 for ti := seed_types; ti < w.type_store.types.len; ti++ {
460 wt := w.type_store.types[ti]
461 // Generate cache key matching TypeStore conventions
462 mut cache_key := ''
463 match wt.kind {
464 .int_t {
465 cache_key = if wt.is_unsigned { 'u${wt.width}' } else { 'i${wt.width}' }
466 }
467 .float_t {
468 cache_key = 'f${wt.width}'
469 }
470 .ptr_t {
471 // Remap the elem_type first
472 remapped_elem := if int(wt.elem_type) < type_remap.len {
473 type_remap[int(wt.elem_type)]
474 } else {
475 wt.elem_type
476 }
477 cache_key = 'p${remapped_elem}'
478 }
479 .array_t {
480 remapped_elem := if int(wt.elem_type) < type_remap.len {
481 type_remap[int(wt.elem_type)]
482 } else {
483 wt.elem_type
484 }
485 cache_key = 'a${remapped_elem}_${wt.len}'
486 }
487 else {}
488 }
489
490 // Try cache lookup in main
491 if cache_key.len > 0 {
492 if existing := map_get_type_id(m.type_store.cache, cache_key) {
493 type_remap[ti] = existing
494 continue
495 }
496 }
497 // Register new type in main with remapped field types
498 remapped_ret := if int(wt.ret_type) < type_remap.len && int(wt.ret_type) >= seed_types {
499 type_remap[int(wt.ret_type)]
500 } else {
501 wt.ret_type
502 }
503 remapped_elem2 := if int(wt.elem_type) < type_remap.len && int(wt.elem_type) >= seed_types {
504 type_remap[int(wt.elem_type)]
505 } else {
506 wt.elem_type
507 }
508 mut new_type := Type{
509 kind: wt.kind
510 width: wt.width
511 len: wt.len
512 is_c_struct: wt.is_c_struct
513 is_union: wt.is_union
514 is_unsigned: wt.is_unsigned
515 ret_type: remapped_ret
516 elem_type: remapped_elem2
517 }
518 if wt.fields.len > 0 {
519 mut new_fields := []TypeID{cap: wt.fields.len}
520 for f in wt.fields {
521 new_fields << if int(f) < type_remap.len && int(f) >= seed_types {
522 type_remap[int(f)]
523 } else {
524 f
525 }
526 }
527 new_type = Type{
528 ...new_type
529 fields: new_fields
530 field_names: wt.field_names
531 }
532 }
533 if wt.params.len > 0 {
534 mut new_params := []TypeID{cap: wt.params.len}
535 for p in wt.params {
536 new_params << if int(p) < type_remap.len && int(p) >= seed_types {
537 type_remap[int(p)]
538 } else {
539 p
540 }
541 }
542 new_type = Type{
543 ...new_type
544 params: new_params
545 }
546 }
547 new_id := m.type_store.register(new_type)
548 if cache_key.len > 0 {
549 m.type_store.cache[cache_key] = new_id
550 }
551 type_remap[ti] = new_id
552 }
553
554 // Remapping offsets: worker IDs in [seed..seed+N) → main IDs in [main.len..main.len+N).
555 // For seed IDs (< seed_values), no remapping needed — they're the same in both.
556 value_off := m.values.len - seed_values
557 instr_off := m.instrs.len - seed_instrs
558 block_off := m.blocks.len - seed_blocks
559
560 // Append worker values beyond seed
561 for wi := seed_values; wi < w.values.len; wi++ {
562 wv := w.values[wi]
563 mut new_index := wv.index
564 if wv.kind == .instruction {
565 new_index += instr_off
566 } else if wv.kind == .basic_block {
567 new_index += block_off
568 }
569 // Remap uses — only IDs >= seed need remapping
570 mut new_uses := []ValueID{cap: wv.uses.len}
571 for u in wv.uses {
572 new_uses << if u >= seed_values { u + value_off } else { u }
573 }
574 // Remap type
575 new_typ := if int(wv.typ) >= seed_types && int(wv.typ) < type_remap.len {
576 type_remap[int(wv.typ)]
577 } else {
578 wv.typ
579 }
580 m.values << Value{
581 id: wi + value_off
582 kind: wv.kind
583 typ: new_typ
584 name: wv.name
585 index: new_index
586 uses: new_uses
587 }
588 }
589
590 // Append worker instructions beyond seed
591 for ii := seed_instrs; ii < w.instrs.len; ii++ {
592 instr := w.instrs[ii]
593 mut new_ops := []ValueID{cap: instr.operands.len}
594 for op in instr.operands {
595 new_ops << if op >= seed_values { op + value_off } else { op }
596 }
597 // Remap type
598 new_typ := if int(instr.typ) >= seed_types && int(instr.typ) < type_remap.len {
599 type_remap[int(instr.typ)]
600 } else {
601 instr.typ
602 }
603 m.instrs << Instruction{
604 op: instr.op
605 block: if instr.block >= seed_blocks {
606 instr.block + block_off
607 } else {
608 instr.block
609 }
610 typ: new_typ
611 operands: new_ops
612 pos: instr.pos
613 atomic_ord: instr.atomic_ord
614 inline: instr.inline
615 }
616 }
617
618 // Append worker blocks beyond seed
619 for bi := seed_blocks; bi < w.blocks.len; bi++ {
620 blk := w.blocks[bi]
621 mut new_instrs := []ValueID{cap: blk.instrs.len}
622 for vi in blk.instrs {
623 new_instrs << if vi >= seed_values { vi + value_off } else { vi }
624 }
625 mut new_preds := []BlockID{cap: blk.preds.len}
626 for p in blk.preds {
627 new_preds << if p >= seed_blocks { p + block_off } else { p }
628 }
629 mut new_succs := []BlockID{cap: blk.succs.len}
630 for s in blk.succs {
631 new_succs << if s >= seed_blocks { s + block_off } else { s }
632 }
633 m.blocks << BasicBlock{
634 id: blk.id + block_off
635 val_id: if blk.val_id >= seed_values { blk.val_id + value_off } else { blk.val_id }
636 name: blk.name
637 parent: blk.parent
638 instrs: new_instrs
639 preds: new_preds
640 succs: new_succs
641 }
642 }
643
644 // Update main module's funcs[] with blocks and params from worker
645 for fd in func_data {
646 if fd.func_idx >= m.funcs.len {
647 continue
648 }
649 mut f := m.funcs[fd.func_idx]
650 // Skip if function was already built by a previous worker's merge.
651 // Without this guard, duplicate blocks from multiple workers get appended,
652 // leading to mismatched parameter/value IDs and crashes.
653 if f.blocks.len > 0 {
654 continue
655 }
656 for blk_id in fd.blocks {
657 f.blocks << if blk_id >= seed_blocks { blk_id + block_off } else { blk_id }
658 }
659 mut new_params := []ValueID{cap: fd.params.len}
660 for p in fd.params {
661 new_params << if p >= seed_values { p + value_off } else { p }
662 }
663 f.params = new_params
664 m.funcs[fd.func_idx] = f
665 }
666
667 // Also add any new functions created by workers (stubs like wyhash, array_eq).
668 for fi := seed_funcs; fi < w.funcs.len; fi++ {
669 wfunc := w.funcs[fi]
670 mut already_added := false
671 for existing in m.funcs {
672 if existing.name == wfunc.name {
673 already_added = true
674 break
675 }
676 }
677 if already_added {
678 continue
679 }
680 mut new_blocks := []BlockID{cap: wfunc.blocks.len}
681 for blk_id in wfunc.blocks {
682 new_blocks << if blk_id >= seed_blocks { blk_id + block_off } else { blk_id }
683 }
684 mut new_params := []ValueID{cap: wfunc.params.len}
685 for p in wfunc.params {
686 new_params << if p >= seed_values { p + value_off } else { p }
687 }
688 // Remap function return type
689 new_typ := if int(wfunc.typ) >= seed_types && int(wfunc.typ) < type_remap.len {
690 type_remap[int(wfunc.typ)]
691 } else {
692 wfunc.typ
693 }
694 m.funcs << Function{
695 id: m.funcs.len
696 name: wfunc.name
697 typ: new_typ
698 blocks: new_blocks
699 params: new_params
700 }
701 }
702}
703