| 1 | // Copyright (c) 2026 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | module builder |
| 5 | |
| 6 | import v2.ast |
| 7 | import v2.transformer |
| 8 | import runtime |
| 9 | |
| 10 | $if !windows { |
| 11 | fn C.pthread_create(thread &C.pthread_t, attr voidptr, start_routine fn (voidptr) voidptr, arg voidptr) int |
| 12 | fn C.pthread_join(thread C.pthread_t, retval voidptr) int |
| 13 | fn C.pthread_attr_init(attr voidptr) int |
| 14 | fn C.pthread_attr_setstacksize(attr voidptr, stacksize usize) int |
| 15 | fn C.pthread_attr_destroy(attr voidptr) int |
| 16 | |
| 17 | // TransformChunkFlatArgs carries one worker's slice of the parallel flat |
| 18 | // transform: the worker emits its file range cursor-native into its own |
| 19 | // FlatBuilder (no legacy ast.File rehydrate) and hands back the resulting |
| 20 | // FlatAst, which the main thread merges via `FlatBuilder.append_flat`. |
| 21 | struct TransformChunkFlatArgs { |
| 22 | t voidptr // &transformer.Transformer |
| 23 | flat &ast.FlatAst = unsafe { nil } |
| 24 | flat_extra_stmts [][]ast.Stmt |
| 25 | flat_start int |
| 26 | flat_end int |
| 27 | result_ptr voidptr // &ast.FlatAst |
| 28 | worker_ptr voidptr |
| 29 | worker_idx int |
| 30 | } |
| 31 | |
| 32 | fn transform_chunk_flat_thread(arg voidptr) voidptr { |
| 33 | a := unsafe { &TransformChunkFlatArgs(arg) } |
| 34 | t := unsafe { &transformer.Transformer(a.t) } |
| 35 | mut w := t.new_worker_clone(a.worker_idx) |
| 36 | // Cursor-native: transform one file at a time straight into this |
| 37 | // worker's FlatBuilder. Peak per worker is the cumulative flat for its |
| 38 | // chunk (no legacy ast.File ever materialises). |
| 39 | mut wb := ast.new_flat_builder() |
| 40 | for fi := a.flat_start; fi < a.flat_end; fi++ { |
| 41 | extra := if fi >= 0 && fi < a.flat_extra_stmts.len { |
| 42 | a.flat_extra_stmts[fi] |
| 43 | } else { |
| 44 | []ast.Stmt{} |
| 45 | } |
| 46 | w.transform_file_index_with_extra_to_flat(a.flat, fi, extra, mut wb) |
| 47 | } |
| 48 | unsafe { |
| 49 | *(&ast.FlatAst(a.result_ptr)) = wb.flat |
| 50 | *(&voidptr(a.worker_ptr)) = voidptr(w) |
| 51 | } |
| 52 | return unsafe { nil } |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | fn flat_extra_stmts_by_file(extra_stmts map[int][]ast.Stmt, n_files int) [][]ast.Stmt { |
| 57 | mut out := [][]ast.Stmt{cap: n_files} |
| 58 | for _ in 0 .. n_files { |
| 59 | out << []ast.Stmt{} |
| 60 | } |
| 61 | for fi, stmts in extra_stmts { |
| 62 | if fi >= 0 && fi < n_files { |
| 63 | out[fi] = stmts |
| 64 | } |
| 65 | } |
| 66 | return out |
| 67 | } |
| 68 | |
| 69 | // transform_files_parallel_flat_direct is the flat-native parallel transform |
| 70 | // used by every backend. It mirrors the sequential |
| 71 | // `transform_flat_to_flat_direct` (pre_pass -> prepare -> per-file cursor |
| 72 | // transform -> post_pass tail) but fans the per-file loop across worker threads: |
| 73 | // each worker emits its contiguous file range cursor-native into its OWN |
| 74 | // FlatBuilder, then the main thread concatenates them in file order via |
| 75 | // `FlatBuilder.append_flat`. No legacy ast.File is materialised; backends that |
| 76 | // still consume []ast.File (.v/eval) rehydrate from the transformed flat at |
| 77 | // the codegen boundary in builder.v. |
| 78 | // |
| 79 | // Per-worker synth-position disjointness is handled by `new_worker_clone` |
| 80 | // (it offsets synth_pos_counter by `-worker_idx * 100_000`); worker state is |
| 81 | // merged back via `merge_worker` after the join, exactly as the legacy path. |
| 82 | fn (mut b Builder) transform_files_parallel_flat_direct(mut trans transformer.Transformer) ast.FlatAst { |
| 83 | // Pre-pass + generic preparation: sequential, once (same as the legacy path). |
| 84 | trans.pre_pass_from_flat(&b.flat) |
| 85 | mut flat_extra_stmts := [][]ast.Stmt{} |
| 86 | if trans.needs_full_files_for_transform() { |
| 87 | extra_stmts := trans.prepare_flat_for_transform(&b.flat) |
| 88 | flat_extra_stmts = flat_extra_stmts_by_file(extra_stmts, b.flat.files.len) |
| 89 | } |
| 90 | |
| 91 | n_jobs := runtime.nr_jobs() |
| 92 | n_files := b.flat.files.len |
| 93 | mut out := ast.new_flat_builder() |
| 94 | $if windows { |
| 95 | for fi in 0 .. n_files { |
| 96 | extra := if fi < flat_extra_stmts.len { flat_extra_stmts[fi] } else { []ast.Stmt{} } |
| 97 | trans.transform_file_index_with_extra_to_flat(&b.flat, fi, extra, mut out) |
| 98 | } |
| 99 | } $else { |
| 100 | if n_files <= 1 || n_jobs <= 1 { |
| 101 | for fi in 0 .. n_files { |
| 102 | extra := if fi < flat_extra_stmts.len { flat_extra_stmts[fi] } else { []ast.Stmt{} } |
| 103 | trans.transform_file_index_with_extra_to_flat(&b.flat, fi, extra, mut out) |
| 104 | } |
| 105 | } else { |
| 106 | // Contiguous [start,end) file ranges, one per worker. Merging the |
| 107 | // worker outputs in spawn order preserves original file order. |
| 108 | mut bucket_indices := [][]int{len: n_jobs} |
| 109 | chunk_size := (n_files + n_jobs - 1) / n_jobs |
| 110 | mut i := 0 |
| 111 | mut bw := 0 |
| 112 | for i < n_files { |
| 113 | end := if i + chunk_size < n_files { i + chunk_size } else { n_files } |
| 114 | for j in i .. end { |
| 115 | bucket_indices[bw] << j |
| 116 | } |
| 117 | i = end |
| 118 | bw++ |
| 119 | } |
| 120 | |
| 121 | mut chunk_flats := []ast.FlatAst{len: n_jobs} |
| 122 | mut worker_ptrs := []voidptr{len: n_jobs, init: unsafe { nil }} |
| 123 | mut thread_ids := []C.pthread_t{len: n_jobs} |
| 124 | mut args := []TransformChunkFlatArgs{cap: n_jobs} |
| 125 | |
| 126 | // 64 MB worker stacks: ARM64-compiled transform recursion is stack-heavy. |
| 127 | attr_buf := [64]u8{} |
| 128 | attr := unsafe { voidptr(&attr_buf[0]) } |
| 129 | C.pthread_attr_init(attr) |
| 130 | C.pthread_attr_setstacksize(attr, 64 * 1024 * 1024) |
| 131 | |
| 132 | mut chunk_idx := 0 |
| 133 | for w in 0 .. n_jobs { |
| 134 | idxs := bucket_indices[w] |
| 135 | if idxs.len == 0 { |
| 136 | continue |
| 137 | } |
| 138 | args << TransformChunkFlatArgs{ |
| 139 | t: unsafe { voidptr(trans) } |
| 140 | flat: unsafe { &b.flat } |
| 141 | flat_extra_stmts: flat_extra_stmts |
| 142 | flat_start: idxs[0] |
| 143 | flat_end: idxs[idxs.len - 1] + 1 |
| 144 | result_ptr: unsafe { voidptr(&chunk_flats[chunk_idx]) } |
| 145 | worker_ptr: unsafe { voidptr(&worker_ptrs[chunk_idx]) } |
| 146 | worker_idx: chunk_idx |
| 147 | } |
| 148 | C.pthread_create(unsafe { &thread_ids[chunk_idx] }, attr, |
| 149 | transform_chunk_flat_thread, unsafe { voidptr(&args[chunk_idx]) }) |
| 150 | chunk_idx++ |
| 151 | } |
| 152 | C.pthread_attr_destroy(attr) |
| 153 | for ci := 0; ci < chunk_idx; ci++ { |
| 154 | C.pthread_join(thread_ids[ci], unsafe { nil }) |
| 155 | } |
| 156 | |
| 157 | // Concatenate worker flats in spawn (= file) order, merging worker state. |
| 158 | mut ci := 0 |
| 159 | for w in 0 .. n_jobs { |
| 160 | if bucket_indices[w].len == 0 { |
| 161 | continue |
| 162 | } |
| 163 | out.append_flat(chunk_flats[ci]) |
| 164 | worker := unsafe { &transformer.Transformer(worker_ptrs[ci]) } |
| 165 | trans.merge_worker(worker) |
| 166 | ci++ |
| 167 | } |
| 168 | // Move synth_pos_counter past all worker ranges for the post_pass. |
| 169 | trans.set_synth_pos_counter(-(chunk_idx * 100_000) - 1) |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | // Post-pass tail: identical to transform_flat_to_flat_direct's tail. |
| 174 | generated_parts := trans.generated_fns_parts_from_flat(&out.flat) |
| 175 | trans.post_pass_to_flat(mut out, generated_parts) |
| 176 | trans.apply_post_pass_tail_from_flat(&out.flat) |
| 177 | return out.flat |
| 178 | } |
| 179 | |