v / vlib / v2 / builder / transform_parallel.v
178 lines · 166 sloc · 6.41 KB · e7738c112c787d477501fa4a87edd0e1d72159bd
Raw
1// Copyright (c) 2026 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module builder
5
6import v2.ast
7import v2.transformer
8import runtime
9
10$if !windows {
11 fn C.pthread_create(thread &C.pthread_t, attr voidptr, start_routine fn (voidptr) voidptr, arg voidptr) int
12 fn C.pthread_join(thread C.pthread_t, retval voidptr) int
13 fn C.pthread_attr_init(attr voidptr) int
14 fn C.pthread_attr_setstacksize(attr voidptr, stacksize usize) int
15 fn C.pthread_attr_destroy(attr voidptr) int
16
17 // TransformChunkFlatArgs carries one worker's slice of the parallel flat
18 // transform: the worker emits its file range cursor-native into its own
19 // FlatBuilder (no legacy ast.File rehydrate) and hands back the resulting
20 // FlatAst, which the main thread merges via `FlatBuilder.append_flat`.
21 struct TransformChunkFlatArgs {
22 t voidptr // &transformer.Transformer
23 flat &ast.FlatAst = unsafe { nil }
24 flat_extra_stmts [][]ast.Stmt
25 flat_start int
26 flat_end int
27 result_ptr voidptr // &ast.FlatAst
28 worker_ptr voidptr
29 worker_idx int
30 }
31
32 fn transform_chunk_flat_thread(arg voidptr) voidptr {
33 a := unsafe { &TransformChunkFlatArgs(arg) }
34 t := unsafe { &transformer.Transformer(a.t) }
35 mut w := t.new_worker_clone(a.worker_idx)
36 // Cursor-native: transform one file at a time straight into this
37 // worker's FlatBuilder. Peak per worker is the cumulative flat for its
38 // chunk (no legacy ast.File ever materialises).
39 mut wb := ast.new_flat_builder()
40 for fi := a.flat_start; fi < a.flat_end; fi++ {
41 extra := if fi >= 0 && fi < a.flat_extra_stmts.len {
42 a.flat_extra_stmts[fi]
43 } else {
44 []ast.Stmt{}
45 }
46 w.transform_file_index_with_extra_to_flat(a.flat, fi, extra, mut wb)
47 }
48 unsafe {
49 *(&ast.FlatAst(a.result_ptr)) = wb.flat
50 *(&voidptr(a.worker_ptr)) = voidptr(w)
51 }
52 return unsafe { nil }
53 }
54}
55
56fn flat_extra_stmts_by_file(extra_stmts map[int][]ast.Stmt, n_files int) [][]ast.Stmt {
57 mut out := [][]ast.Stmt{cap: n_files}
58 for _ in 0 .. n_files {
59 out << []ast.Stmt{}
60 }
61 for fi, stmts in extra_stmts {
62 if fi >= 0 && fi < n_files {
63 out[fi] = stmts
64 }
65 }
66 return out
67}
68
69// transform_files_parallel_flat_direct is the flat-native parallel transform
70// used by every backend. It mirrors the sequential
71// `transform_flat_to_flat_direct` (pre_pass -> prepare -> per-file cursor
72// transform -> post_pass tail) but fans the per-file loop across worker threads:
73// each worker emits its contiguous file range cursor-native into its OWN
74// FlatBuilder, then the main thread concatenates them in file order via
75// `FlatBuilder.append_flat`. No legacy ast.File is materialised; backends that
76// still consume []ast.File (.v/eval) rehydrate from the transformed flat at
77// the codegen boundary in builder.v.
78//
79// Per-worker synth-position disjointness is handled by `new_worker_clone`
80// (it offsets synth_pos_counter by `-worker_idx * 100_000`); worker state is
81// merged back via `merge_worker` after the join, exactly as the legacy path.
82fn (mut b Builder) transform_files_parallel_flat_direct(mut trans transformer.Transformer) ast.FlatAst {
83 // Pre-pass + generic preparation: sequential, once (same as the legacy path).
84 trans.pre_pass_from_flat(&b.flat)
85 mut flat_extra_stmts := [][]ast.Stmt{}
86 if trans.needs_full_files_for_transform() {
87 extra_stmts := trans.prepare_flat_for_transform(&b.flat)
88 flat_extra_stmts = flat_extra_stmts_by_file(extra_stmts, b.flat.files.len)
89 }
90
91 n_jobs := runtime.nr_jobs()
92 n_files := b.flat.files.len
93 mut out := ast.new_flat_builder()
94 $if windows {
95 for fi in 0 .. n_files {
96 extra := if fi < flat_extra_stmts.len { flat_extra_stmts[fi] } else { []ast.Stmt{} }
97 trans.transform_file_index_with_extra_to_flat(&b.flat, fi, extra, mut out)
98 }
99 } $else {
100 if n_files <= 1 || n_jobs <= 1 {
101 for fi in 0 .. n_files {
102 extra := if fi < flat_extra_stmts.len { flat_extra_stmts[fi] } else { []ast.Stmt{} }
103 trans.transform_file_index_with_extra_to_flat(&b.flat, fi, extra, mut out)
104 }
105 } else {
106 // Contiguous [start,end) file ranges, one per worker. Merging the
107 // worker outputs in spawn order preserves original file order.
108 mut bucket_indices := [][]int{len: n_jobs}
109 chunk_size := (n_files + n_jobs - 1) / n_jobs
110 mut i := 0
111 mut bw := 0
112 for i < n_files {
113 end := if i + chunk_size < n_files { i + chunk_size } else { n_files }
114 for j in i .. end {
115 bucket_indices[bw] << j
116 }
117 i = end
118 bw++
119 }
120
121 mut chunk_flats := []ast.FlatAst{len: n_jobs}
122 mut worker_ptrs := []voidptr{len: n_jobs, init: unsafe { nil }}
123 mut thread_ids := []C.pthread_t{len: n_jobs}
124 mut args := []TransformChunkFlatArgs{cap: n_jobs}
125
126 // 64 MB worker stacks: ARM64-compiled transform recursion is stack-heavy.
127 attr_buf := [64]u8{}
128 attr := unsafe { voidptr(&attr_buf[0]) }
129 C.pthread_attr_init(attr)
130 C.pthread_attr_setstacksize(attr, 64 * 1024 * 1024)
131
132 mut chunk_idx := 0
133 for w in 0 .. n_jobs {
134 idxs := bucket_indices[w]
135 if idxs.len == 0 {
136 continue
137 }
138 args << TransformChunkFlatArgs{
139 t: unsafe { voidptr(trans) }
140 flat: unsafe { &b.flat }
141 flat_extra_stmts: flat_extra_stmts
142 flat_start: idxs[0]
143 flat_end: idxs[idxs.len - 1] + 1
144 result_ptr: unsafe { voidptr(&chunk_flats[chunk_idx]) }
145 worker_ptr: unsafe { voidptr(&worker_ptrs[chunk_idx]) }
146 worker_idx: chunk_idx
147 }
148 C.pthread_create(unsafe { &thread_ids[chunk_idx] }, attr,
149 transform_chunk_flat_thread, unsafe { voidptr(&args[chunk_idx]) })
150 chunk_idx++
151 }
152 C.pthread_attr_destroy(attr)
153 for ci := 0; ci < chunk_idx; ci++ {
154 C.pthread_join(thread_ids[ci], unsafe { nil })
155 }
156
157 // Concatenate worker flats in spawn (= file) order, merging worker state.
158 mut ci := 0
159 for w in 0 .. n_jobs {
160 if bucket_indices[w].len == 0 {
161 continue
162 }
163 out.append_flat(chunk_flats[ci])
164 worker := unsafe { &transformer.Transformer(worker_ptrs[ci]) }
165 trans.merge_worker(worker)
166 ci++
167 }
168 // Move synth_pos_counter past all worker ranges for the post_pass.
169 trans.set_synth_pos_counter(-(chunk_idx * 100_000) - 1)
170 }
171 }
172
173 // Post-pass tail: identical to transform_flat_to_flat_direct's tail.
174 generated_parts := trans.generated_fns_parts_from_flat(&out.flat)
175 trans.post_pass_to_flat(mut out, generated_parts)
176 trans.apply_post_pass_tail_from_flat(&out.flat)
177 return out.flat
178}
179