Gitly


1 // Copyright (c) 2026 Alexander Medvednikov. All rights reserved.
2 // Use of this source code is governed by an MIT license
3 // that can be found in the LICENSE file.
4 module builder
5 
6 import v2.ast
7 import v2.transformer
8 import runtime
9 
10 $if !windows {
11     fn C.pthread_create(thread &C.pthread_t, attr voidptr, start_routine fn (voidptr) voidptr, arg voidptr) int
12     fn C.pthread_join(thread C.pthread_t, retval voidptr) int
13     fn C.pthread_attr_init(attr voidptr) int
14     fn C.pthread_attr_setstacksize(attr voidptr, stacksize usize) int
15     fn C.pthread_attr_destroy(attr voidptr) int
16 
17     // TransformChunkFlatArgs carries one worker's slice of the parallel flat
18     // transform: the worker emits its file range cursor-native into its own
19     // FlatBuilder (no legacy ast.File rehydrate) and hands back the resulting
20     // FlatAst, which the main thread merges via `FlatBuilder.append_flat`.
21     struct TransformChunkFlatArgs {
22         t                voidptr // &transformer.Transformer
23         flat             &ast.FlatAst = unsafe { nil }
24         flat_extra_stmts [][]ast.Stmt
25         flat_start       int
26         flat_end         int
27         result_ptr       voidptr // &ast.FlatAst
28         worker_ptr       voidptr
29         worker_idx       int
30     }
31 
32     fn transform_chunk_flat_thread(arg voidptr) voidptr {
33         a := unsafe { &TransformChunkFlatArgs(arg) }
34         t := unsafe { &transformer.Transformer(a.t) }
35         mut w := t.new_worker_clone(a.worker_idx)
36         // Cursor-native: transform one file at a time straight into this
37         // worker's FlatBuilder. Peak per worker is the cumulative flat for its
38         // chunk (no legacy ast.File ever materialises).
39         mut wb := ast.new_flat_builder()
40         for fi := a.flat_start; fi < a.flat_end; fi++ {
41             extra := if fi >= 0 && fi < a.flat_extra_stmts.len {
42                 a.flat_extra_stmts[fi]
43             } else {
44                 []ast.Stmt{}
45             }
46             w.transform_file_index_with_extra_to_flat(a.flat, fi, extra, mut wb)
47         }
48         unsafe {
49             *(&ast.FlatAst(a.result_ptr)) = wb.flat
50             *(&voidptr(a.worker_ptr)) = voidptr(w)
51         }
52         return unsafe { nil }
53     }
54 }
55 
56 fn flat_extra_stmts_by_file(extra_stmts map[int][]ast.Stmt, n_files int) [][]ast.Stmt {
57     mut out := [][]ast.Stmt{cap: n_files}
58     for _ in 0 .. n_files {
59         out << []ast.Stmt{}
60     }
61     for fi, stmts in extra_stmts {
62         if fi >= 0 && fi < n_files {
63             out[fi] = stmts
64         }
65     }
66     return out
67 }
68 
69 // transform_files_parallel_flat_direct is the flat-native parallel transform
70 // used by every backend. It mirrors the sequential
71 // `transform_flat_to_flat_direct` (pre_pass -> prepare -> per-file cursor
72 // transform -> post_pass tail) but fans the per-file loop across worker threads:
73 // each worker emits its contiguous file range cursor-native into its OWN
74 // FlatBuilder, then the main thread concatenates them in file order via
75 // `FlatBuilder.append_flat`. No legacy ast.File is materialised; backends that
76 // still consume []ast.File (.v/eval) rehydrate from the transformed flat at
77 // the codegen boundary in builder.v.
78 //
79 // Per-worker synth-position disjointness is handled by `new_worker_clone`
80 // (it offsets synth_pos_counter by `-worker_idx * 100_000`); worker state is
81 // merged back via `merge_worker` after the join, exactly as the legacy path.
82 fn (mut b Builder) transform_files_parallel_flat_direct(mut trans transformer.Transformer) ast.FlatAst {
83     // Pre-pass + generic preparation: sequential, once (same as the legacy path).
84     trans.pre_pass_from_flat(&b.flat)
85     mut flat_extra_stmts := [][]ast.Stmt{}
86     if trans.needs_full_files_for_transform() {
87         extra_stmts := trans.prepare_flat_for_transform(&b.flat)
88         flat_extra_stmts = flat_extra_stmts_by_file(extra_stmts, b.flat.files.len)
89     }
90 
91     n_jobs := runtime.nr_jobs()
92     n_files := b.flat.files.len
93     mut out := ast.new_flat_builder()
94     $if windows {
95         for fi in 0 .. n_files {
96             extra := if fi < flat_extra_stmts.len { flat_extra_stmts[fi] } else { []ast.Stmt{} }
97             trans.transform_file_index_with_extra_to_flat(&b.flat, fi, extra, mut out)
98         }
99     } $else {
100         if n_files <= 1 || n_jobs <= 1 {
101             for fi in 0 .. n_files {
102                 extra := if fi < flat_extra_stmts.len { flat_extra_stmts[fi] } else { []ast.Stmt{} }
103                 trans.transform_file_index_with_extra_to_flat(&b.flat, fi, extra, mut out)
104             }
105         } else {
106             // Contiguous [start,end) file ranges, one per worker. Merging the
107             // worker outputs in spawn order preserves original file order.
108             mut bucket_indices := [][]int{len: n_jobs}
109             chunk_size := (n_files + n_jobs - 1) / n_jobs
110             mut i := 0
111             mut bw := 0
112             for i < n_files {
113                 end := if i + chunk_size < n_files { i + chunk_size } else { n_files }
114                 for j in i .. end {
115                     bucket_indices[bw] << j
116                 }
117                 i = end
118                 bw++
119             }
120 
121             mut chunk_flats := []ast.FlatAst{len: n_jobs}
122             mut worker_ptrs := []voidptr{len: n_jobs, init: unsafe { nil }}
123             mut thread_ids := []C.pthread_t{len: n_jobs}
124             mut args := []TransformChunkFlatArgs{cap: n_jobs}
125 
126             // 64 MB worker stacks: ARM64-compiled transform recursion is stack-heavy.
127             attr_buf := [64]u8{}
128             attr := unsafe { voidptr(&attr_buf[0]) }
129             C.pthread_attr_init(attr)
130             C.pthread_attr_setstacksize(attr, 64 * 1024 * 1024)
131 
132             mut chunk_idx := 0
133             for w in 0 .. n_jobs {
134                 idxs := bucket_indices[w]
135                 if idxs.len == 0 {
136                     continue
137                 }
138                 args << TransformChunkFlatArgs{
139                     t:                unsafe { voidptr(trans) }
140                     flat:             unsafe { &b.flat }
141                     flat_extra_stmts: flat_extra_stmts
142                     flat_start:       idxs[0]
143                     flat_end:         idxs[idxs.len - 1] + 1
144                     result_ptr:       unsafe { voidptr(&chunk_flats[chunk_idx]) }
145                     worker_ptr:       unsafe { voidptr(&worker_ptrs[chunk_idx]) }
146                     worker_idx:       chunk_idx
147                 }
148                 C.pthread_create(unsafe { &thread_ids[chunk_idx] }, attr,
149                     transform_chunk_flat_thread, unsafe { voidptr(&args[chunk_idx]) })
150                 chunk_idx++
151             }
152             C.pthread_attr_destroy(attr)
153             for ci := 0; ci < chunk_idx; ci++ {
154                 C.pthread_join(thread_ids[ci], unsafe { nil })
155             }
156 
157             // Concatenate worker flats in spawn (= file) order, merging worker state.
158             mut ci := 0
159             for w in 0 .. n_jobs {
160                 if bucket_indices[w].len == 0 {
161                     continue
162                 }
163                 out.append_flat(chunk_flats[ci])
164                 worker := unsafe { &transformer.Transformer(worker_ptrs[ci]) }
165                 trans.merge_worker(worker)
166                 ci++
167             }
168             // Move synth_pos_counter past all worker ranges for the post_pass.
169             trans.set_synth_pos_counter(-(chunk_idx * 100_000) - 1)
170         }
171     }
172 
173     // Post-pass tail: identical to transform_flat_to_flat_direct's tail.
174     generated_parts := trans.generated_fns_parts_from_flat(&out.flat)
175     trans.post_pass_to_flat(mut out, generated_parts)
176     trans.apply_post_pass_tail_from_flat(&out.flat)
177     return out.flat
178 }
179

1	// Copyright (c) 2026 Alexander Medvednikov. All rights reserved.
2	// Use of this source code is governed by an MIT license
3	// that can be found in the LICENSE file.
4	module builder
5
6	import v2.ast
7	import v2.transformer
8	import runtime
9
10	$if !windows {
11	fn C.pthread_create(thread &C.pthread_t, attr voidptr, start_routine fn (voidptr) voidptr, arg voidptr) int
12	fn C.pthread_join(thread C.pthread_t, retval voidptr) int
13	fn C.pthread_attr_init(attr voidptr) int
14	fn C.pthread_attr_setstacksize(attr voidptr, stacksize usize) int
15	fn C.pthread_attr_destroy(attr voidptr) int
16
17	// TransformChunkFlatArgs carries one worker's slice of the parallel flat
18	// transform: the worker emits its file range cursor-native into its own
19	// FlatBuilder (no legacy ast.File rehydrate) and hands back the resulting
20	// FlatAst, which the main thread merges via `FlatBuilder.append_flat`.
21	struct TransformChunkFlatArgs {
22	t voidptr // &transformer.Transformer
23	flat &ast.FlatAst = unsafe { nil }
24	flat_extra_stmts [][]ast.Stmt
25	flat_start int
26	flat_end int
27	result_ptr voidptr // &ast.FlatAst
28	worker_ptr voidptr
29	worker_idx int
30	}
31
32	fn transform_chunk_flat_thread(arg voidptr) voidptr {
33	a := unsafe { &TransformChunkFlatArgs(arg) }
34	t := unsafe { &transformer.Transformer(a.t) }
35	mut w := t.new_worker_clone(a.worker_idx)
36	// Cursor-native: transform one file at a time straight into this
37	// worker's FlatBuilder. Peak per worker is the cumulative flat for its
38	// chunk (no legacy ast.File ever materialises).
39	mut wb := ast.new_flat_builder()
40	for fi := a.flat_start; fi < a.flat_end; fi++ {
41	extra := if fi >= 0 && fi < a.flat_extra_stmts.len {
42	a.flat_extra_stmts[fi]
43	} else {
44	[]ast.Stmt{}
45	}
46	w.transform_file_index_with_extra_to_flat(a.flat, fi, extra, mut wb)
47	}
48	unsafe {
49	*(&ast.FlatAst(a.result_ptr)) = wb.flat
50	*(&voidptr(a.worker_ptr)) = voidptr(w)
51	}
52	return unsafe { nil }
53	}
54	}
55
56	fn flat_extra_stmts_by_file(extra_stmts map[int][]ast.Stmt, n_files int) [][]ast.Stmt {
57	mut out := [][]ast.Stmt{cap: n_files}
58	for _ in 0 .. n_files {
59	out << []ast.Stmt{}
60	}
61	for fi, stmts in extra_stmts {
62	if fi >= 0 && fi < n_files {
63	out[fi] = stmts
64	}
65	}
66	return out
67	}
68
69	// transform_files_parallel_flat_direct is the flat-native parallel transform
70	// used by every backend. It mirrors the sequential
71	// `transform_flat_to_flat_direct` (pre_pass -> prepare -> per-file cursor
72	// transform -> post_pass tail) but fans the per-file loop across worker threads:
73	// each worker emits its contiguous file range cursor-native into its OWN
74	// FlatBuilder, then the main thread concatenates them in file order via
75	// `FlatBuilder.append_flat`. No legacy ast.File is materialised; backends that
76	// still consume []ast.File (.v/eval) rehydrate from the transformed flat at
77	// the codegen boundary in builder.v.
78	//
79	// Per-worker synth-position disjointness is handled by `new_worker_clone`
80	// (it offsets synth_pos_counter by `-worker_idx 100_000`); worker state is*
81	// merged back via `merge_worker` after the join, exactly as the legacy path.
82	fn (mut b Builder) transform_files_parallel_flat_direct(mut trans transformer.Transformer) ast.FlatAst {
83	// Pre-pass + generic preparation: sequential, once (same as the legacy path).
84	trans.pre_pass_from_flat(&b.flat)
85	mut flat_extra_stmts := [][]ast.Stmt{}
86	if trans.needs_full_files_for_transform() {
87	extra_stmts := trans.prepare_flat_for_transform(&b.flat)
88	flat_extra_stmts = flat_extra_stmts_by_file(extra_stmts, b.flat.files.len)
89	}
90
91	n_jobs := runtime.nr_jobs()
92	n_files := b.flat.files.len
93	mut out := ast.new_flat_builder()
94	$if windows {
95	for fi in 0 .. n_files {
96	extra := if fi < flat_extra_stmts.len { flat_extra_stmts[fi] } else { []ast.Stmt{} }
97	trans.transform_file_index_with_extra_to_flat(&b.flat, fi, extra, mut out)
98	}
99	} $else {
100	if n_files <= 1 \|\| n_jobs <= 1 {
101	for fi in 0 .. n_files {
102	extra := if fi < flat_extra_stmts.len { flat_extra_stmts[fi] } else { []ast.Stmt{} }
103	trans.transform_file_index_with_extra_to_flat(&b.flat, fi, extra, mut out)
104	}
105	} else {
106	// Contiguous [start,end) file ranges, one per worker. Merging the
107	// worker outputs in spawn order preserves original file order.
108	mut bucket_indices := [][]int{len: n_jobs}
109	chunk_size := (n_files + n_jobs - 1) / n_jobs
110	mut i := 0
111	mut bw := 0
112	for i < n_files {
113	end := if i + chunk_size < n_files { i + chunk_size } else { n_files }
114	for j in i .. end {
115	bucket_indices[bw] << j
116	}
117	i = end
118	bw++
119	}
120
121	mut chunk_flats := []ast.FlatAst{len: n_jobs}
122	mut worker_ptrs := []voidptr{len: n_jobs, init: unsafe { nil }}
123	mut thread_ids := []C.pthread_t{len: n_jobs}
124	mut args := []TransformChunkFlatArgs{cap: n_jobs}
125
126	// 64 MB worker stacks: ARM64-compiled transform recursion is stack-heavy.
127	attr_buf := [64]u8{}
128	attr := unsafe { voidptr(&attr_buf[0]) }
129	C.pthread_attr_init(attr)
130	C.pthread_attr_setstacksize(attr, 64 * 1024 * 1024)
131
132	mut chunk_idx := 0
133	for w in 0 .. n_jobs {
134	idxs := bucket_indices[w]
135	if idxs.len == 0 {
136	continue
137	}
138	args << TransformChunkFlatArgs{
139	t: unsafe { voidptr(trans) }
140	flat: unsafe { &b.flat }
141	flat_extra_stmts: flat_extra_stmts
142	flat_start: idxs[0]
143	flat_end: idxs[idxs.len - 1] + 1
144	result_ptr: unsafe { voidptr(&chunk_flats[chunk_idx]) }
145	worker_ptr: unsafe { voidptr(&worker_ptrs[chunk_idx]) }
146	worker_idx: chunk_idx
147	}
148	C.pthread_create(unsafe { &thread_ids[chunk_idx] }, attr,
149	transform_chunk_flat_thread, unsafe { voidptr(&args[chunk_idx]) })
150	chunk_idx++
151	}
152	C.pthread_attr_destroy(attr)
153	for ci := 0; ci < chunk_idx; ci++ {
154	C.pthread_join(thread_ids[ci], unsafe { nil })
155	}
156
157	// Concatenate worker flats in spawn (= file) order, merging worker state.
158	mut ci := 0
159	for w in 0 .. n_jobs {
160	if bucket_indices[w].len == 0 {
161	continue
162	}
163	out.append_flat(chunk_flats[ci])
164	worker := unsafe { &transformer.Transformer(worker_ptrs[ci]) }
165	trans.merge_worker(worker)
166	ci++
167	}
168	// Move synth_pos_counter past all worker ranges for the post_pass.
169	trans.set_synth_pos_counter(-(chunk_idx * 100_000) - 1)
170	}
171	}
172
173	// Post-pass tail: identical to transform_flat_to_flat_direct's tail.
174	generated_parts := trans.generated_fns_parts_from_flat(&out.flat)
175	trans.post_pass_to_flat(mut out, generated_parts)
176	trans.apply_post_pass_tail_from_flat(&out.flat)
177	return out.flat
178	}
179