| 1 | // Copyright (c) 2026 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | module builder |
| 5 | |
| 6 | import runtime |
| 7 | import v2.gen.arm64 |
| 8 | |
| 9 | $if !windows { |
| 10 | struct GenARM64ChunkArgs { |
| 11 | worker voidptr // &arm64.Gen — pre-cloned worker (created on main thread) |
| 12 | start_idx int |
| 13 | end_idx int |
| 14 | } |
| 15 | |
| 16 | fn C.pthread_create(thread &C.pthread_t, attr voidptr, start_routine fn (voidptr) voidptr, arg voidptr) int |
| 17 | fn C.pthread_join(thread C.pthread_t, retval voidptr) int |
| 18 | fn C.pthread_attr_init(attr voidptr) int |
| 19 | fn C.pthread_attr_setstacksize(attr voidptr, stacksize usize) int |
| 20 | fn C.pthread_attr_destroy(attr voidptr) int |
| 21 | |
| 22 | fn gen_arm64_chunk_thread(arg voidptr) voidptr { |
| 23 | a := unsafe { &GenARM64ChunkArgs(arg) } |
| 24 | mut w := unsafe { &arm64.Gen(a.worker) } |
| 25 | for fi := a.start_idx; fi < a.end_idx; fi++ { |
| 26 | w.gen_func(fi) |
| 27 | } |
| 28 | return unsafe { nil } |
| 29 | } |
| 30 | } |
| 31 | |
| 32 | fn (mut b Builder) gen_arm64_parallel(mut gen arm64.Gen) { |
| 33 | gen.gen_pre_pass() |
| 34 | |
| 35 | n_funcs := gen.mod.funcs.len |
| 36 | n_jobs := runtime.nr_jobs() |
| 37 | |
| 38 | $if windows { |
| 39 | for fi := 0; fi < n_funcs; fi++ { |
| 40 | gen.gen_func(fi) |
| 41 | } |
| 42 | gen.gen_post_pass() |
| 43 | return |
| 44 | } $else { |
| 45 | if n_funcs <= 1 || n_jobs <= 1 { |
| 46 | // Fallback to sequential |
| 47 | for fi := 0; fi < n_funcs; fi++ { |
| 48 | gen.gen_func(fi) |
| 49 | } |
| 50 | gen.gen_post_pass() |
| 51 | return |
| 52 | } |
| 53 | |
| 54 | // Split functions into chunks |
| 55 | chunk_size := (n_funcs + n_jobs - 1) / n_jobs |
| 56 | mut thread_ids := []C.pthread_t{len: n_jobs} |
| 57 | mut args := []GenARM64ChunkArgs{cap: n_jobs} |
| 58 | |
| 59 | // Pre-create all workers on the main thread to avoid concurrent .clone() races. |
| 60 | // Each worker gets its own deep copy of maps/arrays. |
| 61 | mut workers := []voidptr{cap: n_jobs} |
| 62 | |
| 63 | mut chunk_idx := 0 |
| 64 | mut i := 0 |
| 65 | for i < n_funcs { |
| 66 | end := if i + chunk_size < n_funcs { i + chunk_size } else { n_funcs } |
| 67 | w := gen.new_worker_clone() |
| 68 | workers << voidptr(w) |
| 69 | args << GenARM64ChunkArgs{ |
| 70 | worker: voidptr(w) |
| 71 | start_idx: i |
| 72 | end_idx: end |
| 73 | } |
| 74 | i = end |
| 75 | chunk_idx++ |
| 76 | } |
| 77 | |
| 78 | attr_buf := [64]u8{} |
| 79 | attr := unsafe { voidptr(&attr_buf[0]) } |
| 80 | C.pthread_attr_init(attr) |
| 81 | C.pthread_attr_setstacksize(attr, 64 * 1024 * 1024) |
| 82 | |
| 83 | for ci := 0; ci < chunk_idx; ci++ { |
| 84 | C.pthread_create(unsafe { &thread_ids[ci] }, attr, gen_arm64_chunk_thread, |
| 85 | unsafe { voidptr(&args[ci]) }) |
| 86 | } |
| 87 | C.pthread_attr_destroy(attr) |
| 88 | |
| 89 | // Wait for all workers |
| 90 | for ci := 0; ci < chunk_idx; ci++ { |
| 91 | C.pthread_join(thread_ids[ci], unsafe { nil }) |
| 92 | } |
| 93 | |
| 94 | // Merge worker results in order |
| 95 | for ci := 0; ci < chunk_idx; ci++ { |
| 96 | w := unsafe { &arm64.Gen(workers[ci]) } |
| 97 | gen.merge_worker(w) |
| 98 | } |
| 99 | |
| 100 | gen.gen_post_pass() |
| 101 | } |
| 102 | } |
| 103 | |