v2 / vlib / builtin / closure / closure.c.v
458 lines · 428 sloc · 13.94 KB · 3826b46dd7b1a69d70edbb4a7ecf5d2631910877
Raw
1@[has_globals]
2module closure
3
4// Inspired from Chris Wellons's work
5// https://nullprogram.com/blog/2017/01/08/
6
7const assumed_page_size = int(0x4000)
8const ppc64_architecture = int(11)
9
10type ClosureGetDataFn = fn () voidptr
11
12struct ClosurePage {
13mut:
14 next &ClosurePage = unsafe { nil }
15 exec_page_start voidptr
16}
17
18@[heap]
19struct Closure {
20 ClosureMutex
21mut:
22 closure_ptr voidptr
23 closure_get_data ClosureGetDataFn = unsafe { nil }
24 closure_cap int
25 free_closure_ptr voidptr
26 pages &ClosurePage = unsafe { nil }
27 v_page_size int = int(0x4000)
28}
29
30__global g_closure = Closure{}
31
32enum MemoryProtectAtrr {
33 read_exec
34 read_write
35}
36
37// Keep this runtime check bootstrap-compatible. Older compilers can not parse `$if ppc64` yet.
38@[inline]
39fn is_ppc64() bool {
40 $if big_endian {
41 return C.__V_architecture == ppc64_architecture
42 } $else {
43 return false
44 }
45}
46
47// refer to https://godbolt.org/z/r7P3EYv6c for a complete assembly
48//
49// NOTE: Keep the first branch as the longest byte sequence. In translated/bootstrap C mode
50// (`vc/v.c`), V emits a fixed C array whose size is inferred from the first branch.
51// The final `big_endian` branch maps to ppc64 here, since the supported big-endian
52// closure targets handled above are s390x and sparc64.
53// vfmt off
54pub const closure_thunk = $if ppc64le {
55 [
56 u8(0xa6), 0x02, 0x08, 0x7c, // mflr %r0
57 0x05, 0x00, 0x00, 0x48, // bl here
58 0xa6, 0x02, 0xc8, 0x7d, // here: mflr %r14
59 0xf8, 0xbf, 0xce, 0x39, // addi %r14, %r14, -16392
60 0x00, 0x00, 0xce, 0xc9, // lfd %f14, 0(%r14)
61 0x08, 0x00, 0xce, 0xe9, // ld %r14, 8(%r14)
62 0x78, 0x73, 0xcc, 0x7d, // mr %r12, %r14
63 0xa6, 0x03, 0x08, 0x7c, // mtlr %r0
64 0xa6, 0x03, 0xc9, 0x7d, // mtctr %r14
65 0x20, 0x04, 0x80, 0x4e, // bctr
66 ]!
67} $else $if !ppc64le && !amd64 && !i386 && !arm64 && !arm32 && !rv64 && !rv32 && !s390x && !loongarch64 {
68 // ppc (32-bit PowerPC) - expressed as negation of all other arches for bootstrap compat
69 [
70 u8(0x7c), 0x08, 0x02, 0xa6, // mflr %r0
71 0x48, 0x00, 0x00, 0x05, // bl here
72 0x7d, 0x88, 0x02, 0xa6, // here: mflr %r12
73 0x39, 0x8c, 0xbf, 0xf8, // addi %r12, %r12, -16392
74 0xc9, 0xcc, 0x00, 0x00, // lfd %f14, 0(%r12)
75 0x81, 0x8c, 0x00, 0x04, // lwz %r12, 4(%r12)
76 0x7c, 0x08, 0x03, 0xa6, // mtlr %r0
77 0x7d, 0x89, 0x03, 0xa6, // mtctr %r12
78 0x4e, 0x80, 0x04, 0x20, // bctr
79 ]!
80} $else $if amd64 {
81 [
82 u8(0xF3), 0x44, 0x0F, 0x7E, 0x3D, 0xF7, 0xBF, 0xFF, 0xFF, // movq xmm15, QWORD PTR [rip - userdata]
83 0xFF, 0x25, 0xF9, 0xBF, 0xFF, 0xFF // jmp QWORD PTR [rip - fn]
84 ]!
85} $else $if i386 {
86 [
87 u8(0xe8), 0x00, 0x00, 0x00, 0x00, // call here
88 // here:
89 0x59, // pop ecx
90 0x66, 0x0F, 0x6E, 0xF9, // movd xmm7, ecx
91 0xff, 0xA1, 0xff, 0xbf, 0xff, 0xff, // jmp DWORD PTR [ecx - 0x4001] # <fn>
92 ]!
93} $else $if arm64 {
94 [
95 u8(0x11), 0x00, 0xFE, 0x5C, // ldr d17, userdata
96 0x30, 0x00, 0xFE, 0x58, // ldr x16, fn
97 0x00, 0x02, 0x1F, 0xD6 // br x16
98 ]!
99} $else $if arm32 {
100 [
101 u8(0x04), 0xC0, 0x4F, 0xE2, // adr ip, here
102 // here:
103 0x01, 0xC9, 0x4C, 0xE2, // sub ip, ip, #0x4000
104 0x90, 0xCA, 0x07, 0xEE, // vmov s15, ip
105 0x00, 0xC0, 0x9C, 0xE5, // ldr ip, [ip, 0]
106 0x1C, 0xFF, 0x2F, 0xE1 // bx ip
107 ]!
108} $else $if rv64 {
109 [
110 u8(0x97), 0xCF, 0xFF, 0xFF, // auipc t6, 0xffffc
111 0x03, 0xBF, 0x8F, 0x00, // ld t5, 8(t6)
112 0x07, 0xB3, 0x0F, 0x00, // fld ft6, 0(t6)
113 0x67, 0x00, 0x0F, 0x00, // jr t5
114 ]!
115} $else $if rv32 {
116 [
117 u8(0x97), 0xCF, 0xFF, 0xFF, // auipc t6, 0xffffc
118 0x03, 0xAF, 0x4F, 0x00, // lw t5, 4(t6)
119 0x07, 0xAB, 0x0F, 0x00, // flw fs6, 0(t6)
120 0x67, 0x00, 0x0F, 0x00 // jr t5
121 ]!
122} $else $if s390x {
123 [
124 u8(0xC0), 0x10, 0xFF, 0xFF, 0xE0, 0x00, // larl %r1, -16384
125 0x68, 0xF0, 0x10, 0x00, // ld %f15, 0(%r1)
126 0xE3, 0x10, 0x10, 0x08, 0x00, 0x04, // lg %r1, 8(%r1)
127 0x07, 0xF1, // br %r1
128 ]!
129} $else $if loongarch64 {
130 [
131 u8(0x92), 0xFF, 0xFF, 0x1D, // pcaddu12i t6, -4
132 0x48, 0x02, 0x80, 0x2B, // fld.d f8, t6, 0
133 0x51, 0x22, 0xC0, 0x28, // ld.d t5, t6, 8
134 0x20, 0x02, 0x00, 0x4C, // jr t5
135 ]!
136} $else $if sparc64 {
137 [
138 u8(0x83), 0x41, 0x40, 0x00, // rd %pc, %g1
139 0x05, 0x00, 0x00, 0x10, // sethi %hi(0x4000), %g2
140 0x84, 0x10, 0xa0, 0x00, // mov %g2, %g2 ! 4000 <main>
141 0x82, 0x20, 0x40, 0x02, // sub %g1, %g2, %g1
142 0xff, 0x18, 0x60, 0x00, // ldd [ %l1 ], %d62
143 0xc2, 0x58, 0x60, 0x08, // ldx [ %g1 + 8 ], %g1
144 0x81, 0xc0, 0x40, 0x00, // jmp %g1
145 0x01, 0x00, 0x00, 0x00 // nop
146 ]!
147} $else $if big_endian {
148 [
149 u8(0x7C), 0x08, 0x02, 0xA6, // mflr %r0
150 0x48, 0x00, 0x00, 0x05, // bl here
151 0x7D, 0xC8, 0x02, 0xA6, // here: mflr %r14
152 0x39, 0xCE, 0xC0, 0x08, // addi %r14, %r14, -16376
153 0xC9, 0xCE, 0x00, 0x00, // lfd %f14, 0(%r14) // userdata
154 0xE9, 0xCE, 0x00, 0x08, // ld %r14, 8(%r14) // func descriptor ptr
155 0xE9, 0x8E, 0x00, 0x00, // ld %r12, 0(%r14) // code addr from descriptor
156 0xE8, 0x4E, 0x00, 0x08, // ld %r2, 8(%r14) // TOC from descriptor
157 0x7C, 0x08, 0x03, 0xA6, // mtlr %r0
158 0x7D, 0x89, 0x03, 0xA6, // mtctr %r12
159 0x4E, 0x80, 0x04, 0x20, // bctr
160 ]!
161} $else {
162 [u8(0)]!
163}
164
165// NOTE: Keep the first branch as the longest byte sequence. In translated/bootstrap C mode
166// (`vc/v.c`), V emits a fixed C array whose size is inferred from the first branch.
167const closure_get_data_bytes = $if !ppc64le && !amd64 && !i386 && !arm64 && !arm32 && !rv64 && !rv32 && !s390x && !loongarch64 {
168 // ppc (32-bit PowerPC) - expressed as negation of all other arches for bootstrap compat
169 [
170 u8(0x94), 0x21, 0xff, 0xf0, // stwu %r1, -16(%r1)
171 0xd9, 0xc1, 0x00, 0x08, // stfd %f14, 8(%r1)
172 0x80, 0x61, 0x00, 0x08, // lwz %r3, 8(%r1)
173 0x38, 0x21, 0x00, 0x10, // addi %r1, %r1, 16
174 0x4e, 0x80, 0x00, 0x20, // blr
175 ]!
176} $else $if arm32 {
177 [
178 u8(0x90), 0x0A, 0x17, 0xEE, // vmov r0, s15
179 0x04, 0x00, 0x10, 0xE5, // ldr r0, [r0, #-4]
180 0x1E, 0xFF, 0x2F, 0xE1 // bx lr
181 ]!
182} $else $if amd64 {
183 [
184 u8(0x66), 0x4C, 0x0F, 0x7E, 0xF8, // movq rax, xmm15
185 0xC3 // ret
186 ]!
187} $else $if i386 {
188 [
189 u8(0x66), 0x0F, 0x7E, 0xF8, // movd eax, xmm7
190 0x8B, 0x80, 0xFB, 0xBF, 0xFF, 0xFF, // mov eax, DWORD PTR [eax - 0x4005]
191 0xc3 // ret
192 ]!
193} $else $if arm64 {
194 [
195 u8(0x20), 0x02, 0x66, 0x9E, // fmov x0, d17
196 0xC0, 0x03, 0x5F, 0xD6 // ret
197 ]!
198} $else $if rv64 {
199 [
200 u8(0x53), 0x05, 0x03, 0xE2, // fmv.x.d a0, ft6
201 0x67, 0x80, 0x00, 0x00, // ret
202 ]!
203} $else $if rv32 {
204 [
205 u8(0x53), 0x05, 0x0B, 0xE0, // fmv.x.w a0, fs6
206 0x67, 0x80, 0x00, 0x00 // ret
207 ]!
208} $else $if s390x {
209 [
210 u8(0xB3), 0xCD, 0x00, 0x2F, // lgdr %r2, %f15
211 0x07, 0xFE, // br %r14
212 ]!
213} $else $if ppc64le {
214 [
215 u8(0x66), 0x00, 0xc3, 0x7d, // mfvsrd %r3, %f14
216 0x20, 0x00, 0x80, 0x4e, // blr
217 ]!
218} $else $if loongarch64 {
219 [
220 u8(0x04), 0xB9, 0x14, 0x01, // movfr2gr.d a0, f8
221 0x20, 0x00, 0x00, 0x4C, // ret
222 ]!
223} $else $if sparc64 {
224 [
225 u8(0x91), 0xb0, 0x22, 0x1f, // movdtox %f62, %o0
226 0x81, 0xc3, 0xe0, 0x08, // retl
227 0x01, 0x00, 0x00, 0x00 // nop
228 ]!
229} $else $if big_endian {
230 [
231 u8(0x7d), 0xc3, 0x00, 0x66, // mfvsrd %r3, %f14
232 0x4e, 0x80, 0x00, 0x20 // blr
233 ]!
234} $else {
235 [u8(0)]!
236}
237
238// vfmt on
239
240// equal to `max(2*sizeof(void*), sizeof(__closure_thunk))`, rounded up to the next multiple of `sizeof(void*)`
241// NOTE: This is a workaround for `-usecache` bug, as it can't include `fn get_closure_size()` needed by `const closure_size` in `build-module` mode.
242const closure_size_1 = if 2 * u32(sizeof(voidptr)) > u32(closure_thunk.len) {
243 2 * u32(sizeof(voidptr))
244} else {
245 u32(closure_thunk.len) + u32(sizeof(voidptr)) - 1
246}
247const closure_size = int(closure_size_1 & ~(u32(sizeof(voidptr)) - 1))
248
249@[inline]
250fn closure_exec_ptr(closure voidptr) voidptr {
251 if is_ppc64() {
252 return unsafe { &u8(closure) + assumed_page_size }
253 }
254 return closure
255}
256
257@[inline]
258fn closure_return_ptr(exec_ptr voidptr) voidptr {
259 if is_ppc64() {
260 return unsafe { &u8(exec_ptr) - assumed_page_size }
261 }
262 return exec_ptr
263}
264
265@[inline]
266fn closure_slot_meta(exec_ptr voidptr) &voidptr {
267 return unsafe { &voidptr(&u8(exec_ptr) - assumed_page_size) }
268}
269
270fn closure_register_page(exec_page_start voidptr) {
271 unsafe {
272 node := &ClosurePage(malloc(sizeof(ClosurePage)))
273 *node = ClosurePage{
274 next: g_closure.pages
275 exec_page_start: exec_page_start
276 }
277 g_closure.pages = node
278 }
279}
280
281fn closure_is_managed(exec_ptr voidptr) bool {
282 if isnil(exec_ptr) {
283 return false
284 }
285 exec_addr := unsafe { usize(exec_ptr) }
286 mut page := g_closure.pages
287 for page != unsafe { nil } {
288 page_addr := unsafe { usize(page.exec_page_start) }
289 if exec_addr >= page_addr && exec_addr < page_addr + usize(g_closure.v_page_size) {
290 slot_offset := exec_addr - page_addr
291 return slot_offset >= usize(closure_size) && slot_offset % usize(closure_size) == 0
292 }
293 page = page.next
294 }
295 return false
296}
297
298// closure_alloc allocates executable memory pages for closures(INTERNAL COMPILER USE ONLY).
299fn closure_alloc() {
300 p := closure_alloc_platform()
301 if isnil(p) {
302 return
303 }
304 // Setup executable and guard pages
305 x := unsafe { p + g_closure.v_page_size } // End of guard page
306 mut remaining := g_closure.v_page_size / closure_size // Calculate slot count
307 closure_register_page(x)
308 g_closure.closure_ptr = x // Current allocation pointer
309 g_closure.closure_cap = remaining // Remaining slot count
310
311 // Fill page with closure templates
312 for remaining > 0 {
313 unsafe { vmemcpy(x, &closure_thunk[0], closure_thunk.len) } // Copy template
314 remaining--
315 unsafe {
316 x += closure_size // Move to next slot
317 }
318 }
319 closure_memory_protect_platform(g_closure.closure_ptr, g_closure.v_page_size, .read_exec)
320}
321
322// closure_init initializes global closure subsystem(INTERNAL COMPILER USE ONLY).
323fn closure_init() {
324 // Determine system page size
325 mut page_size := get_page_size_platform()
326 g_closure.v_page_size = page_size // Store calculated size
327
328 // Initialize thread-safety lock
329 closure_mtx_lock_init_platform()
330
331 // Initial memory allocation
332 closure_alloc()
333
334 // Install closure handler template
335 unsafe {
336 // Temporarily enable write access to executable memory
337 closure_memory_protect_platform(g_closure.closure_ptr, page_size, .read_write)
338 // Copy closure entry stub code
339 vmemcpy(g_closure.closure_ptr, &closure_get_data_bytes[0], closure_get_data_bytes.len)
340 // Re-normalize execution protection
341 closure_memory_protect_platform(g_closure.closure_ptr, page_size, .read_exec)
342 }
343 // Setup global closure handler pointer
344 if is_ppc64() {
345 mut desc := unsafe { &voidptr(&u8(g_closure.closure_ptr) - assumed_page_size) }
346 unsafe {
347 desc[0] = g_closure.closure_ptr
348 desc[1] = nil
349 }
350 g_closure.closure_get_data = unsafe { ClosureGetDataFn(desc) }
351 } else {
352 g_closure.closure_get_data = g_closure.closure_ptr
353 }
354
355 // Advance allocation pointer past header
356 unsafe {
357 g_closure.closure_ptr = &u8(g_closure.closure_ptr) + closure_size
358 }
359 g_closure.closure_cap-- // Account for header slot
360}
361
362// closure_create creates closure objects at compile-time(INTERNAL COMPILER USE ONLY).
363@[direct_array_access]
364fn closure_create(func voidptr, data voidptr) voidptr {
365 closure_mtx_lock_platform()
366
367 mut curr_closure := g_closure.free_closure_ptr
368 if !isnil(curr_closure) {
369 unsafe {
370 mut p := closure_slot_meta(curr_closure)
371 g_closure.free_closure_ptr = p[0]
372 }
373 } else {
374 // Handle memory exhaustion
375 if g_closure.closure_cap == 0 {
376 closure_alloc() // Allocate new memory page
377 }
378 g_closure.closure_cap-- // Decrement slot counter
379
380 // Claim current closure slot
381 curr_closure = g_closure.closure_ptr
382 unsafe {
383 // Move to next available slot
384 g_closure.closure_ptr = &u8(g_closure.closure_ptr) + closure_size
385 }
386 }
387 unsafe {
388 // Write closure metadata (data + function pointer)
389 mut p := closure_slot_meta(curr_closure)
390 if is_ppc64() {
391 // ELFv1: guard page layout per slot:
392 // [0] desc[0] = thunk code address <- returned as ELFv1 function pointer
393 // [1] desc[1] = nil (TOC unused; thunk loads real TOC from func descriptor)
394 // [2] userdata
395 // [3] func (V function descriptor pointer into .opd)
396 p[0] = curr_closure
397 p[1] = nil
398 p[2] = data
399 p[3] = func
400 } else {
401 p[0] = data // Stored closure context
402 p[1] = func // Target function to execute
403 }
404 }
405 closure_mtx_unlock_platform()
406
407 // Return executable closure object
408 return closure_return_ptr(curr_closure)
409}
410
411// closure_data returns the userdata pointer associated with a closure object.
412@[direct_array_access]
413fn closure_data(closure voidptr) voidptr {
414 unsafe {
415 mut p := closure_slot_meta(closure_exec_ptr(closure))
416 $if ppc64 {
417 return p[2]
418 } $else {
419 return p[0]
420 }
421 }
422}
423
424// closure_try_destroy frees a managed closure slot and its context when the closure is known to be temporary.
425@[direct_array_access]
426fn closure_try_destroy(closure voidptr) {
427 if isnil(closure) {
428 return
429 }
430 exec_ptr := closure_exec_ptr(closure)
431 closure_mtx_lock_platform()
432 if !closure_is_managed(exec_ptr) {
433 closure_mtx_unlock_platform()
434 return
435 }
436 unsafe {
437 mut p := closure_slot_meta(exec_ptr)
438 mut data := nil
439 if is_ppc64() {
440 data = p[2]
441 } else {
442 data = p[0]
443 }
444 if !isnil(data) {
445 free(data)
446 }
447 p[0] = g_closure.free_closure_ptr
448 if is_ppc64() {
449 p[1] = nil
450 p[2] = nil
451 p[3] = nil
452 } else {
453 p[1] = nil
454 }
455 g_closure.free_closure_ptr = exec_ptr
456 }
457 closure_mtx_unlock_platform()
458}
459