| 1 | @[has_globals] |
| 2 | module closure |
| 3 | |
| 4 | // Inspired from Chris Wellons's work |
| 5 | // https://nullprogram.com/blog/2017/01/08/ |
| 6 | |
| 7 | const assumed_page_size = int(0x4000) |
| 8 | const ppc64_architecture = int(11) |
| 9 | |
| 10 | type ClosureGetDataFn = fn () voidptr |
| 11 | |
| 12 | struct ClosurePage { |
| 13 | mut: |
| 14 | next &ClosurePage = unsafe { nil } |
| 15 | exec_page_start voidptr |
| 16 | } |
| 17 | |
| 18 | @[heap] |
| 19 | struct Closure { |
| 20 | ClosureMutex |
| 21 | mut: |
| 22 | closure_ptr voidptr |
| 23 | closure_get_data ClosureGetDataFn = unsafe { nil } |
| 24 | closure_cap int |
| 25 | free_closure_ptr voidptr |
| 26 | pages &ClosurePage = unsafe { nil } |
| 27 | v_page_size int = int(0x4000) |
| 28 | } |
| 29 | |
| 30 | __global g_closure = Closure{} |
| 31 | |
| 32 | enum MemoryProtectAtrr { |
| 33 | read_exec |
| 34 | read_write |
| 35 | } |
| 36 | |
| 37 | // Keep this runtime check bootstrap-compatible. Older compilers can not parse `$if ppc64` yet. |
| 38 | @[inline] |
| 39 | fn is_ppc64() bool { |
| 40 | $if big_endian { |
| 41 | return C.__V_architecture == ppc64_architecture |
| 42 | } $else { |
| 43 | return false |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | // refer to https://godbolt.org/z/r7P3EYv6c for a complete assembly |
| 48 | // |
| 49 | // NOTE: Keep the first branch as the longest byte sequence. In translated/bootstrap C mode |
| 50 | // (`vc/v.c`), V emits a fixed C array whose size is inferred from the first branch. |
| 51 | // The final `big_endian` branch maps to ppc64 here, since the supported big-endian |
| 52 | // closure targets handled above are s390x and sparc64. |
| 53 | // vfmt off |
| 54 | pub const closure_thunk = $if ppc64le { |
| 55 | [ |
| 56 | u8(0xa6), 0x02, 0x08, 0x7c, // mflr %r0 |
| 57 | 0x05, 0x00, 0x00, 0x48, // bl here |
| 58 | 0xa6, 0x02, 0xc8, 0x7d, // here: mflr %r14 |
| 59 | 0xf8, 0xbf, 0xce, 0x39, // addi %r14, %r14, -16392 |
| 60 | 0x00, 0x00, 0xce, 0xc9, // lfd %f14, 0(%r14) |
| 61 | 0x08, 0x00, 0xce, 0xe9, // ld %r14, 8(%r14) |
| 62 | 0x78, 0x73, 0xcc, 0x7d, // mr %r12, %r14 |
| 63 | 0xa6, 0x03, 0x08, 0x7c, // mtlr %r0 |
| 64 | 0xa6, 0x03, 0xc9, 0x7d, // mtctr %r14 |
| 65 | 0x20, 0x04, 0x80, 0x4e, // bctr |
| 66 | ]! |
| 67 | } $else $if !ppc64le && !amd64 && !i386 && !arm64 && !arm32 && !rv64 && !rv32 && !s390x && !loongarch64 { |
| 68 | // ppc (32-bit PowerPC) - expressed as negation of all other arches for bootstrap compat |
| 69 | [ |
| 70 | u8(0x7c), 0x08, 0x02, 0xa6, // mflr %r0 |
| 71 | 0x48, 0x00, 0x00, 0x05, // bl here |
| 72 | 0x7d, 0x88, 0x02, 0xa6, // here: mflr %r12 |
| 73 | 0x39, 0x8c, 0xbf, 0xf8, // addi %r12, %r12, -16392 |
| 74 | 0xc9, 0xcc, 0x00, 0x00, // lfd %f14, 0(%r12) |
| 75 | 0x81, 0x8c, 0x00, 0x04, // lwz %r12, 4(%r12) |
| 76 | 0x7c, 0x08, 0x03, 0xa6, // mtlr %r0 |
| 77 | 0x7d, 0x89, 0x03, 0xa6, // mtctr %r12 |
| 78 | 0x4e, 0x80, 0x04, 0x20, // bctr |
| 79 | ]! |
| 80 | } $else $if amd64 { |
| 81 | [ |
| 82 | u8(0xF3), 0x44, 0x0F, 0x7E, 0x3D, 0xF7, 0xBF, 0xFF, 0xFF, // movq xmm15, QWORD PTR [rip - userdata] |
| 83 | 0xFF, 0x25, 0xF9, 0xBF, 0xFF, 0xFF // jmp QWORD PTR [rip - fn] |
| 84 | ]! |
| 85 | } $else $if i386 { |
| 86 | [ |
| 87 | u8(0xe8), 0x00, 0x00, 0x00, 0x00, // call here |
| 88 | // here: |
| 89 | 0x59, // pop ecx |
| 90 | 0x66, 0x0F, 0x6E, 0xF9, // movd xmm7, ecx |
| 91 | 0xff, 0xA1, 0xff, 0xbf, 0xff, 0xff, // jmp DWORD PTR [ecx - 0x4001] # <fn> |
| 92 | ]! |
| 93 | } $else $if arm64 { |
| 94 | [ |
| 95 | u8(0x11), 0x00, 0xFE, 0x5C, // ldr d17, userdata |
| 96 | 0x30, 0x00, 0xFE, 0x58, // ldr x16, fn |
| 97 | 0x00, 0x02, 0x1F, 0xD6 // br x16 |
| 98 | ]! |
| 99 | } $else $if arm32 { |
| 100 | [ |
| 101 | u8(0x04), 0xC0, 0x4F, 0xE2, // adr ip, here |
| 102 | // here: |
| 103 | 0x01, 0xC9, 0x4C, 0xE2, // sub ip, ip, #0x4000 |
| 104 | 0x90, 0xCA, 0x07, 0xEE, // vmov s15, ip |
| 105 | 0x00, 0xC0, 0x9C, 0xE5, // ldr ip, [ip, 0] |
| 106 | 0x1C, 0xFF, 0x2F, 0xE1 // bx ip |
| 107 | ]! |
| 108 | } $else $if rv64 { |
| 109 | [ |
| 110 | u8(0x97), 0xCF, 0xFF, 0xFF, // auipc t6, 0xffffc |
| 111 | 0x03, 0xBF, 0x8F, 0x00, // ld t5, 8(t6) |
| 112 | 0x07, 0xB3, 0x0F, 0x00, // fld ft6, 0(t6) |
| 113 | 0x67, 0x00, 0x0F, 0x00, // jr t5 |
| 114 | ]! |
| 115 | } $else $if rv32 { |
| 116 | [ |
| 117 | u8(0x97), 0xCF, 0xFF, 0xFF, // auipc t6, 0xffffc |
| 118 | 0x03, 0xAF, 0x4F, 0x00, // lw t5, 4(t6) |
| 119 | 0x07, 0xAB, 0x0F, 0x00, // flw fs6, 0(t6) |
| 120 | 0x67, 0x00, 0x0F, 0x00 // jr t5 |
| 121 | ]! |
| 122 | } $else $if s390x { |
| 123 | [ |
| 124 | u8(0xC0), 0x10, 0xFF, 0xFF, 0xE0, 0x00, // larl %r1, -16384 |
| 125 | 0x68, 0xF0, 0x10, 0x00, // ld %f15, 0(%r1) |
| 126 | 0xE3, 0x10, 0x10, 0x08, 0x00, 0x04, // lg %r1, 8(%r1) |
| 127 | 0x07, 0xF1, // br %r1 |
| 128 | ]! |
| 129 | } $else $if loongarch64 { |
| 130 | [ |
| 131 | u8(0x92), 0xFF, 0xFF, 0x1D, // pcaddu12i t6, -4 |
| 132 | 0x48, 0x02, 0x80, 0x2B, // fld.d f8, t6, 0 |
| 133 | 0x51, 0x22, 0xC0, 0x28, // ld.d t5, t6, 8 |
| 134 | 0x20, 0x02, 0x00, 0x4C, // jr t5 |
| 135 | ]! |
| 136 | } $else $if sparc64 { |
| 137 | [ |
| 138 | u8(0x83), 0x41, 0x40, 0x00, // rd %pc, %g1 |
| 139 | 0x05, 0x00, 0x00, 0x10, // sethi %hi(0x4000), %g2 |
| 140 | 0x84, 0x10, 0xa0, 0x00, // mov %g2, %g2 ! 4000 <main> |
| 141 | 0x82, 0x20, 0x40, 0x02, // sub %g1, %g2, %g1 |
| 142 | 0xff, 0x18, 0x60, 0x00, // ldd [ %l1 ], %d62 |
| 143 | 0xc2, 0x58, 0x60, 0x08, // ldx [ %g1 + 8 ], %g1 |
| 144 | 0x81, 0xc0, 0x40, 0x00, // jmp %g1 |
| 145 | 0x01, 0x00, 0x00, 0x00 // nop |
| 146 | ]! |
| 147 | } $else $if big_endian { |
| 148 | [ |
| 149 | u8(0x7C), 0x08, 0x02, 0xA6, // mflr %r0 |
| 150 | 0x48, 0x00, 0x00, 0x05, // bl here |
| 151 | 0x7D, 0xC8, 0x02, 0xA6, // here: mflr %r14 |
| 152 | 0x39, 0xCE, 0xC0, 0x08, // addi %r14, %r14, -16376 |
| 153 | 0xC9, 0xCE, 0x00, 0x00, // lfd %f14, 0(%r14) // userdata |
| 154 | 0xE9, 0xCE, 0x00, 0x08, // ld %r14, 8(%r14) // func descriptor ptr |
| 155 | 0xE9, 0x8E, 0x00, 0x00, // ld %r12, 0(%r14) // code addr from descriptor |
| 156 | 0xE8, 0x4E, 0x00, 0x08, // ld %r2, 8(%r14) // TOC from descriptor |
| 157 | 0x7C, 0x08, 0x03, 0xA6, // mtlr %r0 |
| 158 | 0x7D, 0x89, 0x03, 0xA6, // mtctr %r12 |
| 159 | 0x4E, 0x80, 0x04, 0x20, // bctr |
| 160 | ]! |
| 161 | } $else { |
| 162 | [u8(0)]! |
| 163 | } |
| 164 | |
| 165 | // NOTE: Keep the first branch as the longest byte sequence. In translated/bootstrap C mode |
| 166 | // (`vc/v.c`), V emits a fixed C array whose size is inferred from the first branch. |
| 167 | const closure_get_data_bytes = $if !ppc64le && !amd64 && !i386 && !arm64 && !arm32 && !rv64 && !rv32 && !s390x && !loongarch64 { |
| 168 | // ppc (32-bit PowerPC) - expressed as negation of all other arches for bootstrap compat |
| 169 | [ |
| 170 | u8(0x94), 0x21, 0xff, 0xf0, // stwu %r1, -16(%r1) |
| 171 | 0xd9, 0xc1, 0x00, 0x08, // stfd %f14, 8(%r1) |
| 172 | 0x80, 0x61, 0x00, 0x08, // lwz %r3, 8(%r1) |
| 173 | 0x38, 0x21, 0x00, 0x10, // addi %r1, %r1, 16 |
| 174 | 0x4e, 0x80, 0x00, 0x20, // blr |
| 175 | ]! |
| 176 | } $else $if arm32 { |
| 177 | [ |
| 178 | u8(0x90), 0x0A, 0x17, 0xEE, // vmov r0, s15 |
| 179 | 0x04, 0x00, 0x10, 0xE5, // ldr r0, [r0, #-4] |
| 180 | 0x1E, 0xFF, 0x2F, 0xE1 // bx lr |
| 181 | ]! |
| 182 | } $else $if amd64 { |
| 183 | [ |
| 184 | u8(0x66), 0x4C, 0x0F, 0x7E, 0xF8, // movq rax, xmm15 |
| 185 | 0xC3 // ret |
| 186 | ]! |
| 187 | } $else $if i386 { |
| 188 | [ |
| 189 | u8(0x66), 0x0F, 0x7E, 0xF8, // movd eax, xmm7 |
| 190 | 0x8B, 0x80, 0xFB, 0xBF, 0xFF, 0xFF, // mov eax, DWORD PTR [eax - 0x4005] |
| 191 | 0xc3 // ret |
| 192 | ]! |
| 193 | } $else $if arm64 { |
| 194 | [ |
| 195 | u8(0x20), 0x02, 0x66, 0x9E, // fmov x0, d17 |
| 196 | 0xC0, 0x03, 0x5F, 0xD6 // ret |
| 197 | ]! |
| 198 | } $else $if rv64 { |
| 199 | [ |
| 200 | u8(0x53), 0x05, 0x03, 0xE2, // fmv.x.d a0, ft6 |
| 201 | 0x67, 0x80, 0x00, 0x00, // ret |
| 202 | ]! |
| 203 | } $else $if rv32 { |
| 204 | [ |
| 205 | u8(0x53), 0x05, 0x0B, 0xE0, // fmv.x.w a0, fs6 |
| 206 | 0x67, 0x80, 0x00, 0x00 // ret |
| 207 | ]! |
| 208 | } $else $if s390x { |
| 209 | [ |
| 210 | u8(0xB3), 0xCD, 0x00, 0x2F, // lgdr %r2, %f15 |
| 211 | 0x07, 0xFE, // br %r14 |
| 212 | ]! |
| 213 | } $else $if ppc64le { |
| 214 | [ |
| 215 | u8(0x66), 0x00, 0xc3, 0x7d, // mfvsrd %r3, %f14 |
| 216 | 0x20, 0x00, 0x80, 0x4e, // blr |
| 217 | ]! |
| 218 | } $else $if loongarch64 { |
| 219 | [ |
| 220 | u8(0x04), 0xB9, 0x14, 0x01, // movfr2gr.d a0, f8 |
| 221 | 0x20, 0x00, 0x00, 0x4C, // ret |
| 222 | ]! |
| 223 | } $else $if sparc64 { |
| 224 | [ |
| 225 | u8(0x91), 0xb0, 0x22, 0x1f, // movdtox %f62, %o0 |
| 226 | 0x81, 0xc3, 0xe0, 0x08, // retl |
| 227 | 0x01, 0x00, 0x00, 0x00 // nop |
| 228 | ]! |
| 229 | } $else $if big_endian { |
| 230 | [ |
| 231 | u8(0x7d), 0xc3, 0x00, 0x66, // mfvsrd %r3, %f14 |
| 232 | 0x4e, 0x80, 0x00, 0x20 // blr |
| 233 | ]! |
| 234 | } $else { |
| 235 | [u8(0)]! |
| 236 | } |
| 237 | |
| 238 | // vfmt on |
| 239 | |
| 240 | // equal to `max(2*sizeof(void*), sizeof(__closure_thunk))`, rounded up to the next multiple of `sizeof(void*)` |
| 241 | // NOTE: This is a workaround for `-usecache` bug, as it can't include `fn get_closure_size()` needed by `const closure_size` in `build-module` mode. |
| 242 | const closure_size_1 = if 2 * u32(sizeof(voidptr)) > u32(closure_thunk.len) { |
| 243 | 2 * u32(sizeof(voidptr)) |
| 244 | } else { |
| 245 | u32(closure_thunk.len) + u32(sizeof(voidptr)) - 1 |
| 246 | } |
| 247 | const closure_size = int(closure_size_1 & ~(u32(sizeof(voidptr)) - 1)) |
| 248 | |
| 249 | @[inline] |
| 250 | fn closure_exec_ptr(closure voidptr) voidptr { |
| 251 | if is_ppc64() { |
| 252 | return unsafe { &u8(closure) + assumed_page_size } |
| 253 | } |
| 254 | return closure |
| 255 | } |
| 256 | |
| 257 | @[inline] |
| 258 | fn closure_return_ptr(exec_ptr voidptr) voidptr { |
| 259 | if is_ppc64() { |
| 260 | return unsafe { &u8(exec_ptr) - assumed_page_size } |
| 261 | } |
| 262 | return exec_ptr |
| 263 | } |
| 264 | |
| 265 | @[inline] |
| 266 | fn closure_slot_meta(exec_ptr voidptr) &voidptr { |
| 267 | return unsafe { &voidptr(&u8(exec_ptr) - assumed_page_size) } |
| 268 | } |
| 269 | |
| 270 | fn closure_register_page(exec_page_start voidptr) { |
| 271 | unsafe { |
| 272 | node := &ClosurePage(malloc(sizeof(ClosurePage))) |
| 273 | *node = ClosurePage{ |
| 274 | next: g_closure.pages |
| 275 | exec_page_start: exec_page_start |
| 276 | } |
| 277 | g_closure.pages = node |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | fn closure_is_managed(exec_ptr voidptr) bool { |
| 282 | if isnil(exec_ptr) { |
| 283 | return false |
| 284 | } |
| 285 | exec_addr := unsafe { usize(exec_ptr) } |
| 286 | mut page := g_closure.pages |
| 287 | for page != unsafe { nil } { |
| 288 | page_addr := unsafe { usize(page.exec_page_start) } |
| 289 | if exec_addr >= page_addr && exec_addr < page_addr + usize(g_closure.v_page_size) { |
| 290 | slot_offset := exec_addr - page_addr |
| 291 | return slot_offset >= usize(closure_size) && slot_offset % usize(closure_size) == 0 |
| 292 | } |
| 293 | page = page.next |
| 294 | } |
| 295 | return false |
| 296 | } |
| 297 | |
| 298 | // closure_alloc allocates executable memory pages for closures(INTERNAL COMPILER USE ONLY). |
| 299 | fn closure_alloc() { |
| 300 | p := closure_alloc_platform() |
| 301 | if isnil(p) { |
| 302 | return |
| 303 | } |
| 304 | // Setup executable and guard pages |
| 305 | x := unsafe { p + g_closure.v_page_size } // End of guard page |
| 306 | mut remaining := g_closure.v_page_size / closure_size // Calculate slot count |
| 307 | closure_register_page(x) |
| 308 | g_closure.closure_ptr = x // Current allocation pointer |
| 309 | g_closure.closure_cap = remaining // Remaining slot count |
| 310 | |
| 311 | // Fill page with closure templates |
| 312 | for remaining > 0 { |
| 313 | unsafe { vmemcpy(x, &closure_thunk[0], closure_thunk.len) } // Copy template |
| 314 | remaining-- |
| 315 | unsafe { |
| 316 | x += closure_size // Move to next slot |
| 317 | } |
| 318 | } |
| 319 | closure_memory_protect_platform(g_closure.closure_ptr, g_closure.v_page_size, .read_exec) |
| 320 | } |
| 321 | |
| 322 | // closure_init initializes global closure subsystem(INTERNAL COMPILER USE ONLY). |
| 323 | fn closure_init() { |
| 324 | // Determine system page size |
| 325 | mut page_size := get_page_size_platform() |
| 326 | g_closure.v_page_size = page_size // Store calculated size |
| 327 | |
| 328 | // Initialize thread-safety lock |
| 329 | closure_mtx_lock_init_platform() |
| 330 | |
| 331 | // Initial memory allocation |
| 332 | closure_alloc() |
| 333 | |
| 334 | // Install closure handler template |
| 335 | unsafe { |
| 336 | // Temporarily enable write access to executable memory |
| 337 | closure_memory_protect_platform(g_closure.closure_ptr, page_size, .read_write) |
| 338 | // Copy closure entry stub code |
| 339 | vmemcpy(g_closure.closure_ptr, &closure_get_data_bytes[0], closure_get_data_bytes.len) |
| 340 | // Re-normalize execution protection |
| 341 | closure_memory_protect_platform(g_closure.closure_ptr, page_size, .read_exec) |
| 342 | } |
| 343 | // Setup global closure handler pointer |
| 344 | if is_ppc64() { |
| 345 | mut desc := unsafe { &voidptr(&u8(g_closure.closure_ptr) - assumed_page_size) } |
| 346 | unsafe { |
| 347 | desc[0] = g_closure.closure_ptr |
| 348 | desc[1] = nil |
| 349 | } |
| 350 | g_closure.closure_get_data = unsafe { ClosureGetDataFn(desc) } |
| 351 | } else { |
| 352 | g_closure.closure_get_data = g_closure.closure_ptr |
| 353 | } |
| 354 | |
| 355 | // Advance allocation pointer past header |
| 356 | unsafe { |
| 357 | g_closure.closure_ptr = &u8(g_closure.closure_ptr) + closure_size |
| 358 | } |
| 359 | g_closure.closure_cap-- // Account for header slot |
| 360 | } |
| 361 | |
| 362 | // closure_create creates closure objects at compile-time(INTERNAL COMPILER USE ONLY). |
| 363 | @[direct_array_access] |
| 364 | fn closure_create(func voidptr, data voidptr) voidptr { |
| 365 | closure_mtx_lock_platform() |
| 366 | |
| 367 | mut curr_closure := g_closure.free_closure_ptr |
| 368 | if !isnil(curr_closure) { |
| 369 | unsafe { |
| 370 | mut p := closure_slot_meta(curr_closure) |
| 371 | g_closure.free_closure_ptr = p[0] |
| 372 | } |
| 373 | } else { |
| 374 | // Handle memory exhaustion |
| 375 | if g_closure.closure_cap == 0 { |
| 376 | closure_alloc() // Allocate new memory page |
| 377 | } |
| 378 | g_closure.closure_cap-- // Decrement slot counter |
| 379 | |
| 380 | // Claim current closure slot |
| 381 | curr_closure = g_closure.closure_ptr |
| 382 | unsafe { |
| 383 | // Move to next available slot |
| 384 | g_closure.closure_ptr = &u8(g_closure.closure_ptr) + closure_size |
| 385 | } |
| 386 | } |
| 387 | unsafe { |
| 388 | // Write closure metadata (data + function pointer) |
| 389 | mut p := closure_slot_meta(curr_closure) |
| 390 | if is_ppc64() { |
| 391 | // ELFv1: guard page layout per slot: |
| 392 | // [0] desc[0] = thunk code address <- returned as ELFv1 function pointer |
| 393 | // [1] desc[1] = nil (TOC unused; thunk loads real TOC from func descriptor) |
| 394 | // [2] userdata |
| 395 | // [3] func (V function descriptor pointer into .opd) |
| 396 | p[0] = curr_closure |
| 397 | p[1] = nil |
| 398 | p[2] = data |
| 399 | p[3] = func |
| 400 | } else { |
| 401 | p[0] = data // Stored closure context |
| 402 | p[1] = func // Target function to execute |
| 403 | } |
| 404 | } |
| 405 | closure_mtx_unlock_platform() |
| 406 | |
| 407 | // Return executable closure object |
| 408 | return closure_return_ptr(curr_closure) |
| 409 | } |
| 410 | |
| 411 | // closure_data returns the userdata pointer associated with a closure object. |
| 412 | @[direct_array_access] |
| 413 | fn closure_data(closure voidptr) voidptr { |
| 414 | unsafe { |
| 415 | mut p := closure_slot_meta(closure_exec_ptr(closure)) |
| 416 | $if ppc64 { |
| 417 | return p[2] |
| 418 | } $else { |
| 419 | return p[0] |
| 420 | } |
| 421 | } |
| 422 | } |
| 423 | |
| 424 | // closure_try_destroy frees a managed closure slot and its context when the closure is known to be temporary. |
| 425 | @[direct_array_access] |
| 426 | fn closure_try_destroy(closure voidptr) { |
| 427 | if isnil(closure) { |
| 428 | return |
| 429 | } |
| 430 | exec_ptr := closure_exec_ptr(closure) |
| 431 | closure_mtx_lock_platform() |
| 432 | if !closure_is_managed(exec_ptr) { |
| 433 | closure_mtx_unlock_platform() |
| 434 | return |
| 435 | } |
| 436 | unsafe { |
| 437 | mut p := closure_slot_meta(exec_ptr) |
| 438 | mut data := nil |
| 439 | if is_ppc64() { |
| 440 | data = p[2] |
| 441 | } else { |
| 442 | data = p[0] |
| 443 | } |
| 444 | if !isnil(data) { |
| 445 | free(data) |
| 446 | } |
| 447 | p[0] = g_closure.free_closure_ptr |
| 448 | if is_ppc64() { |
| 449 | p[1] = nil |
| 450 | p[2] = nil |
| 451 | p[3] = nil |
| 452 | } else { |
| 453 | p[1] = nil |
| 454 | } |
| 455 | g_closure.free_closure_ptr = exec_ptr |
| 456 | } |
| 457 | closure_mtx_unlock_platform() |
| 458 | } |
| 459 | |