v / thirdparty / vgc / vgc_platform.h
385 lines · 352 sloc · 15.16 KB · 9ec03736aea607c3df09fb8992aaab4d11ddc4dc
Raw
1// vgc_platform.h - Platform abstractions for V Garbage Collector
2// Translated from Go's runtime GC (golang/go src/runtime/mgc*.go, malloc.go, mheap.go)
3
4#ifndef VGC_PLATFORM_H
5#define VGC_PLATFORM_H
6
7#include <stdint.h>
8#include <string.h>
9#include <stdio.h>
10
11// ============================================================
12// Thread-local storage
13// ============================================================
14#ifdef _WIN32
15 #define VGC_TLS __declspec(thread)
16#elif defined(__TINYC__)
17 #define VGC_TLS __thread
18#else
19 #define VGC_TLS __thread
20#endif
21
22static VGC_TLS int _vgc_cache_idx = -1;
23
24static inline int vgc_get_cache_idx(void) { return _vgc_cache_idx; }
25static inline void vgc_set_cache_idx(int idx) { _vgc_cache_idx = idx; }
26
27// ============================================================
28// Atomic operations
29// ============================================================
30#if defined(__TINYC__)
31 // TCC: use __sync builtins
32 #define vgc_atomic_load_u32(ptr) (*(volatile uint32_t*)(ptr))
33 #define vgc_atomic_store_u32(ptr, val) do { *(volatile uint32_t*)(ptr) = (val); __sync_synchronize(); } while(0)
34 #define vgc_atomic_load_u64(ptr) (*(volatile uint64_t*)(ptr))
35 #define vgc_atomic_store_u64(ptr, val) do { *(volatile uint64_t*)(ptr) = (val); __sync_synchronize(); } while(0)
36 #define vgc_atomic_add_u64(ptr, val) __sync_add_and_fetch((volatile uint64_t*)(ptr), (uint64_t)(val))
37 #define vgc_atomic_sub_u64(ptr, val) __sync_sub_and_fetch((volatile uint64_t*)(ptr), (uint64_t)(val))
38 #define vgc_atomic_add_u32(ptr, val) __sync_add_and_fetch((volatile uint32_t*)(ptr), (uint32_t)(val))
39 #define vgc_atomic_cas_u32(ptr, expected, desired) __sync_bool_compare_and_swap((volatile uint32_t*)(ptr), *(expected), (desired))
40 #define vgc_atomic_exchange_u32(ptr, val) __sync_lock_test_and_set((volatile uint32_t*)(ptr), (val))
41 #define vgc_atomic_fence() __sync_synchronize()
42#elif defined(_MSC_VER)
43 #include <intrin.h>
44 #pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange, _InterlockedExchangeAdd64)
45 static inline uint32_t vgc_atomic_load_u32_fn(volatile uint32_t* p) { uint32_t v = *p; _ReadBarrier(); return v; }
46 static inline void vgc_atomic_store_u32_fn(volatile uint32_t* p, uint32_t v) { _WriteBarrier(); *p = v; }
47 static inline uint64_t vgc_atomic_load_u64_fn(volatile uint64_t* p) { uint64_t v = *p; _ReadBarrier(); return v; }
48 static inline void vgc_atomic_store_u64_fn(volatile uint64_t* p, uint64_t v) { _WriteBarrier(); *p = v; }
49 #define vgc_atomic_load_u32(ptr) vgc_atomic_load_u32_fn((volatile uint32_t*)(ptr))
50 #define vgc_atomic_store_u32(ptr, val) vgc_atomic_store_u32_fn((volatile uint32_t*)(ptr), (val))
51 #define vgc_atomic_load_u64(ptr) vgc_atomic_load_u64_fn((volatile uint64_t*)(ptr))
52 #define vgc_atomic_store_u64(ptr, val) vgc_atomic_store_u64_fn((volatile uint64_t*)(ptr), (val))
53 #define vgc_atomic_add_u64(ptr, val) _InterlockedExchangeAdd64((volatile int64_t*)(ptr), (int64_t)(val))
54 #define vgc_atomic_sub_u64(ptr, val) _InterlockedExchangeAdd64((volatile int64_t*)(ptr), -(int64_t)(val))
55 #define vgc_atomic_add_u32(ptr, val) _InterlockedExchangeAdd((volatile long*)(ptr), (long)(val))
56 #define vgc_atomic_cas_u32(ptr, expected, desired) (_InterlockedCompareExchange((volatile long*)(ptr), (long)(desired), (long)*(expected)) == (long)*(expected))
57 #define vgc_atomic_exchange_u32(ptr, val) (uint32_t)_InterlockedExchange((volatile long*)(ptr), (long)(val))
58 #define vgc_atomic_fence() _ReadWriteBarrier()
59#else
60 // GCC/Clang
61 #define vgc_atomic_load_u32(ptr) __atomic_load_n((volatile uint32_t*)(ptr), __ATOMIC_ACQUIRE)
62 #define vgc_atomic_store_u32(ptr, val) __atomic_store_n((volatile uint32_t*)(ptr), (val), __ATOMIC_RELEASE)
63 #define vgc_atomic_load_u64(ptr) __atomic_load_n((volatile uint64_t*)(ptr), __ATOMIC_ACQUIRE)
64 #define vgc_atomic_store_u64(ptr, val) __atomic_store_n((volatile uint64_t*)(ptr), (val), __ATOMIC_RELEASE)
65 #define vgc_atomic_add_u64(ptr, val) __atomic_add_fetch((volatile uint64_t*)(ptr), (uint64_t)(val), __ATOMIC_ACQ_REL)
66 #define vgc_atomic_sub_u64(ptr, val) __atomic_sub_fetch((volatile uint64_t*)(ptr), (uint64_t)(val), __ATOMIC_ACQ_REL)
67 #define vgc_atomic_add_u32(ptr, val) __atomic_add_fetch((volatile uint32_t*)(ptr), (uint32_t)(val), __ATOMIC_ACQ_REL)
68 #define vgc_atomic_cas_u32(ptr, expected, desired) __atomic_compare_exchange_n((volatile uint32_t*)(ptr), (uint32_t*)(expected), (uint32_t)(desired), 0, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)
69 #define vgc_atomic_exchange_u32(ptr, val) __atomic_exchange_n((volatile uint32_t*)(ptr), (uint32_t)(val), __ATOMIC_ACQ_REL)
70 #define vgc_atomic_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
71#endif
72
73// ============================================================
74// OS memory allocation
75// ============================================================
76#ifdef _WIN32
77 #ifndef WIN32_LEAN_AND_MEAN
78 #define WIN32_LEAN_AND_MEAN
79 #endif
80 #include <windows.h>
81 static inline void* vgc_os_alloc(size_t size) {
82 return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
83 }
84 static inline void vgc_os_free(void* ptr, size_t size) {
85 (void)size;
86 VirtualFree(ptr, 0, MEM_RELEASE);
87 }
88 static inline void vgc_os_decommit(void* ptr, size_t size) {
89 VirtualFree(ptr, size, MEM_DECOMMIT);
90 }
91 static inline int vgc_num_cpus(void) {
92 DWORD count = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
93 return count > 0 ? (int)count : 1;
94 }
95#else
96 #include <sys/mman.h>
97 #include <unistd.h>
98 static inline void* vgc_os_alloc(size_t size) {
99 void* p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
100 return (p == MAP_FAILED) ? NULL : p;
101 }
102 static inline void vgc_os_free(void* ptr, size_t size) {
103 munmap(ptr, size);
104 }
105 static inline void vgc_os_decommit(void* ptr, size_t size) {
106 madvise(ptr, size, MADV_DONTNEED);
107 }
108 static inline int vgc_num_cpus(void) {
109 long count = sysconf(_SC_NPROCESSORS_ONLN);
110 return count > 0 ? (int)count : 1;
111 }
112#endif
113
114// ============================================================
115// Stack pointer / frame address
116// ============================================================
117#if defined(_MSC_VER)
118 #include <intrin.h>
119 static inline void* vgc_get_sp(void) { return _AddressOfReturnAddress(); }
120#else
121 static inline void* vgc_get_sp(void) { return __builtin_frame_address(0); }
122#endif
123
124// ============================================================
125// Stack bounds
126// ============================================================
127#if defined(__APPLE__)
128 #include <pthread.h>
129 static inline int vgc_get_stack_bounds(uintptr_t* lo, uintptr_t* hi) {
130 void* stack_hi = pthread_get_stackaddr_np(pthread_self());
131 size_t stack_size = pthread_get_stacksize_np(pthread_self());
132 if (stack_hi == NULL || stack_size == 0) return 0;
133 *hi = (uintptr_t)stack_hi;
134 *lo = *hi - stack_size;
135 return 1;
136 }
137#elif defined(__linux__) || defined(__ANDROID__)
138 #include <pthread.h>
139 static inline int vgc_get_stack_bounds(uintptr_t* lo, uintptr_t* hi) {
140 pthread_attr_t attr;
141 if (pthread_getattr_np(pthread_self(), &attr) != 0) return 0;
142 void* stack_lo = NULL;
143 size_t stack_size = 0;
144 int ok = pthread_attr_getstack(&attr, &stack_lo, &stack_size) == 0 && stack_lo != NULL && stack_size != 0;
145 pthread_attr_destroy(&attr);
146 if (!ok) return 0;
147 *lo = (uintptr_t)stack_lo;
148 *hi = *lo + stack_size;
149 return 1;
150 }
151#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) || defined(__OpenBSD__)
152 #include <pthread.h>
153 static inline int vgc_get_stack_bounds(uintptr_t* lo, uintptr_t* hi) {
154 pthread_attr_t attr;
155 if (pthread_attr_init(&attr) != 0) return 0;
156 if (pthread_attr_get_np(pthread_self(), &attr) != 0) {
157 pthread_attr_destroy(&attr);
158 return 0;
159 }
160 void* stack_lo = NULL;
161 size_t stack_size = 0;
162 int ok = pthread_attr_getstack(&attr, &stack_lo, &stack_size) == 0 && stack_lo != NULL && stack_size != 0;
163 pthread_attr_destroy(&attr);
164 if (!ok) return 0;
165 *lo = (uintptr_t)stack_lo;
166 *hi = *lo + stack_size;
167 return 1;
168 }
169#else
170 static inline int vgc_get_stack_bounds(uintptr_t* lo, uintptr_t* hi) {
171 (void)lo;
172 (void)hi;
173 return 0;
174 }
175#endif
176
177// ============================================================
178// Bitmap operations (for mark/alloc bitmaps)
179// ============================================================
180static inline int vgc_bitmap_get(const uint8_t* bits, uint32_t idx) {
181 return (bits[idx >> 3] >> (idx & 7)) & 1;
182}
183
184static inline void vgc_bitmap_set(uint8_t* bits, uint32_t idx) {
185 bits[idx >> 3] |= (uint8_t)(1u << (idx & 7));
186}
187
188static inline void vgc_bitmap_clear(uint8_t* bits, uint32_t idx) {
189 bits[idx >> 3] &= (uint8_t)~(1u << (idx & 7));
190}
191
192// Returns 1 if bit was already set, 0 if newly set
193static inline int vgc_bitmap_test_and_set(uint8_t* bits, uint32_t idx) {
194 uint8_t mask = (uint8_t)(1u << (idx & 7));
195 uint8_t* byte_ptr = &bits[idx >> 3];
196 if (*byte_ptr & mask) return 1;
197 *byte_ptr |= mask;
198 return 0;
199}
200
201// Popcount on a byte - count number of set bits
202static inline int vgc_popcount8(uint8_t x) {
203 x = x - ((x >> 1) & 0x55);
204 x = (x & 0x33) + ((x >> 2) & 0x33);
205 return (x + (x >> 4)) & 0x0f;
206}
207
208// ============================================================
209// Size class tables (translated from Go's runtime/sizeclasses.go)
210// 68 classes (0 = unused, 1-67 = actual size classes)
211// ============================================================
212
213// Object sizes per class
214static const uint32_t vgc_class_sizes[68] = {
215 0, 8, 16, 24, 32, 48, 64, 80,
216 96, 112, 128, 144, 160, 176, 192, 208,
217 224, 240, 256, 288, 320, 352, 384, 416,
218 448, 480, 512, 576, 640, 704, 768, 896,
219 1024, 1152, 1280, 1408, 1536, 1792, 2048, 2304,
220 2688, 3072, 3200, 3456, 4096, 4864, 5376, 6144,
221 6528, 6784, 6912, 8192, 9472, 9728, 10240, 10880,
222 12288, 13568, 14336, 16384, 18432, 19072, 20480, 21760,
223 24576, 27264, 28672, 32768
224};
225
226// Pages per span for each class
227static const uint32_t vgc_class_npages[68] = {
228 0, 1, 1, 1, 1, 1, 1, 1,
229 1, 1, 1, 1, 1, 1, 1, 1,
230 1, 1, 1, 1, 1, 1, 1, 1,
231 1, 1, 1, 1, 1, 1, 1, 1,
232 1, 1, 1, 2, 1, 2, 1, 2,
233 1, 3, 2, 3, 1, 3, 2, 3,
234 4, 5, 6, 1, 7, 6, 5, 4,
235 3, 5, 7, 2, 9, 7, 5, 8,
236 3, 10, 7, 4
237};
238
239// Objects per span for each class
240static const uint32_t vgc_class_nobjs[68] = {
241 0, 1024, 512, 341, 256, 170, 128, 102,
242 85, 73, 64, 56, 51, 46, 42, 39,
243 36, 34, 32, 28, 25, 23, 21, 19,
244 18, 17, 16, 14, 12, 11, 10, 9,
245 8, 7, 6, 11, 5, 9, 4, 7,
246 3, 8, 5, 7, 2, 5, 3, 4,
247 5, 6, 7, 1, 6, 5, 4, 3,
248 2, 3, 4, 1, 4, 3, 2, 3,
249 1, 3, 2, 1
250};
251
252// Size-to-class lookup tables (computed at init)
253static uint8_t vgc_s2c8[129]; // sizes 1..1024: index = (size+7)/8
254static uint8_t vgc_s2c128[249]; // sizes 1025..32768: index = (size-1024+127)/128
255
256static inline uint32_t vgc_get_class_size(int cls) { return vgc_class_sizes[cls]; }
257static inline uint32_t vgc_get_class_npages(int cls) { return vgc_class_npages[cls]; }
258static inline uint32_t vgc_get_class_nobjs(int cls) { return vgc_class_nobjs[cls]; }
259
260static void vgc_init_size_tables(void) {
261 int cls = 1;
262 for (int i = 0; i < 129; i++) {
263 uint32_t size = (uint32_t)(i + 1) * 8;
264 while (cls < 67 && vgc_class_sizes[cls] < size) {
265 cls++;
266 }
267 vgc_s2c8[i] = (uint8_t)cls;
268 }
269 cls = 33;
270 for (int i = 0; i < 249; i++) {
271 uint32_t size = 1024 + (uint32_t)(i + 1) * 128;
272 while (cls < 67 && vgc_class_sizes[cls] < size) {
273 cls++;
274 }
275 vgc_s2c128[i] = (uint8_t)cls;
276 }
277}
278
279// Get size class for a given allocation size
280static inline uint8_t vgc_size_class(uint32_t size) {
281 if (size == 0) return 0;
282 if (size <= 1024) {
283 return vgc_s2c8[(size + 7) >> 3];
284 }
285 if (size <= 32768) {
286 return vgc_s2c128[(size - 1024 + 127) >> 7];
287 }
288 return 0; // large allocation
289}
290
291// ============================================================
292// Pause / yield for spinlocks
293// ============================================================
294#if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)
295 #define vgc_cpu_pause() __asm__ __volatile__("pause")
296#elif defined(__aarch64__) || defined(_M_ARM64)
297 #define vgc_cpu_pause() __asm__ __volatile__("yield")
298#else
299 #define vgc_cpu_pause() ((void)0)
300#endif
301
302#ifdef _WIN32
303 #define vgc_thread_yield() SwitchToThread()
304#else
305 #include <sched.h>
306 #define vgc_thread_yield() sched_yield()
307#endif
308
309// ============================================================
310// Simple mutex (adaptive spinlock with backoff)
311// ============================================================
312static inline void vgc_mutex_lock(volatile uint32_t* lock) {
313 for (int spin = 0; ; spin++) {
314 uint32_t expected = 0;
315 if (vgc_atomic_cas_u32(lock, &expected, 1)) {
316 return;
317 }
318 if (spin < 16) {
319 vgc_cpu_pause();
320 } else if (spin < 32) {
321 for (int i = 0; i < spin; i++) vgc_cpu_pause();
322 } else {
323 vgc_thread_yield();
324 }
325 }
326}
327
328static inline void vgc_mutex_unlock(volatile uint32_t* lock) {
329 vgc_atomic_store_u32(lock, 0);
330}
331
332// ============================================================
333// Arena address index for O(1) pointer-to-arena lookup
334// Divides address space into 1GB chunks, maps to arena index.
335// ============================================================
336#define VGC_ADDR_SHIFT 30 // 1GB chunks
337#define VGC_ADDR_MAP_SIZE 4096
338
339// Maps (address >> 30) -> arena_index+1 (0 = not mapped)
340static uint16_t vgc_addr_map[VGC_ADDR_MAP_SIZE];
341
342static inline void vgc_addr_map_register(uintptr_t base, size_t size, int arena_idx) {
343 uintptr_t lo = base >> VGC_ADDR_SHIFT;
344 uintptr_t hi = (base + size - 1) >> VGC_ADDR_SHIFT;
345 for (uintptr_t i = lo; i <= hi && i < VGC_ADDR_MAP_SIZE; i++) {
346 vgc_addr_map[i] = (uint16_t)(arena_idx + 1);
347 }
348}
349
350// Returns arena index or -1
351static inline int vgc_addr_to_arena(uintptr_t addr) {
352 uintptr_t idx = addr >> VGC_ADDR_SHIFT;
353 if (idx >= VGC_ADDR_MAP_SIZE) return -1;
354 int v = vgc_addr_map[idx];
355 return v ? v - 1 : -1;
356}
357
358// ============================================================
359// Thread creation (for GC workers - avoids V's spawn which
360// causes import cycles with builtin -> sync.threads -> builtin)
361// ============================================================
362typedef void (*vgc_thread_fn)(void);
363
364#ifdef _WIN32
365static inline void vgc_start_thread(vgc_thread_fn fn) {
366 CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)fn, NULL, 0, NULL);
367}
368#else
369#include <pthread.h>
370static void* _vgc_thread_trampoline(void* arg) {
371 vgc_thread_fn fn = (vgc_thread_fn)arg;
372 fn();
373 return NULL;
374}
375static inline void vgc_start_thread(vgc_thread_fn fn) {
376 pthread_t tid;
377 pthread_attr_t attr;
378 pthread_attr_init(&attr);
379 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
380 pthread_create(&tid, &attr, _vgc_thread_trampoline, (void*)fn);
381 pthread_attr_destroy(&attr);
382}
383#endif
384
385#endif // VGC_PLATFORM_H
386