v2 / vlib / builtin / vgc_gc_d_vgc.c.v
560 lines · 499 sloc · 15.45 KB · 9ec03736aea607c3df09fb8992aaab4d11ddc4dc
Raw
1// vgc_gc_d_vgc.c.v - V Garbage Collector: Mark, sweep, and orchestration
2// Translated from Go's runtime GC (mgc.go, mgcmark.go, mgcsweep.go, mgcwork.go, mgcpacer.go)
3// Concurrent tri-color mark-and-sweep with parallel marking.
4
5@[has_globals]
6module builtin
7
8// ============================================================
9// GC Orchestration (translated from Go's runtime.gcStart, gcMarkDone, gcMarkTermination)
10// ============================================================
11
12// vgc_gc_start triggers a garbage collection cycle.
13// Translated from Go's gcStart() in mgc.go.
14// Flow: sweep termination (STW) -> mark (parallel) -> mark termination (STW) -> sweep (concurrent)
15fn vgc_gc_start() {
16 // Only one GC at a time
17 mut expected := vgc_phase_off
18 if !C.vgc_atomic_cas_u32(&vgc_heap.gc_phase, &expected, vgc_phase_mark) {
19 return
20 }
21
22 // === Phase 1: Sweep Termination (STW) ===
23 // Ensure any previous sweep is complete
24 vgc_sweep_finish()
25
26 // Stop the world for root scanning
27 // Set stop flag - threads will stop at next safepoint (allocation)
28 vgc_heap.gc_target_stops = u32(vgc_heap.ncaches - 1) // all threads except GC thread
29 C.vgc_atomic_store_u32(&vgc_heap.gc_stopped_count, 0)
30 C.vgc_atomic_store_u32(&vgc_heap.gc_stop_flag, 1)
31
32 // Wait for threads to stop (with timeout to avoid deadlock)
33 mut wait_iters := 0
34 for C.vgc_atomic_load_u32(&vgc_heap.gc_stopped_count) < vgc_heap.gc_target_stops {
35 C.vgc_atomic_fence()
36 wait_iters++
37 if wait_iters > 1000000 {
38 break // Don't wait forever - proceed with what we have
39 }
40 }
41
42 // Clear mark bits on all spans (prepare for new cycle)
43 vgc_clear_mark_bits()
44
45 // === Phase 2: Mark (parallel) ===
46 // Enable write barrier (for concurrent correctness)
47 C.vgc_atomic_store_u32(&vgc_heap.wb_enabled, 1)
48
49 // The thread starting GC does not pass through a safepoint.
50 vgc_refresh_stack_range()
51
52 // Scan roots: stacks + globals (conservative scanning)
53 vgc_mark_roots()
54
55 // Resume threads - they can allocate but write barrier is on
56 C.vgc_atomic_store_u32(&vgc_heap.gc_stop_flag, 0)
57 C.vgc_atomic_fence()
58
59 // Parallel mark: drain the work queue
60 vgc_parallel_mark()
61
62 // === Phase 3: Mark Termination (brief STW) ===
63 C.vgc_atomic_store_u32(&vgc_heap.gc_phase, vgc_phase_mark_term)
64
65 // Final drain of work queue
66 vgc_drain_mark_work()
67
68 // Disable write barrier
69 C.vgc_atomic_store_u32(&vgc_heap.wb_enabled, 0)
70
71 // Compute live bytes from mark bits
72 marked := vgc_count_marked()
73 C.vgc_atomic_store_u64(&vgc_heap.heap_marked, marked)
74 // Reset heap_live to match what we actually found alive
75 C.vgc_atomic_store_u64(&vgc_heap.heap_live, marked)
76
77 // === Phase 4: Sweep ===
78 vgc_heap.sweep_gen++
79 C.vgc_atomic_store_u32(&vgc_heap.sweep_done, 0)
80 vgc_heap.sweep_idx = 0
81 C.vgc_atomic_store_u32(&vgc_heap.gc_phase, vgc_phase_sweep)
82
83 // Sweep synchronously - it's fast and avoids race conditions
84 vgc_do_sweep()
85
86 // Update GC trigger for next cycle (translated from Go's gcController.endCycle)
87 vgc_update_trigger()
88
89 vgc_heap.gc_cycle++
90 C.vgc_atomic_store_u32(&vgc_heap.gc_phase, vgc_phase_off)
91}
92
93// ============================================================
94// Mark phase (translated from Go's mgcmark.go)
95// ============================================================
96
97// Clear all mark bits before a new GC cycle
98fn vgc_clear_mark_bits() {
99 for i in 0 .. vgc_heap.nspans {
100 span := unsafe { vgc_heap.allspans[i] }
101 if span == unsafe { nil } || !span.in_use {
102 continue
103 }
104 if span.mark_bits != unsafe { nil } {
105 bitmap_size := (span.nelems + 7) / 8
106 unsafe { C.memset(span.mark_bits, 0, bitmap_size) }
107 }
108 }
109}
110
111// Conservative root scanning: scan thread stacks and look for pointers into the heap.
112// Translated from Go's markroot() / scanblock() - but using conservative scanning
113// since V compiles to C and we don't have precise type info at runtime.
114fn vgc_mark_roots() {
115 // Scan each registered thread's stack
116 for i in 0 .. vgc_heap.ncaches {
117 cache := unsafe { &vgc_heap.caches[i] }
118 if !cache.registered {
119 continue
120 }
121 if cache.stack_lo > 0 && cache.stack_hi > 0 && cache.stack_hi > cache.stack_lo {
122 vgc_scan_range(cache.stack_lo, cache.stack_hi)
123 }
124 }
125}
126
127// Scan a memory range conservatively, looking for pointers into the GC heap.
128// Each word-aligned value that looks like a heap pointer is treated as a root.
129// Translated from Go's scanblock() with conservative pointer finding.
130fn vgc_scan_range(lo usize, hi usize) {
131 // Align to word boundaries
132 start := (lo + sizeof(usize) - 1) & ~(usize(sizeof(usize)) - 1)
133 mut addr := start
134 for addr + sizeof(usize) <= hi {
135 val := unsafe { *(&usize(voidptr(addr))) }
136 if val != 0 {
137 vgc_shade(val)
138 }
139 addr += sizeof(usize)
140 }
141}
142
143// Shade marks an object grey (discovered but not yet scanned).
144// Translated from Go's shade() in mgcmark.go.
145fn vgc_shade(addr usize) {
146 if addr < vgc_arena_lo || addr >= vgc_arena_hi {
147 return
148 }
149 span := vgc_find_span(voidptr(addr))
150 if span == unsafe { nil } || !span.in_use {
151 return
152 }
153 if span.elem_size == 0 {
154 return
155 }
156 // Find which object this address belongs to
157 obj_idx := u32((addr - span.base) / usize(span.elem_size))
158 if obj_idx >= span.nelems {
159 return
160 }
161 // Check if object is allocated
162 if span.alloc_bits == unsafe { nil } || C.vgc_bitmap_get(span.alloc_bits, obj_idx) == 0 {
163 return
164 }
165 // Mark it (grey -> will be scanned)
166 if span.mark_bits != unsafe { nil } {
167 if C.vgc_bitmap_test_and_set(span.mark_bits, obj_idx) == 0 {
168 // Newly marked - add to work queue for scanning (only if it may contain pointers)
169 if !span.noscan {
170 obj_addr := span.base + usize(obj_idx) * usize(span.elem_size)
171 vgc_work_put(obj_addr)
172 }
173 }
174 }
175}
176
177// Parallel mark using OS threads.
178// Translated from Go's gcDrain() with multiple workers.
179fn vgc_parallel_mark() {
180 mut nworkers := C.vgc_num_cpus()
181 if nworkers < 1 {
182 nworkers = 1
183 } else if nworkers > 4 {
184 nworkers = 4
185 }
186 vgc_heap.gc_nworkers = nworkers
187 C.vgc_atomic_store_u32(&vgc_heap.gc_workers_done, 0)
188
189 if nworkers <= 1 {
190 vgc_drain_mark_work()
191 return
192 }
193
194 // Start helper workers and let the current GC thread participate as well.
195 for _ in 1 .. nworkers {
196 C.vgc_start_thread(vgc_mark_worker)
197 }
198 vgc_drain_mark_work()
199 C.vgc_atomic_add_u32(&vgc_heap.gc_workers_done, 1)
200
201 // Wait for all workers to finish
202 for C.vgc_atomic_load_u32(&vgc_heap.gc_workers_done) < u32(nworkers) {
203 C.vgc_atomic_fence()
204 }
205}
206
207// Mark worker function - runs in a spawned thread.
208// Translated from Go's gcDrain() loop.
209fn vgc_mark_worker() {
210 vgc_ensure_registered()
211 vgc_drain_mark_work()
212 C.vgc_atomic_add_u32(&vgc_heap.gc_workers_done, 1)
213}
214
215// Drain the mark work queue - scan grey objects and mark their referents.
216// Uses precise pointer maps when available (from vgc_malloc_typed),
217// falls back to conservative scanning otherwise.
218fn vgc_drain_mark_work() {
219 for {
220 obj_addr := vgc_work_get()
221 if obj_addr == 0 {
222 break
223 }
224 span := vgc_find_span(voidptr(obj_addr))
225 if span == unsafe { nil } || span.noscan {
226 continue // noscan objects don't contain pointers
227 }
228 if span.has_ptrmap {
229 // Precise scanning: only check known pointer word offsets
230 vgc_scan_precise(obj_addr, span.ptrmap, span.ptr_words)
231 } else {
232 // Conservative fallback: scan every word
233 obj_size := usize(span.elem_size)
234 vgc_scan_range(obj_addr, obj_addr + obj_size)
235 }
236 }
237}
238
239// Precise pointer scanning: use the pointer bitmap to scan only
240// word offsets known to contain pointers. Much faster than conservative.
241fn vgc_scan_precise(obj_addr usize, ptrmap u64, ptr_words u8) {
242 mut mask := ptrmap
243 word_size := sizeof(usize)
244 for mask != 0 {
245 // Find lowest set bit (next pointer offset)
246 mut bit := u8(0)
247 mut m := mask
248 // Count trailing zeros to find the bit position
249 if m & 0xFFFFFFFF == 0 {
250 bit += 32
251 m >>= 32
252 }
253 if m & 0xFFFF == 0 {
254 bit += 16
255 m >>= 16
256 }
257 if m & 0xFF == 0 {
258 bit += 8
259 m >>= 8
260 }
261 if m & 0xF == 0 {
262 bit += 4
263 m >>= 4
264 }
265 if m & 0x3 == 0 {
266 bit += 2
267 m >>= 2
268 }
269 if m & 0x1 == 0 {
270 bit += 1
271 }
272 // Read the pointer at this offset
273 ptr_addr := obj_addr + usize(bit) * word_size
274 val := unsafe { *(&usize(voidptr(ptr_addr))) }
275 if val != 0 {
276 vgc_shade(val)
277 }
278 // Clear this bit and continue
279 mask &= mask - 1
280 }
281}
282
283// ============================================================
284// Work queue (translated from Go's mgcwork.go)
285// ============================================================
286
287@[inline]
288fn vgc_can_use_work_fastpath() bool {
289 return vgc_heap.ncaches <= 1 && vgc_heap.gc_nworkers <= 1
290}
291
292// Add a pointer to the mark work queue
293fn vgc_work_put(addr usize) {
294 if vgc_can_use_work_fastpath() {
295 mut buf := vgc_heap.work_full
296 if buf == unsafe { nil } || buf.nobj >= 256 {
297 mut new_buf := vgc_heap.work_empty
298 if new_buf != unsafe { nil } {
299 unsafe {
300 vgc_heap.work_empty = new_buf.next
301 }
302 } else {
303 new_buf = unsafe { &VGC_WorkBuf(C.vgc_os_alloc(usize(sizeof(VGC_WorkBuf)))) }
304 if new_buf == unsafe { nil } {
305 return
306 }
307 }
308 unsafe {
309 new_buf.nobj = 0
310 new_buf.next = vgc_heap.work_full
311 vgc_heap.work_full = new_buf
312 }
313 buf = new_buf
314 }
315 unsafe {
316 buf.obj[buf.nobj] = addr
317 buf.nobj++
318 }
319 return
320 }
321
322 C.vgc_mutex_lock(&vgc_heap.work_lock)
323
324 // Get or create a work buffer
325 mut buf := vgc_heap.work_full
326 if buf == unsafe { nil } || buf.nobj >= 256 {
327 // Need a new buffer
328 mut new_buf := vgc_heap.work_empty
329 if new_buf != unsafe { nil } {
330 unsafe {
331 vgc_heap.work_empty = new_buf.next
332 }
333 } else {
334 new_buf = unsafe { &VGC_WorkBuf(C.vgc_os_alloc(usize(sizeof(VGC_WorkBuf)))) }
335 if new_buf == unsafe { nil } {
336 C.vgc_mutex_unlock(&vgc_heap.work_lock)
337 return
338 }
339 }
340 unsafe {
341 new_buf.nobj = 0
342 new_buf.next = vgc_heap.work_full
343 vgc_heap.work_full = new_buf
344 }
345 buf = new_buf
346 }
347
348 unsafe {
349 buf.obj[buf.nobj] = addr
350 buf.nobj++
351 }
352 C.vgc_mutex_unlock(&vgc_heap.work_lock)
353}
354
355// Get a pointer from the mark work queue
356fn vgc_work_get() usize {
357 if vgc_can_use_work_fastpath() {
358 mut buf := vgc_heap.work_full
359 if buf == unsafe { nil } || buf.nobj == 0 {
360 return 0
361 }
362 unsafe {
363 buf.nobj--
364 addr := buf.obj[buf.nobj]
365 if buf.nobj == 0 {
366 vgc_heap.work_full = buf.next
367 buf.next = vgc_heap.work_empty
368 vgc_heap.work_empty = buf
369 }
370 return addr
371 }
372 }
373
374 C.vgc_mutex_lock(&vgc_heap.work_lock)
375
376 mut buf := vgc_heap.work_full
377 if buf == unsafe { nil } || buf.nobj == 0 {
378 C.vgc_mutex_unlock(&vgc_heap.work_lock)
379 return 0
380 }
381
382 unsafe {
383 buf.nobj--
384 addr := buf.obj[buf.nobj]
385
386 // If buffer is empty, move to empty list
387 if buf.nobj == 0 {
388 vgc_heap.work_full = buf.next
389 buf.next = vgc_heap.work_empty
390 vgc_heap.work_empty = buf
391 }
392
393 C.vgc_mutex_unlock(&vgc_heap.work_lock)
394 return addr
395 }
396}
397
398// ============================================================
399// Write barrier (translated from Go's gcWriteBarrier / wbBufFlush)
400// ============================================================
401
402// Write barrier: called when a pointer field is written during mark phase.
403// Uses Dijkstra-style insertion barrier - shade the new pointer.
404fn vgc_write_barrier(new_val voidptr) {
405 if C.vgc_atomic_load_u32(&vgc_heap.wb_enabled) == 0 {
406 return
407 }
408 if new_val == unsafe { nil } {
409 return
410 }
411 // Shade the new pointer (mark it grey)
412 vgc_shade(usize(new_val))
413}
414
415// ============================================================
416// Sweep phase (translated from Go's mgcsweep.go)
417// ============================================================
418
419// Sweep all spans synchronously.
420fn vgc_do_sweep() {
421 for i in 0 .. vgc_heap.nspans {
422 span := unsafe { vgc_heap.allspans[i] }
423 if span == unsafe { nil } || !span.in_use {
424 continue
425 }
426 vgc_sweep_span(span)
427 }
428 C.vgc_atomic_store_u32(&vgc_heap.sweep_done, 1)
429}
430
431// Sweep a single span: free unmarked objects.
432// Translated from Go's mspan.sweep() in mgcsweep.go.
433fn vgc_sweep_span(span &VGC_Span) {
434 if span.alloc_bits == unsafe { nil } || span.mark_bits == unsafe { nil } {
435 return
436 }
437
438 // Sweep using byte-level operations for speed
439 nbytes := (span.nelems + 7) / 8
440 mut freed := u32(0)
441 mut new_free_index := span.nelems // will be set to lowest freed index
442
443 for b in 0 .. nbytes {
444 alloc_byte := unsafe { span.alloc_bits[b] }
445 mark_byte := unsafe { span.mark_bits[b] }
446 // allocated but not marked = garbage
447 garbage := alloc_byte & ~mark_byte
448 if garbage != 0 {
449 freed += u32(C.vgc_popcount8(garbage))
450 // Clear the garbage bits from alloc bitmap
451 unsafe {
452 span.alloc_bits[b] = alloc_byte & mark_byte
453 }
454 // Track lowest freed index for free_index hint
455 base_idx := b * 8
456 if u32(base_idx) < new_free_index {
457 new_free_index = u32(base_idx)
458 }
459 }
460 }
461
462 if freed > 0 {
463 unsafe {
464 (&VGC_Span(span)).alloc_count -= freed
465 if new_free_index < span.free_index {
466 (&VGC_Span(span)).free_index = new_free_index
467 }
468 }
469 }
470
471 // If span is completely empty, recycle it to the free span pool
472 if span.alloc_count == 0 && span.npages > 0 {
473 mut mspan := unsafe { &VGC_Span(span) }
474 vgc_put_free_span(mut mspan)
475 }
476}
477
478// Ensure all sweeping from previous cycle is done
479fn vgc_sweep_finish() {
480 if C.vgc_atomic_load_u32(&vgc_heap.sweep_done) == 0 && vgc_heap.gc_cycle > 0 {
481 // Sweep any remaining spans
482 for vgc_heap.sweep_idx < vgc_heap.nspans {
483 idx := vgc_heap.sweep_idx
484 vgc_heap.sweep_idx = idx + 1
485 span := unsafe { vgc_heap.allspans[idx] }
486 if span != unsafe { nil } && span.in_use {
487 vgc_sweep_span(span)
488 }
489 }
490 C.vgc_atomic_store_u32(&vgc_heap.sweep_done, 1)
491 }
492}
493
494// ============================================================
495// GC Pacer (translated from Go's mgcpacer.go gcController)
496// ============================================================
497
498// Count total marked bytes across all spans using byte-level popcount
499fn vgc_count_marked() u64 {
500 mut total := u64(0)
501 for i in 0 .. vgc_heap.nspans {
502 span := unsafe { vgc_heap.allspans[i] }
503 if span == unsafe { nil } || !span.in_use || span.mark_bits == unsafe { nil } {
504 continue
505 }
506 nbytes := (span.nelems + 7) / 8
507 mut count := u32(0)
508 for b in 0 .. nbytes {
509 count += u32(C.vgc_popcount8(unsafe { span.mark_bits[b] }))
510 }
511 // Clamp to nelems (last byte may have extra bits)
512 if count > span.nelems {
513 count = span.nelems
514 }
515 total += u64(count) * u64(span.elem_size)
516 }
517 return total
518}
519
520// Update the GC trigger point for the next cycle.
521// Translated from Go's gcControllerState.endCycle() / heapGoal().
522// Uses GOGC logic: trigger when heap grows to (1 + GOGC/100) * marked
523fn vgc_update_trigger() {
524 marked := C.vgc_atomic_load_u64(&vgc_heap.heap_marked)
525 gc_percent := u64(vgc_heap.gc_percent)
526
527 mut goal := marked + marked * gc_percent / 100
528 // Avoid very small heap goals that force frequent full cycles on bursty workloads.
529 if goal < 256 * 1024 * 1024 {
530 goal = 256 * 1024 * 1024
531 }
532 C.vgc_atomic_store_u64(&vgc_heap.next_gc, goal)
533}
534
535// ============================================================
536// Heap usage reporting
537// ============================================================
538
539fn vgc_heap_usage() (usize, usize, usize, usize, usize) {
540 live := C.vgc_atomic_load_u64(&vgc_heap.heap_live)
541 total_alloc := C.vgc_atomic_load_u64(&vgc_heap.total_alloc)
542 // Count actual in-use span pages as heap size
543 mut in_use_bytes := usize(0)
544 for i in 0 .. vgc_heap.nspans {
545 span := unsafe { vgc_heap.allspans[i] }
546 if span != unsafe { nil } && span.in_use {
547 in_use_bytes += usize(span.npages) * vgc_page_size
548 }
549 }
550 free_bytes := if in_use_bytes > usize(live) { in_use_bytes - usize(live) } else { usize(0) }
551 return in_use_bytes, free_bytes, usize(live), usize(total_alloc), usize(vgc_heap.gc_cycle)
552}
553
554fn vgc_memory_use() usize {
555 mut total := usize(0)
556 for i in 0 .. vgc_heap.narenas {
557 total += vgc_heap.arenas[i].used
558 }
559 return total
560}
561