v2 / vlib / builtin / prealloc.c.v
626 lines · 582 sloc · 17.02 KB · 6b20a5ee7d2b42d98e601da4bef94a01d259e311
Raw
1@[has_globals]
2module builtin
3
4#insert "@VEXEROOT/vlib/builtin/prealloc_atomics.h"
5
6fn C.v_prealloc_atomic_add_i32(ptr &int, delta int) int
7fn C.v_prealloc_atomic_load_i32(ptr &int) int
8fn C.v_prealloc_atomic_store_i32(ptr &int, val int) int
9fn C.v_prealloc_atomic_cas_i32(ptr &int, expected int, desired int) int
10
11// With -prealloc, V calls libc's malloc to get chunks, each at least 16MB
12// in size, as needed. Once a chunk is available, all malloc() calls within
13// V code, that can fit inside the chunk, will use it instead, each bumping a
14// pointer, till the chunk is filled. Once a chunk is filled, a new chunk will
15// be allocated by calling libc's malloc, and the process continues.
16// Each new chunk has a pointer to the old one. The base arena is thread-local;
17// scoped arenas can be freed earlier with `prealloc_scope_end` or transferred
18// and freed later with `prealloc_scope_free_after`.
19// The goal of all this is to amortize the cost of calling libc's malloc,
20// trading higher memory usage for a compiler (or any single threaded batch
21// mode program), for a ~8-10% speed increase.
22
23// size of the process/thread preallocated chunk
24const prealloc_block_size = 16 * 1024 * 1024
25
26// size of the first chunk for a scoped prealloc arena. Request-scoped arenas
27// should not force a 16MB libc allocation for every request.
28const prealloc_scope_block_size = 256 * 1024
29
30// `malloc` has to return memory suitably aligned for any V value. Keep the
31// default at the common max alignment used by libc malloc on current targets.
32const prealloc_default_align = sizeof(voidptr) * 2
33
34__global g_memory_block &VMemoryBlock
35@[heap]
36struct VMemoryBlock {
37mut:
38 current &u8 = 0 // 8
39 stop &u8 = 0 // 8
40 start &u8 = 0 // 8
41 previous &VMemoryBlock = 0 // 8
42 next &VMemoryBlock = 0 // 8
43 scope &VPreallocScope = 0
44 min_block_size isize
45 is_scope bool
46 id int // 4
47 mallocs int // 4
48}
49
50@[heap]
51struct VPreallocScope {
52mut:
53 previous &VMemoryBlock = 0
54 first &VMemoryBlock = 0
55 refs int
56 free_requested int
57 abandoned int
58 finalized int
59}
60
61fn vmemory_abort_on_nil(p voidptr, bytes isize) {
62 if unsafe { p == 0 } {
63 C.fprintf(C.stderr, c'could not allocate %td bytes\n', bytes)
64 exit(1)
65 }
66}
67
68fn vmemory_effective_align(align isize) isize {
69 default_align := isize(prealloc_default_align)
70 if align > default_align {
71 return align
72 }
73 return default_align
74}
75
76@[unsafe]
77fn vmemory_align_up(ptr &u8, align isize) &u8 {
78 if align <= 1 {
79 return ptr
80 }
81 addr := u64(ptr)
82 alignment := u64(align)
83 offset := addr % alignment
84 if offset == 0 {
85 return ptr
86 }
87 return unsafe { &u8(i64(addr + alignment - offset)) }
88}
89
90fn vmemory_block_used(mb &VMemoryBlock) i64 {
91 return unsafe { i64(mb.current) - i64(mb.start) }
92}
93
94fn vmemory_block_size(mb &VMemoryBlock) i64 {
95 return unsafe { i64(mb.stop) - i64(mb.start) }
96}
97
98@[unsafe]
99fn prealloc_trace_scope(action &char, scope &VPreallocScope) {
100 $if trace_prealloc ? {
101 if scope == unsafe { nil } {
102 C.fprintf(C.stderr, c'[trace_prealloc] scope %s scope=%p\n', action, scope)
103 return
104 }
105 unsafe {
106 mut blocks := 0
107 mut used := i64(0)
108 mut size := i64(0)
109 mut mallocs := 0
110 mut mb := scope.first
111 for mb != 0 {
112 blocks++
113 used += vmemory_block_used(mb)
114 size += vmemory_block_size(mb)
115 mallocs += mb.mallocs
116 mb = mb.next
117 }
118 C.fprintf(C.stderr,
119 c'[trace_prealloc] scope %s scope=%p previous=%p first=%p blocks=%d used=%lld size=%lld mallocs=%d\n',
120 action, scope, scope.previous, scope.first, blocks, used, size, mallocs)
121 }
122 }
123}
124
125@[unsafe]
126fn vmemory_block_new(prev &VMemoryBlock, at_least isize, align isize) &VMemoryBlock {
127 return unsafe { vmemory_block_new_sized(prev, at_least, align, isize(prealloc_block_size)) }
128}
129
130@[unsafe]
131fn vmemory_block_new_sized(prev &VMemoryBlock, at_least isize, align isize, min_block_size isize) &VMemoryBlock {
132 vmem_block_size := sizeof(VMemoryBlock)
133 mut v := unsafe { &VMemoryBlock(C.calloc(1, vmem_block_size)) }
134 vmemory_abort_on_nil(v, vmem_block_size)
135 if unsafe { prev != 0 } {
136 v.id = prev.id + 1
137 }
138
139 v.previous = prev
140 if unsafe { prev != 0 } {
141 prev.next = v
142 v.is_scope = prev.is_scope
143 }
144 effective_min_block_size := if min_block_size > 0 {
145 min_block_size
146 } else {
147 isize(prealloc_block_size)
148 }
149 v.min_block_size = effective_min_block_size
150 base_block_size := if at_least < effective_min_block_size {
151 effective_min_block_size
152 } else {
153 at_least
154 }
155 block_size := if align > 0 {
156 if base_block_size % align == 0 {
157 base_block_size
158 } else {
159 base_block_size + align - (base_block_size % align)
160 }
161 } else {
162 base_block_size
163 }
164 $if prealloc_trace_malloc ? {
165 C.fprintf(C.stderr,
166 c'vmemory_block_new id: %d, block_size: %lld, at_least: %lld, align: %lld\n', v.id,
167 block_size, at_least, align)
168 }
169
170 fixed_align := if align <= 1 { 1 } else { align }
171 $if windows {
172 v.start = unsafe { C._aligned_malloc(block_size, fixed_align) }
173 } $else {
174 if fixed_align == 1 {
175 v.start = unsafe { C.malloc(block_size) }
176 } else {
177 v.start = unsafe { C.aligned_alloc(fixed_align, block_size) }
178 }
179 }
180 vmemory_abort_on_nil(v.start, block_size)
181 $if prealloc_memset ? {
182 unsafe { C.memset(v.start, int($d('prealloc_memset_value', 0)), block_size) }
183 }
184 v.stop = unsafe { &u8(i64(v.start) + block_size) }
185 v.current = v.start
186 $if trace_prealloc ? {
187 if v.is_scope {
188 C.fprintf(C.stderr,
189 c'[trace_prealloc] block alloc block=%p previous=%p id=%d size=%lld at_least=%lld align=%lld start=%p stop=%p\n',
190 v, prev, v.id, block_size, at_least, align, v.start, v.stop)
191 }
192 }
193 return v
194}
195
196@[unsafe]
197fn vmemory_block_malloc(n isize, align isize) &u8 {
198 unsafe {
199 // Lazy per-thread initialization: when g_memory_block is thread-local,
200 // new threads start with a null pointer and need their own arena.
201 if g_memory_block == nil {
202 g_memory_block = vmemory_block_new(nil, isize(prealloc_block_size), 0)
203 }
204 }
205 $if prealloc_trace_malloc ? {
206 C.fprintf(C.stderr, c'vmemory_block_malloc g_memory_block.id: %d, n: %lld align: %d\n',
207 g_memory_block.id, n, align)
208 }
209 unsafe {
210 fixed_align := vmemory_effective_align(align)
211 mut current := vmemory_align_up(g_memory_block.current, fixed_align)
212 remaining := i64(g_memory_block.stop) - i64(current)
213 if _unlikely_(remaining < n) {
214 was_scope := g_memory_block.is_scope
215 scope := g_memory_block.scope
216 min_block_size := if g_memory_block.min_block_size > 0 {
217 g_memory_block.min_block_size
218 } else {
219 isize(prealloc_block_size)
220 }
221 g_memory_block = vmemory_block_new_sized(g_memory_block, n, fixed_align, min_block_size)
222 g_memory_block.is_scope = was_scope
223 g_memory_block.scope = scope
224 current = vmemory_align_up(g_memory_block.current, fixed_align)
225 }
226 res := &u8(current)
227 g_memory_block.current = current
228 g_memory_block.current += n
229 $if prealloc_stats ? {
230 g_memory_block.mallocs++
231 } $else {
232 $if trace_prealloc ? {
233 g_memory_block.mallocs++
234 }
235 }
236 $if trace_prealloc ? {
237 if g_memory_block.is_scope {
238 used := vmemory_block_used(g_memory_block)
239 size := vmemory_block_size(g_memory_block)
240 C.fprintf(C.stderr,
241 c'[trace_prealloc] alloc block=%p ptr=%p size=%lld align=%lld used=%lld/%lld mallocs=%d\n',
242 g_memory_block, res, n, fixed_align, used, size, g_memory_block.mallocs)
243 }
244 }
245 return res
246 }
247}
248
249@[unsafe]
250fn vmemory_block_free(mb &VMemoryBlock) {
251 $if trace_prealloc ? {
252 if mb.is_scope {
253 C.fprintf(C.stderr,
254 c'[trace_prealloc] block free block=%p id=%d start=%p used=%lld size=%lld mallocs=%d\n',
255 mb, mb.id, mb.start, vmemory_block_used(mb), vmemory_block_size(mb), mb.mallocs)
256 }
257 }
258 $if windows {
259 // Warning! On windows, we always use _aligned_free to free memory.
260 C._aligned_free(mb.start)
261 } $else {
262 C.free(mb.start)
263 }
264 C.free(mb)
265}
266
267@[unsafe]
268fn vmemory_block_free_after(marker &VMemoryBlock) {
269 if marker == unsafe { nil } {
270 return
271 }
272 unsafe {
273 mut mb := marker.next
274 marker.next = nil
275 vmemory_block_free_chain(mb)
276 }
277}
278
279@[unsafe]
280fn vmemory_block_free_chain(first &VMemoryBlock) {
281 unsafe {
282 mut mb := first
283 for mb != 0 {
284 next := mb.next
285 vmemory_block_free(mb)
286 mb = next
287 }
288 }
289}
290
291/////////////////////////////////////////////////
292
293@[unsafe]
294fn prealloc_vinit() {
295 $if prealloc_trace_vinit ? {
296 C.fprintf(C.stderr, c'prealloc_vinit started\n')
297 }
298 unsafe {
299 g_memory_block = vmemory_block_new(nil, isize(prealloc_block_size), 0)
300 at_exit(prealloc_vcleanup) or {}
301 }
302}
303
304@[unsafe]
305fn prealloc_vcleanup() {
306 $if prealloc_trace_vcleanup ? {
307 C.fprintf(C.stderr, c'prealloc_vcleanup started\n')
308 }
309 $if prealloc_stats ? {
310 // Note: we do 2 loops here, because string interpolation
311 // in the first loop may still use g_memory_block
312 // The second loop however should *not* allocate at all.
313 mut nr_mallocs := i64(0)
314 mut total_used := i64(0)
315 mut mb := g_memory_block
316 for unsafe { mb != 0 } {
317 nr_mallocs += mb.mallocs
318 used := i64(mb.current) - i64(mb.start)
319 total_used += used
320 remaining := i64(mb.stop) - i64(mb.current)
321 size := i64(mb.stop) - i64(mb.start)
322 C.fprintf(C.stderr,
323 c'> freeing mb: %16p, mb.id: %3d | size: %10lld | rem: %10lld | start: %16p | current: %16p | used: %10lld bytes | mallocs: %6d\n',
324 mb, mb.id, size, remaining, mb.start, mb.current, used, mb.mallocs)
325 mb = mb.previous
326 }
327 C.fprintf(C.stderr, c'> nr_mallocs: %lld, total_used: %lld bytes\n', nr_mallocs, total_used)
328 }
329 $if prealloc_dump ? {
330 C.fprintf(C.stderr, c'prealloc_vcleanup dumping memory contents ...\n')
331 mut start := g_memory_block
332 unsafe {
333 for start.previous != 0 {
334 start = start.previous
335 }
336 C.fprintf(C.stderr, c'prealloc_vcleanup start: %p\n', start)
337 C.fprintf(C.stderr, c'prealloc_vcleanup start.id: %d\n', start.id)
338 C.fprintf(C.stderr, c'prealloc_vcleanup start.next: %p\n', start.next)
339
340 mut total_used := u64(0)
341 path := $d('memdumpfile', 'memdump.bin')
342 C.fprintf(C.stderr, c'prealloc_vcleanup dumping process memory to path: %s\n', path.str)
343 stream := C.fopen(path.str, c'wb')
344 mut mb := start
345 for {
346 used := u64(mb.current) - u64(mb.start)
347 total_used += used
348 C.fprintf(C.stderr,
349 c'prealloc_vcleanup dumping mb: %p, mb.id: %d, used: %10lld bytes\n', mb,
350 mb.id, used)
351
352 mut ptr := mb.start
353 mut remaining_bytes := isize(used)
354 mut x := isize(0)
355 for remaining_bytes > 0 {
356 x = isize(C.fwrite(ptr, 1, remaining_bytes, stream))
357 ptr += x
358 remaining_bytes -= x
359 }
360
361 if mb.next == 0 {
362 break
363 }
364 mb = mb.next
365 }
366 C.fclose(stream)
367 C.fprintf(C.stderr, c'prealloc_vcleanup total dump size in bytes: %lld\n', total_used)
368 }
369 }
370 unsafe {
371 for g_memory_block != 0 {
372 $if windows {
373 // Warning! On windows, we always use _aligned_free to free memory.
374 C._aligned_free(g_memory_block.start)
375 } $else {
376 C.free(g_memory_block.start)
377 }
378 tmp := g_memory_block
379 g_memory_block = g_memory_block.previous
380 // free the link node
381 C.free(tmp)
382 }
383 }
384}
385
386// prealloc_scope_begin starts a nested arena on the current thread. All V
387// allocations after this call use the nested arena until `prealloc_scope_end`.
388// The returned scope can be passed across threads and later freed with
389// `prealloc_scope_free_after`, which is useful when a response buffer outlives
390// the request handler thread.
391@[unsafe]
392pub fn prealloc_scope_begin() voidptr {
393 unsafe {
394 scope := &VPreallocScope(C.calloc(1, sizeof(VPreallocScope)))
395 vmemory_abort_on_nil(scope, sizeof(VPreallocScope))
396 scope.previous = g_memory_block
397 scope.first = vmemory_block_new_sized(scope.previous, isize(prealloc_scope_block_size), 0,
398 isize(prealloc_scope_block_size))
399 scope.first.is_scope = true
400 scope.first.scope = scope
401 g_memory_block = scope.first
402 prealloc_trace_scope(c'begin', scope)
403 return scope
404 }
405}
406
407@[unsafe]
408pub fn prealloc_scope_checkpoint(label &char) {
409 $if trace_prealloc ? {
410 unsafe {
411 if g_memory_block == 0 || !g_memory_block.is_scope {
412 return
413 }
414 mut blocks := 0
415 mut used := i64(0)
416 mut size := i64(0)
417 mut mallocs := 0
418 mut first := g_memory_block
419 for first.previous != 0 && first.previous.is_scope {
420 first = first.previous
421 }
422 mut mb := first
423 for mb != 0 {
424 blocks++
425 used += vmemory_block_used(mb)
426 size += vmemory_block_size(mb)
427 mallocs += mb.mallocs
428 mb = mb.next
429 }
430 C.fprintf(C.stderr,
431 c'[trace_prealloc] checkpoint label=%s first=%p current=%p blocks=%d used=%lld size=%lld mallocs=%d\n',
432 label, first, g_memory_block, blocks, used, size, mallocs)
433 }
434 }
435}
436
437@[unsafe]
438fn prealloc_scope_free_blocks(scope &VPreallocScope) {
439 if scope == unsafe { nil } {
440 return
441 }
442 unsafe {
443 if scope.previous != 0 {
444 scope.previous.next = nil
445 }
446 vmemory_block_free_chain(scope.first)
447 }
448}
449
450@[unsafe]
451fn prealloc_scope_request_free(scope &VPreallocScope, abandoned bool) {
452 if scope == unsafe { nil } {
453 return
454 }
455 unsafe {
456 if abandoned {
457 C.v_prealloc_atomic_store_i32(&scope.abandoned, 1)
458 }
459 C.v_prealloc_atomic_store_i32(&scope.free_requested, 1)
460 prealloc_scope_finish_if_ready(scope)
461 }
462}
463
464@[unsafe]
465fn prealloc_scope_finish_if_ready(scope &VPreallocScope) {
466 if scope == unsafe { nil } {
467 return
468 }
469 unsafe {
470 if C.v_prealloc_atomic_load_i32(&scope.free_requested) == 0 {
471 return
472 }
473 if C.v_prealloc_atomic_load_i32(&scope.refs) != 0 {
474 return
475 }
476 if C.v_prealloc_atomic_cas_i32(&scope.finalized, 0, 1) == 0 {
477 return
478 }
479 if C.v_prealloc_atomic_load_i32(&scope.abandoned) == 0 {
480 prealloc_scope_free_blocks(scope)
481 }
482 C.free(scope)
483 }
484}
485
486@[unsafe]
487fn prealloc_scope_detach_current(scope &VPreallocScope) {
488 if scope == unsafe { nil } {
489 return
490 }
491 unsafe {
492 previous := scope.previous
493 if previous != 0 {
494 previous.next = nil
495 }
496 if g_memory_block != 0 && g_memory_block.is_scope && g_memory_block.scope == scope {
497 g_memory_block = previous
498 }
499 scope.previous = nil
500 }
501}
502
503// prealloc_scope_retain_current keeps the current scoped arena alive after the
504// owner calls `prealloc_scope_end`/`prealloc_scope_free_after`. It is used by
505// generated `spawn` wrappers so detached threads can safely receive arguments
506// allocated in a request arena.
507@[unsafe]
508pub fn prealloc_scope_retain_current() voidptr {
509 $if prealloc {
510 unsafe {
511 if g_memory_block == 0 || !g_memory_block.is_scope || g_memory_block.scope == 0 {
512 return nil
513 }
514 scope := g_memory_block.scope
515 C.v_prealloc_atomic_add_i32(&scope.refs, 1)
516 $if trace_prealloc ? {
517 prealloc_trace_scope(c'retain', scope)
518 }
519 return scope
520 }
521 } $else {
522 return unsafe { nil }
523 }
524}
525
526@[unsafe]
527pub fn prealloc_scope_release(scope_ptr voidptr) {
528 $if prealloc {
529 if scope_ptr == unsafe { nil } {
530 return
531 }
532 unsafe {
533 scope := &VPreallocScope(scope_ptr)
534 C.v_prealloc_atomic_add_i32(&scope.refs, -1)
535 $if trace_prealloc ? {
536 prealloc_trace_scope(c'release', scope)
537 }
538 prealloc_scope_finish_if_ready(scope)
539 }
540 }
541}
542
543// prealloc_scope_end frees a nested arena and restores the current thread arena
544// to the state before `prealloc_scope_begin`.
545@[unsafe]
546pub fn prealloc_scope_end(scope_ptr voidptr) {
547 if scope_ptr == unsafe { nil } {
548 return
549 }
550 unsafe {
551 scope := &VPreallocScope(scope_ptr)
552 prealloc_trace_scope(c'end', scope)
553 prealloc_scope_detach_current(scope)
554 prealloc_scope_request_free(scope, false)
555 }
556}
557
558// prealloc_scope_leave restores the current thread arena without freeing the
559// scoped blocks. Call this before another thread takes ownership of the scope.
560@[unsafe]
561pub fn prealloc_scope_leave(scope_ptr voidptr) {
562 if scope_ptr == unsafe { nil } {
563 return
564 }
565 unsafe {
566 scope := &VPreallocScope(scope_ptr)
567 prealloc_trace_scope(c'leave', scope)
568 prealloc_scope_detach_current(scope)
569 }
570}
571
572// prealloc_scope_abandon restores the current thread arena and intentionally
573// leaks the scoped blocks. It is only for APIs that transfer request state to
574// user code without providing a close hook yet.
575@[unsafe]
576pub fn prealloc_scope_abandon(scope_ptr voidptr) {
577 if scope_ptr == unsafe { nil } {
578 return
579 }
580 unsafe {
581 scope := &VPreallocScope(scope_ptr)
582 prealloc_trace_scope(c'abandon', scope)
583 prealloc_scope_leave(scope_ptr)
584 prealloc_scope_request_free(scope, true)
585 }
586}
587
588// prealloc_scope_free_after frees a nested arena from a marker without touching
589// the caller's thread-local arena pointer. Use this when another thread finishes
590// sending data that was allocated in the request thread.
591@[unsafe]
592pub fn prealloc_scope_free_after(scope_ptr voidptr) {
593 if scope_ptr == unsafe { nil } {
594 return
595 }
596 unsafe {
597 scope := &VPreallocScope(scope_ptr)
598 prealloc_trace_scope(c'free-after', scope)
599 prealloc_scope_request_free(scope, false)
600 }
601}
602
603@[unsafe]
604fn prealloc_malloc(n isize) &u8 {
605 return unsafe { vmemory_block_malloc(n, 0) }
606}
607
608@[unsafe]
609fn prealloc_realloc(old_data &u8, old_size isize, new_size isize) &u8 {
610 new_ptr := unsafe { vmemory_block_malloc(new_size, 0) }
611 min_size := if old_size < new_size { old_size } else { new_size }
612 unsafe { C.memcpy(new_ptr, old_data, min_size) }
613 return new_ptr
614}
615
616@[unsafe]
617fn prealloc_calloc(n isize) &u8 {
618 new_ptr := unsafe { vmemory_block_malloc(n, 0) }
619 unsafe { C.memset(new_ptr, 0, n) }
620 return new_ptr
621}
622
623@[unsafe]
624fn prealloc_malloc_align(n isize, align isize) &u8 {
625 return unsafe { vmemory_block_malloc(n, align) }
626}
627