v2 / thirdparty / libgc / include / gc / gc_mark.h
516 lines · 456 sloc · 21.31 KB · a4b7f7369d5359a002beced36c1e6644403220e7
Raw
1/*
2 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
3 * Copyright (c) 2001 by Hewlett-Packard Company. All rights reserved.
4 * Copyright (c) 2009-2025 Ivan Maidanski
5 *
6 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
7 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
8 *
9 * Permission is hereby granted to use or copy this program
10 * for any purpose, provided the above notices are retained on all copies.
11 * Permission to modify the code and to distribute modified code is granted,
12 * provided the above notices are retained, and a notice that the code was
13 * modified is included with the above copyright notice.
14 */
15
16/*
17 * This contains interfaces to the garbage collector marker that are likely
18 * to be useful to clients that provide detailed heap layout information
19 * to the collector. This interface should not be used by normal C or C++
20 * clients. It will be useful to runtimes for other languages.
21 *
22 * This is an experts-only interface! There are many ways to break the
23 * collector in subtle ways by using this functionality.
24 */
25#ifndef GC_MARK_H
26#define GC_MARK_H
27
28#ifndef GC_H
29# include "gc.h"
30#endif
31
32#ifdef __cplusplus
33extern "C" {
34#endif
35
36#define GC_PROC_BYTES 100
37
38#if defined(GC_BUILD) || defined(NOT_GCBUILD)
39struct GC_ms_entry;
40struct GC_hblk_s;
41#else
42struct GC_ms_entry {
43 void *opaque;
44};
45struct GC_hblk_s {
46 void *opaque;
47};
48#endif
49
50/**
51 * A client-supplied mark procedure. Returns new mark stack pointer.
52 * Primary effect should be to push new entries on the mark stack.
53 * Mark stack pointer values are passed and returned explicitly.
54 * Global variables describing mark stack are not necessarily valid.
55 * (This usually saves a few cycles by keeping things in registers.)
56 * Assumed to scan about `GC_PROC_BYTES` on average. If it needs to do
57 * much more work than that, it should do it in smaller pieces by
58 * pushing itself back on the mark stack. Note that it should always
59 * do some work (defined as "marking some objects") before pushing more
60 * than one entry on the mark stack. This is required to ensure
61 * termination in the event of mark stack overflows. This procedure is
62 * always called with at least one empty entry on the mark stack.
63 * Currently we require that mark procedures look for pointers in
64 * a subset of the places the conservative marker would. It must be safe
65 * to invoke the normal mark procedure instead.
66 *
67 * *Warning*: Such a mark procedure may be invoked on an unused object
68 * residing on a free list. Such objects are cleared, except for a
69 * free-list link field (which is located at the beginning of each
70 * object). Thus mark procedures may not count on the presence of a
71 * type descriptor, and must handle this case correctly somehow. Also,
72 * a mark procedure should be prepared to be executed concurrently from
73 * the marker threads (the latter ones are created only if the client
74 * has called `GC_start_mark_threads()` or started a user thread
75 * previously). For the compatibility reason, `addr` is a pointer to
76 * `GC_word`, but it should be treated as a pointer to `void` pointer.
77 */
78typedef struct GC_ms_entry *(GC_CALLBACK *GC_mark_proc)(
79 GC_word * /* `addr` */, struct GC_ms_entry * /* `mark_stack_top` */,
80 struct GC_ms_entry * /* `mark_stack_limit` */, GC_word /* `env` */);
81
82#define GC_LOG_MAX_MARK_PROCS 6
83#define GC_MAX_MARK_PROCS (1 << GC_LOG_MAX_MARK_PROCS)
84
85/*
86 * In a few cases it is necessary to assign statically known indices to
87 * certain mark procedures. Thus we reserve a few for well known clients.
88 * (This is necessary if mark descriptors are compiler generated.)
89 */
90#define GC_RESERVED_MARK_PROCS 8
91#define GC_GCJ_RESERVED_MARK_PROC_INDEX 0
92
93/*
94 * Object descriptors on mark stack or in objects. Low-order two bits are
95 * tags distinguishing among the following 4 possibilities for the rest
96 * (high-order) bits.
97 */
98#define GC_DS_TAG_BITS 2
99#define GC_DS_TAGS ((1U << GC_DS_TAG_BITS) - 1)
100
101/**
102 * The entire descriptor is a length in bytes that must be a multiple of 4.
103 */
104#define GC_DS_LENGTH 0
105
106/**
107 * The high-order bits are describing pointer fields. The most
108 * significant bit is set if the first "pointer-sized" word is a pointer.
109 * (This unconventional ordering sometimes makes the marker slightly faster.)
110 * Zeros indicate definite non-pointers; ones indicate possible pointers.
111 * *Note*: only usable if pointers are aligned on the size of a pointer (thus,
112 * extra care should be taken by client on cris and m68k architectures).
113 */
114#define GC_DS_BITMAP 1
115
116/**
117 * The objects referenced by this object can be pushed on the mark stack by
118 * invoking `PROC(descr)`. `ENV(descr)` is passed as the last argument.
119 */
120#define GC_DS_PROC 2
121
122/**
123 * The real descriptor is at the byte displacement from the beginning
124 * of the object given by `descr & ~GC_DS_TAGS`. If the descriptor is
125 * negative, the real descriptor is at
126 * `*<object_start> - (descr & ~GC_DS_TAGS) - GC_INDIR_PER_OBJ_BIAS`.
127 * The latter alternative can be used if each object contains a type
128 * descriptor at the beginning of the object. Note that in the
129 * multi-threaded environments per-object descriptors must be located
130 * in either the first two or last two "pointer-sized" words of the
131 * object, since only those are guaranteed to be cleared while the
132 * allocator lock is held.
133 */
134#define GC_DS_PER_OBJECT 3
135
136#define GC_INDIR_PER_OBJ_BIAS 0x10
137
138#define GC_MAKE_PROC(proc_index, env) \
139 ((((((GC_word)(env)) << GC_LOG_MAX_MARK_PROCS) | (unsigned)(proc_index)) \
140 << GC_DS_TAG_BITS) \
141 | (GC_word)GC_DS_PROC)
142
143/**
144 * Bounds on the heap. Guaranteed to be valid. Likely to include future
145 * heap expansion. Hence the bounded range usually includes not-yet-mapped
146 * memory, or might overlap with other data roots. The address of any heap
147 * object is larger than `GC_least_plausible_heap_addr` and less than
148 * `GC_greatest_plausible_heap_addr`.
149 */
150GC_API void *GC_least_plausible_heap_addr;
151GC_API void *GC_greatest_plausible_heap_addr;
152
153/**
154 * Specify the pointer address mask. Works only if the collector is
155 * built with `DYNAMIC_POINTER_MASK` macro defined. These primitives
156 * are normally needed only to support systems that use high-order
157 * pointer tags. The setter is expected to be called, if needed,
158 * before the collector initialization or, at least, before the first
159 * object is allocated. Both the setter and the getter are unsynchronized.
160 */
161GC_API void GC_CALL GC_set_pointer_mask(GC_word);
162GC_API GC_word GC_CALL GC_get_pointer_mask(void);
163
164/**
165 * Similar to `GC_set_pointer_mask`/`GC_get_pointer_mask` but for the
166 * pointer address shift. The value should be less than the size of
167 * `GC_word`, in bits. Applied after the mask.
168 */
169GC_API void GC_CALL GC_set_pointer_shift(unsigned);
170GC_API unsigned GC_CALL GC_get_pointer_shift(void);
171
172/**
173 * Handle nested references in a custom mark procedure.
174 * Check if `obj` is a valid object. If so, ensure that it is marked.
175 * If it was not previously marked, push its contents onto the mark
176 * stack for future scanning. The object will then be scanned using
177 * its mark descriptor. Returns the new mark stack pointer.
178 * Handles mark stack overflows correctly. Since this marks first, it
179 * makes progress even if there are mark stack overflows.
180 * `src` is the address of the pointer to `obj`, which is used only
181 * for back pointer-based heap debugging.
182 * It is strongly recommended that most objects be handled without mark
183 * procedures, e.g. with bitmap descriptors, and that mark procedures
184 * be reserved for the exceptional cases. That will ensure that
185 * performance of this call is not extremely performance critical.
186 * (Otherwise we would need to inline `GC_mark_and_push` completely,
187 * which would tie the client code to a fixed collector version.)
188 * Note that mark procedures should explicitly call `FIXUP_POINTER()`
189 * if required.
190 */
191GC_API struct GC_ms_entry *GC_CALL GC_mark_and_push(
192 void * /* `obj` */, struct GC_ms_entry * /* `mark_stack_top` */,
193 struct GC_ms_entry * /* `mark_stack_limit` */, void ** /* `src` */);
194
195#define GC_MARK_AND_PUSH(obj, msp, lim, src) \
196 (GC_ADDR_LT((char *)GC_least_plausible_heap_addr, (char *)(obj)) \
197 && GC_ADDR_LT((char *)(obj), \
198 (char *)GC_greatest_plausible_heap_addr) \
199 ? GC_mark_and_push(obj, msp, lim, src) \
200 : (msp))
201
202GC_API void GC_CALL GC_push_proc(GC_word /* `descr` */, void * /* `obj` */);
203
204GC_API struct GC_ms_entry *GC_CALL
205GC_custom_push_proc(GC_word /* `descr` */, void * /* `obj` */,
206 struct GC_ms_entry * /* `mark_stack_top` */,
207 struct GC_ms_entry * /* `mark_stack_limit` */);
208
209GC_API struct GC_ms_entry *GC_CALL
210GC_custom_push_range(void * /* `bottom` */, void * /* `top` */,
211 struct GC_ms_entry * /* `mark_stack_top` */,
212 struct GC_ms_entry * /* `mark_stack_limit` */);
213
214/**
215 * The size of the header added to objects allocated through the `GC_debug_`
216 * routines. Defined as a function so that client mark procedures do not
217 * need to be recompiled for the collector library version changes.
218 */
219GC_API GC_ATTR_CONST size_t GC_CALL GC_get_debug_header_size(void);
220#define GC_USR_PTR_FROM_BASE(p) \
221 ((void *)((char *)(p) + GC_get_debug_header_size()))
222
223/*
224 * The same as `GC_get_debug_header_size()` but defined as a variable.
225 * Exists only for the backward compatibility. Some compilers do not
226 * accept `const` together with `deprecated` or `dllimport` attributes,
227 * so the symbol is exported as a non-constant one.
228 */
229GC_API GC_ATTR_DEPRECATED
230#ifdef GC_BUILD
231 const
232#endif
233 size_t GC_debug_header_size;
234
235/**
236 * Return the heap block size. Each heap block is devoted to a single size
237 * and kind of object.
238 */
239GC_API GC_ATTR_CONST size_t GC_CALL GC_get_hblk_size(void);
240
241typedef void(GC_CALLBACK *GC_walk_hblk_fn)(struct GC_hblk_s *,
242 void * /* `client_data` */);
243
244/**
245 * Apply `fn` to each allocated heap block. It is the responsibility
246 * of the caller to avoid data race during the function execution
247 * (e.g. by acquiring the allocator lock at least in the reader mode).
248 */
249GC_API void GC_CALL GC_apply_to_all_blocks(GC_walk_hblk_fn,
250 void * /* `client_data` */)
251 GC_ATTR_NONNULL(1);
252
253/* Same as `GC_walk_hblk_fn` but with index of the free list. */
254typedef void(GC_CALLBACK *GC_walk_free_blk_fn)(struct GC_hblk_s *,
255 int /* `index` */,
256 void * /* `client_data` */);
257
258/**
259 * Apply `fn` to each completely empty heap block. It is the responsibility
260 * of the caller to avoid data race during the function execution (e.g. by
261 * acquiring the allocator lock at least in the reader mode).
262 */
263GC_API void GC_CALL GC_iterate_free_hblks(GC_walk_free_blk_fn,
264 void * /* `client_data` */)
265 GC_ATTR_NONNULL(1);
266
267/**
268 * If there are likely to be false references to a block starting at
269 * `h` of the indicated length (`len`), then return the next plausible
270 * starting location for `h` that might avoid these false references.
271 * (In other words: Is the block starting at `h` of size `len` bytes
272 * black-listed? If so, then return the address of the next plausible
273 * `r` such that (`r`,`len`) might not be black-listed. Note that
274 * pointer `r` may not actually be in the heap, we guarantee only that
275 * every smaller value of `r` after `h` is also black-listed.)
276 * Otherwise `NULL` is returned. Assumes the allocator lock is held at
277 * least in the reader mode, but no assertion about it by design.
278 */
279GC_API struct GC_hblk_s *GC_CALL
280GC_is_black_listed(struct GC_hblk_s * /* `h` */, size_t /* `len` */);
281
282/**
283 * Return the number of set mark bits for the heap block where object `p`
284 * is located. Defined only if the library has been compiled without
285 * `NO_DEBUGGING` macro defined.
286 */
287GC_API unsigned GC_CALL GC_count_set_marks_in_hblk(const void * /* `p` */);
288
289/*
290 * And some routines to support creation of new "kinds", e.g. with custom
291 * mark procedures, by language runtimes. The `_inner` variants assume
292 * the caller holds the allocator lock.
293 */
294
295/** Return a new free-list array. */
296GC_API void **GC_CALL GC_new_free_list(void);
297GC_API void **GC_CALL GC_new_free_list_inner(void);
298
299/**
300 * Return a new kind, as specified. The last two parameters must be zero
301 * or one.
302 */
303GC_API unsigned GC_CALL GC_new_kind(void ** /* `free_list` */,
304 GC_word /* `mark_descriptor_template` */,
305 int /* `add_size_to_descriptor` */,
306 int /* `clear_new_objects` */)
307 GC_ATTR_NONNULL(1);
308GC_API unsigned GC_CALL GC_new_kind_inner(
309 void ** /* `free_list` */, GC_word /* `mark_descriptor_template` */,
310 int /* `add_size_to_descriptor` */, int /* `clear_new_objects` */)
311 GC_ATTR_NONNULL(1);
312
313/**
314 * Return a new mark procedure identifier, suitable for use as the first
315 * argument in `GC_MAKE_PROC()`.
316 */
317GC_API unsigned GC_CALL GC_new_proc(GC_mark_proc);
318GC_API unsigned GC_CALL GC_new_proc_inner(GC_mark_proc);
319
320/**
321 * Similar to `GC_init_gcj_malloc()` described in `gc_gcj.h` file but with
322 * the proper types of the arguments and an additional runtime checking.
323 * `GC_GCJ_MARK_DESCR_OFFSET` should be passed to `descr_offset` argument.
324 * Defined only if the library has been compiled with `GC_GCJ_SUPPORT`
325 * macro defined.
326 */
327GC_API void GC_CALL GC_init_gcj_malloc_mp(unsigned /* `mp_index` */,
328 GC_mark_proc /* `mp` */,
329 size_t /* `descr_offset` */);
330
331/**
332 * Allocate an object of a given `kind`. By default, there are only
333 * a few kinds: composite (pointerful), atomic, uncollectible, etc.
334 * We claim it is possible for clever client code that understands the
335 * collector internals to add more, e.g. to communicate object layout
336 * information to the collector. Note that in the multi-threaded
337 * contexts, this is usually unsafe for kinds that have the descriptor
338 * in the object itself, since there is otherwise a window in which
339 * the descriptor is not correct. Even in the single-threaded case,
340 * we need to be sure that cleared objects on a free list do not cause
341 * a GC crash if they are accidentally traced.
342 */
343GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
344 GC_generic_malloc(size_t /* `lb` */, int /* `kind` */);
345
346/**
347 * Same as `GC_generic_malloc()`, but pointers to past the first heap block
348 * of the resulting object are ignored. We avoid holding the allocator lock
349 * while we clear the memory.
350 */
351GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
352 GC_generic_malloc_ignore_off_page(size_t /* `lb` */, int /* `kind` */);
353
354/**
355 * A generalized variant of `GC_malloc_uncollectable()` and
356 * `GC_malloc_atomic_uncollectable()`.
357 */
358GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
359 GC_generic_malloc_uncollectable(size_t /* `lb` */, int /* `kind` */);
360
361/**
362 * Same as `GC_generic_malloc()`, but primary for allocating an object
363 * of the same kind as an existing one (`kind` obtained by
364 * `GC_get_kind_and_size()`). Not suitable for `gcj` and typed-`malloc`
365 * kinds.
366 */
367GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
368 GC_generic_or_special_malloc(size_t /* `size` */, int /* `kind` */);
369GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL
370 GC_debug_generic_or_special_malloc(size_t /* `size` */, int /* `kind` */,
371 GC_EXTRA_PARAMS);
372
373#ifdef GC_DEBUG
374# define GC_GENERIC_OR_SPECIAL_MALLOC(sz, k) \
375 GC_debug_generic_or_special_malloc(sz, k, GC_EXTRAS)
376#else
377# define GC_GENERIC_OR_SPECIAL_MALLOC(sz, k) \
378 GC_generic_or_special_malloc(sz, k)
379#endif /* !GC_DEBUG */
380
381/**
382 * Similar to `GC_size` but returns object kind. The size is returned too
383 * if `psize` is not `NULL`. The object pointer should be non-`NULL`.
384 */
385GC_API int GC_CALL GC_get_kind_and_size(const void *, size_t * /* `psize` */)
386 GC_ATTR_NONNULL(1);
387
388/**
389 * A procedure which produces a human-readable description of the
390 * "type" of object `p` into the buffer `out_buf` of length
391 * `GC_TYPE_DESCR_LEN`. This is used by the debug support when
392 * printing objects. These functions should be as robust as possible,
393 * though we do avoid invoking them on objects on the global free list.
394 */
395typedef void(GC_CALLBACK *GC_describe_type_fn)(void * /* `p` */,
396 char * /* `out_buf` */);
397#define GC_TYPE_DESCR_LEN 40
398
399/**
400 * Register a `describe_type` function to be used when printing objects
401 * of a particular `kind`.
402 */
403GC_API void GC_CALL GC_register_describe_type_fn(int /* `kind` */,
404 GC_describe_type_fn);
405
406/**
407 * Clear some of the inaccessible part of the stack. Returns its argument,
408 * so it can be used in a tail call position, hence clearing another frame.
409 * The argument may be `NULL`.
410 */
411GC_API void *GC_CALL GC_clear_stack(void *);
412
413/**
414 * Set/get the client notifier on collections. The client-supplied procedure
415 * is called at the start of every full collection (called with the allocator
416 * lock held). May be 0. This is a really tricky interface to use correctly.
417 * Unless you really understand the collector internals, the callback should
418 * not, directly or indirectly, make any `GC_` or potentially blocking calls.
419 * In particular, it is not safe to allocate memory using the collector from
420 * within the callback procedure. Both the setter and the getter acquire the
421 * allocator lock (in the reader mode in case of the getter).
422 */
423typedef void(GC_CALLBACK *GC_start_callback_proc)(void);
424GC_API void GC_CALL GC_set_start_callback(GC_start_callback_proc);
425GC_API GC_start_callback_proc GC_CALL GC_get_start_callback(void);
426
427/**
428 * Slow/general mark bit manipulation. The caller should hold the
429 * allocator lock. The argument should be the real address of an object
430 * (i.e. the address of the debug header if there is one).
431 */
432GC_API void GC_CALL GC_clear_mark_bit(const void *) GC_ATTR_NONNULL(1);
433GC_API void GC_CALL GC_set_mark_bit(const void *) GC_ATTR_NONNULL(1);
434
435/**
436 * Get the mark bit. Returns 1 (true) or 0. The caller should hold the
437 * allocator lock at least in the reader mode. The argument should be
438 * the real address of an object (i.e. the address of the debug header
439 * if there is one).
440 */
441GC_API int GC_CALL GC_is_marked(const void *) GC_ATTR_NONNULL(1);
442
443/**
444 * Push everything in the given range onto the mark stack. `bottom` is
445 * the first location to be scanned; `top` is one past the last location
446 * to be scanned. Should only be used if there is no possibility of mark
447 * stack overflow.
448 */
449GC_API void GC_CALL GC_push_all(void * /* `bottom` */, void * /* `top` */);
450
451/**
452 * Similar to `GC_push_all` but treats all interior pointers as valid and
453 * scans the entire region immediately (not just schedules it for scanning),
454 * in case the contents change.
455 */
456GC_API void GC_CALL GC_push_all_eager(void * /* `bottom` */,
457 void * /* `top` */);
458
459/**
460 * Similar to `GC_push_all` but processes either all or only dirty pages
461 * depending on `all` argument.
462 */
463GC_API void GC_CALL GC_push_conditional(void * /* `bottom` */,
464 void * /* `top` */,
465 int /* `bool` `all` */);
466
467GC_API void GC_CALL GC_push_finalizer_structures(void);
468
469/**
470 * Set/get the client push-other-roots procedure. A client-supplied
471 * procedure should also call the original one. Note that both the setter
472 * and the getter require some external synchronization to avoid data race.
473 */
474typedef void(GC_CALLBACK *GC_push_other_roots_proc)(void);
475GC_API void GC_CALL GC_set_push_other_roots(GC_push_other_roots_proc);
476GC_API GC_push_other_roots_proc GC_CALL GC_get_push_other_roots(void);
477
478/**
479 * Walk the GC heap visiting all reachable objects. Assume the caller
480 * holds the allocator lock at least in the reader mode. Object base
481 * pointer, object size and client custom data are passed to the callback
482 * (holding the allocator lock in the same mode as the caller does).
483 */
484typedef void(GC_CALLBACK *GC_reachable_object_proc)(
485 void * /* `obj` */, size_t /* `bytes` */, void * /* `client_data` */);
486GC_API void GC_CALL GC_enumerate_reachable_objects_inner(
487 GC_reachable_object_proc, void * /* `client_data` */) GC_ATTR_NONNULL(1);
488
489/**
490 * Is the given address in one of the temporary static root sections?
491 * Acquires the allocator lock in the reader mode. For the debugging
492 * purpose only.
493 */
494GC_API int GC_CALL GC_is_tmp_root(void *);
495
496GC_API void GC_CALL GC_print_trace(GC_word /* `gc_no` */);
497GC_API void GC_CALL GC_print_trace_inner(GC_word /* `gc_no` */);
498
499/**
500 * Set the client for when mark stack is empty. A client can use this
501 * callback to process (un)marked objects and push additional work onto
502 * the stack. Useful for implementing ephemerons. Both the setter and
503 * the getter acquire the allocator lock (in the reader mode in case of
504 * the getter).
505 */
506typedef struct GC_ms_entry *(GC_CALLBACK *GC_on_mark_stack_empty_proc)(
507 struct GC_ms_entry * /* `mark_stack_top` */,
508 struct GC_ms_entry * /* `mark_stack_limit` */);
509GC_API void GC_CALL GC_set_on_mark_stack_empty(GC_on_mark_stack_empty_proc);
510GC_API GC_on_mark_stack_empty_proc GC_CALL GC_get_on_mark_stack_empty(void);
511
512#ifdef __cplusplus
513} /* extern "C" */
514#endif
515
516#endif /* GC_MARK_H */
517