| 1 | /* |
| 2 | * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers |
| 3 | * Copyright (c) 1991-1995 by Xerox Corporation. All rights reserved. |
| 4 | * Copyright (c) 2005 Hewlett-Packard Development Company, L.P. |
| 5 | * Copyright (c) 2008-2025 Ivan Maidanski |
| 6 | * |
| 7 | * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED |
| 8 | * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. |
| 9 | * |
| 10 | * Permission is hereby granted to use or copy this program |
| 11 | * for any purpose, provided the above notices are retained on all copies. |
| 12 | * Permission to modify the code and to distribute modified code is granted, |
| 13 | * provided the above notices are retained, and a notice that the code was |
| 14 | * modified is included with the above copyright notice. |
| 15 | */ |
| 16 | |
| 17 | #ifndef GC_INLINE_H |
| 18 | #define GC_INLINE_H |
| 19 | |
| 20 | /* |
| 21 | * *Warning*: Note that for these routines, it is the client`s responsibility |
| 22 | * to add the extra byte at the end to deal with one-past-the-end pointers. |
| 23 | * In the standard collector configuration, the collector assumes that such |
| 24 | * a byte has been added, and hence does not trace the last "pointer-sized" |
| 25 | * word in the resulting object. This is not an issue if |
| 26 | * `GC_get_all_interior_pointers()` returns zero or if |
| 27 | * `GC_get_dont_add_byte_at_end()` returns 1. This interface is most useful |
| 28 | * for compilers that generate C code. It is also used internally for |
| 29 | * thread-local allocation. A manual use is hereby discouraged. |
| 30 | * Multi-threaded clients should include `atomic_ops.h` file (or similar) |
| 31 | * before this header. There is no debugging variant of this allocation API. |
| 32 | */ |
| 33 | |
| 34 | #include "gc.h" |
| 35 | #include "gc_tiny_fl.h" |
| 36 | |
| 37 | #if GC_GNUC_PREREQ(3, 0) || defined(__clang__) |
| 38 | /* Equivalent to `(expr)`, but predict that usually `expr == outcome`. */ |
| 39 | # define GC_EXPECT(expr, outcome) __builtin_expect(expr, outcome) |
| 40 | #else |
| 41 | # define GC_EXPECT(expr, outcome) (expr) |
| 42 | #endif |
| 43 | |
| 44 | #ifndef GC_ASSERT |
| 45 | # ifdef NDEBUG |
| 46 | # define GC_ASSERT(expr) (void)0 |
| 47 | # else |
| 48 | # include <assert.h> |
| 49 | # define GC_ASSERT(expr) assert(expr) |
| 50 | # endif |
| 51 | #endif |
| 52 | |
| 53 | #ifndef GC_PREFETCH_FOR_WRITE |
| 54 | # if (GC_GNUC_PREREQ(3, 0) || defined(__clang__)) \ |
| 55 | && !defined(GC_NO_PREFETCH_FOR_WRITE) |
| 56 | # define GC_PREFETCH_FOR_WRITE(x) __builtin_prefetch((x), 1 /* write */) |
| 57 | # elif defined(_MSC_VER) && !defined(GC_NO_PREFETCH_FOR_WRITE) \ |
| 58 | && (defined(_M_IX86) || defined(_M_X64)) && !defined(_CHPE_ONLY_) \ |
| 59 | && (_MSC_VER >= 1900 /* VS 2015+ */) |
| 60 | # include <intrin.h> |
| 61 | # define GC_PREFETCH_FOR_WRITE(x) _m_prefetchw(x) |
| 62 | /* TODO: Support also `_M_ARM` (`__prefetchw`). */ |
| 63 | # else |
| 64 | # define GC_PREFETCH_FOR_WRITE(x) (void)0 |
| 65 | # endif |
| 66 | #endif |
| 67 | |
| 68 | #ifdef __cplusplus |
| 69 | extern "C" { |
| 70 | #endif |
| 71 | |
| 72 | /* Object kinds (exposed to public). */ |
| 73 | #define GC_I_PTRFREE 0 |
| 74 | #define GC_I_NORMAL 1 |
| 75 | |
| 76 | /** |
| 77 | * Determine if the collector has been configured not to pad the |
| 78 | * allocated objects even in the all-interior-pointers mode. |
| 79 | * Meaningful only if `GC_get_all_interior_pointers()` returns 1. |
| 80 | */ |
| 81 | GC_API int GC_CALL GC_get_dont_add_byte_at_end(void); |
| 82 | |
| 83 | /** |
| 84 | * Return a list of one or more objects of the indicated size, linked |
| 85 | * through the first pointer in each object. This has the advantage |
| 86 | * that it acquires the allocator lock only once, and may greatly |
| 87 | * reduce time wasted contending for the allocator lock. Typical usage |
| 88 | * would be in a thread that requires many items of the same size. |
| 89 | * It would keep its own free list in a thread-local storage, and call |
| 90 | * `GC_malloc_many()` or friends to replenish it. (We do not round up |
| 91 | * object sizes, since a call indicates the intention to consume many |
| 92 | * objects of exactly this size.) We assume that the size is nonzero |
| 93 | * and a multiple of `GC_GRANULE_BYTES`, and that the size already |
| 94 | * includes the value returned by `GC_get_all_interior_pointers()` |
| 95 | * (unless `GC_get_dont_add_byte_at_end()` returns a nonzero value). |
| 96 | * We return the free-list by assigning it to `*result`, since it is |
| 97 | * not safe to return a linked list of pointer-free objects, since the |
| 98 | * collector would not retain the entire list if it were invoked just |
| 99 | * as we were returning; the client must make sure that `*result` is |
| 100 | * traced even if objects are pointer-free. Note also that the client |
| 101 | * should usually clear the link field. |
| 102 | */ |
| 103 | GC_API void GC_CALL GC_generic_malloc_many(size_t /* `lb_adjusted` */, |
| 104 | int /* `kind` */, |
| 105 | void ** /* `result` */); |
| 106 | |
| 107 | /** |
| 108 | * A generalized variant of `GC_malloc` and `GC_malloc_atomic`. |
| 109 | * Uses appropriately the thread-local (if available) or the global |
| 110 | * free-list of the specified `kind`. |
| 111 | */ |
| 112 | GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL |
| 113 | GC_malloc_kind(size_t /* `lb` */, int /* `kind` */); |
| 114 | |
| 115 | #ifdef GC_THREADS |
| 116 | /* Same as `GC_malloc_kind` but uses only the global free-list. */ |
| 117 | GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void *GC_CALL |
| 118 | GC_malloc_kind_global(size_t /* `lb` */, int /* `kind` */); |
| 119 | #else |
| 120 | # define GC_malloc_kind_global GC_malloc_kind |
| 121 | #endif |
| 122 | |
| 123 | /* |
| 124 | * An internal macro to update the free-list pointer atomically |
| 125 | * (if the AO primitives are available) to avoid race with the marker. |
| 126 | */ |
| 127 | #if !defined(GC_THREADS) || !defined(AO_HAVE_store) |
| 128 | # define GC_FAST_M_AO_STORE(my_fl, next) (void)(*(my_fl) = (next)) |
| 129 | #elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > __SIZEOF_SIZE_T__) |
| 130 | /* |
| 131 | * Directly use the gcc atomic intrinsic as the size of a pointer is |
| 132 | * bigger than that of `AO_t`. |
| 133 | */ |
| 134 | # define GC_FAST_M_AO_STORE(my_fl, next) \ |
| 135 | __atomic_store_n(my_fl, next, __ATOMIC_RELAXED) |
| 136 | #else |
| 137 | # define GC_FAST_M_AO_STORE(my_fl, next) \ |
| 138 | AO_store((volatile AO_t *)(my_fl), (size_t)(next)) |
| 139 | #endif |
| 140 | |
| 141 | /** |
| 142 | * The ultimately general inline allocation macro. Allocate an object |
| 143 | * of size `lg` (in granules), putting the resulting pointer in `result`. |
| 144 | * `tiny_fl` is a "tiny" free-list array, which will be used first, |
| 145 | * if the size is appropriate. If `lg` argument is too large, then we |
| 146 | * allocate with `default_expr` instead. If we need to refill the free |
| 147 | * list, we use `GC_generic_malloc_many()` with the indicated kind `k`. |
| 148 | * `tiny_fl` should be an array of `GC_TINY_FREELISTS` `void` pointers. |
| 149 | * If `num_direct` is nonzero, and the individual free-list pointers are |
| 150 | * initialized to `(void *)1`, then we allocate `num_direct` granules |
| 151 | * directly using `GC_generic_malloc_many()` before putting multiple |
| 152 | * objects into the `tiny_fl` entry. If `num_direct` is zero, then the |
| 153 | * free lists may also be initialized to `NULL`. Note that we use the |
| 154 | * zeroth free list to hold objects of 1 granule in size that are used to |
| 155 | * satisfy size 0 allocation requests. We rely on much of this hopefully |
| 156 | * getting optimized away in the case of `num_direct` is 0. Particularly, |
| 157 | * if `lg` argument is constant, this should generate a small amount of code. |
| 158 | */ |
| 159 | #define GC_FAST_MALLOC_GRANS(result, lg, tiny_fl, num_direct, k, \ |
| 160 | default_expr, init) \ |
| 161 | do { \ |
| 162 | if (GC_EXPECT((lg) >= GC_TINY_FREELISTS, 0)) { \ |
| 163 | result = (default_expr); \ |
| 164 | } else { \ |
| 165 | void **my_fl = (tiny_fl) + (lg); \ |
| 166 | void *my_entry = *my_fl; \ |
| 167 | void *next; \ |
| 168 | \ |
| 169 | for (;;) { \ |
| 170 | if (GC_EXPECT((GC_word)(GC_uintptr_t)my_entry \ |
| 171 | > (num_direct) + GC_TINY_FREELISTS + 1, \ |
| 172 | 1)) { \ |
| 173 | next = *(void **)(my_entry); \ |
| 174 | result = my_entry; \ |
| 175 | GC_FAST_M_AO_STORE(my_fl, next); \ |
| 176 | init; \ |
| 177 | GC_PREFETCH_FOR_WRITE(next); \ |
| 178 | if ((k) != GC_I_PTRFREE) { \ |
| 179 | GC_end_stubborn_change(my_fl); \ |
| 180 | GC_reachable_here(next); \ |
| 181 | } \ |
| 182 | GC_ASSERT(GC_size(result) >= GC_RAW_BYTES_FROM_INDEX(lg)); \ |
| 183 | GC_ASSERT((k) == GC_I_PTRFREE \ |
| 184 | || 0 /* `NULL` */ == ((void **)result)[1]); \ |
| 185 | break; \ |
| 186 | } \ |
| 187 | /* Entry contains counter or `NULL`. */ \ |
| 188 | if ((GC_signed_word)(GC_uintptr_t)my_entry \ |
| 189 | - (GC_signed_word)(num_direct) \ |
| 190 | <= 0 /*< `(GC_uintptr_t)my_entry <= num_direct` */ \ |
| 191 | && my_entry != 0 /* NULL */) { \ |
| 192 | /* Small counter value, not `NULL`. */ \ |
| 193 | GC_FAST_M_AO_STORE(my_fl, (char *)my_entry + (lg) + 1); \ |
| 194 | result = (default_expr); \ |
| 195 | break; \ |
| 196 | } else { \ |
| 197 | /* Large counter or `NULL`. */ \ |
| 198 | size_t lb_adj = GC_RAW_BYTES_FROM_INDEX(0 == (lg) ? 1 : (lg)); \ |
| 199 | \ |
| 200 | GC_generic_malloc_many(lb_adj, k, my_fl); \ |
| 201 | my_entry = *my_fl; \ |
| 202 | if (0 /* `NULL` */ == my_entry) { \ |
| 203 | result = (*GC_get_oom_fn())(lb_adj); \ |
| 204 | break; \ |
| 205 | } \ |
| 206 | } \ |
| 207 | } \ |
| 208 | } \ |
| 209 | } while (0) |
| 210 | |
| 211 | /** |
| 212 | * Allocate `n` "pointer-sized" words. The allocation size is rounded |
| 213 | * up to a granule size. The pointer is stored to `result`. |
| 214 | * Should not be used unless `GC_get_all_interior_pointers()` returns zero |
| 215 | * or if `GC_get_dont_add_byte_at_end()` returns 1. Does not acquire |
| 216 | * the allocator lock. The caller is responsible for supplying |
| 217 | * a cleared `tiny_fl` free-list array. For single-threaded applications, |
| 218 | * this may be a global array. |
| 219 | */ |
| 220 | #define GC_MALLOC_WORDS_KIND(result, n, tiny_fl, k, init) \ |
| 221 | do { \ |
| 222 | size_t lg = GC_PTRS_TO_WHOLE_GRANULES(n); \ |
| 223 | \ |
| 224 | GC_FAST_MALLOC_GRANS(result, lg, tiny_fl, 0 /* `num_direct` */, k, \ |
| 225 | GC_malloc_kind(GC_RAW_BYTES_FROM_INDEX(lg), k), \ |
| 226 | init); \ |
| 227 | } while (0) |
| 228 | |
| 229 | #define GC_MALLOC_WORDS(result, n, tiny_fl) \ |
| 230 | GC_MALLOC_WORDS_KIND(result, n, tiny_fl, GC_I_NORMAL, \ |
| 231 | (void)(*(void **)(result) = 0 /* `NULL` */)) |
| 232 | |
| 233 | #define GC_MALLOC_ATOMIC_WORDS(result, n, tiny_fl) \ |
| 234 | GC_MALLOC_WORDS_KIND(result, n, tiny_fl, GC_I_PTRFREE, (void)0) |
| 235 | |
| 236 | /** Allocate a two-pointer initialized object. */ |
| 237 | #define GC_CONS(result, first, second, tiny_fl) \ |
| 238 | do { \ |
| 239 | void *l = (void *)(first); \ |
| 240 | void *r = (void *)(second); \ |
| 241 | GC_MALLOC_WORDS_KIND(result, 2, tiny_fl, GC_I_NORMAL, (void)0); \ |
| 242 | if ((result) != 0 /* `NULL` */) { \ |
| 243 | *(void **)(result) = l; \ |
| 244 | GC_ptr_store_and_dirty((void **)(result) + 1, r); \ |
| 245 | GC_reachable_here(l); \ |
| 246 | } \ |
| 247 | } while (0) |
| 248 | |
| 249 | /** |
| 250 | * Print address of each object in the free list for the given `kind` |
| 251 | * and size `lg` (in granules). The caller should hold the allocator |
| 252 | * lock at least in the reader mode. Defined only if the library has |
| 253 | * been compiled without `NO_DEBUGGING` macro defined. |
| 254 | */ |
| 255 | GC_API void GC_CALL GC_print_free_list(int /* `kind` */, size_t /* `lg` */); |
| 256 | |
| 257 | #ifdef __cplusplus |
| 258 | } /* extern "C" */ |
| 259 | #endif |
| 260 | |
| 261 | #endif /* !GC_INLINE_H */ |
| 262 | |