| 1 | /* |
| 2 | * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. |
| 3 | * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. |
| 4 | * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. |
| 5 | * Copyright (c) 2009-2016 Ivan Maidanski |
| 6 | * |
| 7 | * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED |
| 8 | * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. |
| 9 | * |
| 10 | * Permission is hereby granted to use or copy this program |
| 11 | * for any purpose, provided the above notices are retained on all copies. |
| 12 | * Permission to modify the code and to distribute modified code is granted, |
| 13 | * provided the above notices are retained, and a notice that the code was |
| 14 | * modified is included with the above copyright notice. |
| 15 | * |
| 16 | * Some of the machine specific code was borrowed from our GC distribution. |
| 17 | */ |
| 18 | |
| 19 | /* The following really assume we have a 486 or better. */ |
| 20 | |
| 21 | #include "../all_aligned_atomic_load_store.h" |
| 22 | |
| 23 | #include "../test_and_set_t_is_char.h" |
| 24 | |
| 25 | #if !defined(AO_USE_PENTIUM4_INSTRS) && !defined(__i386) |
| 26 | /* "mfence" (SSE2) is supported on all x86_64/amd64 chips. */ |
| 27 | # define AO_USE_PENTIUM4_INSTRS |
| 28 | #endif |
| 29 | |
| 30 | #if defined(AO_USE_PENTIUM4_INSTRS) |
| 31 | AO_INLINE void |
| 32 | AO_nop_full(void) |
| 33 | { |
| 34 | __asm__ __volatile__ ("mfence" : : : "memory"); |
| 35 | } |
| 36 | # define AO_HAVE_nop_full |
| 37 | |
| 38 | #else |
| 39 | /* We could use the cpuid instruction. But that seems to be slower */ |
| 40 | /* than the default implementation based on test_and_set_full. Thus */ |
| 41 | /* we omit that bit of misinformation here. */ |
| 42 | #endif /* !AO_USE_PENTIUM4_INSTRS */ |
| 43 | |
| 44 | /* As far as we can tell, the lfence and sfence instructions are not */ |
| 45 | /* currently needed or useful for cached memory accesses. */ |
| 46 | |
| 47 | /* Really only works for 486 and later */ |
| 48 | #ifndef AO_PREFER_GENERALIZED |
| 49 | AO_INLINE AO_t |
| 50 | AO_fetch_and_add_full (volatile AO_t *p, AO_t incr) |
| 51 | { |
| 52 | AO_t result; |
| 53 | |
| 54 | __asm__ __volatile__ ("lock; xadd %0, %1" |
| 55 | : "=r" (result), "+m" (*p) |
| 56 | : "0" (incr) |
| 57 | : "memory"); |
| 58 | return result; |
| 59 | } |
| 60 | # define AO_HAVE_fetch_and_add_full |
| 61 | #endif /* !AO_PREFER_GENERALIZED */ |
| 62 | |
| 63 | AO_INLINE unsigned char |
| 64 | AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr) |
| 65 | { |
| 66 | unsigned char result; |
| 67 | |
| 68 | __asm__ __volatile__ ("lock; xaddb %0, %1" |
| 69 | : "=q" (result), "+m" (*p) |
| 70 | : "0" (incr) |
| 71 | : "memory"); |
| 72 | return result; |
| 73 | } |
| 74 | #define AO_HAVE_char_fetch_and_add_full |
| 75 | |
| 76 | AO_INLINE unsigned short |
| 77 | AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr) |
| 78 | { |
| 79 | unsigned short result; |
| 80 | |
| 81 | __asm__ __volatile__ ("lock; xaddw %0, %1" |
| 82 | : "=r" (result), "+m" (*p) |
| 83 | : "0" (incr) |
| 84 | : "memory"); |
| 85 | return result; |
| 86 | } |
| 87 | #define AO_HAVE_short_fetch_and_add_full |
| 88 | |
| 89 | #ifndef AO_PREFER_GENERALIZED |
| 90 | AO_INLINE void |
| 91 | AO_and_full (volatile AO_t *p, AO_t value) |
| 92 | { |
| 93 | __asm__ __volatile__ ("lock; and %1, %0" |
| 94 | : "+m" (*p) |
| 95 | : "r" (value) |
| 96 | : "memory"); |
| 97 | } |
| 98 | # define AO_HAVE_and_full |
| 99 | |
| 100 | AO_INLINE void |
| 101 | AO_or_full (volatile AO_t *p, AO_t value) |
| 102 | { |
| 103 | __asm__ __volatile__ ("lock; or %1, %0" |
| 104 | : "+m" (*p) |
| 105 | : "r" (value) |
| 106 | : "memory"); |
| 107 | } |
| 108 | # define AO_HAVE_or_full |
| 109 | |
| 110 | AO_INLINE void |
| 111 | AO_xor_full (volatile AO_t *p, AO_t value) |
| 112 | { |
| 113 | __asm__ __volatile__ ("lock; xor %1, %0" |
| 114 | : "+m" (*p) |
| 115 | : "r" (value) |
| 116 | : "memory"); |
| 117 | } |
| 118 | # define AO_HAVE_xor_full |
| 119 | #endif /* !AO_PREFER_GENERALIZED */ |
| 120 | |
| 121 | AO_INLINE AO_TS_VAL_t |
| 122 | AO_test_and_set_full (volatile AO_TS_t *addr) |
| 123 | { |
| 124 | AO_TS_t oldval; |
| 125 | /* Note: the "xchg" instruction does not need a "lock" prefix */ |
| 126 | __asm__ __volatile__ ("xchg %b0, %1" |
| 127 | : "=q" (oldval), "+m" (*addr) |
| 128 | : "0" (0xff) |
| 129 | : "memory"); |
| 130 | return (AO_TS_VAL_t)oldval; |
| 131 | } |
| 132 | #define AO_HAVE_test_and_set_full |
| 133 | |
| 134 | #ifndef AO_GENERALIZE_ASM_BOOL_CAS |
| 135 | /* Returns nonzero if the comparison succeeded. */ |
| 136 | AO_INLINE int |
| 137 | AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) |
| 138 | { |
| 139 | char result; |
| 140 | __asm__ __volatile__ ("lock; cmpxchg %2, %0; setz %1" |
| 141 | : "+m" (*addr), "=a" (result) |
| 142 | : "r" (new_val), "a" (old) |
| 143 | : "memory"); |
| 144 | return (int) result; |
| 145 | } |
| 146 | # define AO_HAVE_compare_and_swap_full |
| 147 | #endif /* !AO_GENERALIZE_ASM_BOOL_CAS */ |
| 148 | |
| 149 | AO_INLINE AO_t |
| 150 | AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val, |
| 151 | AO_t new_val) |
| 152 | { |
| 153 | AO_t fetched_val; |
| 154 | __asm__ __volatile__ ("lock; cmpxchg %2, %0" |
| 155 | : "+m" (*addr), "=a" (fetched_val) |
| 156 | : "r" (new_val), "a" (old_val) |
| 157 | : "memory"); |
| 158 | return fetched_val; |
| 159 | } |
| 160 | #define AO_HAVE_fetch_compare_and_swap_full |
| 161 | |
| 162 | #if defined(__i386) |
| 163 | |
| 164 | # ifndef AO_NO_CMPXCHG8B |
| 165 | # include "../standard_ao_double_t.h" |
| 166 | |
| 167 | /* Reading or writing a quadword aligned on a 64-bit boundary is */ |
| 168 | /* always carried out atomically (requires at least a Pentium). */ |
| 169 | # define AO_ACCESS_double_CHECK_ALIGNED |
| 170 | # include "../loadstore/double_atomic_load_store.h" |
| 171 | |
| 172 | /* Returns nonzero if the comparison succeeded. */ |
| 173 | /* Really requires at least a Pentium. */ |
| 174 | AO_INLINE int |
| 175 | AO_compare_double_and_swap_double_full(volatile AO_double_t *addr, |
| 176 | AO_t old_val1, AO_t old_val2, |
| 177 | AO_t new_val1, AO_t new_val2) |
| 178 | { |
| 179 | AO_t dummy; /* an output for clobbered edx */ |
| 180 | char result; |
| 181 | |
| 182 | __asm__ __volatile__ ("lock; cmpxchg8b %0; setz %1" |
| 183 | : "+m" (*addr), "=a" (result), "=d" (dummy) |
| 184 | : "d" (old_val2), "a" (old_val1), |
| 185 | "c" (new_val2), "b" (new_val1) |
| 186 | : "memory"); |
| 187 | return (int) result; |
| 188 | } |
| 189 | # define AO_HAVE_compare_double_and_swap_double_full |
| 190 | # endif /* !AO_NO_CMPXCHG8B */ |
| 191 | |
| 192 | # define AO_T_IS_INT |
| 193 | |
| 194 | #else /* x64 */ |
| 195 | |
| 196 | AO_INLINE unsigned int |
| 197 | AO_int_fetch_and_add_full (volatile unsigned int *p, unsigned int incr) |
| 198 | { |
| 199 | unsigned int result; |
| 200 | |
| 201 | __asm__ __volatile__ ("lock; xaddl %0, %1" |
| 202 | : "=r" (result), "+m" (*p) |
| 203 | : "0" (incr) |
| 204 | : "memory"); |
| 205 | return result; |
| 206 | } |
| 207 | # define AO_HAVE_int_fetch_and_add_full |
| 208 | |
| 209 | # ifdef AO_CMPXCHG16B_AVAILABLE |
| 210 | # include "../standard_ao_double_t.h" |
| 211 | |
| 212 | /* Older AMD Opterons are missing this instruction (SIGILL should */ |
| 213 | /* be thrown in this case). */ |
| 214 | AO_INLINE int |
| 215 | AO_compare_double_and_swap_double_full (volatile AO_double_t *addr, |
| 216 | AO_t old_val1, AO_t old_val2, |
| 217 | AO_t new_val1, AO_t new_val2) |
| 218 | { |
| 219 | AO_t dummy; |
| 220 | char result; |
| 221 | |
| 222 | __asm__ __volatile__ ("lock; cmpxchg16b %0; setz %1" |
| 223 | : "+m" (*addr), "=a" (result), "=d" (dummy) |
| 224 | : "d" (old_val2), "a" (old_val1), |
| 225 | "c" (new_val2), "b" (new_val1) |
| 226 | : "memory"); |
| 227 | return (int) result; |
| 228 | } |
| 229 | # define AO_HAVE_compare_double_and_swap_double_full |
| 230 | # endif /* !AO_CMPXCHG16B_AVAILABLE */ |
| 231 | |
| 232 | #endif /* x64 */ |
| 233 | |
| 234 | /* Real X86 implementations, except for some old 32-bit WinChips, */ |
| 235 | /* appear to enforce ordering between memory operations, EXCEPT that */ |
| 236 | /* a later read can pass earlier writes, presumably due to the visible */ |
| 237 | /* presence of store buffers. */ |
| 238 | /* We ignore both the WinChips and the fact that the official specs */ |
| 239 | /* seem to be much weaker (and arguably too weak to be usable). */ |
| 240 | #include "../ordered_except_wr.h" |
| 241 | |