| 1 | /* |
| 2 | * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. |
| 3 | * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. |
| 4 | * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P. |
| 5 | * |
| 6 | * |
| 7 | * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED |
| 8 | * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. |
| 9 | * |
| 10 | * Permission is hereby granted to use or copy this program |
| 11 | * for any purpose, provided the above notices are retained on all copies. |
| 12 | * Permission to modify the code and to distribute modified code is granted, |
| 13 | * provided the above notices are retained, and a notice that the code was |
| 14 | * modified is included with the above copyright notice. |
| 15 | * |
| 16 | */ |
| 17 | |
| 18 | /* Memory model documented at http://www-106.ibm.com/developerworks/ */ |
| 19 | /* eserver/articles/archguide.html and (clearer) */ |
| 20 | /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */ |
| 21 | /* There appears to be no implicit ordering between any kind of */ |
| 22 | /* independent memory references. */ |
| 23 | |
| 24 | /* TODO: Implement double-wide operations if available. */ |
| 25 | |
| 26 | #if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 8)) \ |
| 27 | && !defined(AO_DISABLE_GCC_ATOMICS) |
| 28 | /* Probably, it could be enabled even for earlier gcc/clang versions. */ |
| 29 | |
| 30 | /* TODO: As of clang-3.8.1, it emits lwsync in AO_load_acquire */ |
| 31 | /* (i.e., the code is less efficient than the one given below). */ |
| 32 | |
| 33 | # include "generic.h" |
| 34 | |
| 35 | #else /* AO_DISABLE_GCC_ATOMICS */ |
| 36 | |
| 37 | /* Architecture enforces some ordering based on control dependence. */ |
| 38 | /* I don't know if that could help. */ |
| 39 | /* Data-dependent loads are always ordered. */ |
| 40 | /* Based on the above references, eieio is intended for use on */ |
| 41 | /* uncached memory, which we don't support. It does not order loads */ |
| 42 | /* from cached memory. */ |
| 43 | |
| 44 | #include "../all_aligned_atomic_load_store.h" |
| 45 | |
| 46 | #include "../test_and_set_t_is_ao_t.h" |
| 47 | /* There seems to be no byte equivalent of lwarx, so this */ |
| 48 | /* may really be what we want, at least in the 32-bit case. */ |
| 49 | |
| 50 | AO_INLINE void |
| 51 | AO_nop_full(void) |
| 52 | { |
| 53 | __asm__ __volatile__("sync" : : : "memory"); |
| 54 | } |
| 55 | #define AO_HAVE_nop_full |
| 56 | |
| 57 | /* lwsync apparently works for everything but a StoreLoad barrier. */ |
| 58 | AO_INLINE void |
| 59 | AO_lwsync(void) |
| 60 | { |
| 61 | #ifdef __NO_LWSYNC__ |
| 62 | __asm__ __volatile__("sync" : : : "memory"); |
| 63 | #else |
| 64 | __asm__ __volatile__("lwsync" : : : "memory"); |
| 65 | #endif |
| 66 | } |
| 67 | |
| 68 | #define AO_nop_write() AO_lwsync() |
| 69 | #define AO_HAVE_nop_write |
| 70 | |
| 71 | #define AO_nop_read() AO_lwsync() |
| 72 | #define AO_HAVE_nop_read |
| 73 | |
| 74 | #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) |
| 75 | /* ppc64 uses ld not lwz */ |
| 76 | # define AO_PPC_LD "ld" |
| 77 | # define AO_PPC_LxARX "ldarx" |
| 78 | # define AO_PPC_CMPx "cmpd" |
| 79 | # define AO_PPC_STxCXd "stdcx." |
| 80 | # define AO_PPC_LOAD_CLOBBER "cr0" |
| 81 | #else |
| 82 | # define AO_PPC_LD "lwz" |
| 83 | # define AO_PPC_LxARX "lwarx" |
| 84 | # define AO_PPC_CMPx "cmpw" |
| 85 | # define AO_PPC_STxCXd "stwcx." |
| 86 | # define AO_PPC_LOAD_CLOBBER "cc" |
| 87 | /* FIXME: We should get gcc to allocate one of the condition */ |
| 88 | /* registers. I always got "impossible constraint" when I */ |
| 89 | /* tried the "y" constraint. */ |
| 90 | # define AO_T_IS_INT |
| 91 | #endif |
| 92 | |
| 93 | #ifdef _AIX |
| 94 | /* Labels are not supported on AIX. */ |
| 95 | /* ppc64 has same size of instructions as 32-bit one. */ |
| 96 | # define AO_PPC_L(label) /* empty */ |
| 97 | # define AO_PPC_BR_A(labelBF, addr) addr |
| 98 | #else |
| 99 | # define AO_PPC_L(label) label ": " |
| 100 | # define AO_PPC_BR_A(labelBF, addr) labelBF |
| 101 | #endif |
| 102 | |
| 103 | /* We explicitly specify load_acquire, since it is important, and can */ |
| 104 | /* be implemented relatively cheaply. It could be implemented */ |
| 105 | /* with an ordinary load followed by a lwsync. But the general wisdom */ |
| 106 | /* seems to be that a data dependent branch followed by an isync is */ |
| 107 | /* cheaper. And the documentation is fairly explicit that this also */ |
| 108 | /* has acquire semantics. */ |
| 109 | AO_INLINE AO_t |
| 110 | AO_load_acquire(const volatile AO_t *addr) |
| 111 | { |
| 112 | AO_t result; |
| 113 | |
| 114 | __asm__ __volatile__ ( |
| 115 | AO_PPC_LD "%U1%X1 %0,%1\n" |
| 116 | "cmpw %0,%0\n" |
| 117 | "bne- " AO_PPC_BR_A("1f", "$+4") "\n" |
| 118 | AO_PPC_L("1") "isync\n" |
| 119 | : "=r" (result) |
| 120 | : "m"(*addr) : "memory", AO_PPC_LOAD_CLOBBER); |
| 121 | return result; |
| 122 | } |
| 123 | #define AO_HAVE_load_acquire |
| 124 | |
| 125 | /* We explicitly specify store_release, since it relies */ |
| 126 | /* on the fact that lwsync is also a LoadStore barrier. */ |
| 127 | AO_INLINE void |
| 128 | AO_store_release(volatile AO_t *addr, AO_t value) |
| 129 | { |
| 130 | AO_lwsync(); |
| 131 | *addr = value; |
| 132 | } |
| 133 | #define AO_HAVE_store_release |
| 134 | |
| 135 | #ifndef AO_PREFER_GENERALIZED |
| 136 | /* This is similar to the code in the garbage collector. Deleting */ |
| 137 | /* this and having it synthesized from compare_and_swap would probably */ |
| 138 | /* only cost us a load immediate instruction. */ |
| 139 | AO_INLINE AO_TS_VAL_t |
| 140 | AO_test_and_set(volatile AO_TS_t *addr) { |
| 141 | /* TODO: And we should be using smaller objects anyway. */ |
| 142 | AO_t oldval; |
| 143 | AO_t temp = 1; /* locked value */ |
| 144 | |
| 145 | __asm__ __volatile__( |
| 146 | AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n" |
| 147 | /* load and reserve */ |
| 148 | AO_PPC_CMPx "i %0, 0\n" /* if load is */ |
| 149 | "bne " AO_PPC_BR_A("2f", "$+12") "\n" |
| 150 | /* non-zero, return already set */ |
| 151 | AO_PPC_STxCXd " %2,0,%1\n" /* else store conditional */ |
| 152 | "bne- " AO_PPC_BR_A("1b", "$-16") "\n" |
| 153 | /* retry if lost reservation */ |
| 154 | AO_PPC_L("2") "\n" /* oldval is zero if we set */ |
| 155 | : "=&r"(oldval) |
| 156 | : "r"(addr), "r"(temp) |
| 157 | : "memory", "cr0"); |
| 158 | return (AO_TS_VAL_t)oldval; |
| 159 | } |
| 160 | #define AO_HAVE_test_and_set |
| 161 | |
| 162 | AO_INLINE AO_TS_VAL_t |
| 163 | AO_test_and_set_acquire(volatile AO_TS_t *addr) { |
| 164 | AO_TS_VAL_t result = AO_test_and_set(addr); |
| 165 | AO_lwsync(); |
| 166 | return result; |
| 167 | } |
| 168 | #define AO_HAVE_test_and_set_acquire |
| 169 | |
| 170 | AO_INLINE AO_TS_VAL_t |
| 171 | AO_test_and_set_release(volatile AO_TS_t *addr) { |
| 172 | AO_lwsync(); |
| 173 | return AO_test_and_set(addr); |
| 174 | } |
| 175 | #define AO_HAVE_test_and_set_release |
| 176 | |
| 177 | AO_INLINE AO_TS_VAL_t |
| 178 | AO_test_and_set_full(volatile AO_TS_t *addr) { |
| 179 | AO_TS_VAL_t result; |
| 180 | AO_lwsync(); |
| 181 | result = AO_test_and_set(addr); |
| 182 | AO_lwsync(); |
| 183 | return result; |
| 184 | } |
| 185 | #define AO_HAVE_test_and_set_full |
| 186 | #endif /* !AO_PREFER_GENERALIZED */ |
| 187 | |
| 188 | #ifndef AO_GENERALIZE_ASM_BOOL_CAS |
| 189 | |
| 190 | AO_INLINE int |
| 191 | AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) |
| 192 | { |
| 193 | AO_t oldval; |
| 194 | int result = 0; |
| 195 | |
| 196 | __asm__ __volatile__( |
| 197 | AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */ |
| 198 | AO_PPC_CMPx " %0, %4\n" /* if load is not equal to */ |
| 199 | "bne " AO_PPC_BR_A("2f", "$+16") "\n" /* old, fail */ |
| 200 | AO_PPC_STxCXd " %3,0,%2\n" /* else store conditional */ |
| 201 | "bne- " AO_PPC_BR_A("1b", "$-16") "\n" |
| 202 | /* retry if lost reservation */ |
| 203 | "li %1,1\n" /* result = 1; */ |
| 204 | AO_PPC_L("2") "\n" |
| 205 | : "=&r"(oldval), "=&r"(result) |
| 206 | : "r"(addr), "r"(new_val), "r"(old), "1"(result) |
| 207 | : "memory", "cr0"); |
| 208 | return result; |
| 209 | } |
| 210 | # define AO_HAVE_compare_and_swap |
| 211 | |
| 212 | AO_INLINE int |
| 213 | AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) |
| 214 | { |
| 215 | int result = AO_compare_and_swap(addr, old, new_val); |
| 216 | AO_lwsync(); |
| 217 | return result; |
| 218 | } |
| 219 | # define AO_HAVE_compare_and_swap_acquire |
| 220 | |
| 221 | AO_INLINE int |
| 222 | AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) |
| 223 | { |
| 224 | AO_lwsync(); |
| 225 | return AO_compare_and_swap(addr, old, new_val); |
| 226 | } |
| 227 | # define AO_HAVE_compare_and_swap_release |
| 228 | |
| 229 | AO_INLINE int |
| 230 | AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) |
| 231 | { |
| 232 | int result; |
| 233 | AO_lwsync(); |
| 234 | result = AO_compare_and_swap(addr, old, new_val); |
| 235 | if (result) |
| 236 | AO_lwsync(); |
| 237 | return result; |
| 238 | } |
| 239 | # define AO_HAVE_compare_and_swap_full |
| 240 | |
| 241 | #endif /* !AO_GENERALIZE_ASM_BOOL_CAS */ |
| 242 | |
| 243 | AO_INLINE AO_t |
| 244 | AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val) |
| 245 | { |
| 246 | AO_t fetched_val; |
| 247 | |
| 248 | __asm__ __volatile__( |
| 249 | AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n" /* load and reserve */ |
| 250 | AO_PPC_CMPx " %0, %3\n" /* if load is not equal to */ |
| 251 | "bne " AO_PPC_BR_A("2f", "$+12") "\n" /* old_val, fail */ |
| 252 | AO_PPC_STxCXd " %2,0,%1\n" /* else store conditional */ |
| 253 | "bne- " AO_PPC_BR_A("1b", "$-16") "\n" |
| 254 | /* retry if lost reservation */ |
| 255 | AO_PPC_L("2") "\n" |
| 256 | : "=&r"(fetched_val) |
| 257 | : "r"(addr), "r"(new_val), "r"(old_val) |
| 258 | : "memory", "cr0"); |
| 259 | return fetched_val; |
| 260 | } |
| 261 | #define AO_HAVE_fetch_compare_and_swap |
| 262 | |
| 263 | AO_INLINE AO_t |
| 264 | AO_fetch_compare_and_swap_acquire(volatile AO_t *addr, AO_t old_val, |
| 265 | AO_t new_val) |
| 266 | { |
| 267 | AO_t result = AO_fetch_compare_and_swap(addr, old_val, new_val); |
| 268 | AO_lwsync(); |
| 269 | return result; |
| 270 | } |
| 271 | #define AO_HAVE_fetch_compare_and_swap_acquire |
| 272 | |
| 273 | AO_INLINE AO_t |
| 274 | AO_fetch_compare_and_swap_release(volatile AO_t *addr, AO_t old_val, |
| 275 | AO_t new_val) |
| 276 | { |
| 277 | AO_lwsync(); |
| 278 | return AO_fetch_compare_and_swap(addr, old_val, new_val); |
| 279 | } |
| 280 | #define AO_HAVE_fetch_compare_and_swap_release |
| 281 | |
| 282 | AO_INLINE AO_t |
| 283 | AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val, |
| 284 | AO_t new_val) |
| 285 | { |
| 286 | AO_t result; |
| 287 | AO_lwsync(); |
| 288 | result = AO_fetch_compare_and_swap(addr, old_val, new_val); |
| 289 | if (result == old_val) |
| 290 | AO_lwsync(); |
| 291 | return result; |
| 292 | } |
| 293 | #define AO_HAVE_fetch_compare_and_swap_full |
| 294 | |
| 295 | #ifndef AO_PREFER_GENERALIZED |
| 296 | AO_INLINE AO_t |
| 297 | AO_fetch_and_add(volatile AO_t *addr, AO_t incr) { |
| 298 | AO_t oldval; |
| 299 | AO_t newval; |
| 300 | |
| 301 | __asm__ __volatile__( |
| 302 | AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */ |
| 303 | "add %1,%0,%3\n" /* increment */ |
| 304 | AO_PPC_STxCXd " %1,0,%2\n" /* store conditional */ |
| 305 | "bne- " AO_PPC_BR_A("1b", "$-12") "\n" |
| 306 | /* retry if lost reservation */ |
| 307 | : "=&r"(oldval), "=&r"(newval) |
| 308 | : "r"(addr), "r"(incr) |
| 309 | : "memory", "cr0"); |
| 310 | return oldval; |
| 311 | } |
| 312 | #define AO_HAVE_fetch_and_add |
| 313 | |
| 314 | AO_INLINE AO_t |
| 315 | AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) { |
| 316 | AO_t result = AO_fetch_and_add(addr, incr); |
| 317 | AO_lwsync(); |
| 318 | return result; |
| 319 | } |
| 320 | #define AO_HAVE_fetch_and_add_acquire |
| 321 | |
| 322 | AO_INLINE AO_t |
| 323 | AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) { |
| 324 | AO_lwsync(); |
| 325 | return AO_fetch_and_add(addr, incr); |
| 326 | } |
| 327 | #define AO_HAVE_fetch_and_add_release |
| 328 | |
| 329 | AO_INLINE AO_t |
| 330 | AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) { |
| 331 | AO_t result; |
| 332 | AO_lwsync(); |
| 333 | result = AO_fetch_and_add(addr, incr); |
| 334 | AO_lwsync(); |
| 335 | return result; |
| 336 | } |
| 337 | #define AO_HAVE_fetch_and_add_full |
| 338 | #endif /* !AO_PREFER_GENERALIZED */ |
| 339 | |
| 340 | #undef AO_PPC_BR_A |
| 341 | #undef AO_PPC_CMPx |
| 342 | #undef AO_PPC_L |
| 343 | #undef AO_PPC_LD |
| 344 | #undef AO_PPC_LOAD_CLOBBER |
| 345 | #undef AO_PPC_LxARX |
| 346 | #undef AO_PPC_STxCXd |
| 347 | |
| 348 | #endif /* AO_DISABLE_GCC_ATOMICS */ |
| 349 | |