| 1 | /* |
| 2 | * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. |
| 3 | * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved. |
| 4 | * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved. |
| 5 | * Copyright (c) 2008-2017 Ivan Maidanski |
| 6 | * |
| 7 | * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED |
| 8 | * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. |
| 9 | * |
| 10 | * Permission is hereby granted to use or copy this program |
| 11 | * for any purpose, provided the above notices are retained on all copies. |
| 12 | * Permission to modify the code and to distribute modified code is granted, |
| 13 | * provided the above notices are retained, and a notice that the code was |
| 14 | * modified is included with the above copyright notice. |
| 15 | * |
| 16 | */ |
| 17 | |
| 18 | #if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 5)) \ |
| 19 | && !defined(AO_DISABLE_GCC_ATOMICS) |
| 20 | /* Probably, it could be enabled even for earlier gcc/clang versions. */ |
| 21 | # define AO_GCC_ATOMIC_TEST_AND_SET |
| 22 | #endif |
| 23 | |
| 24 | #ifdef __native_client__ |
| 25 | /* Mask instruction should immediately precede access instruction. */ |
| 26 | # define AO_MASK_PTR(reg) " bical " reg ", " reg ", #0xc0000000\n" |
| 27 | # define AO_BR_ALIGN " .align 4\n" |
| 28 | #else |
| 29 | # define AO_MASK_PTR(reg) /* empty */ |
| 30 | # define AO_BR_ALIGN /* empty */ |
| 31 | #endif |
| 32 | |
| 33 | #if defined(__thumb__) && !defined(__thumb2__) |
| 34 | /* Thumb One mode does not have ARM "mcr", "swp" and some load/store */ |
| 35 | /* instructions, so we temporarily switch to ARM mode and go back */ |
| 36 | /* afterwards (clobbering "r3" register). */ |
| 37 | # define AO_THUMB_GO_ARM \ |
| 38 | " adr r3, 4f\n" \ |
| 39 | " bx r3\n" \ |
| 40 | " .align\n" \ |
| 41 | " .arm\n" \ |
| 42 | AO_BR_ALIGN \ |
| 43 | "4:\n" |
| 44 | # define AO_THUMB_RESTORE_MODE \ |
| 45 | " adr r3, 5f + 1\n" \ |
| 46 | " bx r3\n" \ |
| 47 | " .thumb\n" \ |
| 48 | AO_BR_ALIGN \ |
| 49 | "5:\n" |
| 50 | # define AO_THUMB_SWITCH_CLOBBERS "r3", |
| 51 | #else |
| 52 | # define AO_THUMB_GO_ARM /* empty */ |
| 53 | # define AO_THUMB_RESTORE_MODE /* empty */ |
| 54 | # define AO_THUMB_SWITCH_CLOBBERS /* empty */ |
| 55 | #endif /* !__thumb__ */ |
| 56 | |
| 57 | /* NEC LE-IT: gcc has no way to easily check the arm architecture */ |
| 58 | /* but it defines only one (or several) of __ARM_ARCH_x__ to be true. */ |
| 59 | #if !defined(__ARM_ARCH_2__) && !defined(__ARM_ARCH_3__) \ |
| 60 | && !defined(__ARM_ARCH_3M__) && !defined(__ARM_ARCH_4__) \ |
| 61 | && !defined(__ARM_ARCH_4T__) \ |
| 62 | && ((!defined(__ARM_ARCH_5__) && !defined(__ARM_ARCH_5E__) \ |
| 63 | && !defined(__ARM_ARCH_5T__) && !defined(__ARM_ARCH_5TE__) \ |
| 64 | && !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6M__)) \ |
| 65 | || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ |
| 66 | || defined(__ARM_ARCH_8A__)) |
| 67 | # define AO_ARM_HAVE_LDREX |
| 68 | # if !defined(__ARM_ARCH_6__) && !defined(__ARM_ARCH_6J__) \ |
| 69 | && !defined(__ARM_ARCH_6T2__) |
| 70 | /* LDREXB/STREXB and LDREXH/STREXH are present in ARMv6K/Z+. */ |
| 71 | # define AO_ARM_HAVE_LDREXBH |
| 72 | # endif |
| 73 | # if !defined(__ARM_ARCH_6__) && !defined(__ARM_ARCH_6J__) \ |
| 74 | && !defined(__ARM_ARCH_6T2__) && !defined(__ARM_ARCH_6Z__) \ |
| 75 | && !defined(__ARM_ARCH_6ZT2__) |
| 76 | # if !defined(__ARM_ARCH_6K__) && !defined(__ARM_ARCH_6KZ__) \ |
| 77 | && !defined(__ARM_ARCH_6ZK__) |
| 78 | /* DMB is present in ARMv6M and ARMv7+. */ |
| 79 | # define AO_ARM_HAVE_DMB |
| 80 | # endif |
| 81 | # if (!defined(__thumb__) \ |
| 82 | || (defined(__thumb2__) && !defined(__ARM_ARCH_7__) \ |
| 83 | && !defined(__ARM_ARCH_7M__) && !defined(__ARM_ARCH_7EM__))) \ |
| 84 | && (!defined(__clang__) || AO_CLANG_PREREQ(3, 3)) |
| 85 | /* LDREXD/STREXD present in ARMv6K/M+ (see gas/config/tc-arm.c). */ |
| 86 | /* In the Thumb mode, this works only starting from ARMv7 (except */ |
| 87 | /* for the base and 'M' models). Clang3.2 (and earlier) does not */ |
| 88 | /* allocate register pairs for LDREXD/STREXD properly (besides, */ |
| 89 | /* Clang3.1 does not support "%H<r>" operand specification). */ |
| 90 | # define AO_ARM_HAVE_LDREXD |
| 91 | # endif /* !thumb || ARMv7A || ARMv7R+ */ |
| 92 | # endif /* ARMv7+ */ |
| 93 | #endif /* ARMv6+ */ |
| 94 | |
| 95 | #if !defined(__ARM_ARCH_2__) && !defined(__ARM_ARCH_6M__) \ |
| 96 | && !defined(__ARM_ARCH_8A__) && !defined(__thumb2__) |
| 97 | # define AO_ARM_HAVE_SWP |
| 98 | /* Note: ARMv6M is excluded due to no ARM mode support. */ |
| 99 | /* Also, SWP is obsoleted for ARMv8+. */ |
| 100 | #endif /* !__thumb2__ */ |
| 101 | |
| 102 | #if !defined(AO_UNIPROCESSOR) && defined(AO_ARM_HAVE_DMB) \ |
| 103 | && !defined(AO_PREFER_BUILTIN_ATOMICS) |
| 104 | AO_INLINE void |
| 105 | AO_nop_write(void) |
| 106 | { |
| 107 | /* AO_THUMB_GO_ARM is empty. */ |
| 108 | /* This will target the system domain and thus be overly */ |
| 109 | /* conservative as the CPUs (even in case of big.LITTLE SoC) will */ |
| 110 | /* occupy the inner shareable domain. */ |
| 111 | /* The plain variant (dmb st) is theoretically slower, and should */ |
| 112 | /* not be needed. That said, with limited experimentation, a CPU */ |
| 113 | /* implementation for which it actually matters has not been found */ |
| 114 | /* yet, though they should already exist. */ |
| 115 | /* Anyway, note that the "st" and "ishst" barriers are actually */ |
| 116 | /* quite weak and, as the libatomic_ops documentation states, */ |
| 117 | /* usually not what you really want. */ |
| 118 | __asm__ __volatile__("dmb ishst" : : : "memory"); |
| 119 | } |
| 120 | # define AO_HAVE_nop_write |
| 121 | #endif /* AO_ARM_HAVE_DMB */ |
| 122 | |
| 123 | #ifndef AO_GCC_ATOMIC_TEST_AND_SET |
| 124 | |
| 125 | #ifdef AO_UNIPROCESSOR |
| 126 | /* If only a single processor (core) is used, AO_UNIPROCESSOR could */ |
| 127 | /* be defined by the client to avoid unnecessary memory barrier. */ |
| 128 | AO_INLINE void |
| 129 | AO_nop_full(void) |
| 130 | { |
| 131 | AO_compiler_barrier(); |
| 132 | } |
| 133 | # define AO_HAVE_nop_full |
| 134 | |
| 135 | #elif defined(AO_ARM_HAVE_DMB) |
| 136 | /* ARMv7 is compatible to ARMv6 but has a simpler command for issuing */ |
| 137 | /* a memory barrier (DMB). Raising it via CP15 should still work */ |
| 138 | /* (but slightly less efficient because it requires the use of */ |
| 139 | /* a general-purpose register). */ |
| 140 | AO_INLINE void |
| 141 | AO_nop_full(void) |
| 142 | { |
| 143 | /* AO_THUMB_GO_ARM is empty. */ |
| 144 | __asm__ __volatile__("dmb" : : : "memory"); |
| 145 | } |
| 146 | # define AO_HAVE_nop_full |
| 147 | |
| 148 | #elif defined(AO_ARM_HAVE_LDREX) |
| 149 | /* ARMv6 is the first architecture providing support for a simple */ |
| 150 | /* LL/SC. A data memory barrier must be raised via CP15 command. */ |
| 151 | AO_INLINE void |
| 152 | AO_nop_full(void) |
| 153 | { |
| 154 | unsigned dest = 0; |
| 155 | |
| 156 | /* Issue a data memory barrier (keeps ordering of memory */ |
| 157 | /* transactions before and after this operation). */ |
| 158 | __asm__ __volatile__("@AO_nop_full\n" |
| 159 | AO_THUMB_GO_ARM |
| 160 | " mcr p15,0,%0,c7,c10,5\n" |
| 161 | AO_THUMB_RESTORE_MODE |
| 162 | : "=&r"(dest) |
| 163 | : /* empty */ |
| 164 | : AO_THUMB_SWITCH_CLOBBERS "memory"); |
| 165 | } |
| 166 | # define AO_HAVE_nop_full |
| 167 | |
| 168 | #else |
| 169 | /* AO_nop_full() is emulated using AO_test_and_set_full(). */ |
| 170 | #endif /* !AO_UNIPROCESSOR && !AO_ARM_HAVE_LDREX */ |
| 171 | |
| 172 | #endif /* !AO_GCC_ATOMIC_TEST_AND_SET */ |
| 173 | |
| 174 | #ifdef AO_ARM_HAVE_LDREX |
| 175 | |
| 176 | /* "ARM Architecture Reference Manual" (chapter A3.5.3) says that the */ |
| 177 | /* single-copy atomic processor accesses are all byte accesses, all */ |
| 178 | /* halfword accesses to halfword-aligned locations, all word accesses */ |
| 179 | /* to word-aligned locations. */ |
| 180 | /* There is only a single concern related to AO store operations: */ |
| 181 | /* a direct write (by STR[B/H] instruction) will not be recognized */ |
| 182 | /* by the LL/SC construct on the same CPU (i.e., according to ARM */ |
| 183 | /* documentation, e.g., see CortexA8 TRM reference, point 8.5, */ |
| 184 | /* atomic "store" (using LDREX/STREX[B/H]) is the only safe way to */ |
| 185 | /* set variables also used in LL/SC environment). */ |
| 186 | /* This is only a problem if interrupt handlers do not clear the */ |
| 187 | /* reservation (by CLREX instruction or a dummy STREX one), as they */ |
| 188 | /* almost certainly should (e.g., see restore_user_regs defined in */ |
| 189 | /* arch/arm/kernel/entry-header.S of Linux. Nonetheless, there is */ |
| 190 | /* a doubt this was properly implemented in some ancient OS releases. */ |
| 191 | # ifdef AO_BROKEN_TASKSWITCH_CLREX |
| 192 | |
| 193 | # define AO_SKIPATOMIC_store |
| 194 | # define AO_SKIPATOMIC_store_release |
| 195 | # define AO_SKIPATOMIC_char_store |
| 196 | # define AO_SKIPATOMIC_char_store_release |
| 197 | # define AO_SKIPATOMIC_short_store |
| 198 | # define AO_SKIPATOMIC_short_store_release |
| 199 | # define AO_SKIPATOMIC_int_store |
| 200 | # define AO_SKIPATOMIC_int_store_release |
| 201 | |
| 202 | # ifndef AO_PREFER_BUILTIN_ATOMICS |
| 203 | |
| 204 | AO_INLINE void AO_store(volatile AO_t *addr, AO_t value) |
| 205 | { |
| 206 | int flag; |
| 207 | |
| 208 | __asm__ __volatile__("@AO_store\n" |
| 209 | AO_THUMB_GO_ARM |
| 210 | AO_BR_ALIGN |
| 211 | "1: " AO_MASK_PTR("%2") |
| 212 | " ldrex %0, [%2]\n" |
| 213 | AO_MASK_PTR("%2") |
| 214 | " strex %0, %3, [%2]\n" |
| 215 | " teq %0, #0\n" |
| 216 | " bne 1b\n" |
| 217 | AO_THUMB_RESTORE_MODE |
| 218 | : "=&r" (flag), "+m" (*addr) |
| 219 | : "r" (addr), "r" (value) |
| 220 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 221 | } |
| 222 | # define AO_HAVE_store |
| 223 | |
| 224 | # ifdef AO_ARM_HAVE_LDREXBH |
| 225 | AO_INLINE void AO_char_store(volatile unsigned char *addr, |
| 226 | unsigned char value) |
| 227 | { |
| 228 | int flag; |
| 229 | |
| 230 | __asm__ __volatile__("@AO_char_store\n" |
| 231 | AO_THUMB_GO_ARM |
| 232 | AO_BR_ALIGN |
| 233 | "1: " AO_MASK_PTR("%2") |
| 234 | " ldrexb %0, [%2]\n" |
| 235 | AO_MASK_PTR("%2") |
| 236 | " strexb %0, %3, [%2]\n" |
| 237 | " teq %0, #0\n" |
| 238 | " bne 1b\n" |
| 239 | AO_THUMB_RESTORE_MODE |
| 240 | : "=&r" (flag), "+m" (*addr) |
| 241 | : "r" (addr), "r" (value) |
| 242 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 243 | } |
| 244 | # define AO_HAVE_char_store |
| 245 | |
| 246 | AO_INLINE void AO_short_store(volatile unsigned short *addr, |
| 247 | unsigned short value) |
| 248 | { |
| 249 | int flag; |
| 250 | |
| 251 | __asm__ __volatile__("@AO_short_store\n" |
| 252 | AO_THUMB_GO_ARM |
| 253 | AO_BR_ALIGN |
| 254 | "1: " AO_MASK_PTR("%2") |
| 255 | " ldrexh %0, [%2]\n" |
| 256 | AO_MASK_PTR("%2") |
| 257 | " strexh %0, %3, [%2]\n" |
| 258 | " teq %0, #0\n" |
| 259 | " bne 1b\n" |
| 260 | AO_THUMB_RESTORE_MODE |
| 261 | : "=&r" (flag), "+m" (*addr) |
| 262 | : "r" (addr), "r" (value) |
| 263 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 264 | } |
| 265 | # define AO_HAVE_short_store |
| 266 | # endif /* AO_ARM_HAVE_LDREXBH */ |
| 267 | |
| 268 | # endif /* !AO_PREFER_BUILTIN_ATOMICS */ |
| 269 | |
| 270 | # elif !defined(AO_GCC_ATOMIC_TEST_AND_SET) |
| 271 | # include "../loadstore/atomic_store.h" |
| 272 | /* AO_int_store is defined in ao_t_is_int.h. */ |
| 273 | # endif /* !AO_BROKEN_TASKSWITCH_CLREX */ |
| 274 | |
| 275 | #endif /* AO_ARM_HAVE_LDREX */ |
| 276 | |
| 277 | #ifndef AO_GCC_ATOMIC_TEST_AND_SET |
| 278 | |
| 279 | # include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */ |
| 280 | |
| 281 | #ifdef AO_ARM_HAVE_LDREX |
| 282 | |
| 283 | /* AO_t/char/short/int load is simple reading. */ |
| 284 | /* Unaligned accesses are not guaranteed to be atomic. */ |
| 285 | # define AO_ACCESS_CHECK_ALIGNED |
| 286 | # define AO_ACCESS_short_CHECK_ALIGNED |
| 287 | # define AO_ACCESS_int_CHECK_ALIGNED |
| 288 | # include "../all_atomic_only_load.h" |
| 289 | |
| 290 | # ifndef AO_HAVE_char_store |
| 291 | # include "../loadstore/char_atomic_store.h" |
| 292 | # include "../loadstore/short_atomic_store.h" |
| 293 | # endif |
| 294 | |
| 295 | /* NEC LE-IT: replace the SWAP as recommended by ARM: |
| 296 | "Applies to: ARM11 Cores |
| 297 | Though the SWP instruction will still work with ARM V6 cores, it is |
| 298 | recommended to use the new V6 synchronization instructions. The SWP |
| 299 | instruction produces 'locked' read and write accesses which are atomic, |
| 300 | i.e. another operation cannot be done between these locked accesses which |
| 301 | ties up external bus (AHB, AXI) bandwidth and can increase worst case |
| 302 | interrupt latencies. LDREX, STREX are more flexible, other instructions |
| 303 | can be done between the LDREX and STREX accesses." |
| 304 | */ |
| 305 | #ifndef AO_PREFER_GENERALIZED |
| 306 | #if !defined(AO_FORCE_USE_SWP) || !defined(AO_ARM_HAVE_SWP) |
| 307 | /* But, on the other hand, there could be a considerable performance */ |
| 308 | /* degradation in case of a race. Eg., test_atomic.c executing */ |
| 309 | /* test_and_set test on a dual-core ARMv7 processor using LDREX/STREX */ |
| 310 | /* showed around 35 times lower performance than that using SWP. */ |
| 311 | /* To force use of SWP instruction, use -D AO_FORCE_USE_SWP option */ |
| 312 | /* (the latter is ignored if SWP instruction is unsupported). */ |
| 313 | AO_INLINE AO_TS_VAL_t |
| 314 | AO_test_and_set(volatile AO_TS_t *addr) |
| 315 | { |
| 316 | AO_TS_VAL_t oldval; |
| 317 | int flag; |
| 318 | |
| 319 | __asm__ __volatile__("@AO_test_and_set\n" |
| 320 | AO_THUMB_GO_ARM |
| 321 | AO_BR_ALIGN |
| 322 | "1: " AO_MASK_PTR("%3") |
| 323 | " ldrex %0, [%3]\n" |
| 324 | AO_MASK_PTR("%3") |
| 325 | " strex %1, %4, [%3]\n" |
| 326 | " teq %1, #0\n" |
| 327 | " bne 1b\n" |
| 328 | AO_THUMB_RESTORE_MODE |
| 329 | : "=&r"(oldval), "=&r"(flag), "+m"(*addr) |
| 330 | : "r"(addr), "r"(1) |
| 331 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 332 | return oldval; |
| 333 | } |
| 334 | # define AO_HAVE_test_and_set |
| 335 | #endif /* !AO_FORCE_USE_SWP */ |
| 336 | |
| 337 | AO_INLINE AO_t |
| 338 | AO_fetch_and_add(volatile AO_t *p, AO_t incr) |
| 339 | { |
| 340 | AO_t result, tmp; |
| 341 | int flag; |
| 342 | |
| 343 | __asm__ __volatile__("@AO_fetch_and_add\n" |
| 344 | AO_THUMB_GO_ARM |
| 345 | AO_BR_ALIGN |
| 346 | "1: " AO_MASK_PTR("%5") |
| 347 | " ldrex %0, [%5]\n" /* get original */ |
| 348 | " add %2, %0, %4\n" /* sum up in incr */ |
| 349 | AO_MASK_PTR("%5") |
| 350 | " strex %1, %2, [%5]\n" /* store them */ |
| 351 | " teq %1, #0\n" |
| 352 | " bne 1b\n" |
| 353 | AO_THUMB_RESTORE_MODE |
| 354 | : "=&r"(result), "=&r"(flag), "=&r"(tmp), "+m"(*p) /* 0..3 */ |
| 355 | : "r"(incr), "r"(p) /* 4..5 */ |
| 356 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 357 | return result; |
| 358 | } |
| 359 | #define AO_HAVE_fetch_and_add |
| 360 | |
| 361 | AO_INLINE AO_t |
| 362 | AO_fetch_and_add1(volatile AO_t *p) |
| 363 | { |
| 364 | AO_t result, tmp; |
| 365 | int flag; |
| 366 | |
| 367 | __asm__ __volatile__("@AO_fetch_and_add1\n" |
| 368 | AO_THUMB_GO_ARM |
| 369 | AO_BR_ALIGN |
| 370 | "1: " AO_MASK_PTR("%4") |
| 371 | " ldrex %0, [%4]\n" /* get original */ |
| 372 | " add %1, %0, #1\n" /* increment */ |
| 373 | AO_MASK_PTR("%4") |
| 374 | " strex %2, %1, [%4]\n" /* store them */ |
| 375 | " teq %2, #0\n" |
| 376 | " bne 1b\n" |
| 377 | AO_THUMB_RESTORE_MODE |
| 378 | : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p) |
| 379 | : "r"(p) |
| 380 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 381 | return result; |
| 382 | } |
| 383 | #define AO_HAVE_fetch_and_add1 |
| 384 | |
| 385 | AO_INLINE AO_t |
| 386 | AO_fetch_and_sub1(volatile AO_t *p) |
| 387 | { |
| 388 | AO_t result, tmp; |
| 389 | int flag; |
| 390 | |
| 391 | __asm__ __volatile__("@AO_fetch_and_sub1\n" |
| 392 | AO_THUMB_GO_ARM |
| 393 | AO_BR_ALIGN |
| 394 | "1: " AO_MASK_PTR("%4") |
| 395 | " ldrex %0, [%4]\n" /* get original */ |
| 396 | " sub %1, %0, #1\n" /* decrement */ |
| 397 | AO_MASK_PTR("%4") |
| 398 | " strex %2, %1, [%4]\n" /* store them */ |
| 399 | " teq %2, #0\n" |
| 400 | " bne 1b\n" |
| 401 | AO_THUMB_RESTORE_MODE |
| 402 | : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p) |
| 403 | : "r"(p) |
| 404 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 405 | return result; |
| 406 | } |
| 407 | #define AO_HAVE_fetch_and_sub1 |
| 408 | |
| 409 | AO_INLINE void |
| 410 | AO_and(volatile AO_t *p, AO_t value) |
| 411 | { |
| 412 | AO_t tmp, result; |
| 413 | |
| 414 | __asm__ __volatile__("@AO_and\n" |
| 415 | AO_THUMB_GO_ARM |
| 416 | AO_BR_ALIGN |
| 417 | "1: " AO_MASK_PTR("%4") |
| 418 | " ldrex %0, [%4]\n" |
| 419 | " and %1, %0, %3\n" |
| 420 | AO_MASK_PTR("%4") |
| 421 | " strex %0, %1, [%4]\n" |
| 422 | " teq %0, #0\n" |
| 423 | " bne 1b\n" |
| 424 | AO_THUMB_RESTORE_MODE |
| 425 | : "=&r" (tmp), "=&r" (result), "+m" (*p) |
| 426 | : "r" (value), "r" (p) |
| 427 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 428 | } |
| 429 | #define AO_HAVE_and |
| 430 | |
| 431 | AO_INLINE void |
| 432 | AO_or(volatile AO_t *p, AO_t value) |
| 433 | { |
| 434 | AO_t tmp, result; |
| 435 | |
| 436 | __asm__ __volatile__("@AO_or\n" |
| 437 | AO_THUMB_GO_ARM |
| 438 | AO_BR_ALIGN |
| 439 | "1: " AO_MASK_PTR("%4") |
| 440 | " ldrex %0, [%4]\n" |
| 441 | " orr %1, %0, %3\n" |
| 442 | AO_MASK_PTR("%4") |
| 443 | " strex %0, %1, [%4]\n" |
| 444 | " teq %0, #0\n" |
| 445 | " bne 1b\n" |
| 446 | AO_THUMB_RESTORE_MODE |
| 447 | : "=&r" (tmp), "=&r" (result), "+m" (*p) |
| 448 | : "r" (value), "r" (p) |
| 449 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 450 | } |
| 451 | #define AO_HAVE_or |
| 452 | |
| 453 | AO_INLINE void |
| 454 | AO_xor(volatile AO_t *p, AO_t value) |
| 455 | { |
| 456 | AO_t tmp, result; |
| 457 | |
| 458 | __asm__ __volatile__("@AO_xor\n" |
| 459 | AO_THUMB_GO_ARM |
| 460 | AO_BR_ALIGN |
| 461 | "1: " AO_MASK_PTR("%4") |
| 462 | " ldrex %0, [%4]\n" |
| 463 | " eor %1, %0, %3\n" |
| 464 | AO_MASK_PTR("%4") |
| 465 | " strex %0, %1, [%4]\n" |
| 466 | " teq %0, #0\n" |
| 467 | " bne 1b\n" |
| 468 | AO_THUMB_RESTORE_MODE |
| 469 | : "=&r" (tmp), "=&r" (result), "+m" (*p) |
| 470 | : "r" (value), "r" (p) |
| 471 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 472 | } |
| 473 | #define AO_HAVE_xor |
| 474 | #endif /* !AO_PREFER_GENERALIZED */ |
| 475 | |
| 476 | #ifdef AO_ARM_HAVE_LDREXBH |
| 477 | AO_INLINE unsigned char |
| 478 | AO_char_fetch_and_add(volatile unsigned char *p, unsigned char incr) |
| 479 | { |
| 480 | unsigned result, tmp; |
| 481 | int flag; |
| 482 | |
| 483 | __asm__ __volatile__("@AO_char_fetch_and_add\n" |
| 484 | AO_THUMB_GO_ARM |
| 485 | AO_BR_ALIGN |
| 486 | "1: " AO_MASK_PTR("%5") |
| 487 | " ldrexb %0, [%5]\n" |
| 488 | " add %2, %0, %4\n" |
| 489 | AO_MASK_PTR("%5") |
| 490 | " strexb %1, %2, [%5]\n" |
| 491 | " teq %1, #0\n" |
| 492 | " bne 1b\n" |
| 493 | AO_THUMB_RESTORE_MODE |
| 494 | : "=&r" (result), "=&r" (flag), "=&r" (tmp), "+m" (*p) |
| 495 | : "r" ((unsigned)incr), "r" (p) |
| 496 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 497 | return (unsigned char)result; |
| 498 | } |
| 499 | # define AO_HAVE_char_fetch_and_add |
| 500 | |
| 501 | AO_INLINE unsigned short |
| 502 | AO_short_fetch_and_add(volatile unsigned short *p, unsigned short incr) |
| 503 | { |
| 504 | unsigned result, tmp; |
| 505 | int flag; |
| 506 | |
| 507 | __asm__ __volatile__("@AO_short_fetch_and_add\n" |
| 508 | AO_THUMB_GO_ARM |
| 509 | AO_BR_ALIGN |
| 510 | "1: " AO_MASK_PTR("%5") |
| 511 | " ldrexh %0, [%5]\n" |
| 512 | " add %2, %0, %4\n" |
| 513 | AO_MASK_PTR("%5") |
| 514 | " strexh %1, %2, [%5]\n" |
| 515 | " teq %1, #0\n" |
| 516 | " bne 1b\n" |
| 517 | AO_THUMB_RESTORE_MODE |
| 518 | : "=&r" (result), "=&r" (flag), "=&r" (tmp), "+m" (*p) |
| 519 | : "r" ((unsigned)incr), "r" (p) |
| 520 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 521 | return (unsigned short)result; |
| 522 | } |
| 523 | # define AO_HAVE_short_fetch_and_add |
| 524 | #endif /* AO_ARM_HAVE_LDREXBH */ |
| 525 | |
| 526 | #ifndef AO_GENERALIZE_ASM_BOOL_CAS |
| 527 | /* Returns nonzero if the comparison succeeded. */ |
| 528 | AO_INLINE int |
| 529 | AO_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val) |
| 530 | { |
| 531 | AO_t result, tmp; |
| 532 | |
| 533 | __asm__ __volatile__("@AO_compare_and_swap\n" |
| 534 | AO_THUMB_GO_ARM |
| 535 | AO_BR_ALIGN |
| 536 | "1: mov %0, #2\n" /* store a flag */ |
| 537 | AO_MASK_PTR("%3") |
| 538 | " ldrex %1, [%3]\n" /* get original */ |
| 539 | " teq %1, %4\n" /* see if match */ |
| 540 | AO_MASK_PTR("%3") |
| 541 | # ifdef __thumb2__ |
| 542 | /* TODO: Eliminate warning: it blocks containing wide Thumb */ |
| 543 | /* instructions are deprecated in ARMv8. */ |
| 544 | " it eq\n" |
| 545 | # endif |
| 546 | " strexeq %0, %5, [%3]\n" /* store new one if matched */ |
| 547 | " teq %0, #1\n" |
| 548 | " beq 1b\n" /* if update failed, repeat */ |
| 549 | AO_THUMB_RESTORE_MODE |
| 550 | : "=&r"(result), "=&r"(tmp), "+m"(*addr) |
| 551 | : "r"(addr), "r"(old_val), "r"(new_val) |
| 552 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 553 | return !(result&2); /* if succeeded then return 1 else 0 */ |
| 554 | } |
| 555 | # define AO_HAVE_compare_and_swap |
| 556 | #endif /* !AO_GENERALIZE_ASM_BOOL_CAS */ |
| 557 | |
| 558 | AO_INLINE AO_t |
| 559 | AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val) |
| 560 | { |
| 561 | AO_t fetched_val; |
| 562 | int flag; |
| 563 | |
| 564 | __asm__ __volatile__("@AO_fetch_compare_and_swap\n" |
| 565 | AO_THUMB_GO_ARM |
| 566 | AO_BR_ALIGN |
| 567 | "1: mov %0, #2\n" /* store a flag */ |
| 568 | AO_MASK_PTR("%3") |
| 569 | " ldrex %1, [%3]\n" /* get original */ |
| 570 | " teq %1, %4\n" /* see if match */ |
| 571 | AO_MASK_PTR("%3") |
| 572 | # ifdef __thumb2__ |
| 573 | " it eq\n" |
| 574 | # endif |
| 575 | " strexeq %0, %5, [%3]\n" /* store new one if matched */ |
| 576 | " teq %0, #1\n" |
| 577 | " beq 1b\n" /* if update failed, repeat */ |
| 578 | AO_THUMB_RESTORE_MODE |
| 579 | : "=&r"(flag), "=&r"(fetched_val), "+m"(*addr) |
| 580 | : "r"(addr), "r"(old_val), "r"(new_val) |
| 581 | : AO_THUMB_SWITCH_CLOBBERS "cc"); |
| 582 | return fetched_val; |
| 583 | } |
| 584 | #define AO_HAVE_fetch_compare_and_swap |
| 585 | |
| 586 | #ifdef AO_ARM_HAVE_LDREXD |
| 587 | # include "../standard_ao_double_t.h" |
| 588 | |
| 589 | /* "ARM Architecture Reference Manual ARMv7-A/R edition" (chapter */ |
| 590 | /* A3.5.3) says that memory accesses caused by LDREXD and STREXD */ |
| 591 | /* instructions to doubleword-aligned locations are single-copy */ |
| 592 | /* atomic; accesses to 64-bit elements by other instructions might */ |
| 593 | /* not be single-copy atomic as they are executed as a sequence of */ |
| 594 | /* 32-bit accesses. */ |
| 595 | AO_INLINE AO_double_t |
| 596 | AO_double_load(const volatile AO_double_t *addr) |
| 597 | { |
| 598 | AO_double_t result; |
| 599 | |
| 600 | /* AO_THUMB_GO_ARM is empty. */ |
| 601 | __asm__ __volatile__("@AO_double_load\n" |
| 602 | AO_MASK_PTR("%1") |
| 603 | " ldrexd %0, %H0, [%1]" |
| 604 | : "=&r" (result.AO_whole) |
| 605 | : "r" (addr) |
| 606 | /* : no clobber */); |
| 607 | return result; |
| 608 | } |
| 609 | # define AO_HAVE_double_load |
| 610 | |
| 611 | AO_INLINE void |
| 612 | AO_double_store(volatile AO_double_t *addr, AO_double_t new_val) |
| 613 | { |
| 614 | AO_double_t old_val; |
| 615 | int status; |
| 616 | |
| 617 | do { |
| 618 | /* AO_THUMB_GO_ARM is empty. */ |
| 619 | __asm__ __volatile__("@AO_double_store\n" |
| 620 | AO_MASK_PTR("%3") |
| 621 | " ldrexd %0, %H0, [%3]\n" |
| 622 | AO_MASK_PTR("%3") |
| 623 | " strexd %1, %4, %H4, [%3]" |
| 624 | : "=&r" (old_val.AO_whole), "=&r" (status), "+m" (*addr) |
| 625 | : "r" (addr), "r" (new_val.AO_whole) |
| 626 | : "cc"); |
| 627 | } while (AO_EXPECT_FALSE(status)); |
| 628 | } |
| 629 | # define AO_HAVE_double_store |
| 630 | |
| 631 | AO_INLINE int |
| 632 | AO_double_compare_and_swap(volatile AO_double_t *addr, |
| 633 | AO_double_t old_val, AO_double_t new_val) |
| 634 | { |
| 635 | double_ptr_storage tmp; |
| 636 | int result = 1; |
| 637 | |
| 638 | do { |
| 639 | /* AO_THUMB_GO_ARM is empty. */ |
| 640 | __asm__ __volatile__("@AO_double_compare_and_swap\n" |
| 641 | AO_MASK_PTR("%1") |
| 642 | " ldrexd %0, %H0, [%1]\n" /* get original to r1 & r2 */ |
| 643 | : "=&r"(tmp) |
| 644 | : "r"(addr) |
| 645 | /* : no clobber */); |
| 646 | if (tmp != old_val.AO_whole) |
| 647 | break; |
| 648 | __asm__ __volatile__( |
| 649 | AO_MASK_PTR("%2") |
| 650 | " strexd %0, %3, %H3, [%2]\n" /* store new one if matched */ |
| 651 | : "=&r"(result), "+m"(*addr) |
| 652 | : "r" (addr), "r" (new_val.AO_whole) |
| 653 | : "cc"); |
| 654 | } while (AO_EXPECT_FALSE(result)); |
| 655 | return !result; /* if succeeded then return 1 else 0 */ |
| 656 | } |
| 657 | # define AO_HAVE_double_compare_and_swap |
| 658 | #endif /* AO_ARM_HAVE_LDREXD */ |
| 659 | |
| 660 | #else |
| 661 | /* pre ARMv6 architectures ... */ |
| 662 | |
| 663 | /* I found a slide set that, if I read it correctly, claims that */ |
| 664 | /* Loads followed by either a Load or Store are ordered, but nothing */ |
| 665 | /* else is. */ |
| 666 | /* It appears that SWP is the only simple memory barrier. */ |
| 667 | #include "../all_aligned_atomic_load_store.h" |
| 668 | |
| 669 | /* The code should run correctly on a multi-core ARMv6+ as well. */ |
| 670 | |
| 671 | #endif /* !AO_ARM_HAVE_LDREX */ |
| 672 | |
| 673 | #if !defined(AO_HAVE_test_and_set_full) && !defined(AO_HAVE_test_and_set) \ |
| 674 | && defined (AO_ARM_HAVE_SWP) && (!defined(AO_PREFER_GENERALIZED) \ |
| 675 | || !defined(AO_HAVE_fetch_compare_and_swap)) |
| 676 | AO_INLINE AO_TS_VAL_t |
| 677 | AO_test_and_set_full(volatile AO_TS_t *addr) |
| 678 | { |
| 679 | AO_TS_VAL_t oldval; |
| 680 | /* SWP on ARM is very similar to XCHG on x86. */ |
| 681 | /* The first operand is the result, the second the value */ |
| 682 | /* to be stored. Both registers must be different from addr. */ |
| 683 | /* Make the address operand an early clobber output so it */ |
| 684 | /* doesn't overlap with the other operands. The early clobber */ |
| 685 | /* on oldval is necessary to prevent the compiler allocating */ |
| 686 | /* them to the same register if they are both unused. */ |
| 687 | |
| 688 | __asm__ __volatile__("@AO_test_and_set_full\n" |
| 689 | AO_THUMB_GO_ARM |
| 690 | AO_MASK_PTR("%3") |
| 691 | " swp %0, %2, [%3]\n" |
| 692 | /* Ignore GCC "SWP is deprecated for this architecture" */ |
| 693 | /* warning here (for ARMv6+). */ |
| 694 | AO_THUMB_RESTORE_MODE |
| 695 | : "=&r"(oldval), "=&r"(addr) |
| 696 | : "r"(1), "1"(addr) |
| 697 | : AO_THUMB_SWITCH_CLOBBERS "memory"); |
| 698 | return oldval; |
| 699 | } |
| 700 | # define AO_HAVE_test_and_set_full |
| 701 | #endif /* !AO_HAVE_test_and_set[_full] && AO_ARM_HAVE_SWP */ |
| 702 | |
| 703 | #define AO_T_IS_INT |
| 704 | |
| 705 | #else /* AO_GCC_ATOMIC_TEST_AND_SET */ |
| 706 | |
| 707 | # if defined(__clang__) && !defined(AO_ARM_HAVE_LDREX) |
| 708 | /* As of clang-3.8, it cannot compile __atomic_and/or/xor_fetch */ |
| 709 | /* library calls yet for pre ARMv6. */ |
| 710 | # define AO_SKIPATOMIC_ANY_and_ANY |
| 711 | # define AO_SKIPATOMIC_ANY_or_ANY |
| 712 | # define AO_SKIPATOMIC_ANY_xor_ANY |
| 713 | # endif |
| 714 | |
| 715 | # ifdef AO_ARM_HAVE_LDREXD |
| 716 | # include "../standard_ao_double_t.h" |
| 717 | # endif |
| 718 | # include "generic.h" |
| 719 | |
| 720 | #endif /* AO_GCC_ATOMIC_TEST_AND_SET */ |
| 721 | |
| 722 | #undef AO_ARM_HAVE_DMB |
| 723 | #undef AO_ARM_HAVE_LDREX |
| 724 | #undef AO_ARM_HAVE_LDREXBH |
| 725 | #undef AO_ARM_HAVE_LDREXD |
| 726 | #undef AO_ARM_HAVE_SWP |
| 727 | #undef AO_BR_ALIGN |
| 728 | #undef AO_MASK_PTR |
| 729 | #undef AO_SKIPATOMIC_ANY_and_ANY |
| 730 | #undef AO_SKIPATOMIC_ANY_or_ANY |
| 731 | #undef AO_SKIPATOMIC_ANY_xor_ANY |
| 732 | #undef AO_SKIPATOMIC_char_store |
| 733 | #undef AO_SKIPATOMIC_char_store_release |
| 734 | #undef AO_SKIPATOMIC_int_store |
| 735 | #undef AO_SKIPATOMIC_int_store_release |
| 736 | #undef AO_SKIPATOMIC_short_store |
| 737 | #undef AO_SKIPATOMIC_short_store_release |
| 738 | #undef AO_SKIPATOMIC_store |
| 739 | #undef AO_SKIPATOMIC_store_release |
| 740 | #undef AO_THUMB_GO_ARM |
| 741 | #undef AO_THUMB_RESTORE_MODE |
| 742 | #undef AO_THUMB_SWITCH_CLOBBERS |
| 743 | |