v2 / thirdparty / libatomic_ops / atomic_ops / sysdeps / gcc / arm.h
742 lines · 670 sloc · 24.89 KB · f53b5d737fd636890520101e736ad29e20d3c322
Raw
1/*
2 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
3 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
4 * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
5 * Copyright (c) 2008-2017 Ivan Maidanski
6 *
7 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
9 *
10 * Permission is hereby granted to use or copy this program
11 * for any purpose, provided the above notices are retained on all copies.
12 * Permission to modify the code and to distribute modified code is granted,
13 * provided the above notices are retained, and a notice that the code was
14 * modified is included with the above copyright notice.
15 *
16 */
17
18#if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 5)) \
19 && !defined(AO_DISABLE_GCC_ATOMICS)
20 /* Probably, it could be enabled even for earlier gcc/clang versions. */
21# define AO_GCC_ATOMIC_TEST_AND_SET
22#endif
23
24#ifdef __native_client__
25 /* Mask instruction should immediately precede access instruction. */
26# define AO_MASK_PTR(reg) " bical " reg ", " reg ", #0xc0000000\n"
27# define AO_BR_ALIGN " .align 4\n"
28#else
29# define AO_MASK_PTR(reg) /* empty */
30# define AO_BR_ALIGN /* empty */
31#endif
32
33#if defined(__thumb__) && !defined(__thumb2__)
34 /* Thumb One mode does not have ARM "mcr", "swp" and some load/store */
35 /* instructions, so we temporarily switch to ARM mode and go back */
36 /* afterwards (clobbering "r3" register). */
37# define AO_THUMB_GO_ARM \
38 " adr r3, 4f\n" \
39 " bx r3\n" \
40 " .align\n" \
41 " .arm\n" \
42 AO_BR_ALIGN \
43 "4:\n"
44# define AO_THUMB_RESTORE_MODE \
45 " adr r3, 5f + 1\n" \
46 " bx r3\n" \
47 " .thumb\n" \
48 AO_BR_ALIGN \
49 "5:\n"
50# define AO_THUMB_SWITCH_CLOBBERS "r3",
51#else
52# define AO_THUMB_GO_ARM /* empty */
53# define AO_THUMB_RESTORE_MODE /* empty */
54# define AO_THUMB_SWITCH_CLOBBERS /* empty */
55#endif /* !__thumb__ */
56
57/* NEC LE-IT: gcc has no way to easily check the arm architecture */
58/* but it defines only one (or several) of __ARM_ARCH_x__ to be true. */
59#if !defined(__ARM_ARCH_2__) && !defined(__ARM_ARCH_3__) \
60 && !defined(__ARM_ARCH_3M__) && !defined(__ARM_ARCH_4__) \
61 && !defined(__ARM_ARCH_4T__) \
62 && ((!defined(__ARM_ARCH_5__) && !defined(__ARM_ARCH_5E__) \
63 && !defined(__ARM_ARCH_5T__) && !defined(__ARM_ARCH_5TE__) \
64 && !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6M__)) \
65 || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
66 || defined(__ARM_ARCH_8A__))
67# define AO_ARM_HAVE_LDREX
68# if !defined(__ARM_ARCH_6__) && !defined(__ARM_ARCH_6J__) \
69 && !defined(__ARM_ARCH_6T2__)
70 /* LDREXB/STREXB and LDREXH/STREXH are present in ARMv6K/Z+. */
71# define AO_ARM_HAVE_LDREXBH
72# endif
73# if !defined(__ARM_ARCH_6__) && !defined(__ARM_ARCH_6J__) \
74 && !defined(__ARM_ARCH_6T2__) && !defined(__ARM_ARCH_6Z__) \
75 && !defined(__ARM_ARCH_6ZT2__)
76# if !defined(__ARM_ARCH_6K__) && !defined(__ARM_ARCH_6KZ__) \
77 && !defined(__ARM_ARCH_6ZK__)
78 /* DMB is present in ARMv6M and ARMv7+. */
79# define AO_ARM_HAVE_DMB
80# endif
81# if (!defined(__thumb__) \
82 || (defined(__thumb2__) && !defined(__ARM_ARCH_7__) \
83 && !defined(__ARM_ARCH_7M__) && !defined(__ARM_ARCH_7EM__))) \
84 && (!defined(__clang__) || AO_CLANG_PREREQ(3, 3))
85 /* LDREXD/STREXD present in ARMv6K/M+ (see gas/config/tc-arm.c). */
86 /* In the Thumb mode, this works only starting from ARMv7 (except */
87 /* for the base and 'M' models). Clang3.2 (and earlier) does not */
88 /* allocate register pairs for LDREXD/STREXD properly (besides, */
89 /* Clang3.1 does not support "%H<r>" operand specification). */
90# define AO_ARM_HAVE_LDREXD
91# endif /* !thumb || ARMv7A || ARMv7R+ */
92# endif /* ARMv7+ */
93#endif /* ARMv6+ */
94
95#if !defined(__ARM_ARCH_2__) && !defined(__ARM_ARCH_6M__) \
96 && !defined(__ARM_ARCH_8A__) && !defined(__thumb2__)
97# define AO_ARM_HAVE_SWP
98 /* Note: ARMv6M is excluded due to no ARM mode support. */
99 /* Also, SWP is obsoleted for ARMv8+. */
100#endif /* !__thumb2__ */
101
102#if !defined(AO_UNIPROCESSOR) && defined(AO_ARM_HAVE_DMB) \
103 && !defined(AO_PREFER_BUILTIN_ATOMICS)
104 AO_INLINE void
105 AO_nop_write(void)
106 {
107 /* AO_THUMB_GO_ARM is empty. */
108 /* This will target the system domain and thus be overly */
109 /* conservative as the CPUs (even in case of big.LITTLE SoC) will */
110 /* occupy the inner shareable domain. */
111 /* The plain variant (dmb st) is theoretically slower, and should */
112 /* not be needed. That said, with limited experimentation, a CPU */
113 /* implementation for which it actually matters has not been found */
114 /* yet, though they should already exist. */
115 /* Anyway, note that the "st" and "ishst" barriers are actually */
116 /* quite weak and, as the libatomic_ops documentation states, */
117 /* usually not what you really want. */
118 __asm__ __volatile__("dmb ishst" : : : "memory");
119 }
120# define AO_HAVE_nop_write
121#endif /* AO_ARM_HAVE_DMB */
122
123#ifndef AO_GCC_ATOMIC_TEST_AND_SET
124
125#ifdef AO_UNIPROCESSOR
126 /* If only a single processor (core) is used, AO_UNIPROCESSOR could */
127 /* be defined by the client to avoid unnecessary memory barrier. */
128 AO_INLINE void
129 AO_nop_full(void)
130 {
131 AO_compiler_barrier();
132 }
133# define AO_HAVE_nop_full
134
135#elif defined(AO_ARM_HAVE_DMB)
136 /* ARMv7 is compatible to ARMv6 but has a simpler command for issuing */
137 /* a memory barrier (DMB). Raising it via CP15 should still work */
138 /* (but slightly less efficient because it requires the use of */
139 /* a general-purpose register). */
140 AO_INLINE void
141 AO_nop_full(void)
142 {
143 /* AO_THUMB_GO_ARM is empty. */
144 __asm__ __volatile__("dmb" : : : "memory");
145 }
146# define AO_HAVE_nop_full
147
148#elif defined(AO_ARM_HAVE_LDREX)
149 /* ARMv6 is the first architecture providing support for a simple */
150 /* LL/SC. A data memory barrier must be raised via CP15 command. */
151 AO_INLINE void
152 AO_nop_full(void)
153 {
154 unsigned dest = 0;
155
156 /* Issue a data memory barrier (keeps ordering of memory */
157 /* transactions before and after this operation). */
158 __asm__ __volatile__("@AO_nop_full\n"
159 AO_THUMB_GO_ARM
160 " mcr p15,0,%0,c7,c10,5\n"
161 AO_THUMB_RESTORE_MODE
162 : "=&r"(dest)
163 : /* empty */
164 : AO_THUMB_SWITCH_CLOBBERS "memory");
165 }
166# define AO_HAVE_nop_full
167
168#else
169 /* AO_nop_full() is emulated using AO_test_and_set_full(). */
170#endif /* !AO_UNIPROCESSOR && !AO_ARM_HAVE_LDREX */
171
172#endif /* !AO_GCC_ATOMIC_TEST_AND_SET */
173
174#ifdef AO_ARM_HAVE_LDREX
175
176 /* "ARM Architecture Reference Manual" (chapter A3.5.3) says that the */
177 /* single-copy atomic processor accesses are all byte accesses, all */
178 /* halfword accesses to halfword-aligned locations, all word accesses */
179 /* to word-aligned locations. */
180 /* There is only a single concern related to AO store operations: */
181 /* a direct write (by STR[B/H] instruction) will not be recognized */
182 /* by the LL/SC construct on the same CPU (i.e., according to ARM */
183 /* documentation, e.g., see CortexA8 TRM reference, point 8.5, */
184 /* atomic "store" (using LDREX/STREX[B/H]) is the only safe way to */
185 /* set variables also used in LL/SC environment). */
186 /* This is only a problem if interrupt handlers do not clear the */
187 /* reservation (by CLREX instruction or a dummy STREX one), as they */
188 /* almost certainly should (e.g., see restore_user_regs defined in */
189 /* arch/arm/kernel/entry-header.S of Linux. Nonetheless, there is */
190 /* a doubt this was properly implemented in some ancient OS releases. */
191# ifdef AO_BROKEN_TASKSWITCH_CLREX
192
193# define AO_SKIPATOMIC_store
194# define AO_SKIPATOMIC_store_release
195# define AO_SKIPATOMIC_char_store
196# define AO_SKIPATOMIC_char_store_release
197# define AO_SKIPATOMIC_short_store
198# define AO_SKIPATOMIC_short_store_release
199# define AO_SKIPATOMIC_int_store
200# define AO_SKIPATOMIC_int_store_release
201
202# ifndef AO_PREFER_BUILTIN_ATOMICS
203
204 AO_INLINE void AO_store(volatile AO_t *addr, AO_t value)
205 {
206 int flag;
207
208 __asm__ __volatile__("@AO_store\n"
209 AO_THUMB_GO_ARM
210 AO_BR_ALIGN
211 "1: " AO_MASK_PTR("%2")
212 " ldrex %0, [%2]\n"
213 AO_MASK_PTR("%2")
214 " strex %0, %3, [%2]\n"
215 " teq %0, #0\n"
216 " bne 1b\n"
217 AO_THUMB_RESTORE_MODE
218 : "=&r" (flag), "+m" (*addr)
219 : "r" (addr), "r" (value)
220 : AO_THUMB_SWITCH_CLOBBERS "cc");
221 }
222# define AO_HAVE_store
223
224# ifdef AO_ARM_HAVE_LDREXBH
225 AO_INLINE void AO_char_store(volatile unsigned char *addr,
226 unsigned char value)
227 {
228 int flag;
229
230 __asm__ __volatile__("@AO_char_store\n"
231 AO_THUMB_GO_ARM
232 AO_BR_ALIGN
233 "1: " AO_MASK_PTR("%2")
234 " ldrexb %0, [%2]\n"
235 AO_MASK_PTR("%2")
236 " strexb %0, %3, [%2]\n"
237 " teq %0, #0\n"
238 " bne 1b\n"
239 AO_THUMB_RESTORE_MODE
240 : "=&r" (flag), "+m" (*addr)
241 : "r" (addr), "r" (value)
242 : AO_THUMB_SWITCH_CLOBBERS "cc");
243 }
244# define AO_HAVE_char_store
245
246 AO_INLINE void AO_short_store(volatile unsigned short *addr,
247 unsigned short value)
248 {
249 int flag;
250
251 __asm__ __volatile__("@AO_short_store\n"
252 AO_THUMB_GO_ARM
253 AO_BR_ALIGN
254 "1: " AO_MASK_PTR("%2")
255 " ldrexh %0, [%2]\n"
256 AO_MASK_PTR("%2")
257 " strexh %0, %3, [%2]\n"
258 " teq %0, #0\n"
259 " bne 1b\n"
260 AO_THUMB_RESTORE_MODE
261 : "=&r" (flag), "+m" (*addr)
262 : "r" (addr), "r" (value)
263 : AO_THUMB_SWITCH_CLOBBERS "cc");
264 }
265# define AO_HAVE_short_store
266# endif /* AO_ARM_HAVE_LDREXBH */
267
268# endif /* !AO_PREFER_BUILTIN_ATOMICS */
269
270# elif !defined(AO_GCC_ATOMIC_TEST_AND_SET)
271# include "../loadstore/atomic_store.h"
272 /* AO_int_store is defined in ao_t_is_int.h. */
273# endif /* !AO_BROKEN_TASKSWITCH_CLREX */
274
275#endif /* AO_ARM_HAVE_LDREX */
276
277#ifndef AO_GCC_ATOMIC_TEST_AND_SET
278
279# include "../test_and_set_t_is_ao_t.h" /* Probably suboptimal */
280
281#ifdef AO_ARM_HAVE_LDREX
282
283 /* AO_t/char/short/int load is simple reading. */
284 /* Unaligned accesses are not guaranteed to be atomic. */
285# define AO_ACCESS_CHECK_ALIGNED
286# define AO_ACCESS_short_CHECK_ALIGNED
287# define AO_ACCESS_int_CHECK_ALIGNED
288# include "../all_atomic_only_load.h"
289
290# ifndef AO_HAVE_char_store
291# include "../loadstore/char_atomic_store.h"
292# include "../loadstore/short_atomic_store.h"
293# endif
294
295/* NEC LE-IT: replace the SWAP as recommended by ARM:
296 "Applies to: ARM11 Cores
297 Though the SWP instruction will still work with ARM V6 cores, it is
298 recommended to use the new V6 synchronization instructions. The SWP
299 instruction produces 'locked' read and write accesses which are atomic,
300 i.e. another operation cannot be done between these locked accesses which
301 ties up external bus (AHB, AXI) bandwidth and can increase worst case
302 interrupt latencies. LDREX, STREX are more flexible, other instructions
303 can be done between the LDREX and STREX accesses."
304*/
305#ifndef AO_PREFER_GENERALIZED
306#if !defined(AO_FORCE_USE_SWP) || !defined(AO_ARM_HAVE_SWP)
307 /* But, on the other hand, there could be a considerable performance */
308 /* degradation in case of a race. Eg., test_atomic.c executing */
309 /* test_and_set test on a dual-core ARMv7 processor using LDREX/STREX */
310 /* showed around 35 times lower performance than that using SWP. */
311 /* To force use of SWP instruction, use -D AO_FORCE_USE_SWP option */
312 /* (the latter is ignored if SWP instruction is unsupported). */
313 AO_INLINE AO_TS_VAL_t
314 AO_test_and_set(volatile AO_TS_t *addr)
315 {
316 AO_TS_VAL_t oldval;
317 int flag;
318
319 __asm__ __volatile__("@AO_test_and_set\n"
320 AO_THUMB_GO_ARM
321 AO_BR_ALIGN
322 "1: " AO_MASK_PTR("%3")
323 " ldrex %0, [%3]\n"
324 AO_MASK_PTR("%3")
325 " strex %1, %4, [%3]\n"
326 " teq %1, #0\n"
327 " bne 1b\n"
328 AO_THUMB_RESTORE_MODE
329 : "=&r"(oldval), "=&r"(flag), "+m"(*addr)
330 : "r"(addr), "r"(1)
331 : AO_THUMB_SWITCH_CLOBBERS "cc");
332 return oldval;
333 }
334# define AO_HAVE_test_and_set
335#endif /* !AO_FORCE_USE_SWP */
336
337AO_INLINE AO_t
338AO_fetch_and_add(volatile AO_t *p, AO_t incr)
339{
340 AO_t result, tmp;
341 int flag;
342
343 __asm__ __volatile__("@AO_fetch_and_add\n"
344 AO_THUMB_GO_ARM
345 AO_BR_ALIGN
346 "1: " AO_MASK_PTR("%5")
347 " ldrex %0, [%5]\n" /* get original */
348 " add %2, %0, %4\n" /* sum up in incr */
349 AO_MASK_PTR("%5")
350 " strex %1, %2, [%5]\n" /* store them */
351 " teq %1, #0\n"
352 " bne 1b\n"
353 AO_THUMB_RESTORE_MODE
354 : "=&r"(result), "=&r"(flag), "=&r"(tmp), "+m"(*p) /* 0..3 */
355 : "r"(incr), "r"(p) /* 4..5 */
356 : AO_THUMB_SWITCH_CLOBBERS "cc");
357 return result;
358}
359#define AO_HAVE_fetch_and_add
360
361AO_INLINE AO_t
362AO_fetch_and_add1(volatile AO_t *p)
363{
364 AO_t result, tmp;
365 int flag;
366
367 __asm__ __volatile__("@AO_fetch_and_add1\n"
368 AO_THUMB_GO_ARM
369 AO_BR_ALIGN
370 "1: " AO_MASK_PTR("%4")
371 " ldrex %0, [%4]\n" /* get original */
372 " add %1, %0, #1\n" /* increment */
373 AO_MASK_PTR("%4")
374 " strex %2, %1, [%4]\n" /* store them */
375 " teq %2, #0\n"
376 " bne 1b\n"
377 AO_THUMB_RESTORE_MODE
378 : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p)
379 : "r"(p)
380 : AO_THUMB_SWITCH_CLOBBERS "cc");
381 return result;
382}
383#define AO_HAVE_fetch_and_add1
384
385AO_INLINE AO_t
386AO_fetch_and_sub1(volatile AO_t *p)
387{
388 AO_t result, tmp;
389 int flag;
390
391 __asm__ __volatile__("@AO_fetch_and_sub1\n"
392 AO_THUMB_GO_ARM
393 AO_BR_ALIGN
394 "1: " AO_MASK_PTR("%4")
395 " ldrex %0, [%4]\n" /* get original */
396 " sub %1, %0, #1\n" /* decrement */
397 AO_MASK_PTR("%4")
398 " strex %2, %1, [%4]\n" /* store them */
399 " teq %2, #0\n"
400 " bne 1b\n"
401 AO_THUMB_RESTORE_MODE
402 : "=&r"(result), "=&r"(tmp), "=&r"(flag), "+m"(*p)
403 : "r"(p)
404 : AO_THUMB_SWITCH_CLOBBERS "cc");
405 return result;
406}
407#define AO_HAVE_fetch_and_sub1
408
409AO_INLINE void
410AO_and(volatile AO_t *p, AO_t value)
411{
412 AO_t tmp, result;
413
414 __asm__ __volatile__("@AO_and\n"
415 AO_THUMB_GO_ARM
416 AO_BR_ALIGN
417 "1: " AO_MASK_PTR("%4")
418 " ldrex %0, [%4]\n"
419 " and %1, %0, %3\n"
420 AO_MASK_PTR("%4")
421 " strex %0, %1, [%4]\n"
422 " teq %0, #0\n"
423 " bne 1b\n"
424 AO_THUMB_RESTORE_MODE
425 : "=&r" (tmp), "=&r" (result), "+m" (*p)
426 : "r" (value), "r" (p)
427 : AO_THUMB_SWITCH_CLOBBERS "cc");
428}
429#define AO_HAVE_and
430
431AO_INLINE void
432AO_or(volatile AO_t *p, AO_t value)
433{
434 AO_t tmp, result;
435
436 __asm__ __volatile__("@AO_or\n"
437 AO_THUMB_GO_ARM
438 AO_BR_ALIGN
439 "1: " AO_MASK_PTR("%4")
440 " ldrex %0, [%4]\n"
441 " orr %1, %0, %3\n"
442 AO_MASK_PTR("%4")
443 " strex %0, %1, [%4]\n"
444 " teq %0, #0\n"
445 " bne 1b\n"
446 AO_THUMB_RESTORE_MODE
447 : "=&r" (tmp), "=&r" (result), "+m" (*p)
448 : "r" (value), "r" (p)
449 : AO_THUMB_SWITCH_CLOBBERS "cc");
450}
451#define AO_HAVE_or
452
453AO_INLINE void
454AO_xor(volatile AO_t *p, AO_t value)
455{
456 AO_t tmp, result;
457
458 __asm__ __volatile__("@AO_xor\n"
459 AO_THUMB_GO_ARM
460 AO_BR_ALIGN
461 "1: " AO_MASK_PTR("%4")
462 " ldrex %0, [%4]\n"
463 " eor %1, %0, %3\n"
464 AO_MASK_PTR("%4")
465 " strex %0, %1, [%4]\n"
466 " teq %0, #0\n"
467 " bne 1b\n"
468 AO_THUMB_RESTORE_MODE
469 : "=&r" (tmp), "=&r" (result), "+m" (*p)
470 : "r" (value), "r" (p)
471 : AO_THUMB_SWITCH_CLOBBERS "cc");
472}
473#define AO_HAVE_xor
474#endif /* !AO_PREFER_GENERALIZED */
475
476#ifdef AO_ARM_HAVE_LDREXBH
477 AO_INLINE unsigned char
478 AO_char_fetch_and_add(volatile unsigned char *p, unsigned char incr)
479 {
480 unsigned result, tmp;
481 int flag;
482
483 __asm__ __volatile__("@AO_char_fetch_and_add\n"
484 AO_THUMB_GO_ARM
485 AO_BR_ALIGN
486 "1: " AO_MASK_PTR("%5")
487 " ldrexb %0, [%5]\n"
488 " add %2, %0, %4\n"
489 AO_MASK_PTR("%5")
490 " strexb %1, %2, [%5]\n"
491 " teq %1, #0\n"
492 " bne 1b\n"
493 AO_THUMB_RESTORE_MODE
494 : "=&r" (result), "=&r" (flag), "=&r" (tmp), "+m" (*p)
495 : "r" ((unsigned)incr), "r" (p)
496 : AO_THUMB_SWITCH_CLOBBERS "cc");
497 return (unsigned char)result;
498 }
499# define AO_HAVE_char_fetch_and_add
500
501 AO_INLINE unsigned short
502 AO_short_fetch_and_add(volatile unsigned short *p, unsigned short incr)
503 {
504 unsigned result, tmp;
505 int flag;
506
507 __asm__ __volatile__("@AO_short_fetch_and_add\n"
508 AO_THUMB_GO_ARM
509 AO_BR_ALIGN
510 "1: " AO_MASK_PTR("%5")
511 " ldrexh %0, [%5]\n"
512 " add %2, %0, %4\n"
513 AO_MASK_PTR("%5")
514 " strexh %1, %2, [%5]\n"
515 " teq %1, #0\n"
516 " bne 1b\n"
517 AO_THUMB_RESTORE_MODE
518 : "=&r" (result), "=&r" (flag), "=&r" (tmp), "+m" (*p)
519 : "r" ((unsigned)incr), "r" (p)
520 : AO_THUMB_SWITCH_CLOBBERS "cc");
521 return (unsigned short)result;
522 }
523# define AO_HAVE_short_fetch_and_add
524#endif /* AO_ARM_HAVE_LDREXBH */
525
526#ifndef AO_GENERALIZE_ASM_BOOL_CAS
527 /* Returns nonzero if the comparison succeeded. */
528 AO_INLINE int
529 AO_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
530 {
531 AO_t result, tmp;
532
533 __asm__ __volatile__("@AO_compare_and_swap\n"
534 AO_THUMB_GO_ARM
535 AO_BR_ALIGN
536 "1: mov %0, #2\n" /* store a flag */
537 AO_MASK_PTR("%3")
538 " ldrex %1, [%3]\n" /* get original */
539 " teq %1, %4\n" /* see if match */
540 AO_MASK_PTR("%3")
541# ifdef __thumb2__
542 /* TODO: Eliminate warning: it blocks containing wide Thumb */
543 /* instructions are deprecated in ARMv8. */
544 " it eq\n"
545# endif
546 " strexeq %0, %5, [%3]\n" /* store new one if matched */
547 " teq %0, #1\n"
548 " beq 1b\n" /* if update failed, repeat */
549 AO_THUMB_RESTORE_MODE
550 : "=&r"(result), "=&r"(tmp), "+m"(*addr)
551 : "r"(addr), "r"(old_val), "r"(new_val)
552 : AO_THUMB_SWITCH_CLOBBERS "cc");
553 return !(result&2); /* if succeeded then return 1 else 0 */
554 }
555# define AO_HAVE_compare_and_swap
556#endif /* !AO_GENERALIZE_ASM_BOOL_CAS */
557
558AO_INLINE AO_t
559AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
560{
561 AO_t fetched_val;
562 int flag;
563
564 __asm__ __volatile__("@AO_fetch_compare_and_swap\n"
565 AO_THUMB_GO_ARM
566 AO_BR_ALIGN
567 "1: mov %0, #2\n" /* store a flag */
568 AO_MASK_PTR("%3")
569 " ldrex %1, [%3]\n" /* get original */
570 " teq %1, %4\n" /* see if match */
571 AO_MASK_PTR("%3")
572# ifdef __thumb2__
573 " it eq\n"
574# endif
575 " strexeq %0, %5, [%3]\n" /* store new one if matched */
576 " teq %0, #1\n"
577 " beq 1b\n" /* if update failed, repeat */
578 AO_THUMB_RESTORE_MODE
579 : "=&r"(flag), "=&r"(fetched_val), "+m"(*addr)
580 : "r"(addr), "r"(old_val), "r"(new_val)
581 : AO_THUMB_SWITCH_CLOBBERS "cc");
582 return fetched_val;
583}
584#define AO_HAVE_fetch_compare_and_swap
585
586#ifdef AO_ARM_HAVE_LDREXD
587# include "../standard_ao_double_t.h"
588
589 /* "ARM Architecture Reference Manual ARMv7-A/R edition" (chapter */
590 /* A3.5.3) says that memory accesses caused by LDREXD and STREXD */
591 /* instructions to doubleword-aligned locations are single-copy */
592 /* atomic; accesses to 64-bit elements by other instructions might */
593 /* not be single-copy atomic as they are executed as a sequence of */
594 /* 32-bit accesses. */
595 AO_INLINE AO_double_t
596 AO_double_load(const volatile AO_double_t *addr)
597 {
598 AO_double_t result;
599
600 /* AO_THUMB_GO_ARM is empty. */
601 __asm__ __volatile__("@AO_double_load\n"
602 AO_MASK_PTR("%1")
603 " ldrexd %0, %H0, [%1]"
604 : "=&r" (result.AO_whole)
605 : "r" (addr)
606 /* : no clobber */);
607 return result;
608 }
609# define AO_HAVE_double_load
610
611 AO_INLINE void
612 AO_double_store(volatile AO_double_t *addr, AO_double_t new_val)
613 {
614 AO_double_t old_val;
615 int status;
616
617 do {
618 /* AO_THUMB_GO_ARM is empty. */
619 __asm__ __volatile__("@AO_double_store\n"
620 AO_MASK_PTR("%3")
621 " ldrexd %0, %H0, [%3]\n"
622 AO_MASK_PTR("%3")
623 " strexd %1, %4, %H4, [%3]"
624 : "=&r" (old_val.AO_whole), "=&r" (status), "+m" (*addr)
625 : "r" (addr), "r" (new_val.AO_whole)
626 : "cc");
627 } while (AO_EXPECT_FALSE(status));
628 }
629# define AO_HAVE_double_store
630
631 AO_INLINE int
632 AO_double_compare_and_swap(volatile AO_double_t *addr,
633 AO_double_t old_val, AO_double_t new_val)
634 {
635 double_ptr_storage tmp;
636 int result = 1;
637
638 do {
639 /* AO_THUMB_GO_ARM is empty. */
640 __asm__ __volatile__("@AO_double_compare_and_swap\n"
641 AO_MASK_PTR("%1")
642 " ldrexd %0, %H0, [%1]\n" /* get original to r1 & r2 */
643 : "=&r"(tmp)
644 : "r"(addr)
645 /* : no clobber */);
646 if (tmp != old_val.AO_whole)
647 break;
648 __asm__ __volatile__(
649 AO_MASK_PTR("%2")
650 " strexd %0, %3, %H3, [%2]\n" /* store new one if matched */
651 : "=&r"(result), "+m"(*addr)
652 : "r" (addr), "r" (new_val.AO_whole)
653 : "cc");
654 } while (AO_EXPECT_FALSE(result));
655 return !result; /* if succeeded then return 1 else 0 */
656 }
657# define AO_HAVE_double_compare_and_swap
658#endif /* AO_ARM_HAVE_LDREXD */
659
660#else
661/* pre ARMv6 architectures ... */
662
663/* I found a slide set that, if I read it correctly, claims that */
664/* Loads followed by either a Load or Store are ordered, but nothing */
665/* else is. */
666/* It appears that SWP is the only simple memory barrier. */
667#include "../all_aligned_atomic_load_store.h"
668
669/* The code should run correctly on a multi-core ARMv6+ as well. */
670
671#endif /* !AO_ARM_HAVE_LDREX */
672
673#if !defined(AO_HAVE_test_and_set_full) && !defined(AO_HAVE_test_and_set) \
674 && defined (AO_ARM_HAVE_SWP) && (!defined(AO_PREFER_GENERALIZED) \
675 || !defined(AO_HAVE_fetch_compare_and_swap))
676 AO_INLINE AO_TS_VAL_t
677 AO_test_and_set_full(volatile AO_TS_t *addr)
678 {
679 AO_TS_VAL_t oldval;
680 /* SWP on ARM is very similar to XCHG on x86. */
681 /* The first operand is the result, the second the value */
682 /* to be stored. Both registers must be different from addr. */
683 /* Make the address operand an early clobber output so it */
684 /* doesn't overlap with the other operands. The early clobber */
685 /* on oldval is necessary to prevent the compiler allocating */
686 /* them to the same register if they are both unused. */
687
688 __asm__ __volatile__("@AO_test_and_set_full\n"
689 AO_THUMB_GO_ARM
690 AO_MASK_PTR("%3")
691 " swp %0, %2, [%3]\n"
692 /* Ignore GCC "SWP is deprecated for this architecture" */
693 /* warning here (for ARMv6+). */
694 AO_THUMB_RESTORE_MODE
695 : "=&r"(oldval), "=&r"(addr)
696 : "r"(1), "1"(addr)
697 : AO_THUMB_SWITCH_CLOBBERS "memory");
698 return oldval;
699 }
700# define AO_HAVE_test_and_set_full
701#endif /* !AO_HAVE_test_and_set[_full] && AO_ARM_HAVE_SWP */
702
703#define AO_T_IS_INT
704
705#else /* AO_GCC_ATOMIC_TEST_AND_SET */
706
707# if defined(__clang__) && !defined(AO_ARM_HAVE_LDREX)
708 /* As of clang-3.8, it cannot compile __atomic_and/or/xor_fetch */
709 /* library calls yet for pre ARMv6. */
710# define AO_SKIPATOMIC_ANY_and_ANY
711# define AO_SKIPATOMIC_ANY_or_ANY
712# define AO_SKIPATOMIC_ANY_xor_ANY
713# endif
714
715# ifdef AO_ARM_HAVE_LDREXD
716# include "../standard_ao_double_t.h"
717# endif
718# include "generic.h"
719
720#endif /* AO_GCC_ATOMIC_TEST_AND_SET */
721
722#undef AO_ARM_HAVE_DMB
723#undef AO_ARM_HAVE_LDREX
724#undef AO_ARM_HAVE_LDREXBH
725#undef AO_ARM_HAVE_LDREXD
726#undef AO_ARM_HAVE_SWP
727#undef AO_BR_ALIGN
728#undef AO_MASK_PTR
729#undef AO_SKIPATOMIC_ANY_and_ANY
730#undef AO_SKIPATOMIC_ANY_or_ANY
731#undef AO_SKIPATOMIC_ANY_xor_ANY
732#undef AO_SKIPATOMIC_char_store
733#undef AO_SKIPATOMIC_char_store_release
734#undef AO_SKIPATOMIC_int_store
735#undef AO_SKIPATOMIC_int_store_release
736#undef AO_SKIPATOMIC_short_store
737#undef AO_SKIPATOMIC_short_store_release
738#undef AO_SKIPATOMIC_store
739#undef AO_SKIPATOMIC_store_release
740#undef AO_THUMB_GO_ARM
741#undef AO_THUMB_RESTORE_MODE
742#undef AO_THUMB_SWITCH_CLOBBERS
743