| 1 | |
| 2 | /* Memory model documented at http://www-106.ibm.com/developerworks/ */ |
| 3 | /* eserver/articles/archguide.html and (clearer) */ |
| 4 | /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */ |
| 5 | /* There appears to be no implicit ordering between any kind of */ |
| 6 | /* independent memory references. */ |
| 7 | /* Architecture enforces some ordering based on control dependence. */ |
| 8 | /* I don't know if that could help. */ |
| 9 | /* Data-dependent loads are always ordered. */ |
| 10 | /* Based on the above references, eieio is intended for use on */ |
| 11 | /* uncached memory, which we don't support. It does not order loads */ |
| 12 | /* from cached memory. */ |
| 13 | /* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to */ |
| 14 | /* track some of this down and correcting my misunderstandings. -HB */ |
| 15 | |
| 16 | #include "../all_aligned_atomic_load_store.h" |
| 17 | |
| 18 | #include "../test_and_set_t_is_ao_t.h" |
| 19 | |
| 20 | void AO_sync(void); |
| 21 | #pragma mc_func AO_sync { "7c0004ac" } |
| 22 | |
| 23 | #ifdef __NO_LWSYNC__ |
| 24 | # define AO_lwsync AO_sync |
| 25 | #else |
| 26 | void AO_lwsync(void); |
| 27 | #pragma mc_func AO_lwsync { "7c2004ac" } |
| 28 | #endif |
| 29 | |
| 30 | #define AO_nop_write() AO_lwsync() |
| 31 | #define AO_HAVE_nop_write |
| 32 | |
| 33 | #define AO_nop_read() AO_lwsync() |
| 34 | #define AO_HAVE_nop_read |
| 35 | |
| 36 | /* We explicitly specify load_acquire and store_release, since these */ |
| 37 | /* rely on the fact that lwsync is also a LoadStore barrier. */ |
| 38 | AO_INLINE AO_t |
| 39 | AO_load_acquire(const volatile AO_t *addr) |
| 40 | { |
| 41 | AO_t result = *addr; |
| 42 | AO_lwsync(); |
| 43 | return result; |
| 44 | } |
| 45 | #define AO_HAVE_load_acquire |
| 46 | |
| 47 | AO_INLINE void |
| 48 | AO_store_release(volatile AO_t *addr, AO_t value) |
| 49 | { |
| 50 | AO_lwsync(); |
| 51 | *addr = value; |
| 52 | } |
| 53 | #define AO_HAVE_store_release |
| 54 | |
| 55 | #ifndef AO_PREFER_GENERALIZED |
| 56 | /* This is similar to the code in the garbage collector. Deleting */ |
| 57 | /* this and having it synthesized from compare_and_swap would probably */ |
| 58 | /* only cost us a load immediate instruction. */ |
| 59 | AO_INLINE AO_TS_VAL_t |
| 60 | AO_test_and_set(volatile AO_TS_t *addr) { |
| 61 | #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) |
| 62 | /* Completely untested. And we should be using smaller objects anyway. */ |
| 63 | unsigned long oldval; |
| 64 | unsigned long temp = 1; /* locked value */ |
| 65 | |
| 66 | __asm__ __volatile__( |
| 67 | "1:ldarx %0,0,%1\n" /* load and reserve */ |
| 68 | "cmpdi %0, 0\n" /* if load is */ |
| 69 | "bne 2f\n" /* non-zero, return already set */ |
| 70 | "stdcx. %2,0,%1\n" /* else store conditional */ |
| 71 | "bne- 1b\n" /* retry if lost reservation */ |
| 72 | "2:\n" /* oldval is zero if we set */ |
| 73 | : "=&r"(oldval) |
| 74 | : "r"(addr), "r"(temp) |
| 75 | : "memory", "cr0"); |
| 76 | #else |
| 77 | int oldval; |
| 78 | int temp = 1; /* locked value */ |
| 79 | |
| 80 | __asm__ __volatile__( |
| 81 | "1:lwarx %0,0,%1\n" /* load and reserve */ |
| 82 | "cmpwi %0, 0\n" /* if load is */ |
| 83 | "bne 2f\n" /* non-zero, return already set */ |
| 84 | "stwcx. %2,0,%1\n" /* else store conditional */ |
| 85 | "bne- 1b\n" /* retry if lost reservation */ |
| 86 | "2:\n" /* oldval is zero if we set */ |
| 87 | : "=&r"(oldval) |
| 88 | : "r"(addr), "r"(temp) |
| 89 | : "memory", "cr0"); |
| 90 | #endif |
| 91 | return (AO_TS_VAL_t)oldval; |
| 92 | } |
| 93 | #define AO_HAVE_test_and_set |
| 94 | |
| 95 | AO_INLINE AO_TS_VAL_t |
| 96 | AO_test_and_set_acquire(volatile AO_TS_t *addr) { |
| 97 | AO_TS_VAL_t result = AO_test_and_set(addr); |
| 98 | AO_lwsync(); |
| 99 | return result; |
| 100 | } |
| 101 | #define AO_HAVE_test_and_set_acquire |
| 102 | |
| 103 | AO_INLINE AO_TS_VAL_t |
| 104 | AO_test_and_set_release(volatile AO_TS_t *addr) { |
| 105 | AO_lwsync(); |
| 106 | return AO_test_and_set(addr); |
| 107 | } |
| 108 | #define AO_HAVE_test_and_set_release |
| 109 | |
| 110 | AO_INLINE AO_TS_VAL_t |
| 111 | AO_test_and_set_full(volatile AO_TS_t *addr) { |
| 112 | AO_TS_VAL_t result; |
| 113 | AO_lwsync(); |
| 114 | result = AO_test_and_set(addr); |
| 115 | AO_lwsync(); |
| 116 | return result; |
| 117 | } |
| 118 | #define AO_HAVE_test_and_set_full |
| 119 | #endif /* !AO_PREFER_GENERALIZED */ |
| 120 | |
| 121 | AO_INLINE AO_t |
| 122 | AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val) |
| 123 | { |
| 124 | AO_t fetched_val; |
| 125 | # if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__) |
| 126 | __asm__ __volatile__( |
| 127 | "1:ldarx %0,0,%1\n" /* load and reserve */ |
| 128 | "cmpd %0, %3\n" /* if load is not equal to */ |
| 129 | "bne 2f\n" /* old_val, fail */ |
| 130 | "stdcx. %2,0,%1\n" /* else store conditional */ |
| 131 | "bne- 1b\n" /* retry if lost reservation */ |
| 132 | "2:\n" |
| 133 | : "=&r"(fetched_val) |
| 134 | : "r"(addr), "r"(new_val), "r"(old_val) |
| 135 | : "memory", "cr0"); |
| 136 | # else |
| 137 | __asm__ __volatile__( |
| 138 | "1:lwarx %0,0,%1\n" /* load and reserve */ |
| 139 | "cmpw %0, %3\n" /* if load is not equal to */ |
| 140 | "bne 2f\n" /* old_val, fail */ |
| 141 | "stwcx. %2,0,%1\n" /* else store conditional */ |
| 142 | "bne- 1b\n" /* retry if lost reservation */ |
| 143 | "2:\n" |
| 144 | : "=&r"(fetched_val) |
| 145 | : "r"(addr), "r"(new_val), "r"(old_val) |
| 146 | : "memory", "cr0"); |
| 147 | # endif |
| 148 | return fetched_val; |
| 149 | } |
| 150 | #define AO_HAVE_fetch_compare_and_swap |
| 151 | |
| 152 | AO_INLINE AO_t |
| 153 | AO_fetch_compare_and_swap_acquire(volatile AO_t *addr, AO_t old_val, |
| 154 | AO_t new_val) |
| 155 | { |
| 156 | AO_t result = AO_fetch_compare_and_swap(addr, old_val, new_val); |
| 157 | AO_lwsync(); |
| 158 | return result; |
| 159 | } |
| 160 | #define AO_HAVE_fetch_compare_and_swap_acquire |
| 161 | |
| 162 | AO_INLINE AO_t |
| 163 | AO_fetch_compare_and_swap_release(volatile AO_t *addr, AO_t old_val, |
| 164 | AO_t new_val) |
| 165 | { |
| 166 | AO_lwsync(); |
| 167 | return AO_fetch_compare_and_swap(addr, old_val, new_val); |
| 168 | } |
| 169 | #define AO_HAVE_fetch_compare_and_swap_release |
| 170 | |
| 171 | AO_INLINE AO_t |
| 172 | AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val, |
| 173 | AO_t new_val) |
| 174 | { |
| 175 | AO_t result; |
| 176 | AO_lwsync(); |
| 177 | result = AO_fetch_compare_and_swap(addr, old_val, new_val); |
| 178 | AO_lwsync(); |
| 179 | return result; |
| 180 | } |
| 181 | #define AO_HAVE_fetch_compare_and_swap_full |
| 182 | |
| 183 | /* TODO: Implement AO_fetch_and_add, AO_and/or/xor directly. */ |
| 184 | |