| 1 | /* |
| 2 | * Copyright (c) 2003 Hewlett-Packard Development Company, L.P. |
| 3 | * Copyright (c) 2009-2021 Ivan Maidanski |
| 4 | * |
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 6 | * of this software and associated documentation files (the "Software"), to deal |
| 7 | * in the Software without restriction, including without limitation the rights |
| 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 9 | * copies of the Software, and to permit persons to whom the Software is |
| 10 | * furnished to do so, subject to the following conditions: |
| 11 | * |
| 12 | * The above copyright notice and this permission notice shall be included in |
| 13 | * all copies or substantial portions of the Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | * SOFTWARE. |
| 22 | */ |
| 23 | |
| 24 | #include "../all_aligned_atomic_load_store.h" |
| 25 | |
| 26 | #if !defined(AO_ASSUME_VISTA) && _MSC_VER >= 1910 |
| 27 | /* Visual Studio 2017 (15.0) discontinued support of Windows XP. */ |
| 28 | /* We assume Windows Server 2003, Vista or later. */ |
| 29 | # define AO_ASSUME_VISTA |
| 30 | #endif |
| 31 | |
| 32 | #if !defined(AO_ASSUME_WINDOWS98) \ |
| 33 | && (defined(AO_ASSUME_VISTA) || _MSC_VER >= 1400) |
| 34 | /* Visual Studio 2005 (MS VC++ 8.0) discontinued support of Windows 95. */ |
| 35 | # define AO_ASSUME_WINDOWS98 |
| 36 | #endif |
| 37 | |
| 38 | #if !defined(AO_USE_PENTIUM4_INSTRS) && _M_IX86_FP >= 2 /* SSE2 */ |
| 39 | /* "mfence" is a part of SSE2 set (introduced on Intel Pentium 4). */ |
| 40 | # define AO_USE_PENTIUM4_INSTRS |
| 41 | #endif |
| 42 | |
| 43 | #define AO_T_IS_INT |
| 44 | |
| 45 | #ifndef AO_USE_INTERLOCKED_INTRINSICS |
| 46 | /* _Interlocked primitives (Inc, Dec, Xchg, Add) are always available */ |
| 47 | # define AO_USE_INTERLOCKED_INTRINSICS |
| 48 | #endif |
| 49 | #include "common32_defs.h" |
| 50 | |
| 51 | /* As far as we can tell, the lfence and sfence instructions are not */ |
| 52 | /* currently needed or useful for cached memory accesses. */ |
| 53 | |
| 54 | /* Unfortunately mfence doesn't exist everywhere. */ |
| 55 | /* IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) is */ |
| 56 | /* probably a conservative test for it? */ |
| 57 | |
| 58 | #if defined(AO_USE_PENTIUM4_INSTRS) |
| 59 | |
| 60 | AO_INLINE void |
| 61 | AO_nop_full(void) |
| 62 | { |
| 63 | __asm { mfence } |
| 64 | } |
| 65 | #define AO_HAVE_nop_full |
| 66 | |
| 67 | #else |
| 68 | |
| 69 | /* We could use the cpuid instruction. But that seems to be slower */ |
| 70 | /* than the default implementation based on test_and_set_full. Thus */ |
| 71 | /* we omit that bit of misinformation here. */ |
| 72 | |
| 73 | #endif |
| 74 | |
| 75 | #if !defined(AO_NO_ASM_XADD) && !defined(AO_HAVE_char_fetch_and_add_full) |
| 76 | AO_INLINE unsigned char |
| 77 | AO_char_fetch_and_add_full(volatile unsigned char *p, unsigned char incr) |
| 78 | { |
| 79 | __asm |
| 80 | { |
| 81 | mov al, incr |
| 82 | mov ebx, p |
| 83 | lock xadd byte ptr [ebx], al |
| 84 | } |
| 85 | /* Ignore possible "missing return value" warning here. */ |
| 86 | } |
| 87 | # define AO_HAVE_char_fetch_and_add_full |
| 88 | |
| 89 | AO_INLINE unsigned short |
| 90 | AO_short_fetch_and_add_full(volatile unsigned short *p, unsigned short incr) |
| 91 | { |
| 92 | __asm |
| 93 | { |
| 94 | mov ax, incr |
| 95 | mov ebx, p |
| 96 | lock xadd word ptr [ebx], ax |
| 97 | } |
| 98 | /* Ignore possible "missing return value" warning here. */ |
| 99 | } |
| 100 | # define AO_HAVE_short_fetch_and_add_full |
| 101 | #endif /* !AO_NO_ASM_XADD */ |
| 102 | |
| 103 | #ifndef AO_HAVE_test_and_set_full |
| 104 | # include "../test_and_set_t_is_char.h" |
| 105 | |
| 106 | AO_INLINE AO_TS_VAL_t |
| 107 | AO_test_and_set_full(volatile AO_TS_t *addr) |
| 108 | { |
| 109 | __asm |
| 110 | { |
| 111 | mov eax,0xff ; /* AO_TS_SET */ |
| 112 | mov ebx,addr ; |
| 113 | xchg byte ptr [ebx],al ; |
| 114 | } |
| 115 | /* Ignore possible "missing return value" warning here. */ |
| 116 | } |
| 117 | # define AO_HAVE_test_and_set_full |
| 118 | #endif |
| 119 | |
| 120 | #if defined(_WIN64) && !defined(CPPCHECK) |
| 121 | # error wrong architecture |
| 122 | #endif |
| 123 | |
| 124 | #ifdef AO_ASSUME_VISTA |
| 125 | # include "../standard_ao_double_t.h" |
| 126 | |
| 127 | /* Reading or writing a quadword aligned on a 64-bit boundary is */ |
| 128 | /* always carried out atomically (requires at least a Pentium). */ |
| 129 | # define AO_ACCESS_double_CHECK_ALIGNED |
| 130 | # include "../loadstore/double_atomic_load_store.h" |
| 131 | |
| 132 | /* Whenever we run on a Pentium class machine, we have that certain */ |
| 133 | /* function. */ |
| 134 | # pragma intrinsic (_InterlockedCompareExchange64) |
| 135 | |
| 136 | /* Returns nonzero if the comparison succeeded. */ |
| 137 | AO_INLINE int |
| 138 | AO_double_compare_and_swap_full(volatile AO_double_t *addr, |
| 139 | AO_double_t old_val, AO_double_t new_val) |
| 140 | { |
| 141 | AO_ASSERT_ADDR_ALIGNED(addr); |
| 142 | return (double_ptr_storage)_InterlockedCompareExchange64( |
| 143 | (__int64 volatile *)addr, |
| 144 | new_val.AO_whole /* exchange */, |
| 145 | old_val.AO_whole) == old_val.AO_whole; |
| 146 | } |
| 147 | # define AO_HAVE_double_compare_and_swap_full |
| 148 | #endif /* AO_ASSUME_VISTA */ |
| 149 | |
| 150 | /* Real X86 implementations, except for some old WinChips, appear */ |
| 151 | /* to enforce ordering between memory operations, EXCEPT that a later */ |
| 152 | /* read can pass earlier writes, presumably due to the visible */ |
| 153 | /* presence of store buffers. */ |
| 154 | /* We ignore both the WinChips, and the fact that the official specs */ |
| 155 | /* seem to be much weaker (and arguably too weak to be usable). */ |
| 156 | #include "../ordered_except_wr.h" |
| 157 | |