| 1 | module main |
| 2 | |
| 3 | import x.atomics |
| 4 | import time |
| 5 | |
| 6 | $if windows { |
| 7 | #include "@VEXEROOT/thirdparty/stdatomic/win/atomic.h" |
| 8 | } $else { |
| 9 | #include "@VEXEROOT/thirdparty/stdatomic/nix/atomic.h" |
| 10 | } |
| 11 | |
| 12 | fn C.atomic_store_u32(voidptr, u32) |
| 13 | fn C.atomic_load_u32(voidptr) u32 |
| 14 | fn C.atomic_fetch_add_u32(voidptr, u32) u32 |
| 15 | fn C.atomic_compare_exchange_strong_u32(voidptr, voidptr, u32) bool |
| 16 | fn C.atomic_exchange_u32(voidptr, u32) u32 |
| 17 | |
| 18 | fn C.atomic_store_u64(voidptr, u64) |
| 19 | fn C.atomic_load_u64(voidptr) u64 |
| 20 | fn C.atomic_fetch_add_u64(voidptr, u64) u64 |
| 21 | fn C.atomic_compare_exchange_strong_u64(voidptr, voidptr, u64) bool |
| 22 | fn C.atomic_exchange_u64(voidptr, u64) u64 |
| 23 | |
| 24 | const iterations = 100_000_000 |
| 25 | |
| 26 | fn keepalive_u64(x u64) { |
| 27 | asm volatile amd64 { |
| 28 | nop |
| 29 | ; ; r (x) |
| 30 | } |
| 31 | } |
| 32 | |
| 33 | fn keepalive_u32(x u32) { |
| 34 | asm volatile amd64 { |
| 35 | nop |
| 36 | ; ; r (x) |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | fn keepalive_i64(x i64) { |
| 41 | asm volatile amd64 { |
| 42 | nop |
| 43 | ; ; r (x) |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | fn keepalive_i32(x i32) { |
| 48 | asm volatile amd64 { |
| 49 | nop |
| 50 | ; ; r (x) |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | fn bench_u64(name string, f fn (&u64, u64), iters int) { |
| 55 | mut v := u64(0) |
| 56 | |
| 57 | for i in 0 .. 100_000 { |
| 58 | f(&v, u64(i)) |
| 59 | } |
| 60 | |
| 61 | mut sw := time.new_stopwatch() |
| 62 | for i in 0 .. iters { |
| 63 | f(&v, u64(i)) |
| 64 | } |
| 65 | |
| 66 | elapsed := sw.elapsed() |
| 67 | ns_per_op := f64(elapsed.nanoseconds()) / f64(iters) |
| 68 | |
| 69 | keepalive_u64(v) |
| 70 | println('${name:-17s}: ${ns_per_op:6.3f} ns/op (total: ${elapsed:9}, iters: ${iters})') |
| 71 | } |
| 72 | |
| 73 | fn bench_u32(name string, f fn (&u32, u32), iters int) { |
| 74 | mut v := u32(0) |
| 75 | |
| 76 | for i in 0 .. 100_000 { |
| 77 | f(&v, u32(i)) |
| 78 | } |
| 79 | |
| 80 | mut sw := time.new_stopwatch() |
| 81 | for i in 0 .. iters { |
| 82 | f(&v, u32(i)) |
| 83 | } |
| 84 | |
| 85 | elapsed := sw.elapsed() |
| 86 | ns_per_op := f64(elapsed.nanoseconds()) / f64(iters) |
| 87 | |
| 88 | keepalive_u32(v) |
| 89 | println('${name:-17s}: ${ns_per_op:6.3f} ns/op (total: ${elapsed:9}, iters: ${iters})') |
| 90 | } |
| 91 | |
| 92 | fn bench_i64(name string, f fn (&i64, i64), iters int) { |
| 93 | mut v := i64(0) |
| 94 | |
| 95 | for i in 0 .. 100_000 { |
| 96 | f(&v, i64(i)) |
| 97 | } |
| 98 | |
| 99 | mut sw := time.new_stopwatch() |
| 100 | for i in 0 .. iters { |
| 101 | f(&v, i64(i)) |
| 102 | } |
| 103 | |
| 104 | elapsed := sw.elapsed() |
| 105 | ns_per_op := f64(elapsed.nanoseconds()) / f64(iters) |
| 106 | |
| 107 | keepalive_i64(v) |
| 108 | println('${name:-17s}: ${ns_per_op:6.3f} ns/op (total: ${elapsed:9}, iters: ${iters})') |
| 109 | } |
| 110 | |
| 111 | fn bench_i32(name string, f fn (&i32, i32), iters int) { |
| 112 | mut v := i32(0) |
| 113 | |
| 114 | for i in 0 .. 100_000 { |
| 115 | f(&v, i32(i)) |
| 116 | } |
| 117 | |
| 118 | mut sw := time.new_stopwatch() |
| 119 | for i in 0 .. iters { |
| 120 | f(&v, i32(i)) |
| 121 | } |
| 122 | |
| 123 | elapsed := sw.elapsed() |
| 124 | ns_per_op := f64(elapsed.nanoseconds()) / f64(iters) |
| 125 | |
| 126 | keepalive_i32(v) |
| 127 | println('${name:-17s}: ${ns_per_op:6.3f} ns/op (total: ${elapsed:9}, iters: ${iters})') |
| 128 | } |
| 129 | |
| 130 | fn std_store_u64(addr &u64, val u64) { |
| 131 | C.atomic_store_u64(voidptr(addr), val) |
| 132 | } |
| 133 | |
| 134 | fn custom_store_u64(addr &u64, val u64) { |
| 135 | atomics.store_u64(addr, val) |
| 136 | } |
| 137 | |
| 138 | fn std_load_u64(addr &u64, _ u64) { |
| 139 | _ = C.atomic_load_u64(voidptr(addr)) |
| 140 | } |
| 141 | |
| 142 | fn custom_load_u64(addr &u64, _ u64) { |
| 143 | _ = atomics.load_u64(addr) |
| 144 | } |
| 145 | |
| 146 | fn std_add_u64(addr &u64, delta u64) { |
| 147 | _ = C.atomic_fetch_add_u64(voidptr(addr), delta) |
| 148 | } |
| 149 | |
| 150 | fn custom_add_u64(addr &u64, delta u64) { |
| 151 | _ = atomics.add_u64(addr, delta) |
| 152 | } |
| 153 | |
| 154 | fn std_swap_u64(addr &u64, val u64) { |
| 155 | _ = C.atomic_exchange_u64(voidptr(addr), val) |
| 156 | } |
| 157 | |
| 158 | fn custom_swap_u64(addr &u64, val u64) { |
| 159 | _ = atomics.swap_u64(addr, val) |
| 160 | } |
| 161 | |
| 162 | fn std_cas_u64(addr &u64, val u64) { |
| 163 | mut expected := u64(0) |
| 164 | _ = C.atomic_compare_exchange_strong_u64(voidptr(addr), voidptr(&expected), val) |
| 165 | } |
| 166 | |
| 167 | fn custom_cas_u64(addr &u64, val u64) { |
| 168 | _ = atomics.cas_u64(addr, 0, val) |
| 169 | } |
| 170 | |
| 171 | fn std_store_u32(addr &u32, val u32) { |
| 172 | C.atomic_store_u32(voidptr(addr), val) |
| 173 | } |
| 174 | |
| 175 | fn custom_store_u32(addr &u32, val u32) { |
| 176 | atomics.store_u32(addr, val) |
| 177 | } |
| 178 | |
| 179 | fn std_load_u32(addr &u32, _ u32) { |
| 180 | _ = C.atomic_load_u32(voidptr(addr)) |
| 181 | } |
| 182 | |
| 183 | fn custom_load_u32(addr &u32, _ u32) { |
| 184 | _ = atomics.load_u32(addr) |
| 185 | } |
| 186 | |
| 187 | fn std_add_u32(addr &u32, delta u32) { |
| 188 | _ = C.atomic_fetch_add_u32(voidptr(addr), delta) |
| 189 | } |
| 190 | |
| 191 | fn custom_add_u32(addr &u32, delta u32) { |
| 192 | _ = atomics.add_u32(addr, delta) |
| 193 | } |
| 194 | |
| 195 | fn std_swap_u32(addr &u32, val u32) { |
| 196 | _ = C.atomic_exchange_u32(voidptr(addr), val) |
| 197 | } |
| 198 | |
| 199 | fn custom_swap_u32(addr &u32, val u32) { |
| 200 | _ = atomics.swap_u32(addr, val) |
| 201 | } |
| 202 | |
| 203 | fn std_cas_u32(addr &u32, val u32) { |
| 204 | mut expected := u32(0) |
| 205 | _ = C.atomic_compare_exchange_strong_u32(voidptr(addr), voidptr(&expected), val) |
| 206 | } |
| 207 | |
| 208 | fn custom_cas_u32(addr &u32, val u32) { |
| 209 | _ = atomics.cas_u32(addr, 0, val) |
| 210 | } |
| 211 | |
| 212 | fn std_store_i64(addr &i64, val i64) { |
| 213 | unsafe { C.atomic_store_u64(voidptr(addr), u64(val)) } |
| 214 | } |
| 215 | |
| 216 | fn custom_store_i64(addr &i64, val i64) { |
| 217 | atomics.store_i64(addr, val) |
| 218 | } |
| 219 | |
| 220 | fn std_load_i64(addr &i64, _ i64) { |
| 221 | unsafe { |
| 222 | _ = C.atomic_load_u64(voidptr(addr)) |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | fn custom_load_i64(addr &i64, _ i64) { |
| 227 | _ = atomics.load_i64(addr) |
| 228 | } |
| 229 | |
| 230 | fn std_add_i64(addr &i64, delta i64) { |
| 231 | unsafe { |
| 232 | _ = C.atomic_fetch_add_u64(voidptr(addr), u64(delta)) |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | fn custom_add_i64(addr &i64, delta i64) { |
| 237 | _ = atomics.add_i64(addr, delta) |
| 238 | } |
| 239 | |
| 240 | fn std_swap_i64(addr &i64, val i64) { |
| 241 | unsafe { |
| 242 | _ = C.atomic_exchange_u64(voidptr(addr), u64(val)) |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | fn custom_swap_i64(addr &i64, val i64) { |
| 247 | _ = atomics.swap_i64(addr, val) |
| 248 | } |
| 249 | |
| 250 | fn std_cas_i64(addr &i64, val i64) { |
| 251 | unsafe { |
| 252 | mut expected := u64(0) |
| 253 | _ = C.atomic_compare_exchange_strong_u64(voidptr(addr), voidptr(&expected), u64(val)) |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | fn custom_cas_i64(addr &i64, val i64) { |
| 258 | _ = atomics.cas_i64(addr, 0, val) |
| 259 | } |
| 260 | |
| 261 | fn std_store_i32(addr &i32, val i32) { |
| 262 | unsafe { C.atomic_store_u32(voidptr(addr), u32(val)) } |
| 263 | } |
| 264 | |
| 265 | fn custom_store_i32(addr &i32, val i32) { |
| 266 | atomics.store_i32(addr, val) |
| 267 | } |
| 268 | |
| 269 | fn std_load_i32(addr &i32, _ i32) { |
| 270 | unsafe { |
| 271 | _ = C.atomic_load_u32(voidptr(addr)) |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | fn custom_load_i32(addr &i32, _ i32) { |
| 276 | _ = atomics.load_i32(addr) |
| 277 | } |
| 278 | |
| 279 | fn std_add_i32(addr &i32, delta i32) { |
| 280 | unsafe { |
| 281 | _ = C.atomic_fetch_add_u32(voidptr(addr), u32(delta)) |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | fn custom_add_i32(addr &i32, delta i32) { |
| 286 | _ = atomics.add_i32(addr, delta) |
| 287 | } |
| 288 | |
| 289 | fn std_swap_i32(addr &i32, val i32) { |
| 290 | unsafe { |
| 291 | _ = C.atomic_exchange_u32(voidptr(addr), u32(val)) |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | fn custom_swap_i32(addr &i32, val i32) { |
| 296 | _ = atomics.swap_i32(addr, val) |
| 297 | } |
| 298 | |
| 299 | fn std_cas_i32(addr &i32, val i32) { |
| 300 | unsafe { |
| 301 | mut expected := u32(0) |
| 302 | _ = C.atomic_compare_exchange_strong_u32(voidptr(addr), voidptr(&expected), u32(val)) |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | fn custom_cas_i32(addr &i32, val i32) { |
| 307 | _ = atomics.cas_i32(addr, 0, val) |
| 308 | } |
| 309 | |
| 310 | fn main() { |
| 311 | bench_u64('u64 store std', std_store_u64, iterations) |
| 312 | bench_u64('u64 store custom', custom_store_u64, iterations) |
| 313 | |
| 314 | bench_u64('u64 load std', std_load_u64, iterations) |
| 315 | bench_u64('u64 load custom', custom_load_u64, iterations) |
| 316 | |
| 317 | bench_u64('u64 add std', std_add_u64, iterations) |
| 318 | bench_u64('u64 add custom', custom_add_u64, iterations) |
| 319 | |
| 320 | bench_u64('u64 swap std', std_swap_u64, iterations) |
| 321 | bench_u64('u64 swap custom', custom_swap_u64, iterations) |
| 322 | |
| 323 | bench_u64('u64 cas std', std_cas_u64, iterations) |
| 324 | bench_u64('u64 cas custom', custom_cas_u64, iterations) |
| 325 | |
| 326 | println('') |
| 327 | |
| 328 | bench_u32('u32 store std', std_store_u32, iterations) |
| 329 | bench_u32('u32 store custom', custom_store_u32, iterations) |
| 330 | |
| 331 | bench_u32('u32 load std', std_load_u32, iterations) |
| 332 | bench_u32('u32 load custom', custom_load_u32, iterations) |
| 333 | |
| 334 | bench_u32('u32 add std', std_add_u32, iterations) |
| 335 | bench_u32('u32 add custom', custom_add_u32, iterations) |
| 336 | |
| 337 | bench_u32('u32 swap std', std_swap_u32, iterations) |
| 338 | bench_u32('u32 swap custom', custom_swap_u32, iterations) |
| 339 | |
| 340 | bench_u32('u32 cas std', std_cas_u32, iterations) |
| 341 | bench_u32('u32 cas custom', custom_cas_u32, iterations) |
| 342 | |
| 343 | println('') |
| 344 | |
| 345 | bench_i64('i64 store std', std_store_i64, iterations) |
| 346 | bench_i64('i64 store custom', custom_store_i64, iterations) |
| 347 | |
| 348 | bench_i64('i64 load std', std_load_i64, iterations) |
| 349 | bench_i64('i64 load custom', custom_load_i64, iterations) |
| 350 | |
| 351 | bench_i64('i64 add std', std_add_i64, iterations) |
| 352 | bench_i64('i64 add custom', custom_add_i64, iterations) |
| 353 | |
| 354 | bench_i64('i64 swap std', std_swap_i64, iterations) |
| 355 | bench_i64('i64 swap custom', custom_swap_i64, iterations) |
| 356 | |
| 357 | bench_i64('i64 cas std', std_cas_i64, iterations) |
| 358 | bench_i64('i64 cas custom', custom_cas_i64, iterations) |
| 359 | |
| 360 | println('') |
| 361 | |
| 362 | bench_i32('i32 store std', std_store_i32, iterations) |
| 363 | bench_i32('i32 store custom', custom_store_i32, iterations) |
| 364 | |
| 365 | bench_i32('i32 load std', std_load_i32, iterations) |
| 366 | bench_i32('i32 load custom', custom_load_i32, iterations) |
| 367 | |
| 368 | bench_i32('i32 add std', std_add_i32, iterations) |
| 369 | bench_i32('i32 add custom', custom_add_i32, iterations) |
| 370 | |
| 371 | bench_i32('i32 swap std', std_swap_i32, iterations) |
| 372 | bench_i32('i32 swap custom', custom_swap_i32, iterations) |
| 373 | |
| 374 | bench_i32('i32 cas std', std_cas_i32, iterations) |
| 375 | bench_i32('i32 cas custom', custom_cas_i32, iterations) |
| 376 | } |
| 377 | |