From edd4a7129fa45c0bb5d5923b5b1a64cb5053ce46 Mon Sep 17 00:00:00 2001 From: oswyndel <67714233+RbPyer@users.noreply.github.com> Date: Thu, 12 Feb 2026 10:31:05 +0300 Subject: [PATCH] vlib: add x.atomics - implement native_x86-64_atomics in V and assembly, without depending on an external C library (impl #26474) (#26529) --- vlib/x/atomics/README.md | 100 +++ vlib/x/atomics/atomics.amd64.v | 524 ++++++++++++++++ vlib/x/atomics/atomics.i386.v | 606 +++++++++++++++++++ vlib/x/atomics/benchmarks/README.md | 123 ++++ vlib/x/atomics/benchmarks/atomic_benchmark.v | 376 ++++++++++++ vlib/x/atomics/examples/basic.v | 23 + vlib/x/atomics/examples/counter.v | 40 ++ vlib/x/atomics/examples/spinlock.v | 59 ++ vlib/x/atomics/i32_test.v | 189 ++++++ vlib/x/atomics/i64_test.v | 161 +++++ vlib/x/atomics/panic_unaligned.v | 10 + vlib/x/atomics/u32_test.v | 178 ++++++ vlib/x/atomics/u64_test.v | 155 +++++ 13 files changed, 2544 insertions(+) create mode 100644 vlib/x/atomics/README.md create mode 100644 vlib/x/atomics/atomics.amd64.v create mode 100644 vlib/x/atomics/atomics.i386.v create mode 100644 vlib/x/atomics/benchmarks/README.md create mode 100644 vlib/x/atomics/benchmarks/atomic_benchmark.v create mode 100644 vlib/x/atomics/examples/basic.v create mode 100644 vlib/x/atomics/examples/counter.v create mode 100644 vlib/x/atomics/examples/spinlock.v create mode 100644 vlib/x/atomics/i32_test.v create mode 100644 vlib/x/atomics/i64_test.v create mode 100644 vlib/x/atomics/panic_unaligned.v create mode 100644 vlib/x/atomics/u32_test.v create mode 100644 vlib/x/atomics/u64_test.v diff --git a/vlib/x/atomics/README.md b/vlib/x/atomics/README.md new file mode 100644 index 000000000..f445fe1f2 --- /dev/null +++ b/vlib/x/atomics/README.md @@ -0,0 +1,100 @@ +# v-atomics + +Low-level atomic operations for V with explicit i386 support (MMX required on i386). + +Native atomic primitives for V implemented with inline assembly, without relying on C FFI. + +This repository is an experiment in providing low-level atomic operations directly in V, +using V's inline assembly support. + +At the moment, all operations provide sequentially consistent semantics. + +## Motivation + +In the current V ecosystem, atomic operations are implemented via calls into C. + +While this approach works, it introduces an additional dependency on the C toolchain +and headers and limits control over the exact machine instructions being emitted. + +x.atomics explores an alternative: **native atomic operations implemented directly in V**, +using architecture-specific inline assembly and explicit semantics. + +The current focus of this project is: + +- correctness of basic atomic primitives; +- predictable and inspectable code generation; +- sequentially consistent behavior for all operations. + +In future versions, the set of supported atomic operations will be expanded, +and additional memory orderings will be introduced. + +--- + +## Scope and Guarantees + +- atomic operations on integer types implemented in V with inline assembly; +- architecture-specific implementations (per-platform `atomics..v` files); +- **sequential consistency** for all exposed operations. + +--- + +## Memory Model + +All operations in this library are intended to be **sequentially consistent**: + +- operations appear to be globally ordered; +- no weaker semantics (relaxed, acquire, release) are currently implemented; +- when weaker variants are added in the future, they will be explicitly named and documented. + +--- + +## Examples + +See the [examples](examples/) directory for complete runnable examples. + +### Basic Usage + +```v +import x.atomics + +fn main() { + mut value := i32(0) + + // Atomically store a value + atomics.store_i32(&value, 42) + + // Atomically load the value + loaded := atomics.load_i32(&value) + + // Atomic add: returns the new value after addition + new_value := atomics.add_i32(&value, 10) + + // Atomic swap: returns the old value + old := atomics.swap_i32(&value, 100) +} +``` + +### Compare-and-Swap (CAS) + +```v +import x.atomics + +fn main() { + mut flag := u32(0) + + // CAS: if flag == 0, set it to 1; returns true on success + if atomics.cas_u32(&flag, 0, 1) { + println('Successfully changed flag from 0 to 1') + } +} +``` + +### Available Operations + +| Operation | i32 | i64 | u32 | u64 | +|-----------|-----|-----|-----|-----| +| `load_*` | yes | yes | yes | yes | +| `store_*` | yes | yes | yes | yes | +| `add_*` | yes | yes | yes | yes | +| `swap_*` | yes | yes | yes | yes | +| `cas_*` | yes | yes | yes | yes | diff --git a/vlib/x/atomics/atomics.amd64.v b/vlib/x/atomics/atomics.amd64.v new file mode 100644 index 000000000..f70165133 --- /dev/null +++ b/vlib/x/atomics/atomics.amd64.v @@ -0,0 +1,524 @@ +module atomics + +// add_i32 atomically adds delta to the value at dest and returns the new value. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn add_i32(dest &i32, delta i32) i32 { + mut result := i32(0) + asm volatile amd64 { + mov rdx, dest + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, delta + lock xadd [rdx], eax + add eax, delta + mov result, eax + 2: + ; =r (result) + ; r (dest) + r (delta) + ; eax + rdx + memory + } + return result +} + +// swap_i32 atomically stores new value at dest and returns the old value. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn swap_i32(dest &i32, new i32) i32 { + mut old := i32(0) + asm volatile amd64 { + mov rdx, dest + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, new + xchg [rdx], eax + mov old, eax + 2: + ; =r (old) + ; r (dest) + r (new) + ; eax + rdx + memory + } + return old +} + +// store_i32 atomically stores value at dest. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn store_i32(dest &i32, value i32) { + asm volatile amd64 { + mov rdx, dest + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, value + xchg eax, [rdx] + 2: + ; ; r (dest) + r (value) + ; eax + rdx + memory + } +} + +// load_i32 atomically loads and returns the value at num. +// The operation is performed with sequential consistency. +// Panics if num is not 4-byte aligned. +pub fn load_i32(num &i32) i32 { + mut out := i32(0) + asm volatile amd64 { + mov rdx, num + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, [rdx] + mov out, eax + 2: + ; =r (out) + ; r (num) + ; eax + rdx + memory + } + return out +} + +// cas_i32 performs a compare-and-swap operation. +// If the current value at addr equals old, it atomically stores new. +// Returns true if the swap was performed, false otherwise. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn cas_i32(addr &i32, old i32, new i32) bool { + mut swapped := false + asm volatile amd64 { + mov rdx, addr + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, old + mov ecx, new + lock cmpxchg [rdx], ecx + sete al + mov swapped, al + 2: + ; =r (swapped) + ; r (addr) + r (old) + r (new) + ; eax + ecx + rdx + memory + } + return swapped +} + +// store_i64 atomically stores value at dest. +// The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn store_i64(dest &i64, value i64) { + asm volatile amd64 { + mov rdx, dest + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, value + xchg rax, [rdx] + 2: + ; ; r (dest) + r (value) + ; rax + rdx + memory + } +} + +// load_i64 atomically loads and returns the value at num. +// The operation is performed with sequential consistency. +// Panics if num is not 8-byte aligned. +pub fn load_i64(num &i64) i64 { + mut out := i64(0) + asm volatile amd64 { + mov rdx, num + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, [rdx] + mov out, rax + 2: + ; =r (out) + ; r (num) + ; rax + rdx + memory + } + return out +} + +// add_i64 atomically adds delta to the value at dest and returns the new value. +// The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn add_i64(dest &i64, delta i64) i64 { + mut result := i64(0) + asm volatile amd64 { + mov rdx, dest + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, delta + lock xadd [rdx], rax + add rax, delta + mov result, rax + 2: + ; =r (result) + ; r (delta) + r (dest) + ; rax + rdx + memory + } + return result +} + +// swap_i64 atomically stores value at dest and returns the old value. +// The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn swap_i64(dest &i64, value i64) i64 { + mut old := i64(0) + asm volatile amd64 { + mov rdx, dest + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, value + xchg rax, [rdx] + mov old, rax + 2: + ; =r (old) + ; r (dest) + r (value) + ; rax + rdx + memory + } + return old +} + +// cas_i64 performs a compare-and-swap operation. +// If the current value at addr equals old, it atomically stores new. +// Returns true if the swap was performed, false otherwise. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn cas_i64(addr &i64, old i64, new i64) bool { + mut swapped := false + asm volatile amd64 { + mov rdx, addr + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, old + mov rcx, new + lock cmpxchgq [rdx], rcx + sete al + mov swapped, al + 2: + ; =r (swapped) + ; r (addr) + r (old) + r (new) + ; rax + rcx + rdx + memory + } + return swapped +} + +// add_u32 atomically adds delta to the value at dest and returns the new value. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn add_u32(dest &u32, delta u32) u32 { + mut result := u32(0) + asm volatile amd64 { + mov rdx, dest + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, delta + lock xadd [rdx], eax + add eax, delta + mov result, eax + 2: + ; =r (result) + ; r (dest) + r (delta) + ; rax + rdx + memory + } + return result +} + +// swap_u32 atomically stores new value at dest and returns the old value. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn swap_u32(dest &u32, new u32) u32 { + mut old := u32(0) + asm volatile amd64 { + mov rdx, dest + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, new + xchg [rdx], eax + mov old, eax + 2: + ; =r (old) + ; r (dest) + r (new) + ; eax + rdx + memory + } + return old +} + +// store_u32 atomically stores value at dest. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn store_u32(dest &u32, value u32) { + asm volatile amd64 { + mov rdx, dest + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, value + xchg eax, [rdx] + 2: + ; ; r (dest) + r (value) + ; eax + rdx + memory + } +} + +// load_u32 atomically loads and returns the value at num. +// The operation is performed with sequential consistency. +// Panics if num is not 4-byte aligned. +pub fn load_u32(num &u32) u32 { + mut out := u32(0) + asm volatile amd64 { + mov rdx, num + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, [rdx] + mov out, eax + 2: + ; =r (out) + ; r (num) + ; rax + rdx + } + return out +} + +// cas_u32 performs a compare-and-swap operation. +// If the current value at addr equals old, it atomically stores new. +// Returns true if the swap was performed, false otherwise. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn cas_u32(addr &u32, old u32, new u32) bool { + mut swapped := false + asm volatile amd64 { + mov rdx, addr + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, old + mov ecx, new + lock cmpxchg [rdx], ecx + sete al + mov swapped, al + 2: + ; =r (swapped) + ; r (addr) + r (old) + r (new) + ; eax + ecx + rdx + memory + } + return swapped +} + +// load_u64 atomically loads and returns the value at num. +// The operation is performed with sequential consistency. +// Panics if num is not 8-byte aligned. +pub fn load_u64(num &u64) u64 { + mut out := u64(0) + asm volatile amd64 { + mov rdx, num + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, [rdx] + mov out, rax + 2: + ; =r (out) + ; r (num) + ; rax + rdx + memory + } + return out +} + +// store_u64 atomically stores value at dest. +// The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn store_u64(dest &u64, value u64) { + asm volatile amd64 { + mov rdx, dest + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, value + xchg rax, [rdx] + 2: + ; ; r (dest) + r (value) + ; rax + rdx + memory + } +} + +// add_u64 atomically adds delta to the value at dest and returns the new value. +// The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn add_u64(dest &u64, delta u64) u64 { + mut result := u64(0) + asm volatile amd64 { + mov rdx, dest + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, delta + lock xadd [rdx], rax + add rax, delta + mov result, rax + 2: + ; =r (result) + ; r (dest) + r (delta) + ; rax + rdx + memory + } + return result +} + +// swap_u64 atomically stores value at dest and returns the old value. +// The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn swap_u64(dest &u64, value u64) u64 { + mut old := u64(0) + asm volatile amd64 { + mov rdx, dest + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, value + xchg [rdx], rax + mov old, rax + 2: + ; =r (old) + ; r (dest) + r (value) + ; rax + rdx + memory + } + return old +} + +// cas_u64 performs a compare-and-swap operation. +// If the current value at addr equals old, it atomically stores new. +// Returns true if the swap was performed, false otherwise. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn cas_u64(addr &u64, old u64, new u64) bool { + mut swapped := false + asm volatile amd64 { + mov rdx, addr + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov rax, old + mov rcx, new + lock cmpxchgq [rdx], rcx + sete al + mov swapped, al + 2: + ; =r (swapped) + ; r (addr) + r (old) + r (new) + ; rax + rcx + rdx + memory + } + return swapped +} diff --git a/vlib/x/atomics/atomics.i386.v b/vlib/x/atomics/atomics.i386.v new file mode 100644 index 000000000..55655254b --- /dev/null +++ b/vlib/x/atomics/atomics.i386.v @@ -0,0 +1,606 @@ +module atomics + +// add_i32 atomically adds delta to the value at dest and returns the new value. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn add_i32(dest &i32, delta i32) i32 { + mut result := i32(0) + asm volatile i386 { + mov edx, dest + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, delta + lock xadd [edx], eax + add eax, delta + mov result, eax + 2: + ; =r (result) + ; r (dest) + r (delta) + ; eax + edx + memory + } + return result +} + +// swap_i32 atomically stores new value at dest and returns the old value. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn swap_i32(dest &i32, new i32) i32 { + mut old := i32(0) + asm volatile i386 { + mov edx, dest + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, new + xchg [edx], eax + mov old, eax + 2: + ; =m (old) + ; r (dest) + r (new) + ; eax + edx + memory + } + return old +} + +// store_i32 atomically stores value at dest. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn store_i32(dest &i32, value i32) { + asm volatile i386 { + mov edx, dest + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, value + xchg eax, [edx] + 2: + ; ; r (dest) + r (value) + ; eax + edx + memory + } +} + +// load_i32 atomically loads and returns the value at num. +// The operation is performed with sequential consistency. +// Panics if num is not 4-byte aligned. +pub fn load_i32(num &i32) i32 { + mut out := i32(0) + asm volatile i386 { + mov edx, num + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, [edx] + mov out, eax + 2: + ; =r (out) + ; r (num) + ; eax + edx + memory + } + return out +} + +// cas_i32 performs a compare-and-swap operation. +// If the current value at addr equals old, it atomically stores new. +// Returns true if the swap was performed, false otherwise. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn cas_i32(addr &i32, old i32, new i32) bool { + mut swapped := false + asm volatile i386 { + mov edx, addr + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, old + mov ecx, new + lock cmpxchg [edx], ecx + sete al + mov swapped, al + 2: + ; =m (swapped) + ; r (addr) + r (old) + r (new) + ; eax + ecx + edx + memory + } + return swapped +} + +// store_i64 atomically stores value at dest using MMX instructions. +// The operation is performed with sequential consistency. +// Requires MMX support. Panics if dest is not 8-byte aligned. +pub fn store_i64(dest &i64, value i64) { + asm volatile i386 { + mov esi, dest + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + movq mm0, value + movq [esi], mm0 + emms + xor eax, eax + lock xaddl [esp], eax + 2: + ; ; r (dest) + m (value) + ; esi + eax + mm0 + memory + } +} + +// load_i64 atomically loads and returns the value at num using MMX instructions. +// The operation is performed with sequential consistency. +// Requires MMX support. Panics if num is not 8-byte aligned. +pub fn load_i64(num &i64) i64 { + mut out := i64(0) + asm volatile i386 { + mov esi, num + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + movq mm0, [esi] + movq out, mm0 + emms + 2: + ; =m (out) + ; r (num) + ; esi + mm0 + memory + } + return out +} + +// add_i64 atomically adds delta to the value at dest and returns the new value. +// Uses a compare-and-swap loop. The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn add_i64(dest &i64, delta i64) i64 { + mut delta_lo := u32(u64(delta) & 0xFFFF_FFFF) + mut delta_hi := u32(u64(delta) >> 32) + mut res_lo := u32(0) + mut res_hi := u32(0) + asm volatile i386 { + mov esi, dest + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [esi] + mov edx, [esi + 4] + mov ebx, eax + mov ecx, edx + add ebx, delta_lo + adc ecx, delta_hi + lock cmpxchg8b [esi] + jnz '3b' + mov res_lo, ebx + mov res_hi, ecx + 2: + ; ; r (dest) + m (delta_lo) + m (delta_hi) + m (res_lo) + m (res_hi) + ; eax + ebx + ecx + edx + esi + memory + } + return i64(u64(res_lo) | (u64(res_hi) << 32)) +} + +// swap_i64 atomically stores value at dest and returns the old value. +// Uses a compare-and-swap loop. The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn swap_i64(dest &i64, value i64) i64 { + mut value_lo := u32(u64(value) & 0xFFFF_FFFF) + mut value_hi := u32(u64(value) >> 32) + asm volatile i386 { + mov esi, dest + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [esi] + mov edx, [esi + 4] + mov ebx, value_lo + mov ecx, value_hi + lock cmpxchg8b [esi] + jnz '3b' + mov value_lo, eax + mov value_hi, edx + 2: + ; ; r (dest) + m (value_lo) + m (value_hi) + ; eax + ebx + ecx + edx + esi + memory + } + return i64(u64(value_lo) | (u64(value_hi) << 32)) +} + +// cas_i64 performs a compare-and-swap operation. +// If the current value at addr equals old, it atomically stores new. +// Returns true if the swap was performed, false otherwise. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn cas_i64(addr &i64, old i64, new i64) bool { + mut swapped := false + mut old_lo := u32(u64(old) & 0xFFFF_FFFF) + mut old_hi := u32(u64(old) >> 32) + mut new_lo := u32(u64(new) & 0xFFFF_FFFF) + mut new_hi := u32(u64(new) >> 32) + asm volatile i386 { + mov esi, addr + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, old_lo + mov edx, old_hi + mov ebx, new_lo + mov ecx, new_hi + lock cmpxchg8b [esi] + sete al + mov swapped, al + 2: + ; =m (swapped) + ; r (addr) + m (old_lo) + m (old_hi) + m (new_lo) + m (new_hi) + ; eax + ebx + ecx + edx + esi + memory + } + return swapped +} + +// add_u32 atomically adds delta to the value at dest and returns the new value. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn add_u32(dest &u32, delta u32) u32 { + mut result := u32(0) + asm volatile i386 { + mov edx, dest + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, delta + lock xadd [edx], eax + add eax, delta + mov result, eax + 2: + ; =r (result) + ; r (dest) + r (delta) + ; eax + edx + memory + } + return result +} + +// swap_u32 atomically stores new value at dest and returns the old value. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn swap_u32(dest &u32, new u32) u32 { + mut old := u32(0) + asm volatile i386 { + mov edx, dest + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, new + xchg [edx], eax + mov old, eax + 2: + ; =m (old) + ; r (dest) + r (new) + ; eax + edx + memory + } + return old +} + +// store_u32 atomically stores value at dest. +// The operation is performed with sequential consistency. +// Panics if dest is not 4-byte aligned. +pub fn store_u32(dest &u32, value u32) { + asm volatile i386 { + mov edx, dest + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, value + xchg eax, [edx] + 2: + ; ; r (dest) + r (value) + ; eax + edx + memory + } +} + +// load_u32 atomically loads and returns the value at num. +// The operation is performed with sequential consistency. +// Panics if num is not 4-byte aligned. +pub fn load_u32(num &u32) u32 { + mut out := u32(0) + asm volatile i386 { + mov edx, num + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, [edx] + mov out, eax + 2: + ; =r (out) + ; r (num) + ; eax + edx + memory + } + return out +} + +// cas_u32 performs a compare-and-swap operation. +// If the current value at addr equals old, it atomically stores new. +// Returns true if the swap was performed, false otherwise. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn cas_u32(addr &u32, old u32, new u32) bool { + mut swapped := false + asm volatile i386 { + mov edx, addr + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, old + mov ecx, new + lock cmpxchg [edx], ecx + sete al + mov swapped, al + 2: + ; =m (swapped) + ; r (addr) + r (old) + r (new) + ; eax + ecx + edx + memory + } + return swapped +} + +// load_u64 atomically loads and returns the value at num using MMX instructions. +// The operation is performed with sequential consistency. +// Requires MMX support. Panics if num is not 8-byte aligned. +pub fn load_u64(num &u64) u64 { + mut out := u64(0) + asm volatile i386 { + mov esi, num + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + movq mm0, [esi] + movq out, mm0 + emms + 2: + ; =m (out) + ; r (num) + ; esi + mm0 + memory + } + return out +} + +// store_u64 atomically stores value at dest using MMX instructions. +// The operation is performed with sequential consistency. +// Requires MMX support. Panics if dest is not 8-byte aligned. +pub fn store_u64(dest &u64, value u64) { + asm volatile i386 { + mov esi, dest + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + movq mm0, value + movq [esi], mm0 + emms + xor eax, eax + lock xaddl [esp], eax + 2: + ; ; r (dest) + m (value) + ; eax + mm0 + memory + } +} + +// add_u64 atomically adds delta to the value at dest and returns the new value. +// Uses a compare-and-swap loop. The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn add_u64(dest &u64, delta u64) u64 { + mut delta_lo := u32(delta & 0xFFFF_FFFF) + mut delta_hi := u32(delta >> 32) + mut res_lo := u32(0) + mut res_hi := u32(0) + asm volatile i386 { + mov esi, dest + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [esi] + mov edx, [esi + 4] + mov ebx, eax + mov ecx, edx + add ebx, delta_lo + adc ecx, delta_hi + lock cmpxchg8b [esi] + jnz '3b' + mov res_lo, ebx + mov res_hi, ecx + 2: + ; ; r (dest) + m (delta_lo) + m (delta_hi) + m (res_lo) + m (res_hi) + ; eax + ebx + ecx + edx + esi + memory + } + return u64(res_lo) | (u64(res_hi) << 32) +} + +// swap_u64 atomically stores value at dest and returns the old value. +// Uses a compare-and-swap loop. The operation is performed with sequential consistency. +// Panics if dest is not 8-byte aligned. +pub fn swap_u64(dest &u64, value u64) u64 { + mut old := u64(0) + mut value_lo := u32(value & 0xFFFF_FFFF) + mut value_hi := u32(value >> 32) + asm volatile i386 { + mov esi, dest + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [esi] + mov edx, [esi + 4] + mov ebx, value_lo + mov ecx, value_hi + lock cmpxchg8b [esi] + jnz '3b' + mov value_lo, eax + mov value_hi, edx + 2: + ; ; r (dest) + m (value_lo) + m (value_hi) + ; eax + ebx + ecx + edx + esi + memory + } + old = u64(value_lo) | (u64(value_hi) << 32) + return old +} + +// cas_u64 performs a compare-and-swap operation. +// If the current value at addr equals old, it atomically stores new. +// Returns true if the swap was performed, false otherwise. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn cas_u64(addr &u64, old u64, new u64) bool { + mut swapped := false + mut old_lo := u32(old & 0xFFFF_FFFF) + mut old_hi := u32(old >> 32) + mut new_lo := u32(new & 0xFFFF_FFFF) + mut new_hi := u32(new >> 32) + asm volatile i386 { + mov esi, addr + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, old_lo + mov edx, old_hi + mov ebx, new_lo + mov ecx, new_hi + lock cmpxchg8b [esi] + sete al + mov swapped, al + 2: + ; =m (swapped) + ; r (addr) + m (old_lo) + m (old_hi) + m (new_lo) + m (new_hi) + ; eax + ebx + ecx + edx + esi + memory + } + return swapped +} diff --git a/vlib/x/atomics/benchmarks/README.md b/vlib/x/atomics/benchmarks/README.md new file mode 100644 index 000000000..8e14d8cda --- /dev/null +++ b/vlib/x/atomics/benchmarks/README.md @@ -0,0 +1,123 @@ +### Environment + +- CPU: AMD Ryzen 9 9950X3D (16C / 32T) +- RAM: 64 GiB +- OS: EndeavourOS (Linux, kernel 6.18.6-arch1-1) +- Compiler: + - amd64: v -prod -cc gcc -gc none + - i386: v -keepc -cc i686-linux-gnu-gcc -prod -m32 -arch i386 -cflags -mmmx -w -gc none + +### How to run + +```bash +# amd64 +v -prod -cc gcc -gc none run benchmarks/atomic_benchmark.v + +# i386 +v -keepc -cc i686-linux-gnu-gcc -prod -m32 -arch i386 -cflags -mmmx -w -gc none run benchmarks/atomic_benchmark.v +``` + +### Results (ns/op, 100M iterations) + +``` +AMD64 +===== + +Command: +v -prod -cc gcc -gc none run atomic_benchmark.v + +u64 store std: 3.788 ns/op (total: 378.783ms, iters: 100000000) +u64 store custom: 3.773 ns/op (total: 377.301ms, iters: 100000000) +u64 load std: 1.078 ns/op (total: 107.848ms, iters: 100000000) +u64 load custom: 1.084 ns/op (total: 108.381ms, iters: 100000000) +u64 add std: 3.601 ns/op (total: 360.067ms, iters: 100000000) +u64 add custom: 3.782 ns/op (total: 378.213ms, iters: 100000000) +u64 swap std (exchange): 3.805 ns/op (total: 380.520ms, iters: 100000000) +u64 swap custom: 3.835 ns/op (total: 383.493ms, iters: 100000000) +u64 cas std: 3.824 ns/op (total: 382.391ms, iters: 100000000) +u64 cas custom: 3.783 ns/op (total: 378.264ms, iters: 100000000) + +u32 store std: 3.783 ns/op (total: 378.346ms, iters: 100000000) +u32 store custom: 3.822 ns/op (total: 382.245ms, iters: 100000000) +u32 load std: 1.084 ns/op (total: 108.427ms, iters: 100000000) +u32 load custom: 1.085 ns/op (total: 108.536ms, iters: 100000000) +u32 add std: 3.663 ns/op (total: 366.308ms, iters: 100000000) +u32 add custom: 3.857 ns/op (total: 385.722ms, iters: 100000000) +u32 swap std (exchange): 3.855 ns/op (total: 385.503ms, iters: 100000000) +u32 swap custom: 3.859 ns/op (total: 385.892ms, iters: 100000000) +u32 cas std: 3.87 ns/op (total: 387.025ms, iters: 100000000) +u32 cas custom: 3.837 ns/op (total: 383.680ms, iters: 100000000) + +i64 store std (via u64): 3.784 ns/op (total: 378.377ms, iters: 100000000) +i64 store custom: 3.79 ns/op (total: 378.993ms, iters: 100000000) +i64 load std (via u64): 0.935 ns/op (total: 93.519ms, iters: 100000000) +i64 load custom: 0.864 ns/op (total: 86.350ms, iters: 100000000) +i64 add std (via u64): 3.608 ns/op (total: 360.752ms, iters: 100000000) +i64 add custom: 3.843 ns/op (total: 384.319ms, iters: 100000000) +i64 swap std (exchange u64): 3.826 ns/op (total: 382.621ms, iters: 100000000) +i64 swap custom: 3.835 ns/op (total: 383.513ms, iters: 100000000) +i64 cas std (via u64): 3.84 ns/op (total: 383.988ms, iters: 100000000) +i64 cas custom: 3.847 ns/op (total: 384.733ms, iters: 100000000) + +i32 store std (via u32): 3.84 ns/op (total: 383.983ms, iters: 100000000) +i32 store custom: 3.841 ns/op (total: 384.068ms, iters: 100000000) +i32 load std (via u32): 1.1 ns/op (total: 109.956ms, iters: 100000000) +i32 load custom: 1.1 ns/op (total: 109.978ms, iters: 100000000) +i32 add std (via u32): 3.659 ns/op (total: 365.907ms, iters: 100000000) +i32 add custom: 3.846 ns/op (total: 384.574ms, iters: 100000000) +i32 swap std (exchange u32): 3.848 ns/op (total: 384.830ms, iters: 100000000) +i32 swap custom: 3.836 ns/op (total: 383.562ms, iters: 100000000) +i32 cas std (via u32): 3.837 ns/op (total: 383.690ms, iters: 100000000) +i32 cas custom: 3.815 ns/op (total: 381.453ms, iters: 100000000) + + +I386 +==== + +Command: +v -keepc -cc i686-linux-gnu-gcc -prod -m32 -arch i386 -cflags -mmmx -w -gc none run benchmarks/atomic_benchmark.v + +u64 store std: 9.575 ns/op (total: 957.485ms, iters: 100000000) +u64 store custom: 7.703 ns/op (total: 770.251ms, iters: 100000000) +u64 load std: 1.769 ns/op (total: 176.860ms, iters: 100000000) +u64 load custom: 1.892 ns/op (total: 189.238ms, iters: 100000000) +u64 add std: 5.544 ns/op (total: 554.431ms, iters: 100000000) +u64 add custom: 5.31 ns/op (total: 530.964ms, iters: 100000000) +u64 swap std: 5.32 ns/op (total: 531.988ms, iters: 100000000) +u64 swap custom: 5.242 ns/op (total: 524.175ms, iters: 100000000) +u64 cas std: 4.948 ns/op (total: 494.824ms, iters: 100000000) +u64 cas custom: 5.268 ns/op (total: 526.833ms, iters: 100000000) + +u32 store std: 3.896 ns/op (total: 389.574ms, iters: 100000000) +u32 store custom: 4.067 ns/op (total: 406.712ms, iters: 100000000) +u32 load std: 1.132 ns/op (total: 113.242ms, iters: 100000000) +u32 load custom: 1.135 ns/op (total: 113.523ms, iters: 100000000) +u32 add std: 3.951 ns/op (total: 395.090ms, iters: 100000000) +u32 add custom: 4.141 ns/op (total: 414.139ms, iters: 100000000) +u32 swap std: 4.136 ns/op (total: 413.586ms, iters: 100000000) +u32 swap custom: 4.138 ns/op (total: 413.812ms, iters: 100000000) +u32 cas std: 4.136 ns/op (total: 413.644ms, iters: 100000000) +u32 cas custom: 4.705 ns/op (total: 470.505ms, iters: 100000000) + +i64 store std: 9.643 ns/op (total: 964.327ms, iters: 100000000) +i64 store custom: 7.373 ns/op (total: 737.327ms, iters: 100000000) +i64 load std: 1.7 ns/op (total: 169.983ms, iters: 100000000) +i64 load custom: 1.824 ns/op (total: 182.396ms, iters: 100000000) +i64 add std: 5.27 ns/op (total: 526.950ms, iters: 100000000) +i64 add custom: 5.268 ns/op (total: 526.833ms, iters: 100000000) +i64 swap std: 4.917 ns/op (total: 491.690ms, iters: 100000000) +i64 swap custom: 5.095 ns/op (total: 509.546ms, iters: 100000000) +i64 cas std: 4.931 ns/op (total: 493.122ms, iters: 100000000) +i64 cas custom: 5.268 ns/op (total: 526.774ms, iters: 100000000) + +i32 store std: 4.018 ns/op (total: 401.776ms, iters: 100000000) +i32 store custom: 4.138 ns/op (total: 413.820ms, iters: 100000000) +i32 load std: 1.132 ns/op (total: 113.185ms, iters: 100000000) +i32 load custom: 1.134 ns/op (total: 113.359ms, iters: 100000000) +i32 add std: 3.949 ns/op (total: 394.853ms, iters: 100000000) +i32 add custom: 4.139 ns/op (total: 413.947ms, iters: 100000000) +i32 swap std: 4.135 ns/op (total: 413.485ms, iters: 100000000) +i32 swap custom: 4.136 ns/op (total: 413.623ms, iters: 100000000) +i32 cas std: 4.137 ns/op (total: 413.702ms, iters: 100000000) +i32 cas custom: 4.704 ns/op (total: 470.402ms, iters: 100000000) +``` \ No newline at end of file diff --git a/vlib/x/atomics/benchmarks/atomic_benchmark.v b/vlib/x/atomics/benchmarks/atomic_benchmark.v new file mode 100644 index 000000000..5e2752983 --- /dev/null +++ b/vlib/x/atomics/benchmarks/atomic_benchmark.v @@ -0,0 +1,376 @@ +module main + +import x.atomics +import time + +$if windows { + #include "@VEXEROOT/thirdparty/stdatomic/win/atomic.h" +} $else { + #include "@VEXEROOT/thirdparty/stdatomic/nix/atomic.h" +} + +fn C.atomic_store_u32(voidptr, u32) +fn C.atomic_load_u32(voidptr) u32 +fn C.atomic_fetch_add_u32(voidptr, u32) u32 +fn C.atomic_compare_exchange_strong_u32(voidptr, voidptr, u32) bool +fn C.atomic_exchange_u32(voidptr, u32) u32 + +fn C.atomic_store_u64(voidptr, u64) +fn C.atomic_load_u64(voidptr) u64 +fn C.atomic_fetch_add_u64(voidptr, u64) u64 +fn C.atomic_compare_exchange_strong_u64(voidptr, voidptr, u64) bool +fn C.atomic_exchange_u64(voidptr, u64) u64 + +const iterations = 100_000_000 + +fn keepalive_u64(x u64) { + asm volatile amd64 { + nop + ; ; r (x) + } +} + +fn keepalive_u32(x u32) { + asm volatile amd64 { + nop + ; ; r (x) + } +} + +fn keepalive_i64(x i64) { + asm volatile amd64 { + nop + ; ; r (x) + } +} + +fn keepalive_i32(x i32) { + asm volatile amd64 { + nop + ; ; r (x) + } +} + +fn bench_u64(name string, f fn (&u64, u64), iters int) { + mut v := u64(0) + + for i in 0 .. 100_000 { + f(&v, u64(i)) + } + + mut sw := time.new_stopwatch() + for i in 0 .. iters { + f(&v, u64(i)) + } + + elapsed := sw.elapsed() + ns_per_op := f64(elapsed.nanoseconds()) / f64(iters) + + keepalive_u64(v) + println('${name:-17s}: ${ns_per_op:6.3f} ns/op (total: ${elapsed:9}, iters: ${iters})') +} + +fn bench_u32(name string, f fn (&u32, u32), iters int) { + mut v := u32(0) + + for i in 0 .. 100_000 { + f(&v, u32(i)) + } + + mut sw := time.new_stopwatch() + for i in 0 .. iters { + f(&v, u32(i)) + } + + elapsed := sw.elapsed() + ns_per_op := f64(elapsed.nanoseconds()) / f64(iters) + + keepalive_u32(v) + println('${name:-17s}: ${ns_per_op:6.3f} ns/op (total: ${elapsed:9}, iters: ${iters})') +} + +fn bench_i64(name string, f fn (&i64, i64), iters int) { + mut v := i64(0) + + for i in 0 .. 100_000 { + f(&v, i64(i)) + } + + mut sw := time.new_stopwatch() + for i in 0 .. iters { + f(&v, i64(i)) + } + + elapsed := sw.elapsed() + ns_per_op := f64(elapsed.nanoseconds()) / f64(iters) + + keepalive_i64(v) + println('${name:-17s}: ${ns_per_op:6.3f} ns/op (total: ${elapsed:9}, iters: ${iters})') +} + +fn bench_i32(name string, f fn (&i32, i32), iters int) { + mut v := i32(0) + + for i in 0 .. 100_000 { + f(&v, i32(i)) + } + + mut sw := time.new_stopwatch() + for i in 0 .. iters { + f(&v, i32(i)) + } + + elapsed := sw.elapsed() + ns_per_op := f64(elapsed.nanoseconds()) / f64(iters) + + keepalive_i32(v) + println('${name:-17s}: ${ns_per_op:6.3f} ns/op (total: ${elapsed:9}, iters: ${iters})') +} + +fn std_store_u64(addr &u64, val u64) { + C.atomic_store_u64(voidptr(addr), val) +} + +fn custom_store_u64(addr &u64, val u64) { + atomics.store_u64(addr, val) +} + +fn std_load_u64(addr &u64, _ u64) { + _ = C.atomic_load_u64(voidptr(addr)) +} + +fn custom_load_u64(addr &u64, _ u64) { + _ = atomics.load_u64(addr) +} + +fn std_add_u64(addr &u64, delta u64) { + _ = C.atomic_fetch_add_u64(voidptr(addr), delta) +} + +fn custom_add_u64(addr &u64, delta u64) { + _ = atomics.add_u64(addr, delta) +} + +fn std_swap_u64(addr &u64, val u64) { + _ = C.atomic_exchange_u64(voidptr(addr), val) +} + +fn custom_swap_u64(addr &u64, val u64) { + _ = atomics.swap_u64(addr, val) +} + +fn std_cas_u64(addr &u64, val u64) { + mut expected := u64(0) + _ = C.atomic_compare_exchange_strong_u64(voidptr(addr), voidptr(&expected), val) +} + +fn custom_cas_u64(addr &u64, val u64) { + _ = atomics.cas_u64(addr, 0, val) +} + +fn std_store_u32(addr &u32, val u32) { + C.atomic_store_u32(voidptr(addr), val) +} + +fn custom_store_u32(addr &u32, val u32) { + atomics.store_u32(addr, val) +} + +fn std_load_u32(addr &u32, _ u32) { + _ = C.atomic_load_u32(voidptr(addr)) +} + +fn custom_load_u32(addr &u32, _ u32) { + _ = atomics.load_u32(addr) +} + +fn std_add_u32(addr &u32, delta u32) { + _ = C.atomic_fetch_add_u32(voidptr(addr), delta) +} + +fn custom_add_u32(addr &u32, delta u32) { + _ = atomics.add_u32(addr, delta) +} + +fn std_swap_u32(addr &u32, val u32) { + _ = C.atomic_exchange_u32(voidptr(addr), val) +} + +fn custom_swap_u32(addr &u32, val u32) { + _ = atomics.swap_u32(addr, val) +} + +fn std_cas_u32(addr &u32, val u32) { + mut expected := u32(0) + _ = C.atomic_compare_exchange_strong_u32(voidptr(addr), voidptr(&expected), val) +} + +fn custom_cas_u32(addr &u32, val u32) { + _ = atomics.cas_u32(addr, 0, val) +} + +fn std_store_i64(addr &i64, val i64) { + unsafe { C.atomic_store_u64(voidptr(addr), u64(val)) } +} + +fn custom_store_i64(addr &i64, val i64) { + atomics.store_i64(addr, val) +} + +fn std_load_i64(addr &i64, _ i64) { + unsafe { + _ = C.atomic_load_u64(voidptr(addr)) + } +} + +fn custom_load_i64(addr &i64, _ i64) { + _ = atomics.load_i64(addr) +} + +fn std_add_i64(addr &i64, delta i64) { + unsafe { + _ = C.atomic_fetch_add_u64(voidptr(addr), u64(delta)) + } +} + +fn custom_add_i64(addr &i64, delta i64) { + _ = atomics.add_i64(addr, delta) +} + +fn std_swap_i64(addr &i64, val i64) { + unsafe { + _ = C.atomic_exchange_u64(voidptr(addr), u64(val)) + } +} + +fn custom_swap_i64(addr &i64, val i64) { + _ = atomics.swap_i64(addr, val) +} + +fn std_cas_i64(addr &i64, val i64) { + unsafe { + mut expected := u64(0) + _ = C.atomic_compare_exchange_strong_u64(voidptr(addr), voidptr(&expected), u64(val)) + } +} + +fn custom_cas_i64(addr &i64, val i64) { + _ = atomics.cas_i64(addr, 0, val) +} + +fn std_store_i32(addr &i32, val i32) { + unsafe { C.atomic_store_u32(voidptr(addr), u32(val)) } +} + +fn custom_store_i32(addr &i32, val i32) { + atomics.store_i32(addr, val) +} + +fn std_load_i32(addr &i32, _ i32) { + unsafe { + _ = C.atomic_load_u32(voidptr(addr)) + } +} + +fn custom_load_i32(addr &i32, _ i32) { + _ = atomics.load_i32(addr) +} + +fn std_add_i32(addr &i32, delta i32) { + unsafe { + _ = C.atomic_fetch_add_u32(voidptr(addr), u32(delta)) + } +} + +fn custom_add_i32(addr &i32, delta i32) { + _ = atomics.add_i32(addr, delta) +} + +fn std_swap_i32(addr &i32, val i32) { + unsafe { + _ = C.atomic_exchange_u32(voidptr(addr), u32(val)) + } +} + +fn custom_swap_i32(addr &i32, val i32) { + _ = atomics.swap_i32(addr, val) +} + +fn std_cas_i32(addr &i32, val i32) { + unsafe { + mut expected := u32(0) + _ = C.atomic_compare_exchange_strong_u32(voidptr(addr), voidptr(&expected), u32(val)) + } +} + +fn custom_cas_i32(addr &i32, val i32) { + _ = atomics.cas_i32(addr, 0, val) +} + +fn main() { + bench_u64('u64 store std', std_store_u64, iterations) + bench_u64('u64 store custom', custom_store_u64, iterations) + + bench_u64('u64 load std', std_load_u64, iterations) + bench_u64('u64 load custom', custom_load_u64, iterations) + + bench_u64('u64 add std', std_add_u64, iterations) + bench_u64('u64 add custom', custom_add_u64, iterations) + + bench_u64('u64 swap std', std_swap_u64, iterations) + bench_u64('u64 swap custom', custom_swap_u64, iterations) + + bench_u64('u64 cas std', std_cas_u64, iterations) + bench_u64('u64 cas custom', custom_cas_u64, iterations) + + println('') + + bench_u32('u32 store std', std_store_u32, iterations) + bench_u32('u32 store custom', custom_store_u32, iterations) + + bench_u32('u32 load std', std_load_u32, iterations) + bench_u32('u32 load custom', custom_load_u32, iterations) + + bench_u32('u32 add std', std_add_u32, iterations) + bench_u32('u32 add custom', custom_add_u32, iterations) + + bench_u32('u32 swap std', std_swap_u32, iterations) + bench_u32('u32 swap custom', custom_swap_u32, iterations) + + bench_u32('u32 cas std', std_cas_u32, iterations) + bench_u32('u32 cas custom', custom_cas_u32, iterations) + + println('') + + bench_i64('i64 store std', std_store_i64, iterations) + bench_i64('i64 store custom', custom_store_i64, iterations) + + bench_i64('i64 load std', std_load_i64, iterations) + bench_i64('i64 load custom', custom_load_i64, iterations) + + bench_i64('i64 add std', std_add_i64, iterations) + bench_i64('i64 add custom', custom_add_i64, iterations) + + bench_i64('i64 swap std', std_swap_i64, iterations) + bench_i64('i64 swap custom', custom_swap_i64, iterations) + + bench_i64('i64 cas std', std_cas_i64, iterations) + bench_i64('i64 cas custom', custom_cas_i64, iterations) + + println('') + + bench_i32('i32 store std', std_store_i32, iterations) + bench_i32('i32 store custom', custom_store_i32, iterations) + + bench_i32('i32 load std', std_load_i32, iterations) + bench_i32('i32 load custom', custom_load_i32, iterations) + + bench_i32('i32 add std', std_add_i32, iterations) + bench_i32('i32 add custom', custom_add_i32, iterations) + + bench_i32('i32 swap std', std_swap_i32, iterations) + bench_i32('i32 swap custom', custom_swap_i32, iterations) + + bench_i32('i32 cas std', std_cas_i32, iterations) + bench_i32('i32 cas custom', custom_cas_i32, iterations) +} diff --git a/vlib/x/atomics/examples/basic.v b/vlib/x/atomics/examples/basic.v new file mode 100644 index 000000000..3cac83097 --- /dev/null +++ b/vlib/x/atomics/examples/basic.v @@ -0,0 +1,23 @@ +module main + +import x.atomics + +fn main() { + // Basic atomic load and store operations + mut value := i32(0) + + // Atomically store a value + atomics.store_i32(&value, 42) + + // Atomically load the value + loaded := atomics.load_i32(&value) + println('Loaded value: ${loaded}') // Output: 42 + + // Atomic add: returns the new value after addition + new_value := atomics.add_i32(&value, 10) + println('After add: ${new_value}') // Output: 52 + + // Atomic swap: returns the old value + old := atomics.swap_i32(&value, 100) + println('Old value: ${old}, new value: ${atomics.load_i32(&value)}') // Output: 52, 100 +} diff --git a/vlib/x/atomics/examples/counter.v b/vlib/x/atomics/examples/counter.v new file mode 100644 index 000000000..8e7299e1d --- /dev/null +++ b/vlib/x/atomics/examples/counter.v @@ -0,0 +1,40 @@ +module main + +import x.atomics + +fn increment(counter &i64) { + atomics.add_i64(counter, 1) +} + +fn worker(counter &i64, iterations int) { + for _ in 0 .. iterations { + increment(counter) + } +} + +fn main() { + mut counter := i64(0) + + num_threads := 4 + increments_per_thread := 10000 + + mut threads := []thread{} + + for _ in 0 .. num_threads { + threads << spawn worker(&counter, increments_per_thread) + } + + threads.wait() + + expected := i64(num_threads * increments_per_thread) + actual := atomics.load_i64(&counter) + + println('Expected: ${expected}') + println('Actual: ${actual}') + + if actual == expected { + println('Counter is correct') + } else { + println('Counter mismatch - race condition detected') + } +} diff --git a/vlib/x/atomics/examples/spinlock.v b/vlib/x/atomics/examples/spinlock.v new file mode 100644 index 000000000..3e4cf274a --- /dev/null +++ b/vlib/x/atomics/examples/spinlock.v @@ -0,0 +1,59 @@ +module main + +import x.atomics + +struct SpinLock { +mut: + state u32 // 0 = unlocked, 1 = locked +} + +fn acquire(mut spinlock SpinLock) { + for !atomics.cas_u32(&spinlock.state, 0, 1) { + // Busy-wait + } +} + +fn release(mut spinlock SpinLock) { + atomics.store_u32(&spinlock.state, 0) +} + +struct SharedData { +mut: + spinlock SpinLock + value int +} + +fn worker(mut data SharedData, iterations int) { + for _ in 0 .. iterations { + acquire(mut data.spinlock) + data.value++ + release(mut data.spinlock) + } +} + +fn main() { + mut data := &SharedData{} + + num_threads := 4 + iterations_per_thread := 10000 + + mut threads := []thread{} + + for _ in 0 .. num_threads { + threads << spawn worker(mut data, iterations_per_thread) + } + + threads.wait() + + expected := num_threads * iterations_per_thread + actual := data.value + + println('Expected: ${expected}') + println('Actual: ${actual}') + + if actual == expected { + println('Spinlock works correctly') + } else { + println('Race condition detected') + } +} diff --git a/vlib/x/atomics/i32_test.v b/vlib/x/atomics/i32_test.v new file mode 100644 index 000000000..c6d3264b0 --- /dev/null +++ b/vlib/x/atomics/i32_test.v @@ -0,0 +1,189 @@ +// vtest build: !(macos || windows) + +module atomics + +fn test_cas_i32_basic() { + mut x := i32(10) + ok := cas_i32(&x, 10, 20) + assert ok == true + assert x == 20 +} + +fn test_cas_fail() { + mut x := i32(5) + assert !cas_i32(&x, 10, 42) + assert x == 5 +} + +fn test_cas_fail_memory_unchanged() { + mut x := i32(7) + cas_i32(&x, 1, 2) + assert x == 7 +} + +fn test_cas_exact_match() { + mut x := i32(-1) + assert !cas_i32(&x, 0, 999) + assert x == -1 +} + +fn test_cas_twice() { + mut x := i32(1) + assert cas_i32(&x, 1, 2) + assert cas_i32(&x, 2, 3) + assert x == 3 +} + +fn test_cas_with_negative() { + mut x := i32(-123) + assert cas_i32(&x, -123, 8) + assert x == 8 +} + +fn test_add_i32_basic() { + mut x := i32(0) + for _ in 0 .. 1000 { + add_i32(&x, 1) + } + assert x == 1000 +} + +fn test_add_i32_negative() { + mut x := i32(10) + add_i32(&x, -3) + assert x == 7 +} + +fn test_add_i32_return_value() { + mut x := i32(5) + r := add_i32(&x, 7) + assert r == 12 + assert x == 12 +} + +fn test_add_i32_overflow_wraps() { + mut x := i32(2147483647) + add_i32(&x, 1) + assert x == -2147483648 +} + +fn test_swap_i32_basic() { + mut x := i32(5) + old := swap_i32(&x, 99) + assert old == 5 + assert x == 99 +} + +fn test_swap_i32_twice() { + mut x := i32(1) + assert swap_i32(&x, 2) == 1 + assert swap_i32(&x, 3) == 2 + assert x == 3 +} + +fn test_swap_i32_with_cas() { + mut x := i32(10) + assert cas_i32(&x, 10, 20) + old := swap_i32(&x, 30) + assert old == 20 + assert x == 30 +} + +fn test_load_i32_basic() { + mut x := i32(123456) + assert load_i32(&x) == 123456 +} + +fn test_store_i32_basic() { + mut x := i32(5) + store_i32(&x, 777) + assert x == 777 +} + +fn test_add_i32_concurrent() { + mut x := i32(0) + mut threads := []thread{} + + for _ in 0 .. 9 { + threads << spawn fn (ptr &i32) { + for _ in 0 .. 100_000 { + add_i32(ptr, 1) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 900_000 +} + +fn test_swap_i32_concurrent() { + mut x := i32(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (ptr &i32) { + for _ in 0 .. 50_000 { + swap_i32(ptr, 123) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 123 +} + +fn test_cas_i32_concurrent() { + mut x := i32(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (ptr &i32) { + for _ in 0 .. 100_000 { + for { + old := load_i32(ptr) + if cas_i32(ptr, old, old + 1) { + break + } + } + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 800_000 +} + +fn test_load_store_i32_concurrent() { + mut x := i32(0) + mut threads := []thread{} + + for _ in 0 .. 4 { + threads << spawn fn (px &i32) { + for _ in 0 .. 50_000 { + add_i32(px, 1) + } + }(&x) + } + for _ in 0 .. 4 { + threads << spawn fn (px &i32) { + for _ in 0 .. 50_000 { + _ = load_i32(px) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 4 * 50_000 +} diff --git a/vlib/x/atomics/i64_test.v b/vlib/x/atomics/i64_test.v new file mode 100644 index 000000000..d0b476cd4 --- /dev/null +++ b/vlib/x/atomics/i64_test.v @@ -0,0 +1,161 @@ +// vtest build: !(macos || windows) + +module atomics + +fn test_load_i64_basic() { + mut x := i64(1234567890123) + assert load_i64(&x) == 1234567890123 +} + +fn test_store_i64_basic() { + mut x := i64(1) + store_i64(&x, 9999999) + assert x == 9999999 +} + +fn test_swap_i64_basic() { + mut x := i64(50) + old := swap_i64(&x, 777) + assert old == 50 + assert x == 777 +} + +fn test_swap_i64_same_value() { + mut x := i64(-123) + old := swap_i64(&x, -123) + assert old == -123 + assert x == -123 +} + +fn test_add_i64_basic() { + mut x := i64(0) + for _ in 0 .. 1000 { + add_i64(&x, 3) + } + assert x == 3000 +} + +fn test_add_i64_negative_delta() { + mut x := i64(100) + add_i64(&x, -7) + assert x == 93 +} + +fn test_add_i64_wraparound_behavior() { + mut x := i64(0x7fffffffffffffff) + add_i64(&x, 1) + assert x == -0x8000000000000000 +} + +fn test_add_i64_return_value() { + mut x := i64(10) + r := add_i64(&x, 100) + assert r == 110 + assert x == 110 +} + +fn test_cas_i64_basic() { + mut x := i64(111) + assert cas_i64(&x, 111, 222) + assert x == 222 +} + +fn test_cas_i64_fail() { + mut x := i64(555) + assert !cas_i64(&x, 123, 999) + assert x == 555 +} + +fn test_cas_i64_nochange_on_fail() { + mut x := i64(-999) + cas_i64(&x, 1, 5) + assert x == -999 +} + +fn test_cas_i64_negative_values() { + mut x := i64(-123456) + assert cas_i64(&x, -123456, 777) + assert x == 777 +} + +fn test_add_i64_concurrent() { + mut x := i64(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &i64) { + for _ in 0 .. 100_000 { + add_i64(px, 1) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 800_000 +} + +fn test_swap_i64_concurrent() { + mut x := i64(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &i64) { + for _ in 0 .. 50_000 { + swap_i64(px, 12345) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 12345 +} + +fn test_cas_i64_concurrent_inc() { + mut x := i64(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &i64) { + for _ in 0 .. 50_000 { + for { + old := load_i64(px) + if cas_i64(px, old, old + 1) { + break + } + } + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 400_000 +} + +fn test_cas_i64_contended_flip() { + mut x := i64(0) + mut threads := []thread{} + + for _ in 0 .. 4 { + threads << spawn fn (px &i64) { + for _ in 0 .. 200_000 { + cas_i64(px, 0, 1) + cas_i64(px, 1, 0) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 0 || x == 1 +} diff --git a/vlib/x/atomics/panic_unaligned.v b/vlib/x/atomics/panic_unaligned.v new file mode 100644 index 000000000..4b3838899 --- /dev/null +++ b/vlib/x/atomics/panic_unaligned.v @@ -0,0 +1,10 @@ +module atomics + +$if prod && (gcc || clang) { + #flag -Wl,--undefined=panicUnaligned +} + +@[export: 'panicUnaligned'] +fn panic_unaligned() { + panic('unaligned atomic operation') +} diff --git a/vlib/x/atomics/u32_test.v b/vlib/x/atomics/u32_test.v new file mode 100644 index 000000000..2823a400f --- /dev/null +++ b/vlib/x/atomics/u32_test.v @@ -0,0 +1,178 @@ +// vtest build: !(macos || windows) + +module atomics + +fn test_add_u32_basic() { + mut x := u32(0) + for _ in 0 .. 1000 { + add_u32(&x, 1) + } + assert x == 1000 +} + +fn test_add_u32_wraparound() { + mut x := u32(0xffffffff) + add_u32(&x, 1) + assert x == 0 +} + +fn test_add_u32_large() { + mut x := u32(10) + r := add_u32(&x, 1000) + assert r == 1010 + assert x == 1010 +} + +fn test_swap_u32_basic() { + mut x := u32(123) + old := swap_u32(&x, 999) + assert old == 123 + assert x == 999 +} + +fn test_swap_u32_same() { + mut x := u32(777) + old := swap_u32(&x, 777) + assert old == 777 + assert x == 777 +} + +fn test_store_u32_basic() { + mut x := u32(1) + store_u32(&x, 555) + assert x == 555 +} + +fn test_load_u32_basic() { + mut x := u32(888) + assert load_u32(&x) == 888 +} + +fn test_cas_u32_basic() { + mut x := u32(10) + assert cas_u32(&x, 10, 50) + assert x == 50 +} + +fn test_cas_u32_fail() { + mut x := u32(10) + assert !cas_u32(&x, 5, 999) + assert x == 10 +} + +fn test_cas_u32_nochange_on_fail() { + mut x := u32(777) + cas_u32(&x, 5, 9) + assert x == 777 +} + +fn test_cas_u32_boundary() { + mut x := u32(0xffffffff) + assert cas_u32(&x, 0xffffffff, 0) + assert x == 0 +} + +fn test_add_u32_concurrent() { + mut x := u32(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &u32) { + for _ in 0 .. 100_000 { + add_u32(px, 1) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 800_000 +} + +fn test_swap_u32_concurrent() { + mut x := u32(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &u32) { + for _ in 0 .. 50_000 { + swap_u32(px, 123) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 123 +} + +fn test_cas_u32_concurrent_inc() { + mut x := u32(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &u32) { + for _ in 0 .. 50_000 { + for { + old := load_u32(px) + if cas_u32(px, old, old + 1) { + break + } + } + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 400_000 +} + +fn test_cas_u32_contended_flip() { + mut x := u32(0) + mut threads := []thread{} + + for _ in 0 .. 4 { + threads << spawn fn (px &u32) { + for _ in 0 .. 200_000 { + cas_u32(px, 0, 1) + cas_u32(px, 1, 0) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 0 || x == 1 +} + +fn test_load_store_u32_concurrent() { + mut x := u32(0) + mut threads := []thread{} + + for i in 0 .. 8 { + threads << spawn fn (px &u32, id int) { + for _ in 0 .. 50_000 { + if id % 2 == 0 { + store_u32(px, 1) + } else { + _ = load_u32(px) + } + } + }(&x, i) + } + + for t in threads { + t.wait() + } + + assert x == 1 +} diff --git a/vlib/x/atomics/u64_test.v b/vlib/x/atomics/u64_test.v new file mode 100644 index 000000000..ab704342e --- /dev/null +++ b/vlib/x/atomics/u64_test.v @@ -0,0 +1,155 @@ +// vtest build: !(macos || windows) + +module atomics + +fn test_load_u64_basic() { + mut x := u64(1234567890123) + assert load_u64(&x) == 1234567890123 +} + +fn test_store_u64_basic() { + mut x := u64(1) + store_u64(&x, 999999) + assert x == 999999 +} + +fn test_swap_u64_basic() { + mut x := u64(123) + old := swap_u64(&x, 999) + assert old == 123 + assert x == 999 +} + +fn test_swap_u64_same() { + mut x := u64(777777) + old := swap_u64(&x, 777777) + assert old == 777777 + assert x == 777777 +} + +fn test_add_u64_basic() { + mut x := u64(0) + for _ in 0 .. 1000 { + add_u64(&x, 2) + } + assert x == 2000 +} + +fn test_add_u64_wraparound() { + mut x := u64(0xffffffffffffffff) + add_u64(&x, 1) + assert x == 0 +} + +fn test_add_u64_return() { + mut x := u64(10) + r := add_u64(&x, 100) + assert r == 110 + assert x == 110 +} + +fn test_cas_u64_basic() { + mut x := u64(111) + assert cas_u64(&x, 111, 222) + assert x == 222 +} + +fn test_cas_u64_fail() { + mut x := u64(500) + assert !cas_u64(&x, 100, 200) + assert x == 500 +} + +fn test_cas_u64_nochange_fail() { + mut x := u64(999) + cas_u64(&x, 1, 2) + assert x == 999 +} + +fn test_cas_u64_boundary() { + mut x := u64(0xffffffffffffffff) + assert cas_u64(&x, 0xffffffffffffffff, 0) + assert x == 0 +} + +fn test_add_u64_concurrent() { + mut x := u64(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &u64) { + for _ in 0 .. 100_000 { + add_u64(px, 1) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 800_000 +} + +fn test_swap_u64_concurrent() { + mut x := u64(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &u64) { + for _ in 0 .. 50_000 { + swap_u64(px, 123456) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 123456 +} + +fn test_cas_u64_concurrent_inc() { + mut x := u64(0) + mut threads := []thread{} + + for _ in 0 .. 8 { + threads << spawn fn (px &u64) { + for _ in 0 .. 50_000 { + for { + old := load_u64(px) + if cas_u64(px, old, old + 1) { + break + } + } + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 400_000 +} + +fn test_cas_u64_contended_flip() { + mut x := u64(0) + mut threads := []thread{} + + for _ in 0 .. 4 { + threads << spawn fn (px &u64) { + for _ in 0 .. 200_000 { + cas_u64(px, 0, 1) + cas_u64(px, 1, 0) + } + }(&x) + } + + for t in threads { + t.wait() + } + + assert x == 0 || x == 1 +} -- 2.39.5