From 1c4bb916e5cdf6aecbc6c6315f95453a9e8778f3 Mon Sep 17 00:00:00 2001 From: oswyndel <67714233+RbPyer@users.noreply.github.com> Date: Mon, 23 Feb 2026 07:56:53 +0300 Subject: [PATCH] atomic: and/or bitwise ops (#26646) --- vlib/x/atomics/README.md | 49 ++---- vlib/x/atomics/atomics.amd64.v | 248 +++++++++++++++++++++++++++ vlib/x/atomics/atomics.i386.v | 296 +++++++++++++++++++++++++++++++++ vlib/x/atomics/i32_test.v | 32 ++++ vlib/x/atomics/i64_test.v | 32 ++++ vlib/x/atomics/u32_test.v | 32 ++++ vlib/x/atomics/u64_test.v | 32 ++++ 7 files changed, 690 insertions(+), 31 deletions(-) diff --git a/vlib/x/atomics/README.md b/vlib/x/atomics/README.md index f445fe1f2..753537d10 100644 --- a/vlib/x/atomics/README.md +++ b/vlib/x/atomics/README.md @@ -1,96 +1,81 @@ # v-atomics - Low-level atomic operations for V with explicit i386 support (MMX required on i386). - Native atomic primitives for V implemented with inline assembly, without relying on C FFI. - This repository is an experiment in providing low-level atomic operations directly in V, using V's inline assembly support. - At the moment, all operations provide sequentially consistent semantics. - ## Motivation - In the current V ecosystem, atomic operations are implemented via calls into C. - While this approach works, it introduces an additional dependency on the C toolchain and headers and limits control over the exact machine instructions being emitted. - x.atomics explores an alternative: **native atomic operations implemented directly in V**, using architecture-specific inline assembly and explicit semantics. - The current focus of this project is: - - correctness of basic atomic primitives; - predictable and inspectable code generation; - sequentially consistent behavior for all operations. - In future versions, the set of supported atomic operations will be expanded, and additional memory orderings will be introduced. - --- - ## Scope and Guarantees - - atomic operations on integer types implemented in V with inline assembly; - architecture-specific implementations (per-platform `atomics..v` files); - **sequential consistency** for all exposed operations. - --- - ## Memory Model - All operations in this library are intended to be **sequentially consistent**: - - operations appear to be globally ordered; - no weaker semantics (relaxed, acquire, release) are currently implemented; - when weaker variants are added in the future, they will be explicitly named and documented. - --- - ## Examples - See the [examples](examples/) directory for complete runnable examples. - ### Basic Usage - ```v import x.atomics fn main() { mut value := i32(0) - // Atomically store a value atomics.store_i32(&value, 42) - // Atomically load the value loaded := atomics.load_i32(&value) - // Atomic add: returns the new value after addition new_value := atomics.add_i32(&value, 10) - // Atomic swap: returns the old value old := atomics.swap_i32(&value, 100) + _ = loaded + _ = new_value + _ = old } ``` - ### Compare-and-Swap (CAS) - ```v import x.atomics fn main() { mut flag := u32(0) - // CAS: if flag == 0, set it to 1; returns true on success if atomics.cas_u32(&flag, 0, 1) { println('Successfully changed flag from 0 to 1') } } ``` +### Bitwise Operations (AND / OR) +``` +import x.atomics +fn main() { + mut flags := u32(0xFF) + // Atomically AND: clears lower nibble, returns old value + old := atomics.and_u32(&flags, 0xF0) + println('old: ${old}, new: ${flags}') // old: 255, new: 240 + // Atomically OR: sets a bit, returns old value + prev := atomics.or_u32(&flags, 0x01) + println('prev: ${prev}, new: ${flags}') // prev: 240, new: 241 +} +``` ### Available Operations - | Operation | i32 | i64 | u32 | u64 | |-----------|-----|-----|-----|-----| | `load_*` | yes | yes | yes | yes | @@ -98,3 +83,5 @@ fn main() { | `add_*` | yes | yes | yes | yes | | `swap_*` | yes | yes | yes | yes | | `cas_*` | yes | yes | yes | yes | +| `and_*` | yes | yes | yes | yes | +| `or_*` | yes | yes | yes | yes | diff --git a/vlib/x/atomics/atomics.amd64.v b/vlib/x/atomics/atomics.amd64.v index f70165133..9f3c55a50 100644 --- a/vlib/x/atomics/atomics.amd64.v +++ b/vlib/x/atomics/atomics.amd64.v @@ -522,3 +522,251 @@ pub fn cas_u64(addr &u64, old u64, new u64) bool { } return swapped } + +// and_i64 atomically performs a bitwise AND of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn and_i64(addr &i64, mask i64) i64 { + mut old := i64(0) + asm volatile amd64 { + mov rdx, addr + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov rax, [rdx] + mov rcx, rax + and rcx, mask + lock cmpxchg [rdx], rcx + jnz '3b' + mov old, rax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; rdx + rax + rcx + memory + } + return old +} + +// and_u64 atomically performs a bitwise AND of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn and_u64(addr &u64, mask u64) u64 { + mut old := u64(0) + asm volatile amd64 { + mov rdx, addr + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov rax, [rdx] + mov rcx, rax + and rcx, mask + lock cmpxchg [rdx], rcx + jnz '3b' + mov old, rax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; rdx + rax + rcx + memory + } + return old +} + +// or_i64 atomically performs a bitwise OR of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn or_i64(addr &i64, mask i64) i64 { + mut old := i64(0) + asm volatile amd64 { + mov rdx, addr + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov rax, [rdx] + mov rcx, rax + or rcx, mask + lock cmpxchg [rdx], rcx + jnz '3b' + mov old, rax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; rdx + rax + rcx + memory + } + return old +} + +// or_u64 atomically performs a bitwise OR of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn or_u64(addr &u64, mask u64) u64 { + mut old := u64(0) + asm volatile amd64 { + mov rdx, addr + test rdx, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov rax, [rdx] + mov rcx, rax + or rcx, mask + lock cmpxchg [rdx], rcx + jnz '3b' + mov old, rax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; rdx + rax + rcx + memory + } + return old +} + +// and_i32 atomically performs a bitwise AND of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn and_i32(addr &i32, mask i32) i32 { + mut old := i32(0) + asm volatile amd64 { + mov rdx, addr + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [rdx] + mov ecx, eax + and ecx, mask + lock cmpxchg [rdx], ecx + jnz '3b' + mov old, eax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; rdx + eax + ecx + memory + } + return old +} + +// and_u32 atomically performs a bitwise AND of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn and_u32(addr &u32, mask u32) u32 { + mut old := u32(0) + asm volatile amd64 { + mov rdx, addr + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [rdx] + mov ecx, eax + and ecx, mask + lock cmpxchg [rdx], ecx + jnz '3b' + mov old, eax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; rdx + eax + ecx + memory + } + return old +} + +// or_i32 atomically performs a bitwise OR of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn or_i32(addr &i32, mask i32) i32 { + mut old := i32(0) + asm volatile amd64 { + mov rdx, addr + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [rdx] + mov ecx, eax + or ecx, mask + lock cmpxchg [rdx], ecx + jnz '3b' + mov old, eax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; rdx + eax + ecx + memory + } + return old +} + +// or_u32 atomically performs a bitwise OR of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn or_u32(addr &u32, mask u32) u32 { + mut old := u32(0) + asm volatile amd64 { + mov rdx, addr + test rdx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [rdx] + mov ecx, eax + or ecx, mask + lock cmpxchg [rdx], ecx + jnz '3b' + mov old, eax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; rdx + eax + ecx + memory + } + return old +} diff --git a/vlib/x/atomics/atomics.i386.v b/vlib/x/atomics/atomics.i386.v index 55655254b..bcd05e91c 100644 --- a/vlib/x/atomics/atomics.i386.v +++ b/vlib/x/atomics/atomics.i386.v @@ -604,3 +604,299 @@ pub fn cas_u64(addr &u64, old u64, new u64) bool { } return swapped } + +// and_i64 atomically performs a bitwise AND of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn and_i64(addr &i64, mask i64) i64 { + mask_lo := u32(u64(mask) & 0xFFFF_FFFF) + mask_hi := u32(u64(mask) >> 32) + mut old_lo := u32(0) + mut old_hi := u32(0) + + asm volatile i386 { + mov esi, addr + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, [esi] + mov edx, [esi + 4] + 3: + mov ebx, eax + and ebx, mask_lo + mov ecx, edx + and ecx, mask_hi + lock cmpxchg8b [esi] + jnz '3b' + mov old_lo, eax + mov old_hi, edx + 2: + ; =m (old_lo) + =m (old_hi) + ; m (mask_lo) + m (mask_hi) + r (addr) + ; eax + edx + ecx + ebx + esi + } + return i64(u64(old_lo) | (u64(old_hi) << 32)) +} + +// and_u64 atomically performs a bitwise AND of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn and_u64(addr &u64, mask u64) u64 { + mask_lo := u32(u64(mask) & 0xFFFF_FFFF) + mask_hi := u32(u64(mask) >> 32) + mut old_lo := u32(0) + mut old_hi := u32(0) + + asm volatile i386 { + mov esi, addr + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, [esi] + mov edx, [esi + 4] + 3: + mov ebx, eax + and ebx, mask_lo + mov ecx, edx + and ecx, mask_hi + lock cmpxchg8b [esi] + jnz '3b' + mov old_lo, eax + mov old_hi, edx + 2: + ; =m (old_lo) + =m (old_hi) + ; m (mask_lo) + m (mask_hi) + r (addr) + ; eax + edx + ecx + ebx + esi + } + return u64(old_lo) | (u64(old_hi) << 32) +} + +// or_i64 atomically performs a bitwise OR of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn or_i64(addr &i64, mask i64) i64 { + mask_lo := u32(u64(mask) & 0xFFFF_FFFF) + mask_hi := u32(u64(mask) >> 32) + mut old_lo := u32(0) + mut old_hi := u32(0) + + asm volatile i386 { + mov esi, addr + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, [esi] + mov edx, [esi + 4] + 3: + mov ebx, eax + or ebx, mask_lo + mov ecx, edx + or ecx, mask_hi + lock cmpxchg8b [esi] + jnz '3b' + mov old_lo, eax + mov old_hi, edx + 2: + ; =m (old_lo) + =m (old_hi) + ; m (mask_lo) + m (mask_hi) + r (addr) + ; eax + edx + ecx + ebx + esi + } + return i64(u64(old_lo) | (u64(old_hi) << 32)) +} + +// or_u64 atomically performs a bitwise OR of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 8-byte aligned. +pub fn or_u64(addr &u64, mask u64) u64 { + mask_lo := u32(u64(mask) & 0xFFFF_FFFF) + mask_hi := u32(u64(mask) >> 32) + mut old_lo := u32(0) + mut old_hi := u32(0) + + asm volatile i386 { + mov esi, addr + test esi, 7 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + mov eax, [esi] + mov edx, [esi + 4] + 3: + mov ebx, eax + or ebx, mask_lo + mov ecx, edx + or ecx, mask_hi + lock cmpxchg8b [esi] + jnz '3b' + mov old_lo, eax + mov old_hi, edx + 2: + ; =m (old_lo) + =m (old_hi) + ; m (mask_lo) + m (mask_hi) + r (addr) + ; eax + edx + ecx + ebx + esi + } + return u64(old_lo) | (u64(old_hi) << 32) +} + +// and_u32 atomically performs a bitwise AND of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn and_u32(addr &u32, mask u32) u32 { + mut old := u32(0) + + asm volatile i386 { + mov edx, addr + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [edx] + mov ecx, eax + and ecx, mask + lock cmpxchgl [edx], ecx + jnz '3b' + mov old, eax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; edx + eax + ecx + memory + } + return old +} + +// and_i32 atomically performs a bitwise AND of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn and_i32(addr &i32, mask i32) i32 { + mut old := i32(0) + + asm volatile i386 { + mov edx, addr + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [edx] + mov ecx, eax + and ecx, mask + lock cmpxchgl [edx], ecx + jnz '3b' + mov old, eax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; edx + eax + ecx + memory + } + return old +} + +// or_i32 atomically performs a bitwise OR of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn or_i32(addr &i32, mask i32) i32 { + mut old := i32(0) + + asm volatile i386 { + mov edx, addr + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [edx] + mov ecx, eax + or ecx, mask + lock cmpxchgl [edx], ecx + jnz '3b' + mov old, eax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; edx + eax + ecx + memory + } + return old +} + +// or_u32 atomically performs a bitwise OR of the value at addr with mask and returns the old value. +// The operation is performed with sequential consistency. +// Panics if addr is not 4-byte aligned. +pub fn or_u32(addr &u32, mask u32) u32 { + mut old := u32(0) + + asm volatile i386 { + mov edx, addr + test edx, 3 + jz '1f' + call panicUnaligned + jmp '2f' + 1: + 3: + mov eax, [edx] + mov ecx, eax + or ecx, mask + lock cmpxchgl [edx], ecx + jnz '3b' + mov old, eax + 2: + ; =r (old) + ; r (addr) + r (mask) + ; edx + eax + ecx + memory + } + return old +} diff --git a/vlib/x/atomics/i32_test.v b/vlib/x/atomics/i32_test.v index c6d3264b0..fbe4e17ce 100644 --- a/vlib/x/atomics/i32_test.v +++ b/vlib/x/atomics/i32_test.v @@ -187,3 +187,35 @@ fn test_load_store_i32_concurrent() { assert x == 4 * 50_000 } + +fn test_and_i32_concurrent() { + mut x := i32(0x7fffffff) + mut threads := []thread{} + for _ in 0 .. 8 { + threads << spawn fn (px &i32) { + for _ in 0 .. 100_000 { + and_i32(px, 0x0fffffff) + } + }(&x) + } + for t in threads { + t.wait() + } + assert x == 0x0fffffff +} + +fn test_or_i32_concurrent() { + mut x := i32(0) + mut threads := []thread{} + for _ in 0 .. 8 { + threads << spawn fn (px &i32) { + for _ in 0 .. 100_000 { + or_i32(px, 0x12345678) + } + }(&x) + } + for t in threads { + t.wait() + } + assert x == 0x12345678 +} diff --git a/vlib/x/atomics/i64_test.v b/vlib/x/atomics/i64_test.v index d0b476cd4..f7d181686 100644 --- a/vlib/x/atomics/i64_test.v +++ b/vlib/x/atomics/i64_test.v @@ -159,3 +159,35 @@ fn test_cas_i64_contended_flip() { assert x == 0 || x == 1 } + +fn test_and_i64_concurrent() { + mut x := i64(0x7fffffffffffffff) + mut threads := []thread{} + for _ in 0 .. 8 { + threads << spawn fn (px &i64) { + for _ in 0 .. 100_000 { + and_i64(px, 0x00ffffffffffffff) + } + }(&x) + } + for t in threads { + t.wait() + } + assert x == 0x00ffffffffffffff +} + +fn test_or_i64_concurrent() { + mut x := i64(0) + mut threads := []thread{} + for _ in 0 .. 8 { + threads << spawn fn (px &i64) { + for _ in 0 .. 100_000 { + or_i64(px, 0x0123456789abcdef) + } + }(&x) + } + for t in threads { + t.wait() + } + assert x == 0x0123456789abcdef +} diff --git a/vlib/x/atomics/u32_test.v b/vlib/x/atomics/u32_test.v index 2823a400f..378f154f2 100644 --- a/vlib/x/atomics/u32_test.v +++ b/vlib/x/atomics/u32_test.v @@ -176,3 +176,35 @@ fn test_load_store_u32_concurrent() { assert x == 1 } + +fn test_and_u32_concurrent() { + mut x := u32(0xffffffff) + mut threads := []thread{} + for _ in 0 .. 8 { + threads << spawn fn (px &u32) { + for _ in 0 .. 100_000 { + and_u32(px, 0x0f0f0f0f) + } + }(&x) + } + for t in threads { + t.wait() + } + assert x == 0x0f0f0f0f +} + +fn test_or_u32_concurrent() { + mut x := u32(0) + mut threads := []thread{} + for _ in 0 .. 8 { + threads << spawn fn (px &u32) { + for _ in 0 .. 100_000 { + or_u32(px, 0xdeadbeef) + } + }(&x) + } + for t in threads { + t.wait() + } + assert x == u32(0xdeadbeef) +} diff --git a/vlib/x/atomics/u64_test.v b/vlib/x/atomics/u64_test.v index ab704342e..fd4e56fd4 100644 --- a/vlib/x/atomics/u64_test.v +++ b/vlib/x/atomics/u64_test.v @@ -153,3 +153,35 @@ fn test_cas_u64_contended_flip() { assert x == 0 || x == 1 } + +fn test_and_u64_concurrent() { + mut x := u64(0xffffffffffffffff) + mut threads := []thread{} + for _ in 0 .. 8 { + threads << spawn fn (px &u64) { + for _ in 0 .. 100_000 { + and_u64(px, 0x00ff00ff00ff00ff) + } + }(&x) + } + for t in threads { + t.wait() + } + assert x == 0x00ff00ff00ff00ff +} + +fn test_or_u64_concurrent() { + mut x := u64(0) + mut threads := []thread{} + for _ in 0 .. 8 { + threads << spawn fn (px &u64) { + for _ in 0 .. 100_000 { + or_u64(px, 0xfedcba9876543210) + } + }(&x) + } + for t in threads { + t.wait() + } + assert x == u64(0xfedcba9876543210) +} -- 2.39.5