From ba1cfccb66d38f15cb044207b40c31bbbdda5ace Mon Sep 17 00:00:00 2001 From: Mike Date: Sun, 1 Feb 2026 10:20:32 +0200 Subject: [PATCH] crypto.blake3: add @[direct_array_access] and improve f() performance (#26480) --- vlib/crypto/blake3/blake3.v | 5 +++-- vlib/crypto/blake3/blake3_block_generic.v | 23 +++++++++++------------ vlib/crypto/blake3/blake3_chunk.v | 3 ++- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/vlib/crypto/blake3/blake3.v b/vlib/crypto/blake3/blake3.v index cfd66551b..4f7f3994f 100644 --- a/vlib/crypto/blake3/blake3.v +++ b/vlib/crypto/blake3/blake3.v @@ -137,7 +137,7 @@ pub fn Digest.new_hash() !Digest { // Digest.new_keyed_hash initializes a Digest structure for a Blake3 keyed hash pub fn Digest.new_keyed_hash(key []u8) !Digest { // treat the key bytes as little endian u32 values - mut key_words := []u32{len: 8, cap: 8} + mut key_words := []u32{len: 8} for i in 0 .. 8 { key_words[i] = binary.little_endian_u32_at(key, i * 4) } @@ -153,7 +153,7 @@ pub fn Digest.new_derive_key_hash(context []u8) !Digest { context_key := context_digest.checksum_internal(key_length) // treat the context key bytes as little endian u32 values - mut key_words := []u32{len: 8, cap: 8} + mut key_words := []u32{len: 8} for i in 0 .. 8 { key_words[i] = binary.little_endian_u32_at(context_key, i * 4) } @@ -309,6 +309,7 @@ fn root_output_bytes(state HashState, size u64) []u8 { return output } +@[direct_array_access] fn (mut d Digest) add_node(node Node, level u8) { // if we are above the highst level, // just add the node at the top diff --git a/vlib/crypto/blake3/blake3_block_generic.v b/vlib/crypto/blake3/blake3_block_generic.v index d10782c2e..6e7ae0bdb 100644 --- a/vlib/crypto/blake3/blake3_block_generic.v +++ b/vlib/crypto/blake3/blake3_block_generic.v @@ -8,10 +8,11 @@ module blake3 +import arrays import math.bits // mixing function g -@[inline] +@[direct_array_access; inline] fn g(mut v []u32, a u8, b u8, c u8, d u8, x u32, y u32) { v[a] = v[a] + v[b] + x v[d] = bits.rotate_left_32((v[d] ^ v[a]), nr1) @@ -24,7 +25,7 @@ fn g(mut v []u32, a u8, b u8, c u8, d u8, x u32, y u32) { } // one complete mixing round with the function g -@[inline] +@[direct_array_access; inline] fn mixing_round(mut v []u32, m []u32, s []u8) { g(mut v, 0, 4, 8, 12, m[s[0]], m[s[1]]) g(mut v, 1, 5, 9, 13, m[s[2]], m[s[3]]) @@ -38,19 +39,17 @@ fn mixing_round(mut v []u32, m []u32, s []u8) { } // compression function f +@[direct_array_access] fn f(h []u32, m []u32, counter u64, input_bytes u32, flags u32) []u32 { - mut v := []u32{len: 0, cap: 16} + mut v := []u32{len: 16} // initialize the working vector - v << h[..8] - v << iv[..4] - - v << u32(counter & 0x00000000ffffffff) - v << u32(counter >> 32) - - v << input_bytes - - v << flags + arrays.copy[u32](mut v, h[..8]) + arrays.copy[u32](mut v[8..], iv[..4]) + v[12] = u32(counter) + v[13] = u32(counter >> 32) + v[14] = input_bytes + v[15] = flags // go 7 rounds of cryptographic mixing // diff --git a/vlib/crypto/blake3/blake3_chunk.v b/vlib/crypto/blake3/blake3_chunk.v index a4aecbecc..c534e9803 100644 --- a/vlib/crypto/blake3/blake3_chunk.v +++ b/vlib/crypto/blake3/blake3_chunk.v @@ -50,6 +50,7 @@ fn (c Chunk) str() string { // // As a potential speed up, we could try spawning this function // in a concurrent task and see if it is worth the overhead. +@[direct_array_access] fn (mut c Chunk) process_input(input []u8, key_words []u32, counter u64, flags u32, root bool) []u32 { mut remaining_input := unsafe { input[..] } @@ -63,7 +64,7 @@ fn (mut c Chunk) process_input(input []u8, key_words []u32, counter u64, flags u c.chunk_number = counter c.chaining_value = key_words.clone() - c.block_words = []u32{len: 16, cap: 16, init: 0} + c.block_words = []u32{len: 16} for i in 0 .. 16 { c.block_len = u32(block_size) -- 2.39.5