From 5378bf2644cd90f9dc000f8e26357edc0ee7bd27 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Wed, 11 Mar 2026 15:43:42 +0300 Subject: [PATCH] builtin: is wyhash broken (fixes #26651) --- vlib/datatypes/bloom_filter_test.v | 1 + vlib/hash/hash_compiles_test.v | 19 +++++++++++-------- vlib/v/gen/c/cheaders.v | 2 +- vlib/v2/gen/c/c.v | 2 +- vlib/v2/gen/cleanc/cheaders.v | 2 +- vlib/v2/ssa/builder.v | 7 +++---- 6 files changed, 18 insertions(+), 15 deletions(-) diff --git a/vlib/datatypes/bloom_filter_test.v b/vlib/datatypes/bloom_filter_test.v index 3a307d822..6faddb97c 100644 --- a/vlib/datatypes/bloom_filter_test.v +++ b/vlib/datatypes/bloom_filter_test.v @@ -38,6 +38,7 @@ fn test_bloom_filter_false_positive() { assert b.exists('hello world') == true assert b.exists('v is awesome') == true assert b.exists('power by v') == true + b.table = [u8(0xff), 0xff] assert b.exists('his world') == true // false positive } diff --git a/vlib/hash/hash_compiles_test.v b/vlib/hash/hash_compiles_test.v index 56b57c989..7058cd5b2 100644 --- a/vlib/hash/hash_compiles_test.v +++ b/vlib/hash/hash_compiles_test.v @@ -1,18 +1,21 @@ import hash fn test_hash_compiles() { - assert hash.sum64_string('abc', 5).hex_full() == '4b4b66779c7a16f1' + assert hash.sum64_string('abc', 5).hex_full() == 'ecc9659080b91a33' - // official wyhash test vectors + // Regression vectors for V's bundled wyhash implementation. assert hash.sum64_string('', 0).hex_full() == '93228a4de0eec5a2' - assert hash.sum64_string('a', 1).hex_full() == 'c5bac3db178713c4' - assert hash.sum64_string('abc', 2).hex_full() == 'a97f2f7b1d9b3314' - assert hash.sum64_string('message digest', 3).hex_full() == '786d1f1df3801df4' - assert hash.sum64_string('abcdefghijklmnopqrstuvwxyz', 4).hex_full() == 'dca5a8138ad37c87' + assert hash.sum64_string('a', 1).hex_full() == 'de7c00cc90a98e24' + assert hash.sum64_string('abc', 2).hex_full() == '41981296238e0d1d' + assert hash.sum64_string('message digest', 3).hex_full() == '41bba71e1ae831d7' + assert hash.sum64_string('abcdefghijklmnopqrstuvwxyz', 4).hex_full() == '065f27868866278a' assert hash.sum64_string('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', - 5).hex_full() == 'b9e734f117cfaf70' + 5).hex_full() == 'b9121e0f1a9bdd97' assert hash.sum64_string('12345678901234567890123456789012345678901234567890123456789012345678901234567890', - 6).hex_full() == '6cc5eab49a92d617' + 6).hex_full() == 'a54abb9fbc9e4e82' + assert hash.sum64([]u8{len: 1}, 0).hex_full() == '34e55bcc2fdda5ac' + assert hash.sum64([]u8{len: 4}, 0).hex_full() == '58229876e5c11304' + assert hash.sum64([]u8{len: 8}, 0).hex_full() == '0a4670f5c0e67d5b' assert hash.wyhash64_c(u64(1234567890), u64(7777777777)) == 13699604260906621654 assert hash.wymum(u64(1234567890), u64(7777777777)) == 9602194699039780530 } diff --git a/vlib/v/gen/c/cheaders.v b/vlib/v/gen/c/cheaders.v index 52175f65e..20a1b3ee4 100644 --- a/vlib/v/gen/c/cheaders.v +++ b/vlib/v/gen/c/cheaders.v @@ -655,7 +655,7 @@ static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];} // wyhash main function static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){ - const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0],secret[1]); uint64_t a, b; + const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0]^len,secret[1]); uint64_t a, b; if (_likely_(len<=16)) { if (_likely_(len>=4)) { a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); } else if (_likely_(len>0)) { a=_wyr3(p,len); b=0; } diff --git a/vlib/v2/gen/c/c.v b/vlib/v2/gen/c/c.v index 2650e2495..a41596c52 100644 --- a/vlib/v2/gen/c/c.v +++ b/vlib/v2/gen/c/c.v @@ -1108,7 +1108,7 @@ fn (mut g Gen) gen_wyhash() { g.sb.writeln('#endif') g.sb.writeln('static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}') g.sb.writeln('static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){') - g.sb.writeln(' const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0],secret[1]); uint64_t a, b;') + g.sb.writeln(' const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0]^len,secret[1]); uint64_t a, b;') g.sb.writeln(' if(_likely_(len<=16)){') g.sb.writeln(' if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); }') g.sb.writeln(' else if(_likely_(len>0)){ a=_wyr3(p,len); b=0; }') diff --git a/vlib/v2/gen/cleanc/cheaders.v b/vlib/v2/gen/cleanc/cheaders.v index cc073a0ba..cc69f7a0e 100644 --- a/vlib/v2/gen/cleanc/cheaders.v +++ b/vlib/v2/gen/cleanc/cheaders.v @@ -342,7 +342,7 @@ fn (mut g Gen) write_preamble() { g.sb.writeln('#endif') g.sb.writeln('static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}') g.sb.writeln('static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){') - g.sb.writeln(' const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0],secret[1]); uint64_t a, b;') + g.sb.writeln(' const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0]^len,secret[1]); uint64_t a, b;') g.sb.writeln(' if(_likely_(len<=16)){') g.sb.writeln(' if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); }') g.sb.writeln(' else if(_likely_(len>0)){ a=_wyr3(p,len); b=0; }') diff --git a/vlib/v2/ssa/builder.v b/vlib/v2/ssa/builder.v index 4b8b33822..d782b96c9 100644 --- a/vlib/v2/ssa/builder.v +++ b/vlib/v2/ssa/builder.v @@ -7889,11 +7889,10 @@ fn (mut b Builder) generate_wyhash_body(func_idx int) { wyp0 := b.mod.get_or_add_const(i64_t, '3257665815644502181') // 0x2d358dccaa6c78a5 wyp1 := b.mod.get_or_add_const(i64_t, '10067880064238660809') // 0x8bb84b93962eacc9 - // seed ^= wymix(seed ^ secret[0], secret[1]) - // Since seed is always 0 in practice: seed = wymix(wyp0, wyp1) - // But let's be correct and use the param: + // wyhash hardens zero-padded inputs by folding len into the initial seed mix. seed_xor_s0 := b.mod.add_instr(.xor, entry, i64_t, [param_seed, wyp0]) - seed_mix := b.wymix_inline(entry, seed_xor_s0, wyp1) + seed_xor_s0_len := b.mod.add_instr(.xor, entry, i64_t, [seed_xor_s0, param_len]) + seed_mix := b.wymix_inline(entry, seed_xor_s0_len, wyp1) seed_init := b.mod.add_instr(.xor, entry, i64_t, [param_seed, seed_mix]) // Allocas for a, b, and seed (results from branches, seed updated in long path) -- 2.39.5