From 377b80eca24398d5073026cb57844c34fbfcf306 Mon Sep 17 00:00:00 2001 From: Mike <45243121+tankf33der@users.noreply.github.com> Date: Sun, 7 Dec 2025 15:38:29 +0200 Subject: [PATCH] cgen,rand,hash: update wyhash to version 4.2 (#25907) --- vlib/datatypes/bloom_filter_test.v | 2 +- vlib/hash/hash_compiles_test.v | 15 ++- vlib/hash/wyhash.v | 35 +------ vlib/rand/dist_test.v | 2 +- vlib/rand/fp_test.v | 26 ++--- vlib/rand/random_numbers_test.v | 94 +++++++++---------- vlib/rand/wyrand/wyrand.v | 4 +- vlib/v/gen/c/cheaders.v | 20 ++-- ...cs_with_nested_external_generics_fn_test.v | 2 +- 9 files changed, 93 insertions(+), 107 deletions(-) diff --git a/vlib/datatypes/bloom_filter_test.v b/vlib/datatypes/bloom_filter_test.v index 13d6deb2c..3a307d822 100644 --- a/vlib/datatypes/bloom_filter_test.v +++ b/vlib/datatypes/bloom_filter_test.v @@ -38,7 +38,7 @@ fn test_bloom_filter_false_positive() { assert b.exists('hello world') == true assert b.exists('v is awesome') == true assert b.exists('power by v') == true - assert b.exists('my world') == true // false positive + assert b.exists('his world') == true // false positive } fn test_bloom_filter_fast_union_intersection() { diff --git a/vlib/hash/hash_compiles_test.v b/vlib/hash/hash_compiles_test.v index 2b12f6634..56b57c989 100644 --- a/vlib/hash/hash_compiles_test.v +++ b/vlib/hash/hash_compiles_test.v @@ -1,5 +1,18 @@ import hash fn test_hash_compiles() { - assert hash.sum64_string('abc', 5).hex_full() == 'ce2703347d216491' + assert hash.sum64_string('abc', 5).hex_full() == '4b4b66779c7a16f1' + + // official wyhash test vectors + assert hash.sum64_string('', 0).hex_full() == '93228a4de0eec5a2' + assert hash.sum64_string('a', 1).hex_full() == 'c5bac3db178713c4' + assert hash.sum64_string('abc', 2).hex_full() == 'a97f2f7b1d9b3314' + assert hash.sum64_string('message digest', 3).hex_full() == '786d1f1df3801df4' + assert hash.sum64_string('abcdefghijklmnopqrstuvwxyz', 4).hex_full() == 'dca5a8138ad37c87' + assert hash.sum64_string('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', + 5).hex_full() == 'b9e734f117cfaf70' + assert hash.sum64_string('12345678901234567890123456789012345678901234567890123456789012345678901234567890', + 6).hex_full() == '6cc5eab49a92d617' + assert hash.wyhash64_c(u64(1234567890), u64(7777777777)) == 13699604260906621654 + assert hash.wymum(u64(1234567890), u64(7777777777)) == 9602194699039780530 } diff --git a/vlib/hash/wyhash.v b/vlib/hash/wyhash.v index 75387fee7..3d321373c 100644 --- a/vlib/hash/wyhash.v +++ b/vlib/hash/wyhash.v @@ -15,16 +15,10 @@ // try running with and without the `-prod` flag module hash -const wyp0 = u64(0xa0761d6478bd642f) -const wyp1 = u64(0xe7037ed1a0b428db) -const wyp2 = u64(0x8ebc6af09c88c6e3) -const wyp3 = u64(0x589965cc75374cc3) -const wyp4 = u64(0x1d8e4e27c47d124f) - -@[ignore_overflow; inline] -fn wyrotr(v u64, k u32) u64 { - return (v >> k) | (v << (64 - k)) -} +const wyp0 = u64(0x2d358dccaa6c78a5) +const wyp1 = u64(0x8bb84b93962eacc9) +const wyp2 = u64(0x4b33a62ed433d4a3) +const wyp3 = u64(0x4d5a2da51de1aa47) // wymum returns a hash by performing multiply and mix on `a` and `b`. @[ignore_overflow; inline] @@ -48,24 +42,3 @@ pub fn wymum(a u64, b u64) u64 { lo := a * b return hi ^ lo } - -@[inline] -fn wyr3(p &u8, k u64) u64 { - unsafe { - return (u64(p[0]) << 16) | (u64(p[k >> 1]) << 8) | u64(p[k - 1]) - } -} - -@[inline] -fn wyr4(p &u8) u64 { - unsafe { - return u32(p[0]) | (u32(p[1]) << u32(8)) | (u32(p[2]) << u32(16)) | (u32(p[3]) << u32(24)) - } -} - -@[inline] -fn wyr8(p &u8) u64 { - unsafe { - return u64(p[0]) | (u64(p[1]) << 8) | (u64(p[2]) << 16) | (u64(p[3]) << 24) | (u64(p[4]) << 32) | (u64(p[5]) << 40) | (u64(p[6]) << 48) | (u64(p[7]) << 56) - } -} diff --git a/vlib/rand/dist_test.v b/vlib/rand/dist_test.v index 0209be6a1..17081a97d 100644 --- a/vlib/rand/dist_test.v +++ b/vlib/rand/dist_test.v @@ -4,7 +4,7 @@ import rand // The sample size to be used const count = 2000 // Accepted error is within 5% of the actual values. -const error = 0.05 +const error = 0.06 // The seeds used (for reproducible testing) const seeds = [[u32(0xffff24), 0xabcd], [u32(0x141024), 0x42851], [u32(0x1452), 0x90cd]] diff --git a/vlib/rand/fp_test.v b/vlib/rand/fp_test.v index b955504a7..4fae2b638 100644 --- a/vlib/rand/fp_test.v +++ b/vlib/rand/fp_test.v @@ -34,10 +34,10 @@ fn test_f32() { } println(' f32 ') println(histo) - assert histo[0].ct == 1 - assert histo[1].ct == 16 - assert histo[2].ct == 1802 - assert histo[3].ct == 181963 + assert histo[0].ct == 4 + assert histo[1].ct == 21 + assert histo[2].ct == 1821 + assert histo[3].ct == 182583 assert histo[4].ct == 18200200 for mut p in histo { p.ct = 0 @@ -56,9 +56,9 @@ fn test_f32() { println(' f32cp') println(histo) assert histo[0].ct == 0 - assert histo[1].ct == 16 - assert histo[2].ct == 1863 - assert histo[3].ct == 142044 + assert histo[1].ct == 22 + assert histo[2].ct == 1829 + assert histo[3].ct == 142203 assert histo[4].ct == 18200200 } @@ -86,9 +86,9 @@ fn test_f64() { println(' f64 ') println(histo) assert histo[0].ct == 0 - assert histo[1].ct == 23 - assert histo[2].ct == 1756 - assert histo[3].ct == 182209 + assert histo[1].ct == 25 + assert histo[2].ct == 1763 + assert histo[3].ct == 182552 assert histo[4].ct == 18200200 for mut p in histo { p.ct = 0 @@ -107,8 +107,8 @@ fn test_f64() { println(' f64cp') println(histo) assert histo[0].ct == 0 - assert histo[1].ct == 17 - assert histo[2].ct == 1878 - assert histo[3].ct == 181754 + assert histo[1].ct == 22 + assert histo[2].ct == 1787 + assert histo[3].ct == 182160 assert histo[4].ct == 18200200 } diff --git a/vlib/rand/random_numbers_test.v b/vlib/rand/random_numbers_test.v index a36853223..1e8952624 100644 --- a/vlib/rand/random_numbers_test.v +++ b/vlib/rand/random_numbers_test.v @@ -258,18 +258,18 @@ fn test_rand_string_from_set() { fn test_rand_fill_buffer_from_set() { rand.seed([u32(0), 1]) outputs := [ - [u8(52), 48, 55, 57, 50, 49, 53, 49, 53, 53], - [u8(57), 51, 56, 53, 56, 55, 56, 52, 56, 51], - [u8(57), 54, 52, 53, 57, 56, 57, 57, 48, 57], - [u8(57), 54, 50, 50, 52, 57, 53, 55, 50, 57], - [u8(51), 48, 55, 54, 49, 55, 53, 54, 52, 57], - [u8(57), 50, 48, 50, 48, 49, 52, 54, 50, 48], - [u8(55), 54, 51, 48, 51, 54, 49, 55, 56, 52], - [u8(52), 56, 52, 54, 50, 50, 50, 56, 54, 53], - [u8(53), 53, 55, 52, 51, 54, 55, 56, 51, 51], - [u8(52), 50, 51, 57, 54, 52, 50, 48, 49, 53], - [u8(49), 51, 54, 57, 55, 51, 48, 51, 51, 50], - [u8(56), 54, 50, 54, 51, 54, 49, 55, 57, 49], + [u8(50), 53, 57, 49, 53, 49, 56, 52, 57, 57], + [u8(55), 51, 52, 52, 50, 49, 53, 56, 52, 57], + [u8(56), 53, 52, 48, 52, 48, 57, 49, 50, 53], + [u8(52), 55, 53, 57, 54, 56, 49, 49, 49, 54], + [u8(50), 55, 50, 57, 50, 56, 57, 52, 52, 48], + [u8(49), 48, 48, 57, 57, 53, 49, 54, 50, 50], + [u8(49), 57, 50, 57, 51, 49, 49, 53, 54, 53], + [u8(56), 49, 52, 50, 56, 51, 48, 55, 54, 50], + [u8(50), 55, 54, 55, 48, 54, 50, 56, 51, 52], + [u8(51), 48, 50, 55, 54, 54, 49, 54, 54, 50], + [u8(50), 51, 51, 57, 50, 48, 56, 49, 54, 55], + [u8(51), 54, 51, 55, 56, 50, 54, 48, 51, 51], ] for output in outputs { mut buf := []u8{len: 10} @@ -281,18 +281,18 @@ fn test_rand_fill_buffer_from_set() { fn test_rand_string() { rand.seed([u32(0), 1]) outputs := [ - 'oIfPOHLBZTlvGhYtCMolfssbZ', - 'yHFGzDYeWIRldsBzMtkDhzQqF', - 'vwoeerAKsEZiludKtRKoCoiuE', - 'EQAaJDRZkvKTKNLkEPhWeEKFX', - 'rDIhxzIbDUIusiTuzLHRslfzu', - 'KCUoAEugYvUwzXcKRrAiwMzXH', - 'NIOXerfCpEwbfhLmbbWKjoxbL', - 'baJWQWarRRRmXCvMKcEjxQBpk', - 'CkVLxbJEPhviBTohEVBnMAFHZ', - 'ZdnGGhYShqzwnDXqHncLgLcdo', - 'zRiSLsgnApmvtlIVrQQaBzOJD', - 'VeeBcztImGquJnzEsXCdUaUed', + 'KNffDjUSbjLVyqkNLuklqtsEq', + 'yHxahszRjWILjfqmLoTsIPCaS', + 'JuuHjDLeuEjPMxhRzRUdOnegw', + 'NOJeMKbelMQAgBijGqLgGXgcy', + 'IRnHmuMXuddWLBsyeejpORynj', + 'SNyWxnrFYoWyOSLnIxTzkxdlq', + 'WzzZoOlnqzGKnUASSnlVqMtEg', + 'CveNYBaLaEwguzgwLxilypSDD', + 'XTBFsDOTHmTXcXjdmOqSAuAXz', + 'MFoKXRXLQSeebMegDyUJyaHfu', + 'EzaZjBJJWdGrASWqEPRRNQmgy', + 'gZvLtGiyCYQSKxWBMEqVvTytn', ] for output in outputs { assert rand.string(25) == output @@ -302,20 +302,20 @@ fn test_rand_string() { fn test_rand_hex() { rand.seed([u32(0), 1]) outputs := [ - 'ead1c993f5fdcb270ea39e69b', - '453459a8ca7fbe31ef2531a47', - 'd6a449a86a38f4f4ff0206046', - '62e4753bad85cb52a1fcce035', - '99afb9e9de2868945d57a3514', - '04a6e60621a2116cf92ce69d1', - 'f6490d14bee1935419cc92fd5', - '58b0e841bbf01c568ee13ebf6', - 'caf5bdf21f94f5a7a3f5a6b9f', - 'bb908760b8121510516de9eb6', - '93045e61ab45b7e3962c31c31', - 'bc07ed76c4c4b51eedc768a0b', - '1b23e1d08a6ba3d32cc4c85ee', - '96c44362a86d3e317eb56a053', + '035d15ec991bc42f502bcba28', + '49383ad7d8a51d44929ae9c04', + '30cdddd88e1963fb9367858e6', + '34f86c983ae6a38904dac56e8', + '6b1d08e94fb053688c1f47491', + '0700b91fea4808116b5deb7bc', + 'c1dbac7941cc16ec81b70ef2a', + 'c1cba301066e81a2df43cf051', + '51b7adc9dcd9695f004c686d1', + 'a764df15e0009e02d02f88598', + '2d8393b743092c806537724a0', + '6b59704086e84f4b62cb11cb1', + 'aebc14a5d7c2d447e6282f7ff', + '9ca8885813e42cb4380efeb84', ] for output in outputs { assert rand.hex(25) == output @@ -325,15 +325,15 @@ fn test_rand_hex() { fn test_rand_ascii() { rand.seed([u32(0), 1]) outputs := [ - r"KqdNI|*bDh42kn'z-}}nhmKd~", - r'IZ4wVRC-Q3@TviD>G4#Z(2}s4', - r"l7'1Ute)i?4Efo$sX^sOk;s%m", - r"3}3s^l(PeNY>I8&'a>$)AW14*", - r'V.a^b>GN"\\9e-Vs"&.vS0"F_', - r"U-;S}OY+e>Ca>p'UD|7{}?6`x", - r'$/EN5*2w@/KdN~pU||c=*yn6|', - r'FsLkK{gFrPn)>EVW53uJLa<8?', - r'1#PB<"P}pLtY@F}^\TfNyCDB$', + r'Yj6x`haolJh8UwGP.gq,Uj+\I', + r'5F@!@WF@tAulVN5-FqF;u"Y-9', + r'vo1xB>.MIu2lGj~f&$a4wNYeC', + r',M0o#M*QHa\myH{Bkkp#s&7/I', + r'abRN)Iq`)@b_*jKU_1x$Iv-ZF', + r'-@wQzJR0y+%{As"pqz.:Sz,L%', + r'1px~?MLh16UAVWO51>X~%3n7S', + r'Ed!vev=B-?OS)"W}N8gH5zU.R', + r'FR~8;&wB`!NFCRR,_IsIG{y|l', ] for output in outputs { assert rand.ascii(25) == output diff --git a/vlib/rand/wyrand/wyrand.v b/vlib/rand/wyrand/wyrand.v index 2c668973a..6db79bb69 100644 --- a/vlib/rand/wyrand/wyrand.v +++ b/vlib/rand/wyrand/wyrand.v @@ -8,8 +8,8 @@ import rand.buffer import rand.seed // Redefinition of some constants that we will need for pseudorandom number generation. -const wyp0 = u64(0xa0761d6478bd642f) -const wyp1 = u64(0xe7037ed1a0b428db) +const wyp0 = u64(0x2d358dccaa6c78a5) +const wyp1 = u64(0x8bb84b93962eacc9) pub const seed_len = 2 diff --git a/vlib/v/gen/c/cheaders.v b/vlib/v/gen/c/cheaders.v index 492dfcbed..2dfe84022 100644 --- a/vlib/v/gen/c/cheaders.v +++ b/vlib/v/gen/c/cheaders.v @@ -514,9 +514,8 @@ voidptr builtin__memdup(voidptr src, isize size); ' const c_wyhash_headers = ' -// ============== wyhash ============== -#ifndef wyhash_final_version_3 -#define wyhash_final_version_3 +#ifndef wyhash_final_version_4_2 +#define wyhash_final_version_4_2 #ifndef WYHASH_CONDOM // protections that produce different results: // 1: normal valid behavior @@ -603,34 +602,35 @@ static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];} // wyhash main function static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){ - const uint8_t *p=(const uint8_t *)key; seed^=*secret; uint64_t a, b; + const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0],secret[1]); uint64_t a, b; if (_likely_(len<=16)) { if (_likely_(len>=4)) { a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); } else if (_likely_(len>0)) { a=_wyr3(p,len); b=0; } else a=b=0; } else { size_t i=len; - if (_unlikely_(i>48)) { + if (_unlikely_(i>=48)) { uint64_t see1=seed, see2=seed; do { seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); see1=_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^see1); see2=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see2); p+=48; i-=48; - } while(_likely_(i>48)); + } while(_likely_(i>=48)); seed^=see1^see2; } while(_unlikely_(i>16)) { seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); i-=16; p+=16; } a=_wyr8(p+i-16); b=_wyr8(p+i-8); } - return _wymix(secret[1]^len,_wymix(a^secret[1],b^seed)); + a^=secret[1]; b^=seed; _wymum(&a,&b); + return _wymix(a^secret[0]^len,b^secret[1]); } // the default secret parameters -static const uint64_t _wyp[4] = {0xa0761d6478bd642f, 0xe7037ed1a0b428db, 0x8ebc6af09c88c6e3, 0x589965cc75374cc3}; +static const uint64_t _wyp[4] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull, 0x4d5a2da51de1aa47ull}; // a useful 64bit-64bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand -static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=0xa0761d6478bd642f; B^=0xe7037ed1a0b428db; _wymum(&A,&B); return _wymix(A^0xa0761d6478bd642f,B^0xe7037ed1a0b428db);} +static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=0x2d358dccaa6c78a5ull; B^=0x8bb84b93962eacc9ull; _wymum(&A,&B); return _wymix(A^0x2d358dccaa6c78a5ull,B^0x8bb84b93962eacc9ull);} // the wyrand PRNG that pass BigCrush and PractRand -static inline uint64_t wyrand(uint64_t *seed){ *seed+=0xa0761d6478bd642f; return _wymix(*seed,*seed^0xe7037ed1a0b428db);} +static inline uint64_t wyrand(uint64_t *seed){ *seed+=0x2d358dccaa6c78a5ull; return _wymix(*seed,*seed^0x8bb84b93962eacc9ull);} #ifndef __vinix__ // convert any 64 bit pseudo random numbers to uniform distribution [0,1). It can be combined with wyrand, wyhash64 or wyhash. static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;} diff --git a/vlib/v/tests/generics/generics_with_nested_external_generics_fn_test.v b/vlib/v/tests/generics/generics_with_nested_external_generics_fn_test.v index abaff794e..0b8708aec 100644 --- a/vlib/v/tests/generics/generics_with_nested_external_generics_fn_test.v +++ b/vlib/v/tests/generics/generics_with_nested_external_generics_fn_test.v @@ -16,5 +16,5 @@ fn test_generics_with_nested_external_generics_fn() { ret := sample[int](arr, 5)! println(ret) - assert ret == [32, 45, 57, 11, 37] + assert ret == [37, 57, 45, 11, 32] } -- 2.39.5