From ef11575ce2f92fe4353ad9fce591499ad0889859 Mon Sep 17 00:00:00 2001 From: JalonSolov Date: Wed, 13 May 2026 03:21:51 -0400 Subject: [PATCH] hash.crc32: add crc32c, crc32k, and crc32q variants (#27149) --- vlib/hash/crc32/crc32.v | 111 +++++++++++++++++++++++---- vlib/hash/crc32/crc32_test.v | 145 ++++++++++++++++++++++++++++++++++- 2 files changed, 240 insertions(+), 16 deletions(-) diff --git a/vlib/hash/crc32/crc32.v b/vlib/hash/crc32/crc32.v index e70dcff76..371a70bfa 100644 --- a/vlib/hash/crc32/crc32.v +++ b/vlib/hash/crc32/crc32.v @@ -2,17 +2,24 @@ // Use of this source code is governed by an MIT license // that can be found in the LICENSE file. -// This is a very basic crc32 implementation -// at the moment with no architecture optimizations +// This is a fairly basic crc32 implementation, with 4 variants of the crc32 algorithm, and a way +// to create custom crc32 tables from user-provided polynomials. module crc32 // polynomials pub const ieee = u32(0xedb88320) pub const castagnoli = u32(0x82f63b78) pub const koopman = u32(0xeb31d82e) +// q is the standard CRC-32Q polynomial (MSB-first). +pub const q = u32(0x814141ab) +// q_reflected is the reflected (LSB-first) form of CRC-32Q polynomial. +pub const q_reflected = u32(0xd5828281) -// The size of a CRC-32 checksum in bytes. -const size = 4 +// Named aliases for common CRC-32 variants. +pub const crc32c = castagnoli +pub const crc32k = koopman +pub const crc32q = q +pub const crc32q_reflected = q_reflected struct Crc32 { mut: @@ -21,44 +28,118 @@ mut: // generate_table populates a 256-word table from the specified polynomial `poly` // to represent the polynomial for efficient processing. -fn (mut c Crc32) generate_table(poly int) { +@[direct_array_access] +fn (mut c Crc32) generate_table(poly u32) { + c.table = []u32{len: 256} for i in 0 .. 256 { mut crc := u32(i) for _ in 0 .. 8 { if crc & u32(1) == u32(1) { - crc = (crc >> 1) ^ u32(poly) + crc = (crc >> 1) ^ poly } else { crc >>= u32(1) } } - c.table << crc + c.table[i] = crc } } @[direct_array_access] -fn (c &Crc32) sum32(b []u8) u32 { - mut crc := ~u32(0) +fn (c &Crc32) update32(crc u32, b []u8) u32 { + mut next := crc for i in 0 .. b.len { - crc = c.table[u8(crc) ^ b[i]] ^ (crc >> 8) + next = c.table[u8(next) ^ b[i]] ^ (next >> 8) } - return ~crc + return next +} + +// update_state updates an internal CRC state with the bytes in `b`. +// Start from `~u32(0)` and finalize with `~state`. +pub fn (c &Crc32) update_state(state u32, b []u8) u32 { + return c.update32(state, b) } // checksum returns the CRC-32 checksum of data `b` by using the polynomial represented by `c`'s table. pub fn (c &Crc32) checksum(b []u8) u32 { - return c.sum32(b) + return ~c.update_state(~u32(0), b) +} + +// update returns the updated CRC-32 checksum for `b`, starting from `crc`. +// Use `crc = 0` for a fresh checksum, or pass a previous result to continue streaming. +pub fn (c &Crc32) update(crc u32, b []u8) u32 { + state := c.update_state(~crc, b) + return ~state } // new creates a `Crc32` polynomial. -pub fn new(poly int) &Crc32 { +pub fn new(poly u32) &Crc32 { mut c := &Crc32{} c.generate_table(poly) return c } -const ieee_poly = new(int(ieee)) +// sum_with_poly calculates the CRC-32 checksum of `b` for the provided polynomial. +// Built-in constants use their canonical parameter sets. +pub fn sum_with_poly(poly u32, b []u8) u32 { + return match poly { + ieee { ieee_poly.checksum(b) } + crc32c { crc32c_poly.checksum(b) } + crc32k { crc32k_poly.checksum(b) } + crc32q { crc32q_sum_internal(b) } + crc32q_reflected { crc32q_reflected_poly.checksum(b) } + else { new(poly).checksum(b) } + } +} + +const ieee_poly = new(ieee) +const crc32c_poly = new(crc32c) +const crc32k_poly = new(crc32k) +const crc32q_reflected_poly = new(crc32q_reflected) +const crc32q_table = crc32q_generate_table(q) + +@[direct_array_access] +fn crc32q_generate_table(poly u32) []u32 { + mut table := []u32{len: 256} + for i in 0 .. 256 { + mut crc := u32(i) << 24 + for _ in 0 .. 8 { + if crc & u32(0x80000000) != 0 { + crc = (crc << 1) ^ poly + } else { + crc <<= 1 + } + } + table[i] = crc + } + return table +} + +@[direct_array_access] +fn crc32q_sum_internal(b []u8) u32 { + mut crc := u32(0) + for byte in b { + idx := u8((crc >> 24) ^ byte) + crc = crc32q_table[idx] ^ (crc << 8) + } + return crc +} // sum calculates the CRC-32 checksum of `b` by using the IEEE polynomial. pub fn sum(b []u8) u32 { - return ieee_poly.sum32(b) + return ieee_poly.checksum(b) +} + +// sum_crc32c calculates the CRC-32C checksum of `b`. +pub fn sum_crc32c(b []u8) u32 { + return crc32c_poly.checksum(b) +} + +// sum_crc32k calculates the CRC-32K checksum of `b`. +pub fn sum_crc32k(b []u8) u32 { + return crc32k_poly.checksum(b) +} + +// sum_crc32q calculates the CRC-32Q checksum of `b`. +pub fn sum_crc32q(b []u8) u32 { + return crc32q_sum_internal(b) } diff --git a/vlib/hash/crc32/crc32_test.v b/vlib/hash/crc32/crc32_test.v index 735517948..63b1900ce 100644 --- a/vlib/hash/crc32/crc32_test.v +++ b/vlib/hash/crc32/crc32_test.v @@ -1,14 +1,157 @@ import hash.crc32 +const reflected_test_polys = [crc32.ieee, crc32.crc32c, crc32.crc32k, crc32.crc32q_reflected] + +fn sum_for_reflected_poly(poly u32, data []u8) u32 { + return match poly { + crc32.ieee { crc32.sum(data) } + crc32.crc32c { crc32.sum_crc32c(data) } + crc32.crc32k { crc32.sum_crc32k(data) } + crc32.crc32q_reflected { crc32.sum_with_poly(crc32.crc32q_reflected, data) } + else { panic('unexpected polynomial in test') } + } +} + +fn expected_reflected_crc_123456789(poly u32) u32 { + return match poly { + crc32.ieee { u32(0xcbf43926) } + crc32.crc32c { u32(0xe3069283) } + crc32.crc32k { u32(0x2d3dd0ae) } + crc32.crc32q_reflected { u32(0xa9cc8179) } + else { panic('unexpected polynomial in test') } + } +} + +fn expected_reflected_crc_a(poly u32) u32 { + return match poly { + crc32.ieee { u32(0xe8b7be43) } + crc32.crc32c { u32(0xc1d04330) } + crc32.crc32k { u32(0x0da2aa8a) } + crc32.crc32q_reflected { u32(0x248ca0a3) } + else { panic('unexpected polynomial in test') } + } +} + +fn assert_reflected_poly_paths_match(poly u32, data []u8) { + c := crc32.new(poly) + by_new := c.checksum(data) + assert by_new == crc32.sum_with_poly(poly, data) + assert by_new == sum_for_reflected_poly(poly, data) +} + fn test_hash_crc32() { b1 := 'testing crc32'.bytes() sum1 := crc32.sum(b1) assert sum1 == u32(1212124400) assert sum1.hex() == '483f8cf0' - c := crc32.new(int(crc32.ieee)) + c := crc32.new(crc32.ieee) b2 := 'testing crc32 again'.bytes() sum2 := c.checksum(b2) assert sum2 == u32(1420327025) assert sum2.hex() == '54a87871' } + +fn test_hash_crc32_variants() { + data := '123456789'.bytes() + for poly in reflected_test_polys { + expected := expected_reflected_crc_123456789(poly) + assert sum_for_reflected_poly(poly, data) == expected + assert_reflected_poly_paths_match(poly, data) + } +} + +fn test_hash_crc32q_standard() { + data := '123456789'.bytes() + assert crc32.sum_crc32q(data) == u32(0x3010bf7f) + assert crc32.sum_with_poly(crc32.crc32q, data) == u32(0x3010bf7f) + assert crc32.sum_crc32q('a'.bytes()) == u32(0xd1112b6b) +} + +fn test_hash_crc32_update() { + data := '123456789'.bytes() + part1 := data[..4] + part2 := data[4..] + + c := crc32.new(crc32.ieee) + mut acc := u32(0) + acc = c.update(acc, part1) + acc = c.update(acc, part2) + + assert acc == c.checksum(data) + assert acc.hex() == 'cbf43926' +} + +fn test_hash_crc32_edge_cases() { + empty := ''.bytes() + one := 'a'.bytes() + for poly in reflected_test_polys { + assert sum_for_reflected_poly(poly, empty) == u32(0) + assert sum_for_reflected_poly(poly, one) == expected_reflected_crc_a(poly) + } + assert crc32.sum_crc32q(empty) == u32(0) +} + +fn test_hash_crc32_sum_with_poly() { + data := 'variant helper'.bytes() + for poly in reflected_test_polys { + assert_reflected_poly_paths_match(poly, data) + } + assert crc32.sum_with_poly(crc32.crc32q, data) == crc32.sum_crc32q(data) +} + +fn test_hash_crc32_sum_with_poly_custom() { + data := 'custom poly checksum'.bytes() + poly := u32(0xa833982b) + + assert crc32.sum_with_poly(poly, data) == crc32.new(poly).checksum(data) +} + +fn test_hash_crc32_all_polys_consistent() { + data := 'all polys consistent'.bytes() + part1 := data[..7] + part2 := data[7..] + + for poly in reflected_test_polys { + c := crc32.new(poly) + full := c.checksum(data) + + mut split := u32(0) + split = c.update(split, part1) + split = c.update(split, part2) + + assert full == split + assert full == sum_for_reflected_poly(poly, data) + } +} + +fn test_hash_crc32_streaming_chunk_sizes() { + data := ('streaming data block '.repeat(64)).bytes() + for poly in reflected_test_polys { + c := crc32.new(poly) + expected := c.checksum(data) + for chunk_size in [1, 2, 3, 5, 7, 16, 31, 64, 128] { + mut state := ~u32(0) + mut start := 0 + for start < data.len { + end := if start + chunk_size < data.len { start + chunk_size } else { data.len } + state = c.update_state(state, data[start..end]) + start = end + } + assert ~state == expected + } + } +} + +fn test_hash_crc32_update_state() { + data := 'stateful streaming'.bytes() + part1 := data[..5] + part2 := data[5..] + c := crc32.new(crc32.crc32c) + + mut state := ~u32(0) + state = c.update_state(state, part1) + state = c.update_state(state, part2) + + assert ~state == c.checksum(data) +} -- 2.39.5