From c410aee218fca542202f5601d77f214746dceb5f Mon Sep 17 00:00:00 2001 From: JalonSolov Date: Mon, 18 May 2026 07:19:03 -0400 Subject: [PATCH] hash: pure V adler32, RFC-1950 compliant (#27176) --- vlib/hash/README.md | 8 ++++ vlib/hash/adler32/adler32.v | 51 +++++++++++++++++++++++++ vlib/hash/adler32/adler32_test.v | 64 ++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 vlib/hash/adler32/adler32.v create mode 100644 vlib/hash/adler32/adler32_test.v diff --git a/vlib/hash/README.md b/vlib/hash/README.md index 0c779121a..33cf50e29 100644 --- a/vlib/hash/README.md +++ b/vlib/hash/README.md @@ -5,3 +5,11 @@ that is highly content dependent, so even slightly different content will produce widely different hashes. Hash functions are useful for implementing maps, caches etc. + +## Available submodules + +- `hash.adler32` - Adler-32 (RFC 1950 checksum used by zlib) +- `hash.crc32` - CRC-32 variants (IEEE 802.3 `crc32`, Castagnoli `crc32c`, Koopman `crc32k`, + CRC-32Q `crc32q`) +- `hash.crc64` - CRC-64-ECMA-182 +- `hash.fnv1a` - Fowler-Noll-Vo hashes diff --git a/vlib/hash/adler32/adler32.v b/vlib/hash/adler32/adler32.v new file mode 100644 index 000000000..e5a2a99b5 --- /dev/null +++ b/vlib/hash/adler32/adler32.v @@ -0,0 +1,51 @@ +// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +// This is a pure V implementation of Adler-32 from RFC 1950. +// Parameters: BASE=65521, init=1, output=(s2 << 16) | s1. +module adler32 + +// base is the largest prime smaller than 2^16, mandated by RFC 1950. +pub const base = u32(65521) +// nmax is the largest chunk size that keeps intermediate sums inside u32. +// See RFC 1950 Appendix and zlib's adler32 implementation. +pub const nmax = 5552 + +// update_state updates an Adler-32 state with `data`. +// For RFC-1950 compliant checksums, use state `1` for a new stream. +@[direct_array_access] +pub fn update_state(state u32, data []u8) u32 { + mut s1 := state & u32(0xffff) + mut s2 := state >> 16 + mut pos := 0 + for pos < data.len { + block_len := if data.len - pos > nmax { nmax } else { data.len - pos } + for _ in 0 .. block_len { + s1 += data[pos] + if s1 >= base { + s1 -= base + } + s2 += s1 + pos++ + } + s2 %= base + } + return (s2 << 16) | s1 +} + +// checksum returns the RFC-1950 Adler-32 checksum for `data`. +pub fn checksum(data []u8) u32 { + return update_state(u32(1), data) +} + +// update extends an existing Adler-32 checksum `adler` with `data`. +// Use `adler = 1` for a fresh checksum. +pub fn update(adler u32, data []u8) u32 { + return update_state(adler, data) +} + +// sum is an alias for checksum. +pub fn sum(data []u8) u32 { + return checksum(data) +} diff --git a/vlib/hash/adler32/adler32_test.v b/vlib/hash/adler32/adler32_test.v new file mode 100644 index 000000000..9a2b28f35 --- /dev/null +++ b/vlib/hash/adler32/adler32_test.v @@ -0,0 +1,64 @@ +import hash.adler32 + +fn test_adler32_rfc_vectors() { + assert adler32.sum([]u8{}) == u32(0x00000001) + assert adler32.sum('123456789'.bytes()) == u32(0x091e01de) + assert adler32.sum('Wikipedia'.bytes()) == u32(0x11e60398) +} + +fn test_adler32_basic_aliases() { + data := 'Hello world!'.bytes() + expected := u32(0x1d09045e) + assert adler32.checksum(data) == expected + assert adler32.sum(data) == expected +} + +fn test_adler32_update_matches_full_sum() { + data := 'streaming adler32 data'.bytes() + part1 := data[..8] + part2 := data[8..14] + part3 := data[14..] + + mut state := u32(1) + state = adler32.update(state, part1) + state = adler32.update(state, part2) + state = adler32.update(state, part3) + + assert state == adler32.sum(data) +} + +fn test_adler32_update_state_matches_update() { + data := ('chunked data '.repeat(80)).bytes() + mut via_state := u32(1) + mut via_update := u32(1) + for chunk_size in [1, 2, 3, 5, 8, 16, 64, 257] { + mut state_a := u32(1) + mut state_b := u32(1) + mut i := 0 + for i < data.len { + end := if i + chunk_size < data.len { i + chunk_size } else { data.len } + chunk := data[i..end] + state_a = adler32.update_state(state_a, chunk) + state_b = adler32.update(state_b, chunk) + i = end + } + assert state_a == adler32.sum(data) + assert state_b == adler32.sum(data) + via_state = state_a + via_update = state_b + } + assert via_state == via_update +} + +fn test_adler32_all_bytes() { + mut all_bytes := []u8{len: 256} + for i in 0 .. 256 { + all_bytes[i] = u8(i) + } + assert adler32.sum(all_bytes) == u32(0xadf67f81) +} + +fn test_adler32_large_input() { + data := 'a'.repeat(7000).bytes() + assert adler32.sum(data) == u32(0x1a305cef) +} -- 2.39.5