From 95861b8bdeeddc71d79c3f09f56a66bf01106ecd Mon Sep 17 00:00:00 2001 From: Richard Wheeler Date: Mon, 8 Jun 2026 14:19:08 -0400 Subject: [PATCH] hash.huffman: add shared canonical Huffman builder; migrate HPACK to it (#27392) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `hash.huffman`, a small module that performs the canonical Huffman code-assignment step (RFC 1951 §3.2.2 bl_count / next_code) shared by compress.deflate and the HTTP/2 HPACK code in net.http. It is parameterized by the two things the callers genuinely differ on, with no defaults (`@[required]` config fields): the maximum code length (DEFLATE caps at 15 bits and decodes via a flat 2^max_bits table; HPACK goes to 30 bits and must use a map / bit-at-a-time decode) and the bit order (DEFLATE is LSB-first and bit-reverses each code; HPACK is MSB-first). It exposes `build()` (per-symbol codes + metadata) plus two decode-structure builders — `flat_table()` (allocation-lean DEFLATE-style flat lookup, both bit orders) and `decode_map()` (HPACK-style) — and validates lengths, rejecting over-subscribed codes via the Kraft inequality. Migrate net.http HPACK to it: the RFC 7541 Appendix B table is canonical, so the generated `h2_huffman_codes` array (258 lines) is dropped and the codes are rebuilt at startup from the per-symbol lengths via `huffman.build(...)`. A new regression test pins a few known Appendix B codes so a bad rebuild is caught. This is the first half of vlang/v#27358; migrating compress.deflate to the same builder (verified performance-neutral via before/after inflate benchmarks, since deflate is stable and used by zlib/gzip) is the follow-up PR, as the issue outlines — flat_table() is the entry point it consumes. Co-authored-by: Richard Wheeler Co-authored-by: Claude Opus 4.8 --- vlib/hash/huffman/huffman.v | 225 +++++++++++++++++++++ vlib/hash/huffman/huffman_test.v | 145 +++++++++++++ vlib/net/http/h2_hpack_huffman.v | 36 ++-- vlib/net/http/h2_hpack_huffman_table.v | 270 +------------------------ vlib/net/http/h2_hpack_test.v | 19 ++ 5 files changed, 417 insertions(+), 278 deletions(-) create mode 100644 vlib/hash/huffman/huffman.v create mode 100644 vlib/hash/huffman/huffman_test.v diff --git a/vlib/hash/huffman/huffman.v b/vlib/hash/huffman/huffman.v new file mode 100644 index 000000000..ba55049a9 --- /dev/null +++ b/vlib/hash/huffman/huffman.v @@ -0,0 +1,225 @@ +// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +// Module huffman builds canonical Huffman codes from a per-symbol array of bit +// lengths. This is the assignment step shared by RFC 1951 (DEFLATE, used by +// compress.deflate / zlib / gzip) and RFC 7541 Appendix B (HPACK, used by the +// HTTP/2 code in net.http): given only the lengths, both standards rebuild the +// exact same codes via the `bl_count` / `next_code` algorithm (RFC 1951 §3.2.2). +// +// What the two callers do NOT share — and therefore what this module is +// parameterized by — is the maximum code length (DEFLATE caps at 15 bits and +// decodes via a flat 2^max_bits table; HPACK goes up to 30 bits, where a flat +// table is infeasible) and the bit order (DEFLATE is LSB-first and bit-reverses +// every code, HPACK is MSB-first). The bit I/O loops and codec-specific +// semantics (EOS/padding, end-of-block, extra bits, distance alphabets) stay in +// the callers. +module huffman + +// BitOrder selects how a code's bits are laid out in the returned `u32`. +pub enum BitOrder { + // msb_first keeps the canonical code as-is: the first transmitted bit is + // the most-significant bit of the code (RFC 7541 / HPACK). + msb_first + // lsb_first reverses each code within its length, so the first transmitted + // bit is the least-significant bit (RFC 1951 / DEFLATE). This is the form a + // flat LSB-first decode table is indexed by. + lsb_first +} + +// Config parameterizes build(). All fields are required and have no defaults: +// the two callers have intentionally different requirements (15 vs 30 bits, +// LSB vs MSB), so an implicit default would silently fit only one of them. +@[params] +pub struct Config { +pub: + lengths []int @[required] // per-symbol code length in bits; 0 marks an unused symbol + max_bits int @[required] // maximum allowed code length; must be >= every nonzero length + bit_order BitOrder @[required] // .msb_first (HPACK) or .lsb_first (DEFLATE) +} + +// Table is the result of build(): the canonical code for every symbol, plus the +// metadata a caller needs to drive its own bit I/O and to build a decode +// structure (flat_table() for small max_bits, decode_map() otherwise). +@[noinit] +pub struct Table { +pub: + codes []u32 // per-symbol code, right-aligned in a u32, in `bit_order` + lengths []int // per-symbol bit length (a copy of the input) + max_bits int + bit_order BitOrder +} + +// max_flat_bits is the largest max_bits for which flat_table() will +// allocate a table (2^max_bits entries). DEFLATE's 15 fits; HPACK's 30 does +// not and must use decode_map() / a bit-at-a-time decoder instead. +pub const max_flat_bits = 18 + +// flat_invalid_entry marks a flat_table() slot that no code maps to. +pub const flat_invalid_entry = u32(0xffff_ffff) + +// flat_length_bits is how many low bits of a flat_table() entry hold the +// code length; the symbol is stored in the remaining high bits. 5 bits hold +// lengths up to 31, covering every max_bits <= max_flat_bits. +pub const flat_length_bits = 5 + +// next_codes validates `lengths` against `max_bits` and returns the canonical +// starting code for each length (RFC 1951 §3.2.2): next_code[l] is the code the +// first symbol of length l receives, and callers post-increment it per symbol. +// It also reports whether the code is `complete` (uses the whole code space, +// i.e. the Kraft inequality holds with equality), which lets flat_table() skip +// pre-filling the table. It is the single source of truth for the code +// assignment shared by build() and flat_table(). Errors on max_bits < 1 or +// > 32, a negative length or one exceeding max_bits, or an over-subscribed code +// (Kraft inequality); an incomplete (under-subscribed) code is allowed, as both +// DEFLATE and HPACK use. +fn next_codes(lengths []int, max_bits int) !([]u32, bool) { + if max_bits < 1 { + return error('huffman: max_bits must be >= 1, got ${max_bits}') + } + if max_bits > 32 { + return error('huffman: max_bits ${max_bits} exceeds 32 (u32 code storage)') + } + mut bl_count := []int{len: max_bits + 1} + for sym, l in lengths { + if l < 0 { + return error('huffman: negative length ${l} for symbol ${sym}') + } + if l > max_bits { + return error('huffman: length ${l} for symbol ${sym} exceeds max_bits ${max_bits}') + } + if l > 0 { + bl_count[l]++ + } + } + // Kraft inequality: sum over used symbols of 2^(max_bits - len) must not + // exceed 2^max_bits, i.e. the code must not be over-subscribed. left == 0 at + // the end means the code is complete (covers every index of a flat table). + mut left := u64(1) << max_bits + for bits in 1 .. max_bits + 1 { + used := u64(bl_count[bits]) << (max_bits - bits) + if used > left { + return error('huffman: over-subscribed code (lengths exceed the code space)') + } + left -= used + } + mut next_code := []u32{len: max_bits + 1} + mut c := u32(0) + for bits in 1 .. max_bits + 1 { + c = (c + u32(bl_count[bits - 1])) << 1 + next_code[bits] = c + } + return next_code, left == 0 +} + +// build assigns a canonical Huffman code to every symbol from its bit length. +// Symbols with length 0 are unused and get code 0. It returns the codes plus +// the metadata for a decode structure; use it when you need the per-symbol +// codes (e.g. HPACK encoding) and/or decode_map(). Callers that only want a +// flat decode table should use flat_table(), which avoids materializing the +// codes array. See next_codes() for the validation rules. +pub fn build(cfg Config) !Table { + mut next_code, _ := next_codes(cfg.lengths, cfg.max_bits)! + mut codes := []u32{len: cfg.lengths.len} + for sym, l in cfg.lengths { + if l == 0 { + continue + } + code := next_code[l] + next_code[l]++ + codes[sym] = if cfg.bit_order == .lsb_first { bit_reverse(code, l) } else { code } + } + return Table{ + codes: codes + lengths: cfg.lengths.clone() + max_bits: cfg.max_bits + bit_order: cfg.bit_order + } +} + +// flat_table builds a 2^max_bits lookup table for fast decode of short codes +// (the DEFLATE strategy), directly from the lengths. Each entry is +// `(symbol << flat_length_bits) | length`, or flat_invalid_entry for indices no +// code reaches. The table is indexed by the next max_bits bits read from the +// stream in `bit_order`. It returns an error if max_bits > max_flat_bits, since +// the table would be prohibitively large (use build() + decode_map() instead). +// +// Unlike build(), this allocates no per-symbol codes array and does not copy +// the lengths: it assigns each code inline while filling the table, so a hot +// caller rebuilding a tree per block (compress.deflate) pays no extra +// allocation over hand-rolling the loop. +pub fn flat_table(cfg Config) ![]u32 { + if cfg.max_bits > max_flat_bits { + return error('huffman: max_bits ${cfg.max_bits} > max_flat_bits ${max_flat_bits}; use build() + decode_map()') + } + mut next_code, complete := next_codes(cfg.lengths, cfg.max_bits)! + table_size := 1 << cfg.max_bits + // A complete code writes every table slot, so the invalid pre-fill is dead + // work; allocate zeroed (vcalloc) and skip it. An incomplete code leaves + // gaps that must read back as flat_invalid_entry, so it pays the fill. + mut table := if complete { + []u32{len: table_size} + } else { + []u32{len: table_size, init: flat_invalid_entry} + } + for sym, l in cfg.lengths { + if l == 0 { + continue + } + raw := next_code[l] + next_code[l]++ + entry := (u32(sym) << flat_length_bits) | u32(l) + // The (max_bits - l) bits beyond the code are don't-cares, so a code of + // length l fills 2^(max_bits - l) table slots. Where those slots sit + // depends on bit_order: LSB-first codes occupy every index whose low l + // bits match the code (stride by 2^l); MSB-first codes occupy a + // contiguous block whose high l bits match the code. + if cfg.bit_order == .lsb_first { + step := 1 << l + mut idx := int(bit_reverse(raw, l)) + for idx < table_size { + table[idx] = entry + idx += step + } + } else { + block := 1 << (cfg.max_bits - l) + base := int(raw) * block + for k in 0 .. block { + table[base + k] = entry + } + } + } + return table +} + +// decode_map builds a map from a packed (length, code) key to its symbol, for +// codecs whose max_bits is too large for a flat table (HPACK's 30 bits). The +// key is `(u64(length) << 32) | code`; a decoder accumulates bits MSB-first and +// looks up after each bit. Only defined for .msb_first tables, where the +// accumulated value matches the stored code; it returns an error otherwise. +pub fn (t Table) decode_map() !map[u64]int { + if t.bit_order != .msb_first { + return error('huffman: decode_map requires .msb_first bit order') + } + mut m := map[u64]int{} + for sym, l in t.lengths { + if l == 0 { + continue + } + m[(u64(l) << 32) | u64(t.codes[sym])] = sym + } + return m +} + +// bit_reverse reverses the low `n` bits of `v` (used to convert a canonical +// MSB-first code into the LSB-first form a DEFLATE bit reader consumes). +fn bit_reverse(v u32, n int) u32 { + mut r := u32(0) + mut val := v + for _ in 0 .. n { + r = (r << 1) | (val & 1) + val >>= 1 + } + return r +} diff --git a/vlib/hash/huffman/huffman_test.v b/vlib/hash/huffman/huffman_test.v new file mode 100644 index 000000000..a5f23df22 --- /dev/null +++ b/vlib/hash/huffman/huffman_test.v @@ -0,0 +1,145 @@ +module huffman + +// The worked example from RFC 1951 §3.2.2: symbols A..H with these lengths +// produce these exact canonical (MSB-first) codes. +fn test_rfc1951_canonical_example() { + lengths := [3, 3, 3, 3, 3, 2, 4, 4] // A B C D E F G H + t := build(lengths: lengths, max_bits: 4, bit_order: .msb_first)! + expected := [u32(0b010), 0b011, 0b100, 0b101, 0b110, 0b00, 0b1110, 0b1111] + assert t.codes == expected + assert t.lengths == lengths + assert t.max_bits == 4 +} + +fn test_lsb_first_reverses_each_code() { + lengths := [3, 3, 3, 3, 3, 2, 4, 4] + msb := build(lengths: lengths, max_bits: 4, bit_order: .msb_first)! + lsb := build(lengths: lengths, max_bits: 4, bit_order: .lsb_first)! + // Each LSB code is the MSB code bit-reversed within its length. + for sym, l in lengths { + assert lsb.codes[sym] == bit_reverse(msb.codes[sym], l) + } + // e.g. F (len 2, code 00) is unchanged; A (len 3, 010) -> 010 reversed. + assert lsb.codes[5] == 0b00 + assert lsb.codes[0] == 0b010 // 010 reversed is still 010 + assert lsb.codes[6] == bit_reverse(u32(0b1110), 4) // 1110 -> 0111 +} + +fn test_flat_table_round_trips_lsb() { + lengths := [3, 3, 3, 3, 3, 2, 4, 4] + t := build(lengths: lengths, max_bits: 4, bit_order: .lsb_first)! + table := flat_table(lengths: lengths, max_bits: 4, bit_order: .lsb_first)! + assert table.len == 1 << 4 + // Every symbol must decode back from its code in every don't-care variant. + for sym, l in lengths { + step := 1 << l + mut idx := int(t.codes[sym]) + for idx < table.len { + entry := table[idx] + assert entry != flat_invalid_entry + assert int(entry & ((u32(1) << flat_length_bits) - 1)) == l + assert int(entry >> flat_length_bits) == sym + idx += step + } + } +} + +fn test_flat_table_round_trips_msb() { + // MSB-first flat table: a code of length l fills the contiguous block whose + // high l bits equal the code (the low max_bits-l bits are don't-cares). + lengths := [3, 3, 3, 3, 3, 2, 4, 4] + t := build(lengths: lengths, max_bits: 4, bit_order: .msb_first)! + table := flat_table(lengths: lengths, max_bits: 4, bit_order: .msb_first)! + assert table.len == 1 << 4 + for sym, l in lengths { + block := 1 << (t.max_bits - l) + base := int(t.codes[sym]) * block + for k in 0 .. block { + entry := table[base + k] + assert entry != flat_invalid_entry + assert int(entry & ((u32(1) << flat_length_bits) - 1)) == l + assert int(entry >> flat_length_bits) == sym + } + } +} + +fn test_flat_table_incomplete_marks_gaps() { + // A single length-1 code under-subscribes a 2-bit table: half the indices + // belong to no code and must read back as flat_invalid_entry. This is the + // path the complete-code fast path must NOT take. + table := flat_table(lengths: [1], max_bits: 2, bit_order: .lsb_first)! + assert table.len == 4 + // code 0, len 1, lsb stride 2 -> indices 0 and 2 are the symbol; 1 and 3 gaps. + assert int(table[0] >> flat_length_bits) == 0 + assert int(table[0] & ((u32(1) << flat_length_bits) - 1)) == 1 + assert table[2] == table[0] + assert table[1] == flat_invalid_entry + assert table[3] == flat_invalid_entry +} + +fn test_decode_map_msb() { + lengths := [3, 3, 3, 3, 3, 2, 4, 4] + t := build(lengths: lengths, max_bits: 4, bit_order: .msb_first)! + m := t.decode_map()! + for sym, l in lengths { + key := (u64(l) << 32) | u64(t.codes[sym]) + assert m[key] == sym + } +} + +fn test_decode_map_rejects_lsb() { + t := build(lengths: [1, 1], max_bits: 1, bit_order: .lsb_first)! + if _ := t.decode_map() { + assert false, 'decode_map should reject lsb_first tables' + } +} + +fn test_unused_symbols_get_zero_code() { + // A length-0 symbol is unused; it must not consume a code. + t := build(lengths: [1, 0, 1], max_bits: 1, bit_order: .msb_first)! + assert t.codes[1] == 0 + assert t.codes[0] == 0 + assert t.codes[2] == 1 +} + +fn test_error_length_exceeds_max_bits() { + if _ := build(lengths: [5], max_bits: 4, bit_order: .msb_first) { + assert false, 'length > max_bits must error' + } +} + +fn test_error_negative_length() { + if _ := build(lengths: [-1], max_bits: 4, bit_order: .msb_first) { + assert false, 'negative length must error' + } +} + +fn test_error_max_bits_too_small() { + if _ := build(lengths: [1], max_bits: 0, bit_order: .msb_first) { + assert false, 'max_bits < 1 must error' + } +} + +fn test_error_over_subscribed() { + // Three length-1 codes cannot coexist (only two 1-bit codes exist). + if _ := build(lengths: [1, 1, 1], max_bits: 1, bit_order: .msb_first) { + assert false, 'over-subscribed code must error' + } +} + +fn test_incomplete_code_is_allowed() { + // A single length-2 code under-subscribes the space; that is permitted. + t := build(lengths: [2], max_bits: 2, bit_order: .msb_first)! + assert t.codes[0] == 0 +} + +fn test_flat_table_rejects_wide_codes() { + if _ := flat_table( + lengths: [max_flat_bits + 1] + max_bits: max_flat_bits + 1 + bit_order: .lsb_first + ) + { + assert false, 'flat table must reject max_bits > max_flat_bits' + } +} diff --git a/vlib/net/http/h2_hpack_huffman.v b/vlib/net/http/h2_hpack_huffman.v index 6557a0e0f..499cafe1f 100644 --- a/vlib/net/http/h2_hpack_huffman.v +++ b/vlib/net/http/h2_hpack_huffman.v @@ -3,23 +3,33 @@ // that can be found in the LICENSE file. module http +import hash.huffman + // h2_huffman_eos is the index of the EOS (end-of-string) symbol in the // HPACK Huffman table (RFC 7541 Appendix B). const h2_huffman_eos = 256 +// h2_max_code_bits is the longest HPACK Huffman code length (RFC 7541 +// Appendix B). 30 bits is too wide for a flat decode table, so decoding goes +// bit-at-a-time against h2_huffman_decode_map. +const h2_max_code_bits = 30 + +// h2_huffman_table holds the canonical HPACK Huffman codes, rebuilt at startup +// from the per-symbol bit lengths via the shared hash.huffman builder. The +// codes are MSB-first (the order they appear on the wire). +const h2_huffman_table = build_h2_huffman_table() + // h2_huffman_decode_map maps a (bit_length, code) pair, packed as // `(bit_length << 32) | code`, to its symbol. It is built once at startup // from the canonical code table and is read-only afterwards. -const h2_huffman_decode_map = build_h2_huffman_decode_map() +const h2_huffman_decode_map = h2_huffman_table.decode_map() or { panic('hpack: ${err}') } -fn build_h2_huffman_decode_map() map[u64]int { - mut m := map[u64]int{} - for sym in 0 .. h2_huffman_codes.len { - l := u64(h2_huffman_code_lens[sym]) - code := u64(h2_huffman_codes[sym]) - m[(l << 32) | code] = sym - } - return m +fn build_h2_huffman_table() huffman.Table { + return huffman.build( + lengths: h2_huffman_code_lens[..].map(int(it)) + max_bits: h2_max_code_bits + bit_order: .msb_first + ) or { panic('hpack: ${err}') } } // h2_huffman_encode returns the HPACK Huffman encoding of `input` @@ -30,8 +40,8 @@ fn h2_huffman_encode(input []u8) []u8 { mut acc := u64(0) mut nbits := 0 for b in input { - code := u64(h2_huffman_codes[b]) - clen := int(h2_huffman_code_lens[b]) + code := u64(h2_huffman_table.codes[b]) + clen := h2_huffman_table.lengths[b] acc = (acc << clen) | code nbits += clen for nbits >= 8 { @@ -59,8 +69,8 @@ fn h2_huffman_decode(input []u8) ![]u8 { for bit := 7; bit >= 0; bit-- { cur = (cur << 1) | u64((b >> u8(bit)) & 1) cur_len++ - if cur_len > 30 { - return error('hpack: invalid huffman code (no symbol within 30 bits)') + if cur_len > h2_max_code_bits { + return error('hpack: invalid huffman code (no symbol within ${h2_max_code_bits} bits)') } if sym := h2_huffman_decode_map[(u64(cur_len) << 32) | cur] { if sym == h2_huffman_eos { diff --git a/vlib/net/http/h2_hpack_huffman_table.v b/vlib/net/http/h2_hpack_huffman_table.v index 4b9b72edd..84378d6f0 100644 --- a/vlib/net/http/h2_hpack_huffman_table.v +++ b/vlib/net/http/h2_hpack_huffman_table.v @@ -2,271 +2,11 @@ // Source: HTTP/2 (RFC 7541) static Huffman table, 256 byte symbols + EOS (257). module http -// h2_huffman_codes holds the Huffman code for each symbol, right-aligned in a -// u32. Index 0..255 are byte values; index 256 is the EOS symbol. The matching -// bit length for each code is in h2_huffman_code_lens. -const h2_huffman_codes = [ - u32(0x1ff8), - 0x7fffd8, - 0xfffffe2, - 0xfffffe3, - 0xfffffe4, - 0xfffffe5, - 0xfffffe6, - 0xfffffe7, - 0xfffffe8, - 0xffffea, - 0x3ffffffc, - 0xfffffe9, - 0xfffffea, - 0x3ffffffd, - 0xfffffeb, - 0xfffffec, - 0xfffffed, - 0xfffffee, - 0xfffffef, - 0xffffff0, - 0xffffff1, - 0xffffff2, - 0x3ffffffe, - 0xffffff3, - 0xffffff4, - 0xffffff5, - 0xffffff6, - 0xffffff7, - 0xffffff8, - 0xffffff9, - 0xffffffa, - 0xffffffb, - 0x14, - 0x3f8, - 0x3f9, - 0xffa, - 0x1ff9, - 0x15, - 0xf8, - 0x7fa, - 0x3fa, - 0x3fb, - 0xf9, - 0x7fb, - 0xfa, - 0x16, - 0x17, - 0x18, - 0x0, - 0x1, - 0x2, - 0x19, - 0x1a, - 0x1b, - 0x1c, - 0x1d, - 0x1e, - 0x1f, - 0x5c, - 0xfb, - 0x7ffc, - 0x20, - 0xffb, - 0x3fc, - 0x1ffa, - 0x21, - 0x5d, - 0x5e, - 0x5f, - 0x60, - 0x61, - 0x62, - 0x63, - 0x64, - 0x65, - 0x66, - 0x67, - 0x68, - 0x69, - 0x6a, - 0x6b, - 0x6c, - 0x6d, - 0x6e, - 0x6f, - 0x70, - 0x71, - 0x72, - 0xfc, - 0x73, - 0xfd, - 0x1ffb, - 0x7fff0, - 0x1ffc, - 0x3ffc, - 0x22, - 0x7ffd, - 0x3, - 0x23, - 0x4, - 0x24, - 0x5, - 0x25, - 0x26, - 0x27, - 0x6, - 0x74, - 0x75, - 0x28, - 0x29, - 0x2a, - 0x7, - 0x2b, - 0x76, - 0x2c, - 0x8, - 0x9, - 0x2d, - 0x77, - 0x78, - 0x79, - 0x7a, - 0x7b, - 0x7ffe, - 0x7fc, - 0x3ffd, - 0x1ffd, - 0xffffffc, - 0xfffe6, - 0x3fffd2, - 0xfffe7, - 0xfffe8, - 0x3fffd3, - 0x3fffd4, - 0x3fffd5, - 0x7fffd9, - 0x3fffd6, - 0x7fffda, - 0x7fffdb, - 0x7fffdc, - 0x7fffdd, - 0x7fffde, - 0xffffeb, - 0x7fffdf, - 0xffffec, - 0xffffed, - 0x3fffd7, - 0x7fffe0, - 0xffffee, - 0x7fffe1, - 0x7fffe2, - 0x7fffe3, - 0x7fffe4, - 0x1fffdc, - 0x3fffd8, - 0x7fffe5, - 0x3fffd9, - 0x7fffe6, - 0x7fffe7, - 0xffffef, - 0x3fffda, - 0x1fffdd, - 0xfffe9, - 0x3fffdb, - 0x3fffdc, - 0x7fffe8, - 0x7fffe9, - 0x1fffde, - 0x7fffea, - 0x3fffdd, - 0x3fffde, - 0xfffff0, - 0x1fffdf, - 0x3fffdf, - 0x7fffeb, - 0x7fffec, - 0x1fffe0, - 0x1fffe1, - 0x3fffe0, - 0x1fffe2, - 0x7fffed, - 0x3fffe1, - 0x7fffee, - 0x7fffef, - 0xfffea, - 0x3fffe2, - 0x3fffe3, - 0x3fffe4, - 0x7ffff0, - 0x3fffe5, - 0x3fffe6, - 0x7ffff1, - 0x3ffffe0, - 0x3ffffe1, - 0xfffeb, - 0x7fff1, - 0x3fffe7, - 0x7ffff2, - 0x3fffe8, - 0x1ffffec, - 0x3ffffe2, - 0x3ffffe3, - 0x3ffffe4, - 0x7ffffde, - 0x7ffffdf, - 0x3ffffe5, - 0xfffff1, - 0x1ffffed, - 0x7fff2, - 0x1fffe3, - 0x3ffffe6, - 0x7ffffe0, - 0x7ffffe1, - 0x3ffffe7, - 0x7ffffe2, - 0xfffff2, - 0x1fffe4, - 0x1fffe5, - 0x3ffffe8, - 0x3ffffe9, - 0xffffffd, - 0x7ffffe3, - 0x7ffffe4, - 0x7ffffe5, - 0xfffec, - 0xfffff3, - 0xfffed, - 0x1fffe6, - 0x3fffe9, - 0x1fffe7, - 0x1fffe8, - 0x7ffff3, - 0x3fffea, - 0x3fffeb, - 0x1ffffee, - 0x1ffffef, - 0xfffff4, - 0xfffff5, - 0x3ffffea, - 0x7ffff4, - 0x3ffffeb, - 0x7ffffe6, - 0x3ffffec, - 0x3ffffed, - 0x7ffffe7, - 0x7ffffe8, - 0x7ffffe9, - 0x7ffffea, - 0x7ffffeb, - 0xffffffe, - 0x7ffffec, - 0x7ffffed, - 0x7ffffee, - 0x7ffffef, - 0x7fffff0, - 0x3ffffee, - 0x3fffffff, -]! - -// h2_huffman_code_lens holds the bit length of each Huffman code in -// h2_huffman_codes (1..30 bits). +// h2_huffman_code_lens holds the bit length of each HPACK symbol's Huffman +// code (1..30 bits). Index 0..255 are byte values; index 256 is the EOS +// symbol. The codes themselves are canonical, so they are not stored: they are +// rebuilt from these lengths at startup via hash.huffman (see h2_huffman_table +// in h2_hpack_huffman.v). const h2_huffman_code_lens = [ u8(13), 23, diff --git a/vlib/net/http/h2_hpack_test.v b/vlib/net/http/h2_hpack_test.v index 8542e0d69..566c8d6f8 100644 --- a/vlib/net/http/h2_hpack_test.v +++ b/vlib/net/http/h2_hpack_test.v @@ -94,6 +94,25 @@ fn test_huffman_known_vector() { assert dec.bytestr() == 'www.example.com' } +fn test_huffman_codes_rebuilt_from_lengths() { + // The HPACK table now ships only the bit lengths; the canonical codes are + // rebuilt at startup via hash.huffman. Pin a few known codes from RFC 7541 + // Appendix B so a bad rebuild (or a future builder change) is caught. + assert h2_huffman_table.codes.len == 257 + assert h2_huffman_table.lengths.len == 257 + // symbol 0 (NUL): 0x1ff8 / 13 bits + assert h2_huffman_table.codes[0] == 0x1ff8 + assert h2_huffman_table.lengths[0] == 13 + // '0' (0x30): 0x0 / 5 bits, '1': 0x1 / 5 bits, 'a' (0x61): 0x3 / 5 bits + assert h2_huffman_table.codes[0x30] == 0x0 + assert h2_huffman_table.lengths[0x30] == 5 + assert h2_huffman_table.codes[0x31] == 0x1 + assert h2_huffman_table.codes[0x61] == 0x3 + // EOS (256): 0x3fffffff / 30 bits + assert h2_huffman_table.codes[256] == 0x3fffffff + assert h2_huffman_table.lengths[256] == 30 +} + fn test_huffman_rejects_padding_not_all_ones() { // Valid encoding of "0" is 5 bits (00000); pad the rest of the byte with // zeros instead of ones -> invalid per RFC 7541 Section 5.2. -- 2.39.5