v / vlib / hash / huffman / huffman_test.v
145 lines · 131 sloc · 4.74 KB · 95861b8bdeeddc71d79c3f09f56a66bf01106ecd
Raw
1module huffman
2
3// The worked example from RFC 1951 §3.2.2: symbols A..H with these lengths
4// produce these exact canonical (MSB-first) codes.
5fn test_rfc1951_canonical_example() {
6 lengths := [3, 3, 3, 3, 3, 2, 4, 4] // A B C D E F G H
7 t := build(lengths: lengths, max_bits: 4, bit_order: .msb_first)!
8 expected := [u32(0b010), 0b011, 0b100, 0b101, 0b110, 0b00, 0b1110, 0b1111]
9 assert t.codes == expected
10 assert t.lengths == lengths
11 assert t.max_bits == 4
12}
13
14fn test_lsb_first_reverses_each_code() {
15 lengths := [3, 3, 3, 3, 3, 2, 4, 4]
16 msb := build(lengths: lengths, max_bits: 4, bit_order: .msb_first)!
17 lsb := build(lengths: lengths, max_bits: 4, bit_order: .lsb_first)!
18 // Each LSB code is the MSB code bit-reversed within its length.
19 for sym, l in lengths {
20 assert lsb.codes[sym] == bit_reverse(msb.codes[sym], l)
21 }
22 // e.g. F (len 2, code 00) is unchanged; A (len 3, 010) -> 010 reversed.
23 assert lsb.codes[5] == 0b00
24 assert lsb.codes[0] == 0b010 // 010 reversed is still 010
25 assert lsb.codes[6] == bit_reverse(u32(0b1110), 4) // 1110 -> 0111
26}
27
28fn test_flat_table_round_trips_lsb() {
29 lengths := [3, 3, 3, 3, 3, 2, 4, 4]
30 t := build(lengths: lengths, max_bits: 4, bit_order: .lsb_first)!
31 table := flat_table(lengths: lengths, max_bits: 4, bit_order: .lsb_first)!
32 assert table.len == 1 << 4
33 // Every symbol must decode back from its code in every don't-care variant.
34 for sym, l in lengths {
35 step := 1 << l
36 mut idx := int(t.codes[sym])
37 for idx < table.len {
38 entry := table[idx]
39 assert entry != flat_invalid_entry
40 assert int(entry & ((u32(1) << flat_length_bits) - 1)) == l
41 assert int(entry >> flat_length_bits) == sym
42 idx += step
43 }
44 }
45}
46
47fn test_flat_table_round_trips_msb() {
48 // MSB-first flat table: a code of length l fills the contiguous block whose
49 // high l bits equal the code (the low max_bits-l bits are don't-cares).
50 lengths := [3, 3, 3, 3, 3, 2, 4, 4]
51 t := build(lengths: lengths, max_bits: 4, bit_order: .msb_first)!
52 table := flat_table(lengths: lengths, max_bits: 4, bit_order: .msb_first)!
53 assert table.len == 1 << 4
54 for sym, l in lengths {
55 block := 1 << (t.max_bits - l)
56 base := int(t.codes[sym]) * block
57 for k in 0 .. block {
58 entry := table[base + k]
59 assert entry != flat_invalid_entry
60 assert int(entry & ((u32(1) << flat_length_bits) - 1)) == l
61 assert int(entry >> flat_length_bits) == sym
62 }
63 }
64}
65
66fn test_flat_table_incomplete_marks_gaps() {
67 // A single length-1 code under-subscribes a 2-bit table: half the indices
68 // belong to no code and must read back as flat_invalid_entry. This is the
69 // path the complete-code fast path must NOT take.
70 table := flat_table(lengths: [1], max_bits: 2, bit_order: .lsb_first)!
71 assert table.len == 4
72 // code 0, len 1, lsb stride 2 -> indices 0 and 2 are the symbol; 1 and 3 gaps.
73 assert int(table[0] >> flat_length_bits) == 0
74 assert int(table[0] & ((u32(1) << flat_length_bits) - 1)) == 1
75 assert table[2] == table[0]
76 assert table[1] == flat_invalid_entry
77 assert table[3] == flat_invalid_entry
78}
79
80fn test_decode_map_msb() {
81 lengths := [3, 3, 3, 3, 3, 2, 4, 4]
82 t := build(lengths: lengths, max_bits: 4, bit_order: .msb_first)!
83 m := t.decode_map()!
84 for sym, l in lengths {
85 key := (u64(l) << 32) | u64(t.codes[sym])
86 assert m[key] == sym
87 }
88}
89
90fn test_decode_map_rejects_lsb() {
91 t := build(lengths: [1, 1], max_bits: 1, bit_order: .lsb_first)!
92 if _ := t.decode_map() {
93 assert false, 'decode_map should reject lsb_first tables'
94 }
95}
96
97fn test_unused_symbols_get_zero_code() {
98 // A length-0 symbol is unused; it must not consume a code.
99 t := build(lengths: [1, 0, 1], max_bits: 1, bit_order: .msb_first)!
100 assert t.codes[1] == 0
101 assert t.codes[0] == 0
102 assert t.codes[2] == 1
103}
104
105fn test_error_length_exceeds_max_bits() {
106 if _ := build(lengths: [5], max_bits: 4, bit_order: .msb_first) {
107 assert false, 'length > max_bits must error'
108 }
109}
110
111fn test_error_negative_length() {
112 if _ := build(lengths: [-1], max_bits: 4, bit_order: .msb_first) {
113 assert false, 'negative length must error'
114 }
115}
116
117fn test_error_max_bits_too_small() {
118 if _ := build(lengths: [1], max_bits: 0, bit_order: .msb_first) {
119 assert false, 'max_bits < 1 must error'
120 }
121}
122
123fn test_error_over_subscribed() {
124 // Three length-1 codes cannot coexist (only two 1-bit codes exist).
125 if _ := build(lengths: [1, 1, 1], max_bits: 1, bit_order: .msb_first) {
126 assert false, 'over-subscribed code must error'
127 }
128}
129
130fn test_incomplete_code_is_allowed() {
131 // A single length-2 code under-subscribes the space; that is permitted.
132 t := build(lengths: [2], max_bits: 2, bit_order: .msb_first)!
133 assert t.codes[0] == 0
134}
135
136fn test_flat_table_rejects_wide_codes() {
137 if _ := flat_table(
138 lengths: [max_flat_bits + 1]
139 max_bits: max_flat_bits + 1
140 bit_order: .lsb_first
141 )
142 {
143 assert false, 'flat table must reject max_bits > max_flat_bits'
144 }
145}
146