v2 / vlib / compress / deflate / deflate_inflate.v
262 lines · 250 sloc · 6.09 KB · 96c365159d037716a7097f6b02e4b4b82edd0a8a
Raw
1module deflate
2
3// vfmt off
4// RFC 1951 length/distance decode tables
5const length_bases = [3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
6 67, 83, 99, 115, 131, 163, 195, 227, 258]
7const length_extra_bits = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
8 4, 5, 5, 5, 5, 0]
9const dist_bases = [1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
10 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577]
11const dist_extra_bits = [0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
12 10, 11, 11, 12, 12, 13, 13]
13// code-length alphabet order (RFC 1951)
14const cl_order = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
15// vmt on
16
17// fixed_litlen_lengths returns code lengths for the fixed Huffman lit/len tree (RFC 1951 §3.2.6).
18fn fixed_litlen_lengths() []int {
19 mut lens := []int{len: 288}
20 for i in 0 .. 144 {
21 lens[i] = 8
22 }
23 for i in 144 .. 256 {
24 lens[i] = 9
25 }
26 for i in 256 .. 280 {
27 lens[i] = 7
28 }
29 for i in 280 .. 288 {
30 lens[i] = 8
31 }
32 return lens
33}
34
35// HuffTree is a MSB-first Huffman lookup table for DEFLATE decoding.
36// Indexed by the next max_bits bits read LSB-first from the stream.
37struct HuffTree {
38 table []u32 // entry: (symbol << 5) | code_length; 0xFFFF_FFFF = invalid
39 max_bits int
40}
41
42fn build_huff_tree(lengths []int) HuffTree {
43 mut max_bits := 0
44 for l in lengths {
45 if l > max_bits {
46 max_bits = l
47 }
48 }
49 if max_bits == 0 {
50 return HuffTree{
51 table: [u32(0)]
52 max_bits: 0
53 }
54 }
55 mut bl_count := []int{len: max_bits + 1}
56 for l in lengths {
57 if l > 0 {
58 bl_count[l]++
59 }
60 }
61 mut next_code := []u32{len: max_bits + 1}
62 mut c := u32(0)
63 for bits in 1 .. max_bits + 1 {
64 c = (c + u32(bl_count[bits - 1])) << 1
65 next_code[bits] = c
66 }
67 table_size := 1 << max_bits
68 mut table := []u32{len: table_size, init: 0xffff_ffff}
69 for sym in 0 .. lengths.len {
70 l := lengths[sym]
71 if l == 0 {
72 continue
73 }
74 code := next_code[l]
75 next_code[l]++
76 // Reverse code for LSB-first bit reader
77 rev := bit_reverse(code, l)
78 step := 1 << l
79 mut idx := int(rev)
80 for idx < table_size {
81 table[idx] = (u32(sym) << 5) | u32(l)
82 idx += step
83 }
84 }
85 return HuffTree{
86 table: table
87 max_bits: max_bits
88 }
89}
90
91struct BitReader {
92 buf []u8
93mut:
94 pos int
95 bits u32
96 nbits int
97}
98
99@[direct_array_access; inline]
100fn (mut r BitReader) read_bits(n int) !u32 {
101 for r.nbits < n {
102 if r.pos >= r.buf.len {
103 return error('inflate: unexpected end of stream')
104 }
105 r.bits |= u32(r.buf[r.pos]) << r.nbits
106 r.pos++
107 r.nbits += 8
108 }
109 val := r.bits & ((u32(1) << n) - 1)
110 r.bits >>= u32(n)
111 r.nbits -= n
112 return val
113}
114
115@[inline]
116fn (mut r BitReader) align_byte() {
117 r.bits = 0
118 r.nbits = 0
119}
120
121@[direct_array_access; inline]
122fn (mut r BitReader) read_byte_raw() !u8 {
123 if r.pos >= r.buf.len {
124 return error('inflate: unexpected end of stream')
125 }
126 b := r.buf[r.pos]
127 r.pos++
128 return b
129}
130
131@[direct_array_access; inline]
132fn (mut r BitReader) huff_decode(t HuffTree) !u32 {
133 for r.nbits < t.max_bits {
134 if r.pos >= r.buf.len {
135 break
136 }
137 r.bits |= u32(r.buf[r.pos]) << r.nbits
138 r.pos++
139 r.nbits += 8
140 }
141 idx := int(r.bits & ((u32(1) << t.max_bits) - 1))
142 entry := t.table[idx]
143 if entry == 0xffff_ffff {
144 return error('inflate: invalid Huffman code')
145 }
146 len_ := int(entry & 0x1f)
147 sym := entry >> 5
148 r.bits >>= u32(len_)
149 r.nbits -= len_
150 return sym
151}
152
153// inflate decompresses raw RFC 1951 DEFLATE data.
154fn inflate(data []u8) ![]u8 {
155 mut r := BitReader{
156 buf: data
157 }
158 mut out := []u8{}
159 fixed_ll := build_huff_tree(fixed_litlen_lengths())
160 fixed_d := build_huff_tree([]int{len: 32, init: 5})
161 for {
162 bfinal := r.read_bits(1)!
163 btype := r.read_bits(2)!
164 match btype {
165 0 {
166 r.align_byte()
167 len_ := int(r.read_byte_raw()!) | (int(u32(r.read_byte_raw()!) << 8))
168 nlen := int(r.read_byte_raw()!) | (int(u32(r.read_byte_raw()!) << 8))
169 if len_ & 0xffff != (~nlen) & 0xffff {
170 return error('inflate: bad stored block length')
171 }
172 for _ in 0 .. len_ {
173 out << r.read_byte_raw()!
174 }
175 }
176 1 {
177 inflate_block(mut r, mut out, fixed_ll, fixed_d)!
178 }
179 2 {
180 hlit := int(r.read_bits(5)!) + 257
181 hdist := int(r.read_bits(5)!) + 1
182 hclen := int(r.read_bits(4)!) + 4
183 mut cl_lens := []int{len: 19}
184 for i in 0 .. hclen {
185 cl_lens[cl_order[i]] = int(r.read_bits(3)!)
186 }
187 cl_tree := build_huff_tree(cl_lens)
188 mut all_lens := []int{}
189 for all_lens.len < hlit + hdist {
190 sym := r.huff_decode(cl_tree)!
191 if sym <= 15 {
192 all_lens << int(sym)
193 } else if sym == 16 {
194 if all_lens.len == 0 {
195 return error('inflate: repeat with empty history')
196 }
197 rep := int(r.read_bits(2)!) + 3
198 last := all_lens[all_lens.len - 1]
199 for _ in 0 .. rep {
200 all_lens << last
201 }
202 } else if sym == 17 {
203 rep := int(r.read_bits(3)!) + 3
204 for _ in 0 .. rep {
205 all_lens << 0
206 }
207 } else if sym == 18 {
208 rep := int(r.read_bits(7)!) + 11
209 for _ in 0 .. rep {
210 all_lens << 0
211 }
212 } else {
213 return error('inflate: bad code length symbol')
214 }
215 }
216 ll_tree := build_huff_tree(all_lens[..hlit])
217 d_tree := build_huff_tree(all_lens[hlit..])
218 inflate_block(mut r, mut out, ll_tree, d_tree)!
219 }
220 else {
221 return error('inflate: reserved block type')
222 }
223 }
224
225 if bfinal == 1 {
226 break
227 }
228 }
229 return out
230}
231
232@[direct_array_access]
233fn inflate_block(mut r BitReader, mut out []u8, ll HuffTree, dist HuffTree) ! {
234 for {
235 sym := r.huff_decode(ll)!
236 if sym == 256 {
237 break
238 }
239 if sym < 256 {
240 out << u8(sym)
241 } else {
242 li := int(sym) - 257
243 if li < 0 || li >= length_bases.len {
244 return error('inflate: invalid length symbol ${sym}')
245 }
246 length := length_bases[li] + int(r.read_bits(length_extra_bits[li])!)
247 dsym := r.huff_decode(dist)!
248 di := int(dsym)
249 if di >= dist_bases.len {
250 return error('inflate: invalid distance symbol ${dsym}')
251 }
252 distance := dist_bases[di] + int(r.read_bits(dist_extra_bits[di])!)
253 if distance > out.len {
254 return error('inflate: distance past output start')
255 }
256 base := out.len - distance
257 for i in 0 .. length {
258 out << out[base + i]
259 }
260 }
261 }
262}
263