v / vlib / compress / deflate / deflate_inflate.v
429 lines · 407 sloc · 10.78 KB · a0632356d23a7c6ee16e85f89a2ef5ca2d360245
Raw
1module deflate
2
3import hash.huffman
4
5// vfmt off
6// RFC 1951 length/distance decode tables
7const length_bases = [3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
8 67, 83, 99, 115, 131, 163, 195, 227, 258]
9const length_extra_bits = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
10 4, 5, 5, 5, 5, 0]
11const dist_bases = [1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
12 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577]
13const dist_extra_bits = [0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
14 10, 11, 11, 12, 12, 13, 13]
15// code-length alphabet order (RFC 1951)
16const cl_order = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
17// vmt on
18
19// fixed_litlen_lengths returns code lengths for the fixed Huffman lit/len tree (RFC 1951 §3.2.6).
20fn fixed_litlen_lengths() []int {
21 mut lens := []int{len: 288}
22 for i in 0 .. 144 {
23 lens[i] = 8
24 }
25 for i in 144 .. 256 {
26 lens[i] = 9
27 }
28 for i in 256 .. 280 {
29 lens[i] = 7
30 }
31 for i in 280 .. 288 {
32 lens[i] = 8
33 }
34 return lens
35}
36
37// HuffTree is a MSB-first Huffman lookup table for DEFLATE decoding.
38// Indexed by the next max_bits bits read LSB-first from the stream.
39struct HuffTree {
40 table []u32 // entry: (symbol << 5) | code_length; 0xFFFF_FFFF = invalid
41 max_bits int
42}
43
44fn build_huff_tree(lengths []int) !HuffTree {
45 mut max_bits := 0
46 for l in lengths {
47 if l > max_bits {
48 max_bits = l
49 }
50 }
51 if max_bits == 0 {
52 return HuffTree{
53 table: [u32(0)]
54 max_bits: 0
55 }
56 }
57 // Build the flat LSB-first lookup table the decode loop expects directly
58 // from the lengths via the shared canonical builder: entry = (symbol << 5) |
59 // length, 0xFFFF_FFFF for invalid. flat_table() allocates no intermediate
60 // codes array, so this matches the hand-rolled loop's cost. Over-subscribed
61 // lengths now surface as an error instead of a silently corrupt table.
62 table := huffman.flat_table(lengths: lengths, max_bits: max_bits, bit_order: .lsb_first)!
63 return HuffTree{
64 table: table
65 max_bits: max_bits
66 }
67}
68
69struct BitReader {
70 buf []u8
71mut:
72 pos int
73 bits u32
74 nbits int
75}
76
77@[direct_array_access; inline]
78fn (mut r BitReader) read_bits(n int) !u32 {
79 for r.nbits < n {
80 if r.pos >= r.buf.len {
81 return error('inflate: unexpected end of stream')
82 }
83 r.bits |= u32(r.buf[r.pos]) << r.nbits
84 r.pos++
85 r.nbits += 8
86 }
87 val := r.bits & ((u32(1) << n) - 1)
88 r.bits >>= u32(n)
89 r.nbits -= n
90 return val
91}
92
93@[inline]
94fn (mut r BitReader) align_byte() {
95 r.bits = 0
96 r.nbits = 0
97}
98
99@[direct_array_access; inline]
100fn (mut r BitReader) read_byte_raw() !u8 {
101 if r.pos >= r.buf.len {
102 return error('inflate: unexpected end of stream')
103 }
104 b := r.buf[r.pos]
105 r.pos++
106 return b
107}
108
109@[direct_array_access; inline]
110fn (mut r BitReader) huff_decode(t HuffTree) !u32 {
111 for r.nbits < t.max_bits {
112 if r.pos >= r.buf.len {
113 break
114 }
115 r.bits |= u32(r.buf[r.pos]) << r.nbits
116 r.pos++
117 r.nbits += 8
118 }
119 idx := int(r.bits & ((u32(1) << t.max_bits) - 1))
120 entry := t.table[idx]
121 if entry == 0xffff_ffff {
122 return error('inflate: invalid Huffman code')
123 }
124 len_ := int(entry & 0x1f)
125 if r.nbits < len_ {
126 return error('inflate: unexpected end of stream')
127 }
128 sym := entry >> 5
129 r.bits >>= u32(len_)
130 r.nbits -= len_
131 return sym
132}
133
134struct InflateResult {
135 decoded []u8
136 consumed int
137}
138
139struct InflateStreamResult {
140 decoded []u8
141 consumed int
142 delivered int
143 aborted bool
144}
145
146struct InflateStreamState {
147mut:
148 delivered int
149}
150
151const inflate_callback_chunk_size = 32768
152
153@[direct_array_access; inline]
154fn flush_stream_chunks(out []u8, cb ChunkCallback, userdata voidptr, mut state InflateStreamState) bool {
155 for out.len - state.delivered >= inflate_callback_chunk_size {
156 end := state.delivered + inflate_callback_chunk_size
157 chunk := out[state.delivered..end]
158 if cb(chunk, userdata) != chunk.len {
159 return false
160 }
161 state.delivered = end
162 }
163 return true
164}
165
166// inflate_with_consumed decompresses raw RFC 1951 DEFLATE data and reports
167// how many input bytes were consumed by the DEFLATE bitstream.
168fn inflate_with_consumed(data []u8) !InflateResult {
169 mut r := BitReader{
170 buf: data
171 }
172 mut out := []u8{}
173 fixed_ll := build_huff_tree(fixed_litlen_lengths())!
174 fixed_d := build_huff_tree([]int{len: 32, init: 5})!
175 for {
176 bfinal := r.read_bits(1)!
177 btype := r.read_bits(2)!
178 match btype {
179 0 {
180 r.align_byte()
181 len_ := int(r.read_byte_raw()!) | (int(u32(r.read_byte_raw()!) << 8))
182 nlen := int(r.read_byte_raw()!) | (int(u32(r.read_byte_raw()!) << 8))
183 if len_ & 0xffff != (~nlen) & 0xffff {
184 return error('inflate: bad stored block length')
185 }
186 for _ in 0 .. len_ {
187 out << r.read_byte_raw()!
188 }
189 }
190 1 {
191 inflate_block(mut r, mut out, fixed_ll, fixed_d)!
192 }
193 2 {
194 hlit := int(r.read_bits(5)!) + 257
195 hdist := int(r.read_bits(5)!) + 1
196 hclen := int(r.read_bits(4)!) + 4
197 mut cl_lens := []int{len: 19}
198 for i in 0 .. hclen {
199 cl_lens[cl_order[i]] = int(r.read_bits(3)!)
200 }
201 cl_tree := build_huff_tree(cl_lens)!
202 mut all_lens := []int{}
203 for all_lens.len < hlit + hdist {
204 sym := r.huff_decode(cl_tree)!
205 if sym <= 15 {
206 all_lens << int(sym)
207 } else if sym == 16 {
208 if all_lens.len == 0 {
209 return error('inflate: repeat with empty history')
210 }
211 rep := int(r.read_bits(2)!) + 3
212 last := all_lens[all_lens.len - 1]
213 for _ in 0 .. rep {
214 all_lens << last
215 }
216 } else if sym == 17 {
217 rep := int(r.read_bits(3)!) + 3
218 for _ in 0 .. rep {
219 all_lens << 0
220 }
221 } else if sym == 18 {
222 rep := int(r.read_bits(7)!) + 11
223 for _ in 0 .. rep {
224 all_lens << 0
225 }
226 } else {
227 return error('inflate: bad code length symbol')
228 }
229 }
230 ll_tree := build_huff_tree(all_lens[..hlit])!
231 d_tree := build_huff_tree(all_lens[hlit..])!
232 inflate_block(mut r, mut out, ll_tree, d_tree)!
233 }
234 else {
235 return error('inflate: reserved block type')
236 }
237 }
238
239 if bfinal == 1 {
240 break
241 }
242 }
243 consumed := r.pos - (r.nbits >> 3)
244 return InflateResult{
245 decoded: out
246 consumed: consumed
247 }
248}
249
250// inflate decompresses raw RFC 1951 DEFLATE data.
251fn inflate(data []u8) ![]u8 {
252 res := inflate_with_consumed(data)!
253 return res.decoded
254}
255
256// inflate_with_callback decompresses raw RFC 1951 DEFLATE data and streams output through `cb`.
257// It still tracks consumed input bytes for container-level validation.
258fn inflate_with_callback(data []u8, cb ChunkCallback, userdata voidptr) !InflateStreamResult {
259 mut r := BitReader{
260 buf: data
261 }
262 mut out := []u8{}
263 mut state := InflateStreamState{}
264 mut aborted := false
265 fixed_ll := build_huff_tree(fixed_litlen_lengths())!
266 fixed_d := build_huff_tree([]int{len: 32, init: 5})!
267 for {
268 bfinal := r.read_bits(1)!
269 btype := r.read_bits(2)!
270 match btype {
271 0 {
272 r.align_byte()
273 len_ := int(r.read_byte_raw()!) | (int(u32(r.read_byte_raw()!) << 8))
274 nlen := int(r.read_byte_raw()!) | (int(u32(r.read_byte_raw()!) << 8))
275 if len_ & 0xffff != (~nlen) & 0xffff {
276 return error('inflate: bad stored block length')
277 }
278 for _ in 0 .. len_ {
279 out << r.read_byte_raw()!
280 if !flush_stream_chunks(out, cb, userdata, mut state) {
281 aborted = true
282 break
283 }
284 }
285 }
286 1 {
287 if !inflate_block_stream(mut r, mut out, fixed_ll, fixed_d, cb, userdata, mut state)! {
288 aborted = true
289 }
290 }
291 2 {
292 hlit := int(r.read_bits(5)!) + 257
293 hdist := int(r.read_bits(5)!) + 1
294 hclen := int(r.read_bits(4)!) + 4
295 mut cl_lens := []int{len: 19}
296 for i in 0 .. hclen {
297 cl_lens[cl_order[i]] = int(r.read_bits(3)!)
298 }
299 cl_tree := build_huff_tree(cl_lens)!
300 mut all_lens := []int{}
301 for all_lens.len < hlit + hdist {
302 sym := r.huff_decode(cl_tree)!
303 if sym <= 15 {
304 all_lens << int(sym)
305 } else if sym == 16 {
306 if all_lens.len == 0 {
307 return error('inflate: repeat with empty history')
308 }
309 rep := int(r.read_bits(2)!) + 3
310 last := all_lens[all_lens.len - 1]
311 for _ in 0 .. rep {
312 all_lens << last
313 }
314 } else if sym == 17 {
315 rep := int(r.read_bits(3)!) + 3
316 for _ in 0 .. rep {
317 all_lens << 0
318 }
319 } else if sym == 18 {
320 rep := int(r.read_bits(7)!) + 11
321 for _ in 0 .. rep {
322 all_lens << 0
323 }
324 } else {
325 return error('inflate: bad code length symbol')
326 }
327 }
328 ll_tree := build_huff_tree(all_lens[..hlit])!
329 d_tree := build_huff_tree(all_lens[hlit..])!
330 if !inflate_block_stream(mut r, mut out, ll_tree, d_tree, cb, userdata, mut state)! {
331 aborted = true
332 }
333 }
334 else {
335 return error('inflate: reserved block type')
336 }
337 }
338
339 if aborted || bfinal == 1 {
340 break
341 }
342 }
343 if !aborted && out.len > state.delivered {
344 chunk := unsafe { out[state.delivered..] }
345 if cb(chunk, userdata) != chunk.len {
346 aborted = true
347 } else {
348 state.delivered = out.len
349 }
350 }
351 consumed := r.pos - (r.nbits >> 3)
352 return InflateStreamResult{
353 decoded: out
354 consumed: consumed
355 delivered: state.delivered
356 aborted: aborted
357 }
358}
359
360@[direct_array_access]
361fn inflate_block(mut r BitReader, mut out []u8, ll HuffTree, dist HuffTree) ! {
362 for {
363 sym := r.huff_decode(ll)!
364 if sym == 256 {
365 break
366 }
367 if sym < 256 {
368 out << u8(sym)
369 } else {
370 li := int(sym) - 257
371 if li < 0 || li >= length_bases.len {
372 return error('inflate: invalid length symbol ${sym}')
373 }
374 length := length_bases[li] + int(r.read_bits(length_extra_bits[li])!)
375 dsym := r.huff_decode(dist)!
376 di := int(dsym)
377 if di >= dist_bases.len {
378 return error('inflate: invalid distance symbol ${dsym}')
379 }
380 distance := dist_bases[di] + int(r.read_bits(dist_extra_bits[di])!)
381 if distance > out.len {
382 return error('inflate: distance past output start')
383 }
384 base := out.len - distance
385 for i in 0 .. length {
386 out << out[base + i]
387 }
388 }
389 }
390}
391
392@[direct_array_access]
393fn inflate_block_stream(mut r BitReader, mut out []u8, ll HuffTree, dist HuffTree, cb ChunkCallback, userdata voidptr, mut state InflateStreamState) !bool {
394 for {
395 sym := r.huff_decode(ll)!
396 if sym == 256 {
397 break
398 }
399 if sym < 256 {
400 out << u8(sym)
401 if !flush_stream_chunks(out, cb, userdata, mut state) {
402 return false
403 }
404 } else {
405 li := int(sym) - 257
406 if li < 0 || li >= length_bases.len {
407 return error('inflate: invalid length symbol ${sym}')
408 }
409 length := length_bases[li] + int(r.read_bits(length_extra_bits[li])!)
410 dsym := r.huff_decode(dist)!
411 di := int(dsym)
412 if di >= dist_bases.len {
413 return error('inflate: invalid distance symbol ${dsym}')
414 }
415 distance := dist_bases[di] + int(r.read_bits(dist_extra_bits[di])!)
416 if distance > out.len {
417 return error('inflate: distance past output start')
418 }
419 base := out.len - distance
420 for i in 0 .. length {
421 out << out[base + i]
422 if !flush_stream_chunks(out, cb, userdata, mut state) {
423 return false
424 }
425 }
426 }
427 }
428 return true
429}
430