v2 / vlib / compress / deflate / deflate_compress.v
184 lines · 173 sloc · 4.15 KB · 96c365159d037716a7097f6b02e4b4b82edd0a8a
Raw
1module deflate
2
3const deflate_hash_bits = 15
4const deflate_hash_size = 1 << deflate_hash_bits
5const deflate_max_chain = 64
6const deflate_min_match = 3
7const deflate_max_match = 258
8const deflate_window = 32768
9
10// fixed_litlen_encode returns (reversed_codes, code_lengths) for fixed Huffman lit/len.
11fn fixed_litlen_encode() ([]u32, []int) {
12 lens := fixed_litlen_lengths()
13 mut max_bits := 0
14 for l in lens {
15 if l > max_bits {
16 max_bits = l
17 }
18 }
19 mut bl_count := []int{len: max_bits + 1}
20 for l in lens {
21 if l > 0 {
22 bl_count[l]++
23 }
24 }
25 mut next_code := []u32{len: max_bits + 1}
26 mut c := u32(0)
27 for bits in 1 .. max_bits + 1 {
28 c = (c + u32(bl_count[bits - 1])) << 1
29 next_code[bits] = c
30 }
31 mut codes := []u32{len: 288}
32 for sym in 0 .. 288 {
33 l := lens[sym]
34 if l == 0 {
35 continue
36 }
37 codes[sym] = bit_reverse(next_code[l], l)
38 next_code[l]++
39 }
40 return codes, lens
41}
42
43// fixed_dist_encode returns (reversed_codes, code_lengths) for fixed Huffman distance.
44fn fixed_dist_encode() ([]u32, []int) {
45 mut codes := []u32{len: 30}
46 for i in 0 .. 30 {
47 codes[i] = bit_reverse(u32(i), 5)
48 }
49 return codes, []int{len: 30, init: 5}
50}
51
52fn length_code_info(length int) (int, int, int) {
53 for i := length_bases.len - 1; i >= 0; i-- {
54 if length >= length_bases[i] {
55 return i, length - length_bases[i], length_extra_bits[i]
56 }
57 }
58 return 0, 0, 0
59}
60
61fn dist_code_info(distance int) (int, int, int) {
62 for i := dist_bases.len - 1; i >= 0; i-- {
63 if distance >= dist_bases[i] {
64 return i, distance - dist_bases[i], dist_extra_bits[i]
65 }
66 }
67 return 0, 0, 0
68}
69
70fn hash3(data []u8, pos int) int {
71 v := u32(data[pos]) | (u32(data[pos + 1]) << 8) | (u32(data[pos + 2]) << 16)
72 return int((v * u32(2654435761)) >> u32(32 - deflate_hash_bits))
73}
74
75@[direct_array_access]
76fn find_lz_match(data []u8, pos int, last []int, prev []int) (int, int) {
77 if pos + deflate_min_match > data.len {
78 return 0, 0
79 }
80 max_len := if pos + deflate_max_match < data.len { deflate_max_match } else { data.len - pos }
81 mut best_len := 0
82 mut best_off := 0
83 mut i := last[hash3(data, pos)]
84 mut chain := 0
85 for i >= 0 && chain < deflate_max_chain {
86 off := pos - i
87 if off > deflate_window {
88 break
89 }
90 mut l := 0
91 for l < max_len && data[i + l] == data[pos + l] {
92 l++
93 }
94 if l > best_len {
95 best_len = l
96 best_off = off
97 if best_len == max_len {
98 break
99 }
100 }
101 i = prev[i]
102 chain++
103 }
104 return best_off, best_len
105}
106
107struct BitWriter {
108mut:
109 buf []u8
110 bits u32
111 nbits int
112}
113
114@[direct_array_access; inline]
115fn (mut w BitWriter) write_bits(value u32, nbits int) {
116 if nbits == 0 {
117 return
118 }
119 w.bits |= value << w.nbits
120 w.nbits += nbits
121 for w.nbits >= 8 {
122 w.buf << u8(w.bits & 0xff)
123 w.bits >>= 8
124 w.nbits -= 8
125 }
126}
127
128fn (mut w BitWriter) flush() {
129 if w.nbits > 0 {
130 w.buf << u8(w.bits & 0xff)
131 w.bits = 0
132 w.nbits = 0
133 }
134}
135
136// deflate_compress_fixed compresses data to RFC 1951 DEFLATE using fixed Huffman codes.
137@[direct_array_access]
138fn deflate_compress_fixed(data []u8) []u8 {
139 ll_codes, ll_lens := fixed_litlen_encode()
140 d_codes, d_lens := fixed_dist_encode()
141 mut w := BitWriter{}
142 // BFINAL=1, BTYPE=01 (fixed Huffman)
143 w.write_bits(1, 1)
144 w.write_bits(1, 2)
145 if data.len == 0 {
146 w.write_bits(ll_codes[256], ll_lens[256])
147 w.flush()
148 return w.buf
149 }
150 mut last := []int{len: deflate_hash_size, init: -1}
151 mut prev := []int{len: data.len, init: -1}
152 mut pos := 0
153 for pos < data.len {
154 off, match_len := find_lz_match(data, pos, last, prev)
155 if match_len >= deflate_min_match {
156 li, lext, lext_bits := length_code_info(match_len)
157 sym := 257 + li
158 w.write_bits(ll_codes[sym], ll_lens[sym])
159 w.write_bits(u32(lext), lext_bits)
160 di, dext, dext_bits := dist_code_info(off)
161 w.write_bits(d_codes[di], d_lens[di])
162 w.write_bits(u32(dext), dext_bits)
163 for i in pos .. pos + match_len {
164 if i + deflate_min_match < data.len {
165 h := hash3(data, i)
166 prev[i] = last[h]
167 last[h] = i
168 }
169 }
170 pos += match_len
171 } else {
172 w.write_bits(ll_codes[int(data[pos])], ll_lens[int(data[pos])])
173 if pos + deflate_min_match < data.len {
174 h := hash3(data, pos)
175 prev[pos] = last[h]
176 last[h] = pos
177 }
178 pos++
179 }
180 }
181 w.write_bits(ll_codes[256], ll_lens[256])
182 w.flush()
183 return w.buf
184}
185