v / vlib / compress / deflate / deflate_compress.v
162 lines · 150 sloc · 3.89 KB · a0632356d23a7c6ee16e85f89a2ef5ca2d360245
Raw
1module deflate
2
3import hash.huffman
4
5const deflate_hash_bits = 15
6const deflate_hash_size = 1 << deflate_hash_bits
7const deflate_max_chain = 64
8const deflate_min_match = 3
9const deflate_max_match = 258
10const deflate_window = 32768
11
12// fixed_litlen_encode returns (reversed_codes, code_lengths) for fixed Huffman lit/len.
13// The LSB-first (bit-reversed) codes come straight from the shared canonical
14// builder, since the encoder writes bits LSB-first.
15fn fixed_litlen_encode() !([]u32, []int) {
16 lens := fixed_litlen_lengths()
17 t := huffman.build(lengths: lens, max_bits: 9, bit_order: .lsb_first)!
18 return t.codes, lens
19}
20
21// fixed_dist_encode returns (reversed_codes, code_lengths) for fixed Huffman distance.
22fn fixed_dist_encode() ([]u32, []int) {
23 mut codes := []u32{len: 30}
24 for i in 0 .. 30 {
25 codes[i] = bit_reverse(u32(i), 5)
26 }
27 return codes, []int{len: 30, init: 5}
28}
29
30fn length_code_info(length int) (int, int, int) {
31 for i := length_bases.len - 1; i >= 0; i-- {
32 if length >= length_bases[i] {
33 return i, length - length_bases[i], length_extra_bits[i]
34 }
35 }
36 return 0, 0, 0
37}
38
39fn dist_code_info(distance int) (int, int, int) {
40 for i := dist_bases.len - 1; i >= 0; i-- {
41 if distance >= dist_bases[i] {
42 return i, distance - dist_bases[i], dist_extra_bits[i]
43 }
44 }
45 return 0, 0, 0
46}
47
48fn hash3(data []u8, pos int) int {
49 v := u32(data[pos]) | (u32(data[pos + 1]) << 8) | (u32(data[pos + 2]) << 16)
50 return int((v * u32(2654435761)) >> u32(32 - deflate_hash_bits))
51}
52
53@[direct_array_access]
54fn find_lz_match(data []u8, pos int, last []int, prev []int) (int, int) {
55 if pos + deflate_min_match > data.len {
56 return 0, 0
57 }
58 max_len := if pos + deflate_max_match < data.len { deflate_max_match } else { data.len - pos }
59 mut best_len := 0
60 mut best_off := 0
61 mut i := last[hash3(data, pos)]
62 mut chain := 0
63 for i >= 0 && chain < deflate_max_chain {
64 off := pos - i
65 if off > deflate_window {
66 break
67 }
68 mut l := 0
69 for l < max_len && data[i + l] == data[pos + l] {
70 l++
71 }
72 if l > best_len {
73 best_len = l
74 best_off = off
75 if best_len == max_len {
76 break
77 }
78 }
79 i = prev[i]
80 chain++
81 }
82 return best_off, best_len
83}
84
85struct BitWriter {
86mut:
87 buf []u8
88 bits u32
89 nbits int
90}
91
92@[direct_array_access; inline]
93fn (mut w BitWriter) write_bits(value u32, nbits int) {
94 if nbits == 0 {
95 return
96 }
97 w.bits |= value << w.nbits
98 w.nbits += nbits
99 for w.nbits >= 8 {
100 w.buf << u8(w.bits & 0xff)
101 w.bits >>= 8
102 w.nbits -= 8
103 }
104}
105
106fn (mut w BitWriter) flush() {
107 if w.nbits > 0 {
108 w.buf << u8(w.bits & 0xff)
109 w.bits = 0
110 w.nbits = 0
111 }
112}
113
114// deflate_compress_fixed compresses data to RFC 1951 DEFLATE using fixed Huffman codes.
115@[direct_array_access]
116fn deflate_compress_fixed(data []u8) ![]u8 {
117 ll_codes, ll_lens := fixed_litlen_encode()!
118 d_codes, d_lens := fixed_dist_encode()
119 mut w := BitWriter{}
120 // BFINAL=1, BTYPE=01 (fixed Huffman)
121 w.write_bits(1, 1)
122 w.write_bits(1, 2)
123 if data.len == 0 {
124 w.write_bits(ll_codes[256], ll_lens[256])
125 w.flush()
126 return w.buf
127 }
128 mut last := []int{len: deflate_hash_size, init: -1}
129 mut prev := []int{len: data.len, init: -1}
130 mut pos := 0
131 for pos < data.len {
132 off, match_len := find_lz_match(data, pos, last, prev)
133 if match_len >= deflate_min_match {
134 li, lext, lext_bits := length_code_info(match_len)
135 sym := 257 + li
136 w.write_bits(ll_codes[sym], ll_lens[sym])
137 w.write_bits(u32(lext), lext_bits)
138 di, dext, dext_bits := dist_code_info(off)
139 w.write_bits(d_codes[di], d_lens[di])
140 w.write_bits(u32(dext), dext_bits)
141 for i in pos .. pos + match_len {
142 if i + deflate_min_match < data.len {
143 h := hash3(data, i)
144 prev[i] = last[h]
145 last[h] = i
146 }
147 }
148 pos += match_len
149 } else {
150 w.write_bits(ll_codes[int(data[pos])], ll_lens[int(data[pos])])
151 if pos + deflate_min_match < data.len {
152 h := hash3(data, pos)
153 prev[pos] = last[h]
154 last[h] = pos
155 }
156 pos++
157 }
158 }
159 w.write_bits(ll_codes[256], ll_lens[256])
160 w.flush()
161 return w.buf
162}
163