| 1 | module deflate |
| 2 | |
| 3 | const deflate_hash_bits = 15 |
| 4 | const deflate_hash_size = 1 << deflate_hash_bits |
| 5 | const deflate_max_chain = 64 |
| 6 | const deflate_min_match = 3 |
| 7 | const deflate_max_match = 258 |
| 8 | const deflate_window = 32768 |
| 9 | |
| 10 | // fixed_litlen_encode returns (reversed_codes, code_lengths) for fixed Huffman lit/len. |
| 11 | fn fixed_litlen_encode() ([]u32, []int) { |
| 12 | lens := fixed_litlen_lengths() |
| 13 | mut max_bits := 0 |
| 14 | for l in lens { |
| 15 | if l > max_bits { |
| 16 | max_bits = l |
| 17 | } |
| 18 | } |
| 19 | mut bl_count := []int{len: max_bits + 1} |
| 20 | for l in lens { |
| 21 | if l > 0 { |
| 22 | bl_count[l]++ |
| 23 | } |
| 24 | } |
| 25 | mut next_code := []u32{len: max_bits + 1} |
| 26 | mut c := u32(0) |
| 27 | for bits in 1 .. max_bits + 1 { |
| 28 | c = (c + u32(bl_count[bits - 1])) << 1 |
| 29 | next_code[bits] = c |
| 30 | } |
| 31 | mut codes := []u32{len: 288} |
| 32 | for sym in 0 .. 288 { |
| 33 | l := lens[sym] |
| 34 | if l == 0 { |
| 35 | continue |
| 36 | } |
| 37 | codes[sym] = bit_reverse(next_code[l], l) |
| 38 | next_code[l]++ |
| 39 | } |
| 40 | return codes, lens |
| 41 | } |
| 42 | |
| 43 | // fixed_dist_encode returns (reversed_codes, code_lengths) for fixed Huffman distance. |
| 44 | fn fixed_dist_encode() ([]u32, []int) { |
| 45 | mut codes := []u32{len: 30} |
| 46 | for i in 0 .. 30 { |
| 47 | codes[i] = bit_reverse(u32(i), 5) |
| 48 | } |
| 49 | return codes, []int{len: 30, init: 5} |
| 50 | } |
| 51 | |
| 52 | fn length_code_info(length int) (int, int, int) { |
| 53 | for i := length_bases.len - 1; i >= 0; i-- { |
| 54 | if length >= length_bases[i] { |
| 55 | return i, length - length_bases[i], length_extra_bits[i] |
| 56 | } |
| 57 | } |
| 58 | return 0, 0, 0 |
| 59 | } |
| 60 | |
| 61 | fn dist_code_info(distance int) (int, int, int) { |
| 62 | for i := dist_bases.len - 1; i >= 0; i-- { |
| 63 | if distance >= dist_bases[i] { |
| 64 | return i, distance - dist_bases[i], dist_extra_bits[i] |
| 65 | } |
| 66 | } |
| 67 | return 0, 0, 0 |
| 68 | } |
| 69 | |
| 70 | fn hash3(data []u8, pos int) int { |
| 71 | v := u32(data[pos]) | (u32(data[pos + 1]) << 8) | (u32(data[pos + 2]) << 16) |
| 72 | return int((v * u32(2654435761)) >> u32(32 - deflate_hash_bits)) |
| 73 | } |
| 74 | |
| 75 | @[direct_array_access] |
| 76 | fn find_lz_match(data []u8, pos int, last []int, prev []int) (int, int) { |
| 77 | if pos + deflate_min_match > data.len { |
| 78 | return 0, 0 |
| 79 | } |
| 80 | max_len := if pos + deflate_max_match < data.len { deflate_max_match } else { data.len - pos } |
| 81 | mut best_len := 0 |
| 82 | mut best_off := 0 |
| 83 | mut i := last[hash3(data, pos)] |
| 84 | mut chain := 0 |
| 85 | for i >= 0 && chain < deflate_max_chain { |
| 86 | off := pos - i |
| 87 | if off > deflate_window { |
| 88 | break |
| 89 | } |
| 90 | mut l := 0 |
| 91 | for l < max_len && data[i + l] == data[pos + l] { |
| 92 | l++ |
| 93 | } |
| 94 | if l > best_len { |
| 95 | best_len = l |
| 96 | best_off = off |
| 97 | if best_len == max_len { |
| 98 | break |
| 99 | } |
| 100 | } |
| 101 | i = prev[i] |
| 102 | chain++ |
| 103 | } |
| 104 | return best_off, best_len |
| 105 | } |
| 106 | |
| 107 | struct BitWriter { |
| 108 | mut: |
| 109 | buf []u8 |
| 110 | bits u32 |
| 111 | nbits int |
| 112 | } |
| 113 | |
| 114 | @[direct_array_access; inline] |
| 115 | fn (mut w BitWriter) write_bits(value u32, nbits int) { |
| 116 | if nbits == 0 { |
| 117 | return |
| 118 | } |
| 119 | w.bits |= value << w.nbits |
| 120 | w.nbits += nbits |
| 121 | for w.nbits >= 8 { |
| 122 | w.buf << u8(w.bits & 0xff) |
| 123 | w.bits >>= 8 |
| 124 | w.nbits -= 8 |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | fn (mut w BitWriter) flush() { |
| 129 | if w.nbits > 0 { |
| 130 | w.buf << u8(w.bits & 0xff) |
| 131 | w.bits = 0 |
| 132 | w.nbits = 0 |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | // deflate_compress_fixed compresses data to RFC 1951 DEFLATE using fixed Huffman codes. |
| 137 | @[direct_array_access] |
| 138 | fn deflate_compress_fixed(data []u8) []u8 { |
| 139 | ll_codes, ll_lens := fixed_litlen_encode() |
| 140 | d_codes, d_lens := fixed_dist_encode() |
| 141 | mut w := BitWriter{} |
| 142 | // BFINAL=1, BTYPE=01 (fixed Huffman) |
| 143 | w.write_bits(1, 1) |
| 144 | w.write_bits(1, 2) |
| 145 | if data.len == 0 { |
| 146 | w.write_bits(ll_codes[256], ll_lens[256]) |
| 147 | w.flush() |
| 148 | return w.buf |
| 149 | } |
| 150 | mut last := []int{len: deflate_hash_size, init: -1} |
| 151 | mut prev := []int{len: data.len, init: -1} |
| 152 | mut pos := 0 |
| 153 | for pos < data.len { |
| 154 | off, match_len := find_lz_match(data, pos, last, prev) |
| 155 | if match_len >= deflate_min_match { |
| 156 | li, lext, lext_bits := length_code_info(match_len) |
| 157 | sym := 257 + li |
| 158 | w.write_bits(ll_codes[sym], ll_lens[sym]) |
| 159 | w.write_bits(u32(lext), lext_bits) |
| 160 | di, dext, dext_bits := dist_code_info(off) |
| 161 | w.write_bits(d_codes[di], d_lens[di]) |
| 162 | w.write_bits(u32(dext), dext_bits) |
| 163 | for i in pos .. pos + match_len { |
| 164 | if i + deflate_min_match < data.len { |
| 165 | h := hash3(data, i) |
| 166 | prev[i] = last[h] |
| 167 | last[h] = i |
| 168 | } |
| 169 | } |
| 170 | pos += match_len |
| 171 | } else { |
| 172 | w.write_bits(ll_codes[int(data[pos])], ll_lens[int(data[pos])]) |
| 173 | if pos + deflate_min_match < data.len { |
| 174 | h := hash3(data, pos) |
| 175 | prev[pos] = last[h] |
| 176 | last[h] = pos |
| 177 | } |
| 178 | pos++ |
| 179 | } |
| 180 | } |
| 181 | w.write_bits(ll_codes[256], ll_lens[256]) |
| 182 | w.flush() |
| 183 | return w.buf |
| 184 | } |
| 185 | |