| 1 | module bzip2 |
| 2 | |
| 3 | const bzip2_block_magic = u64(0x314159265359) |
| 4 | const bzip2_eos_magic = u64(0x177245385090) |
| 5 | const bzip2_runa = 0 |
| 6 | const bzip2_runb = 1 |
| 7 | const bzip2_max_groups = 6 |
| 8 | const bzip2_max_alpha = 258 |
| 9 | const bzip2_group_size = 50 |
| 10 | const bzip2_max_code_len = 20 |
| 11 | const bzip2_max_selectors = 18002 |
| 12 | |
| 13 | // vfmt off |
| 14 | const bzip2_crc32_table = [..]u32[ |
| 15 | 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, |
| 16 | 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, |
| 17 | 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, |
| 18 | 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, |
| 19 | 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, |
| 20 | 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, |
| 21 | 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, |
| 22 | 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, |
| 23 | 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, |
| 24 | 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, |
| 25 | 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, |
| 26 | 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, |
| 27 | 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, |
| 28 | 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, |
| 29 | 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, |
| 30 | 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, |
| 31 | 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, |
| 32 | 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, |
| 33 | 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, |
| 34 | 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, |
| 35 | 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, |
| 36 | 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, |
| 37 | 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, |
| 38 | 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, |
| 39 | 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, |
| 40 | 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, |
| 41 | 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, |
| 42 | 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, |
| 43 | 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, |
| 44 | 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, |
| 45 | 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, |
| 46 | 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 |
| 47 | ] |
| 48 | // vfmt on |
| 49 | |
| 50 | @[params] |
| 51 | pub struct CompressParams { |
| 52 | pub: |
| 53 | block_size int = 9 // Valid range is 1..9 (100k to 900k block size). |
| 54 | } |
| 55 | |
| 56 | @[params] |
| 57 | pub struct DecompressParams { |
| 58 | pub: |
| 59 | verify_crc bool = true |
| 60 | } |
| 61 | |
| 62 | // compress compresses `src` into a bzip2 byte stream. |
| 63 | pub fn compress(src []u8, params CompressParams) ![]u8 { |
| 64 | if params.block_size < 1 || params.block_size > 9 { |
| 65 | return error('bzip2: block_size must be in 1..9') |
| 66 | } |
| 67 | mut w := BitWriter{ |
| 68 | out: []u8{} |
| 69 | } |
| 70 | |
| 71 | w.write_byte(`B`) |
| 72 | w.write_byte(`Z`) |
| 73 | w.write_byte(`h`) |
| 74 | w.write_byte(u8(`0`) + u8(params.block_size)) |
| 75 | |
| 76 | max_block := params.block_size * 100000 |
| 77 | mut stream_crc := u32(0) |
| 78 | if src.len > 0 { |
| 79 | mut pos := 0 |
| 80 | for pos < src.len { |
| 81 | end := find_rle1_block_end(src, pos, max_block) |
| 82 | chunk := src[pos..end] |
| 83 | block := encode_block(chunk)! |
| 84 | w.write_bits(48, bzip2_block_magic) |
| 85 | w.write_bits(32, u64(block.block_crc)) |
| 86 | w.write_bit(0) // randomized flag; unsupported legacy mode |
| 87 | w.write_bits(24, u64(block.orig_ptr)) |
| 88 | write_in_use_map(mut w, block.in_use) |
| 89 | w.write_bits(3, u64(block.n_groups)) |
| 90 | w.write_bits(15, u64(block.selectors.len)) |
| 91 | for sel in block.selector_mtf { |
| 92 | for _ in 0 .. sel { |
| 93 | w.write_bit(1) |
| 94 | } |
| 95 | w.write_bit(0) |
| 96 | } |
| 97 | for g in 0 .. block.n_groups { |
| 98 | write_code_lengths(mut w, block.code_lengths[g], block.alpha_size) |
| 99 | } |
| 100 | mut grp_idx := 0 |
| 101 | mut grp_pos := 0 |
| 102 | for sym in block.symbols { |
| 103 | if grp_pos == 0 { |
| 104 | grp_idx = block.selectors[grp_idx] |
| 105 | } |
| 106 | code := block.codes[grp_idx][sym] |
| 107 | len := block.code_lengths[grp_idx][sym] |
| 108 | w.write_bits(len, u64(code)) |
| 109 | grp_pos++ |
| 110 | if grp_pos == bzip2_group_size { |
| 111 | grp_pos = 0 |
| 112 | grp_idx++ |
| 113 | } |
| 114 | } |
| 115 | stream_crc = rotate_left_1(stream_crc) ^ block.block_crc |
| 116 | pos = end |
| 117 | } |
| 118 | } |
| 119 | w.write_bits(48, bzip2_eos_magic) |
| 120 | w.write_bits(32, u64(stream_crc)) |
| 121 | return w.finish() |
| 122 | } |
| 123 | |
| 124 | // decompress decompresses a bzip2 byte stream. |
| 125 | pub fn decompress(src []u8, params DecompressParams) ![]u8 { |
| 126 | mut r := BitReader{ |
| 127 | data: src |
| 128 | } |
| 129 | if r.read_byte()! != `B` || r.read_byte()! != `Z` || r.read_byte()! != `h` { |
| 130 | return error('bzip2: invalid header') |
| 131 | } |
| 132 | lvl := r.read_byte()! |
| 133 | if lvl < `1` || lvl > `9` { |
| 134 | return error('bzip2: invalid block size marker') |
| 135 | } |
| 136 | block_limit := int(lvl - `0`) * 100000 |
| 137 | mut out := []u8{} |
| 138 | mut stream_crc := u32(0) |
| 139 | for { |
| 140 | magic := r.read_bits(48)! |
| 141 | if magic == bzip2_eos_magic { |
| 142 | stored_stream_crc := u32(r.read_bits(32)!) |
| 143 | if params.verify_crc && stored_stream_crc != stream_crc { |
| 144 | return error('bzip2: stream crc mismatch') |
| 145 | } |
| 146 | if !r.is_aligned_to_byte_zero_padding() { |
| 147 | return error('bzip2: trailing non-zero bits') |
| 148 | } |
| 149 | return out |
| 150 | } |
| 151 | if magic != bzip2_block_magic { |
| 152 | return error('bzip2: invalid block magic') |
| 153 | } |
| 154 | block_crc := u32(r.read_bits(32)!) |
| 155 | randomized := r.read_bit()! |
| 156 | if randomized != 0 { |
| 157 | return error('bzip2: randomized blocks are not supported') |
| 158 | } |
| 159 | orig_ptr := int(r.read_bits(24)!) |
| 160 | in_use := read_in_use_map(mut r)! |
| 161 | mut seq_to_unseq := []u8{} |
| 162 | for i, used in in_use { |
| 163 | if used { |
| 164 | seq_to_unseq << u8(i) |
| 165 | } |
| 166 | } |
| 167 | n_in_use := seq_to_unseq.len |
| 168 | alpha_size := n_in_use + 2 |
| 169 | if alpha_size < 2 || alpha_size > bzip2_max_alpha { |
| 170 | return error('bzip2: invalid alphabet size') |
| 171 | } |
| 172 | n_groups := int(r.read_bits(3)!) |
| 173 | if n_groups < 2 || n_groups > bzip2_max_groups { |
| 174 | return error('bzip2: invalid huffman group count') |
| 175 | } |
| 176 | n_selectors := int(r.read_bits(15)!) |
| 177 | if n_selectors < 1 || n_selectors > bzip2_max_selectors { |
| 178 | return error('bzip2: invalid selector count') |
| 179 | } |
| 180 | mut selectors_mtf := []int{len: n_selectors} |
| 181 | for i in 0 .. n_selectors { |
| 182 | mut c := 0 |
| 183 | for { |
| 184 | b := r.read_bit()! |
| 185 | if b == 0 { |
| 186 | break |
| 187 | } |
| 188 | c++ |
| 189 | if c >= n_groups { |
| 190 | return error('bzip2: invalid selector mtf value') |
| 191 | } |
| 192 | } |
| 193 | selectors_mtf[i] = c |
| 194 | } |
| 195 | selectors := decode_selector_mtf(selectors_mtf, n_groups)! |
| 196 | |
| 197 | mut code_lengths := [][]int{len: n_groups, init: []int{len: alpha_size}} |
| 198 | for g in 0 .. n_groups { |
| 199 | mut curr := int(r.read_bits(5)!) |
| 200 | if curr < 1 || curr > bzip2_max_code_len { |
| 201 | return error('bzip2: invalid initial huffman code length') |
| 202 | } |
| 203 | for i in 0 .. alpha_size { |
| 204 | for { |
| 205 | flag := r.read_bit()! |
| 206 | if flag == 0 { |
| 207 | break |
| 208 | } |
| 209 | up_down := r.read_bit()! |
| 210 | if up_down == 0 { |
| 211 | curr++ |
| 212 | } else { |
| 213 | curr-- |
| 214 | } |
| 215 | if curr < 1 || curr > bzip2_max_code_len { |
| 216 | return error('bzip2: invalid huffman code length delta') |
| 217 | } |
| 218 | } |
| 219 | code_lengths[g][i] = curr |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | mut tables := []DecodeTable{len: n_groups} |
| 224 | for g in 0 .. n_groups { |
| 225 | tables[g] = build_decode_table(code_lengths[g], alpha_size)! |
| 226 | } |
| 227 | |
| 228 | eob := n_in_use + 1 |
| 229 | mut selector_index := 0 |
| 230 | mut group_pos := 0 |
| 231 | mut mtf := []int{len: n_in_use} |
| 232 | for i in 0 .. n_in_use { |
| 233 | mtf[i] = i |
| 234 | } |
| 235 | mut decoded_syms := []int{} |
| 236 | mut next_sym := 0 |
| 237 | for { |
| 238 | if group_pos == 0 { |
| 239 | if selector_index >= selectors.len { |
| 240 | return error('bzip2: selectors exhausted') |
| 241 | } |
| 242 | } |
| 243 | grp := selectors[selector_index] |
| 244 | next_sym = tables[grp].decode(mut r)! |
| 245 | group_pos++ |
| 246 | if group_pos == bzip2_group_size { |
| 247 | group_pos = 0 |
| 248 | selector_index++ |
| 249 | } |
| 250 | if next_sym == eob { |
| 251 | break |
| 252 | } |
| 253 | if next_sym == bzip2_runa || next_sym == bzip2_runb { |
| 254 | mut s := i64(-1) |
| 255 | mut n := i64(1) |
| 256 | remaining := block_limit - decoded_syms.len |
| 257 | if remaining < 1 { |
| 258 | return error('bzip2: block output exceeds declared block size') |
| 259 | } |
| 260 | for { |
| 261 | if next_sym == bzip2_runa { |
| 262 | s += n |
| 263 | } else { |
| 264 | s += n * 2 |
| 265 | } |
| 266 | if s + 1 > i64(remaining) { |
| 267 | return error('bzip2: block output exceeds declared block size') |
| 268 | } |
| 269 | n *= 2 |
| 270 | if group_pos == 0 { |
| 271 | if selector_index >= selectors.len { |
| 272 | return error('bzip2: selectors exhausted in run') |
| 273 | } |
| 274 | } |
| 275 | grp2 := selectors[selector_index] |
| 276 | next_sym = tables[grp2].decode(mut r)! |
| 277 | group_pos++ |
| 278 | if group_pos == bzip2_group_size { |
| 279 | group_pos = 0 |
| 280 | selector_index++ |
| 281 | } |
| 282 | if next_sym != bzip2_runa && next_sym != bzip2_runb { |
| 283 | break |
| 284 | } |
| 285 | } |
| 286 | run_len := int(s + 1) |
| 287 | if run_len < 1 { |
| 288 | return error('bzip2: invalid run length') |
| 289 | } |
| 290 | if mtf.len == 0 { |
| 291 | return error('bzip2: invalid run with empty mtf table') |
| 292 | } |
| 293 | ensure_block_output_limit(decoded_syms.len, run_len, block_limit)! |
| 294 | for _ in 0 .. run_len { |
| 295 | decoded_syms << mtf[0] |
| 296 | } |
| 297 | if next_sym == eob { |
| 298 | break |
| 299 | } |
| 300 | } |
| 301 | if next_sym == eob { |
| 302 | break |
| 303 | } |
| 304 | if next_sym < 2 || next_sym > eob { |
| 305 | return error('bzip2: invalid symbol value') |
| 306 | } |
| 307 | pos := next_sym - 1 |
| 308 | if pos >= mtf.len { |
| 309 | return error('bzip2: mtf index out of range') |
| 310 | } |
| 311 | sym := mtf[pos] |
| 312 | move_to_front_int(mut mtf, pos) |
| 313 | ensure_block_output_limit(decoded_syms.len, 1, block_limit)! |
| 314 | decoded_syms << sym |
| 315 | } |
| 316 | |
| 317 | mut bwt_bytes := []u8{cap: decoded_syms.len} |
| 318 | for s in decoded_syms { |
| 319 | if s < 0 || s >= seq_to_unseq.len { |
| 320 | return error('bzip2: decoded symbol is out of sequence map range') |
| 321 | } |
| 322 | bwt_bytes << seq_to_unseq[s] |
| 323 | } |
| 324 | |
| 325 | rle1 := inverse_bwt(bwt_bytes, orig_ptr)! |
| 326 | plain := rle1_decode(rle1)! |
| 327 | if params.verify_crc { |
| 328 | calc := bzip2_crc32(plain) |
| 329 | if calc != block_crc { |
| 330 | return error('bzip2: block crc mismatch') |
| 331 | } |
| 332 | } |
| 333 | stream_crc = rotate_left_1(stream_crc) ^ block_crc |
| 334 | out << plain |
| 335 | } |
| 336 | return error('bzip2: unexpected end of stream') |
| 337 | } |
| 338 | |
| 339 | struct EncodedBlock { |
| 340 | block_crc u32 |
| 341 | orig_ptr int |
| 342 | in_use []bool |
| 343 | alpha_size int |
| 344 | n_groups int |
| 345 | selectors []int |
| 346 | selector_mtf []int |
| 347 | symbols []int |
| 348 | code_lengths [][]int |
| 349 | codes [][]u32 |
| 350 | } |
| 351 | |
| 352 | fn encode_block(src []u8) !EncodedBlock { |
| 353 | block_crc := bzip2_crc32(src) |
| 354 | rle1 := rle1_encode(src) |
| 355 | if rle1.len == 0 { |
| 356 | return error('bzip2: internal error, empty block after rle1') |
| 357 | } |
| 358 | bwt, orig_ptr := bwt_transform(rle1) |
| 359 | mut in_use := []bool{len: 256} |
| 360 | for b in bwt { |
| 361 | in_use[int(b)] = true |
| 362 | } |
| 363 | mut seq_to_unseq := []u8{} |
| 364 | for i, used in in_use { |
| 365 | if used { |
| 366 | seq_to_unseq << u8(i) |
| 367 | } |
| 368 | } |
| 369 | mut unseq_to_seq := []int{len: 256, init: -1} |
| 370 | for i, b in seq_to_unseq { |
| 371 | unseq_to_seq[int(b)] = i |
| 372 | } |
| 373 | mtf_symbols, alpha_size := mtf_rle2_encode(bwt, unseq_to_seq, seq_to_unseq.len) |
| 374 | if mtf_symbols.len == 0 { |
| 375 | return error('bzip2: internal error, empty symbol stream') |
| 376 | } |
| 377 | n_groups := select_group_count(mtf_symbols.len) |
| 378 | n_selectors := selector_count_from_symbol_count(mtf_symbols.len)! |
| 379 | mut selectors := []int{len: n_selectors, init: 0} |
| 380 | selector_mtf := encode_selector_mtf(selectors, n_groups) |
| 381 | freq := symbol_freq(mtf_symbols, alpha_size) |
| 382 | lens := make_huffman_lengths(freq, bzip2_max_code_len) |
| 383 | codes := build_huffman_codes(lens) |
| 384 | mut code_lengths := [][]int{len: n_groups} |
| 385 | mut code_words := [][]u32{len: n_groups} |
| 386 | for i in 0 .. n_groups { |
| 387 | code_lengths[i] = lens.clone() |
| 388 | code_words[i] = codes.clone() |
| 389 | } |
| 390 | return EncodedBlock{ |
| 391 | block_crc: block_crc |
| 392 | orig_ptr: orig_ptr |
| 393 | in_use: in_use |
| 394 | alpha_size: alpha_size |
| 395 | n_groups: n_groups |
| 396 | selectors: selectors |
| 397 | selector_mtf: selector_mtf |
| 398 | symbols: mtf_symbols |
| 399 | code_lengths: code_lengths |
| 400 | codes: code_words |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | fn select_group_count(n_syms int) int { |
| 405 | if n_syms < 200 { |
| 406 | return 2 |
| 407 | } |
| 408 | if n_syms < 600 { |
| 409 | return 3 |
| 410 | } |
| 411 | if n_syms < 1200 { |
| 412 | return 4 |
| 413 | } |
| 414 | if n_syms < 2400 { |
| 415 | return 5 |
| 416 | } |
| 417 | return 6 |
| 418 | } |
| 419 | |
| 420 | fn selector_count_from_symbol_count(n_symbols int) !int { |
| 421 | if n_symbols < 1 { |
| 422 | return error('bzip2: invalid selector count') |
| 423 | } |
| 424 | n_selectors := (n_symbols + bzip2_group_size - 1) / bzip2_group_size |
| 425 | if n_selectors < 1 || n_selectors > bzip2_max_selectors { |
| 426 | return error('bzip2: invalid selector count') |
| 427 | } |
| 428 | return n_selectors |
| 429 | } |
| 430 | |
| 431 | fn ensure_block_output_limit(decoded_len int, add_len int, block_limit int) ! { |
| 432 | if decoded_len < 0 || add_len < 0 || block_limit < 0 { |
| 433 | return error('bzip2: invalid block output state') |
| 434 | } |
| 435 | if decoded_len > block_limit || add_len > block_limit - decoded_len { |
| 436 | return error('bzip2: block output exceeds declared block size') |
| 437 | } |
| 438 | } |
| 439 | |
| 440 | fn symbol_freq(symbols []int, alpha_size int) []int { |
| 441 | mut freq := []int{len: alpha_size} |
| 442 | for s in symbols { |
| 443 | if s >= 0 && s < alpha_size { |
| 444 | freq[s]++ |
| 445 | } |
| 446 | } |
| 447 | for i in 0 .. alpha_size { |
| 448 | if freq[i] == 0 { |
| 449 | freq[i] = 1 |
| 450 | } |
| 451 | } |
| 452 | return freq |
| 453 | } |
| 454 | |
| 455 | fn write_in_use_map(mut w BitWriter, in_use []bool) { |
| 456 | mut group_used := []bool{len: 16} |
| 457 | for i in 0 .. 16 { |
| 458 | for j in 0 .. 16 { |
| 459 | if in_use[i * 16 + j] { |
| 460 | group_used[i] = true |
| 461 | break |
| 462 | } |
| 463 | } |
| 464 | } |
| 465 | for g in group_used { |
| 466 | w.write_bit(if g { 1 } else { 0 }) |
| 467 | } |
| 468 | for i in 0 .. 16 { |
| 469 | if !group_used[i] { |
| 470 | continue |
| 471 | } |
| 472 | for j in 0 .. 16 { |
| 473 | w.write_bit(if in_use[i * 16 + j] { 1 } else { 0 }) |
| 474 | } |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | fn read_in_use_map(mut r BitReader) ![]bool { |
| 479 | mut group_used := []bool{len: 16} |
| 480 | for i in 0 .. 16 { |
| 481 | group_used[i] = r.read_bit()! == 1 |
| 482 | } |
| 483 | mut in_use := []bool{len: 256} |
| 484 | for i in 0 .. 16 { |
| 485 | if !group_used[i] { |
| 486 | continue |
| 487 | } |
| 488 | for j in 0 .. 16 { |
| 489 | in_use[i * 16 + j] = r.read_bit()! == 1 |
| 490 | } |
| 491 | } |
| 492 | return in_use |
| 493 | } |
| 494 | |
| 495 | fn write_code_lengths(mut w BitWriter, lengths []int, alpha_size int) { |
| 496 | mut curr := lengths[0] |
| 497 | w.write_bits(5, u64(curr)) |
| 498 | for i in 0 .. alpha_size { |
| 499 | target := lengths[i] |
| 500 | for curr < target { |
| 501 | w.write_bit(1) |
| 502 | w.write_bit(0) |
| 503 | curr++ |
| 504 | } |
| 505 | for curr > target { |
| 506 | w.write_bit(1) |
| 507 | w.write_bit(1) |
| 508 | curr-- |
| 509 | } |
| 510 | w.write_bit(0) |
| 511 | } |
| 512 | } |
| 513 | |
| 514 | fn decode_selector_mtf(vals []int, n_groups int) ![]int { |
| 515 | mut pos := []int{len: n_groups} |
| 516 | for i in 0 .. n_groups { |
| 517 | pos[i] = i |
| 518 | } |
| 519 | mut out := []int{len: vals.len} |
| 520 | for i, v in vals { |
| 521 | if v < 0 || v >= n_groups { |
| 522 | return error('bzip2: selector mtf value out of range') |
| 523 | } |
| 524 | sel := pos[v] |
| 525 | for j := v; j > 0; j-- { |
| 526 | pos[j] = pos[j - 1] |
| 527 | } |
| 528 | pos[0] = sel |
| 529 | out[i] = sel |
| 530 | } |
| 531 | return out |
| 532 | } |
| 533 | |
| 534 | fn encode_selector_mtf(selectors []int, n_groups int) []int { |
| 535 | mut pos := []int{len: n_groups} |
| 536 | for i in 0 .. n_groups { |
| 537 | pos[i] = i |
| 538 | } |
| 539 | mut out := []int{len: selectors.len} |
| 540 | for i, sel in selectors { |
| 541 | mut idx := 0 |
| 542 | for idx < pos.len && pos[idx] != sel { |
| 543 | idx++ |
| 544 | } |
| 545 | if idx >= pos.len { |
| 546 | out[i] = 0 |
| 547 | continue |
| 548 | } |
| 549 | out[i] = idx |
| 550 | for j := idx; j > 0; j-- { |
| 551 | pos[j] = pos[j - 1] |
| 552 | } |
| 553 | pos[0] = sel |
| 554 | } |
| 555 | return out |
| 556 | } |
| 557 | |
| 558 | fn make_huffman_lengths(freq []int, max_len int) []int { |
| 559 | mut scaled := freq.clone() |
| 560 | for { |
| 561 | lens := build_huffman_lengths_unbounded(scaled) |
| 562 | mut max_seen := 0 |
| 563 | for l in lens { |
| 564 | if l > max_seen { |
| 565 | max_seen = l |
| 566 | } |
| 567 | } |
| 568 | if max_seen <= max_len { |
| 569 | return lens |
| 570 | } |
| 571 | for i in 0 .. scaled.len { |
| 572 | scaled[i] = (scaled[i] >> 1) + 1 |
| 573 | } |
| 574 | } |
| 575 | return []int{} |
| 576 | } |
| 577 | |
| 578 | fn build_huffman_lengths_unbounded(freq []int) []int { |
| 579 | n := freq.len |
| 580 | if n == 0 { |
| 581 | return []int{} |
| 582 | } |
| 583 | mut weight := []int{cap: 2 * n} |
| 584 | mut parent := []int{cap: 2 * n} |
| 585 | for i in 0 .. n { |
| 586 | w := if freq[i] > 0 { freq[i] } else { 1 } |
| 587 | weight << w |
| 588 | parent << -1 |
| 589 | } |
| 590 | mut active := []int{len: n} |
| 591 | for i in 0 .. n { |
| 592 | active[i] = i |
| 593 | } |
| 594 | for active.len > 1 { |
| 595 | mut m1 := 0 |
| 596 | mut m2 := 1 |
| 597 | if weight[active[m2]] < weight[active[m1]] { |
| 598 | t := m1 |
| 599 | m1 = m2 |
| 600 | m2 = t |
| 601 | } |
| 602 | for i in 2 .. active.len { |
| 603 | idx := active[i] |
| 604 | if weight[idx] < weight[active[m1]] { |
| 605 | m2 = m1 |
| 606 | m1 = i |
| 607 | } else if weight[idx] < weight[active[m2]] { |
| 608 | m2 = i |
| 609 | } |
| 610 | } |
| 611 | a := active[m1] |
| 612 | b := active[m2] |
| 613 | new_idx := weight.len |
| 614 | weight << (weight[a] + weight[b]) |
| 615 | parent << -1 |
| 616 | parent[a] = new_idx |
| 617 | parent[b] = new_idx |
| 618 | if m1 > m2 { |
| 619 | active.delete(m1) |
| 620 | active.delete(m2) |
| 621 | } else { |
| 622 | active.delete(m2) |
| 623 | active.delete(m1) |
| 624 | } |
| 625 | active << new_idx |
| 626 | } |
| 627 | mut lengths := []int{len: n} |
| 628 | for i in 0 .. n { |
| 629 | mut l := 0 |
| 630 | mut p := parent[i] |
| 631 | for p != -1 { |
| 632 | l++ |
| 633 | p = parent[p] |
| 634 | } |
| 635 | if l == 0 { |
| 636 | l = 1 |
| 637 | } |
| 638 | lengths[i] = l |
| 639 | } |
| 640 | return lengths |
| 641 | } |
| 642 | |
| 643 | fn build_huffman_codes(lengths []int) []u32 { |
| 644 | mut max_len := 0 |
| 645 | for l in lengths { |
| 646 | if l > max_len { |
| 647 | max_len = l |
| 648 | } |
| 649 | } |
| 650 | mut bl_count := []int{len: max_len + 1} |
| 651 | for l in lengths { |
| 652 | if l > 0 { |
| 653 | bl_count[l]++ |
| 654 | } |
| 655 | } |
| 656 | mut next_code := []u32{len: max_len + 1} |
| 657 | mut code := u32(0) |
| 658 | for bits in 1 .. max_len + 1 { |
| 659 | code = (code + u32(bl_count[bits - 1])) << 1 |
| 660 | next_code[bits] = code |
| 661 | } |
| 662 | mut out := []u32{len: lengths.len} |
| 663 | for i, l in lengths { |
| 664 | if l == 0 { |
| 665 | continue |
| 666 | } |
| 667 | out[i] = next_code[l] |
| 668 | next_code[l]++ |
| 669 | } |
| 670 | return out |
| 671 | } |
| 672 | |
| 673 | struct DecodeTable { |
| 674 | min_len int |
| 675 | max_len int |
| 676 | base []int |
| 677 | limit []int |
| 678 | perm []int |
| 679 | } |
| 680 | |
| 681 | fn build_decode_table(lengths []int, alpha_size int) !DecodeTable { |
| 682 | mut min_len := 999 |
| 683 | mut max_len := 0 |
| 684 | for i in 0 .. alpha_size { |
| 685 | l := lengths[i] |
| 686 | if l < 1 || l > bzip2_max_code_len { |
| 687 | return error('bzip2: invalid huffman code length') |
| 688 | } |
| 689 | if l < min_len { |
| 690 | min_len = l |
| 691 | } |
| 692 | if l > max_len { |
| 693 | max_len = l |
| 694 | } |
| 695 | } |
| 696 | mut perm := []int{} |
| 697 | for l in min_len .. max_len + 1 { |
| 698 | for i in 0 .. alpha_size { |
| 699 | if lengths[i] == l { |
| 700 | perm << i |
| 701 | } |
| 702 | } |
| 703 | } |
| 704 | mut base := []int{len: max_len + 2} |
| 705 | mut limit := []int{len: max_len + 1} |
| 706 | for i in 0 .. alpha_size { |
| 707 | base[lengths[i] + 1]++ |
| 708 | } |
| 709 | for i in 1 .. base.len { |
| 710 | base[i] += base[i - 1] |
| 711 | } |
| 712 | mut vec := 0 |
| 713 | for l in min_len .. max_len + 1 { |
| 714 | vec += base[l + 1] - base[l] |
| 715 | limit[l] = vec - 1 |
| 716 | vec <<= 1 |
| 717 | } |
| 718 | for l in min_len + 1 .. max_len + 1 { |
| 719 | base[l] = ((limit[l - 1] + 1) * 2) - base[l] |
| 720 | } |
| 721 | return DecodeTable{ |
| 722 | min_len: min_len |
| 723 | max_len: max_len |
| 724 | base: base |
| 725 | limit: limit |
| 726 | perm: perm |
| 727 | } |
| 728 | } |
| 729 | |
| 730 | fn (t DecodeTable) decode(mut r BitReader) !int { |
| 731 | mut zn := t.min_len |
| 732 | mut zvec := int(r.read_bits(zn)!) |
| 733 | for zn <= t.max_len && zvec > t.limit[zn] { |
| 734 | zn++ |
| 735 | bit := int(r.read_bits(1)!) |
| 736 | zvec = (zvec * 2) | bit |
| 737 | } |
| 738 | if zn > t.max_len { |
| 739 | return error('bzip2: invalid huffman code') |
| 740 | } |
| 741 | idx := zvec - t.base[zn] |
| 742 | if idx < 0 || idx >= t.perm.len { |
| 743 | return error('bzip2: invalid huffman decode index') |
| 744 | } |
| 745 | return t.perm[idx] |
| 746 | } |
| 747 | |
| 748 | fn mtf_rle2_encode(data []u8, unseq_to_seq []int, n_in_use int) ([]int, int) { |
| 749 | mut mtf := []int{len: n_in_use} |
| 750 | for i in 0 .. n_in_use { |
| 751 | mtf[i] = i |
| 752 | } |
| 753 | eob := n_in_use + 1 |
| 754 | mut out := []int{} |
| 755 | mut zpend := 0 |
| 756 | for b in data { |
| 757 | seq := unseq_to_seq[int(b)] |
| 758 | mut pos := 0 |
| 759 | for pos < mtf.len && mtf[pos] != seq { |
| 760 | pos++ |
| 761 | } |
| 762 | if pos == 0 { |
| 763 | zpend++ |
| 764 | continue |
| 765 | } |
| 766 | if zpend > 0 { |
| 767 | emit_run_a_b(mut out, zpend) |
| 768 | zpend = 0 |
| 769 | } |
| 770 | out << (pos + 1) |
| 771 | move_to_front_int(mut mtf, pos) |
| 772 | } |
| 773 | if zpend > 0 { |
| 774 | emit_run_a_b(mut out, zpend) |
| 775 | } |
| 776 | out << eob |
| 777 | return out, n_in_use + 2 |
| 778 | } |
| 779 | |
| 780 | fn emit_run_a_b(mut out []int, run_len int) { |
| 781 | mut z := run_len - 1 |
| 782 | for { |
| 783 | if (z & 1) == 0 { |
| 784 | out << bzip2_runa |
| 785 | } else { |
| 786 | out << bzip2_runb |
| 787 | } |
| 788 | if z < 2 { |
| 789 | break |
| 790 | } |
| 791 | z = (z - 2) >> 1 |
| 792 | } |
| 793 | } |
| 794 | |
| 795 | fn move_to_front_int(mut arr []int, pos int) { |
| 796 | if pos <= 0 { |
| 797 | return |
| 798 | } |
| 799 | tmp := arr[pos] |
| 800 | for i := pos; i > 0; i-- { |
| 801 | arr[i] = arr[i - 1] |
| 802 | } |
| 803 | arr[0] = tmp |
| 804 | } |
| 805 | |
| 806 | fn rle1_encode(src []u8) []u8 { |
| 807 | if src.len == 0 { |
| 808 | return []u8{} |
| 809 | } |
| 810 | mut out := []u8{cap: src.len + (src.len / 4) + 8} |
| 811 | mut i := 0 |
| 812 | for i < src.len { |
| 813 | b := src[i] |
| 814 | mut run := 1 |
| 815 | for i + run < src.len && src[i + run] == b && run < max_i32 { |
| 816 | run++ |
| 817 | } |
| 818 | mut rem := run |
| 819 | for rem > 0 { |
| 820 | if rem <= 3 { |
| 821 | for _ in 0 .. rem { |
| 822 | out << b |
| 823 | } |
| 824 | rem = 0 |
| 825 | } else { |
| 826 | chunk := if rem > 259 { 259 } else { rem } |
| 827 | for _ in 0 .. 4 { |
| 828 | out << b |
| 829 | } |
| 830 | out << u8(chunk - 4) |
| 831 | rem -= chunk |
| 832 | } |
| 833 | } |
| 834 | i += run |
| 835 | } |
| 836 | return out |
| 837 | } |
| 838 | |
| 839 | fn find_rle1_block_end(src []u8, start int, block_limit int) int { |
| 840 | if start >= src.len { |
| 841 | return start |
| 842 | } |
| 843 | mut end := start |
| 844 | mut encoded_len := 0 |
| 845 | for end < src.len { |
| 846 | b := src[end] |
| 847 | mut run_len := 1 |
| 848 | for end + run_len < src.len && src[end + run_len] == b { |
| 849 | run_len++ |
| 850 | } |
| 851 | run_encoded_len := rle1_encoded_run_len(run_len) |
| 852 | if encoded_len + run_encoded_len <= block_limit { |
| 853 | encoded_len += run_encoded_len |
| 854 | end += run_len |
| 855 | continue |
| 856 | } |
| 857 | available := block_limit - encoded_len |
| 858 | if available <= 0 { |
| 859 | break |
| 860 | } |
| 861 | take := max_run_prefix_for_rle1(run_len, available) |
| 862 | if take <= 0 { |
| 863 | break |
| 864 | } |
| 865 | end += take |
| 866 | break |
| 867 | } |
| 868 | if end == start { |
| 869 | return start + 1 |
| 870 | } |
| 871 | return end |
| 872 | } |
| 873 | |
| 874 | fn max_run_prefix_for_rle1(run_len int, encoded_limit int) int { |
| 875 | if run_len <= 0 || encoded_limit <= 0 { |
| 876 | return 0 |
| 877 | } |
| 878 | mut lo := 1 |
| 879 | mut hi := run_len |
| 880 | mut best := 0 |
| 881 | for lo <= hi { |
| 882 | mid := lo + (hi - lo) / 2 |
| 883 | if rle1_encoded_run_len(mid) <= encoded_limit { |
| 884 | best = mid |
| 885 | lo = mid + 1 |
| 886 | } else { |
| 887 | hi = mid - 1 |
| 888 | } |
| 889 | } |
| 890 | return best |
| 891 | } |
| 892 | |
| 893 | fn rle1_encoded_run_len(run_len int) int { |
| 894 | if run_len <= 0 { |
| 895 | return 0 |
| 896 | } |
| 897 | if run_len <= 3 { |
| 898 | return run_len |
| 899 | } |
| 900 | full_chunks := run_len / 259 |
| 901 | rem := run_len % 259 |
| 902 | mut out := full_chunks * 5 |
| 903 | if rem == 0 { |
| 904 | return out |
| 905 | } |
| 906 | if rem <= 3 { |
| 907 | out += rem |
| 908 | } else { |
| 909 | out += 5 |
| 910 | } |
| 911 | return out |
| 912 | } |
| 913 | |
| 914 | fn rle1_decode(src []u8) ![]u8 { |
| 915 | mut out := []u8{cap: src.len} |
| 916 | mut i := 0 |
| 917 | for i < src.len { |
| 918 | if i + 4 < src.len && src[i] == src[i + 1] && src[i] == src[i + 2] && src[i] == src[i + 3] { |
| 919 | run := int(src[i + 4]) + 4 |
| 920 | for _ in 0 .. run { |
| 921 | out << src[i] |
| 922 | } |
| 923 | i += 5 |
| 924 | } else { |
| 925 | out << src[i] |
| 926 | i++ |
| 927 | } |
| 928 | } |
| 929 | return out |
| 930 | } |
| 931 | |
| 932 | fn bwt_transform(data []u8) ([]u8, int) { |
| 933 | n := data.len |
| 934 | if n == 0 { |
| 935 | return []u8{}, 0 |
| 936 | } |
| 937 | mut sa := cyclic_suffix_array(data) |
| 938 | mut out := []u8{len: n} |
| 939 | mut orig_ptr := 0 |
| 940 | for i, s in sa { |
| 941 | if s == 0 { |
| 942 | orig_ptr = i |
| 943 | out[i] = data[n - 1] |
| 944 | } else { |
| 945 | out[i] = data[s - 1] |
| 946 | } |
| 947 | } |
| 948 | return out, orig_ptr |
| 949 | } |
| 950 | |
| 951 | fn inverse_bwt(last_col []u8, orig_ptr int) ![]u8 { |
| 952 | n := last_col.len |
| 953 | if n == 0 { |
| 954 | return []u8{} |
| 955 | } |
| 956 | if orig_ptr < 0 || orig_ptr >= n { |
| 957 | return error('bzip2: invalid bwt origin pointer') |
| 958 | } |
| 959 | mut count := []int{len: 256} |
| 960 | for b in last_col { |
| 961 | count[int(b)]++ |
| 962 | } |
| 963 | mut tots := []int{len: 256} |
| 964 | mut sum := 0 |
| 965 | for i in 0 .. 256 { |
| 966 | tots[i] = sum |
| 967 | sum += count[i] |
| 968 | } |
| 969 | mut tt := []int{len: n} |
| 970 | for i, b in last_col { |
| 971 | idx := int(b) |
| 972 | tt[tots[idx]] = i |
| 973 | tots[idx]++ |
| 974 | } |
| 975 | mut out := []u8{len: n} |
| 976 | mut tpos := orig_ptr |
| 977 | for i in 0 .. n { |
| 978 | tpos = tt[tpos] |
| 979 | out[i] = last_col[tpos] |
| 980 | } |
| 981 | return out |
| 982 | } |
| 983 | |
| 984 | fn cyclic_suffix_array(data []u8) []int { |
| 985 | n := data.len |
| 986 | if n <= 1 { |
| 987 | if n == 1 { |
| 988 | return [0] |
| 989 | } |
| 990 | return []int{} |
| 991 | } |
| 992 | mut sa := []int{len: n} |
| 993 | mut rank := []int{len: n} |
| 994 | for i in 0 .. n { |
| 995 | sa[i] = i |
| 996 | rank[i] = int(data[i]) |
| 997 | } |
| 998 | mut tmp_sa := []int{len: n} |
| 999 | mut new_rank := []int{len: n} |
| 1000 | mut k := 1 |
| 1001 | for k < n { |
| 1002 | radix_sort_cyclic(mut sa, mut tmp_sa, rank, k) |
| 1003 | new_rank[sa[0]] = 0 |
| 1004 | mut classes := 1 |
| 1005 | for i in 1 .. n { |
| 1006 | a := sa[i - 1] |
| 1007 | b := sa[i] |
| 1008 | if rank[a] != rank[b] || rank[(a + k) % n] != rank[(b + k) % n] { |
| 1009 | classes++ |
| 1010 | } |
| 1011 | new_rank[b] = classes - 1 |
| 1012 | } |
| 1013 | rank = new_rank.clone() |
| 1014 | if classes == n { |
| 1015 | break |
| 1016 | } |
| 1017 | k <<= 1 |
| 1018 | } |
| 1019 | return sa |
| 1020 | } |
| 1021 | |
| 1022 | fn radix_sort_cyclic(mut sa []int, mut tmp []int, rank []int, k int) { |
| 1023 | n := sa.len |
| 1024 | m := if n > 256 { n } else { 256 } |
| 1025 | mut count := []int{len: m} |
| 1026 | for i in 0 .. n { |
| 1027 | key := rank[(sa[i] + k) % n] |
| 1028 | count[key]++ |
| 1029 | } |
| 1030 | mut pos := []int{len: m} |
| 1031 | mut sum := 0 |
| 1032 | for i in 0 .. m { |
| 1033 | pos[i] = sum |
| 1034 | sum += count[i] |
| 1035 | } |
| 1036 | for i in 0 .. n { |
| 1037 | v := sa[i] |
| 1038 | key := rank[(v + k) % n] |
| 1039 | tmp[pos[key]] = v |
| 1040 | pos[key]++ |
| 1041 | } |
| 1042 | count = []int{len: m} |
| 1043 | for i in 0 .. n { |
| 1044 | key := rank[tmp[i]] |
| 1045 | count[key]++ |
| 1046 | } |
| 1047 | sum = 0 |
| 1048 | for i in 0 .. m { |
| 1049 | pos[i] = sum |
| 1050 | sum += count[i] |
| 1051 | } |
| 1052 | for i in 0 .. n { |
| 1053 | v := tmp[i] |
| 1054 | key := rank[v] |
| 1055 | sa[pos[key]] = v |
| 1056 | pos[key]++ |
| 1057 | } |
| 1058 | } |
| 1059 | |
| 1060 | struct BitWriter { |
| 1061 | mut: |
| 1062 | out []u8 |
| 1063 | bitbuf u64 |
| 1064 | bit_count int |
| 1065 | } |
| 1066 | |
| 1067 | fn (mut w BitWriter) write_byte(b u8) { |
| 1068 | w.out << b |
| 1069 | } |
| 1070 | |
| 1071 | fn (mut w BitWriter) write_bit(bit int) { |
| 1072 | w.write_bits(1, u64(bit & 1)) |
| 1073 | } |
| 1074 | |
| 1075 | fn (mut w BitWriter) write_bits(n int, value u64) { |
| 1076 | if n <= 0 { |
| 1077 | return |
| 1078 | } |
| 1079 | for i := n - 1; i >= 0; i-- { |
| 1080 | b := (value >> u32(i)) & 1 |
| 1081 | w.bitbuf = (w.bitbuf << 1) | b |
| 1082 | w.bit_count++ |
| 1083 | if w.bit_count == 8 { |
| 1084 | w.out << u8(w.bitbuf & 0xff) |
| 1085 | w.bitbuf = 0 |
| 1086 | w.bit_count = 0 |
| 1087 | } |
| 1088 | } |
| 1089 | } |
| 1090 | |
| 1091 | fn (mut w BitWriter) finish() ![]u8 { |
| 1092 | if w.bit_count > 0 { |
| 1093 | w.bitbuf <<= u32(8 - w.bit_count) |
| 1094 | w.out << u8(w.bitbuf & 0xff) |
| 1095 | w.bitbuf = 0 |
| 1096 | w.bit_count = 0 |
| 1097 | } |
| 1098 | return w.out |
| 1099 | } |
| 1100 | |
| 1101 | struct BitReader { |
| 1102 | data []u8 |
| 1103 | mut: |
| 1104 | byte_pos int |
| 1105 | bit_pos int |
| 1106 | } |
| 1107 | |
| 1108 | fn (mut r BitReader) read_byte() !u8 { |
| 1109 | if r.bit_pos != 0 { |
| 1110 | return error('bzip2: attempted byte read on non-byte boundary') |
| 1111 | } |
| 1112 | if r.byte_pos >= r.data.len { |
| 1113 | return error('bzip2: unexpected end of input') |
| 1114 | } |
| 1115 | b := r.data[r.byte_pos] |
| 1116 | r.byte_pos++ |
| 1117 | return b |
| 1118 | } |
| 1119 | |
| 1120 | fn (mut r BitReader) read_bit() !int { |
| 1121 | return int(r.read_bits(1)!) |
| 1122 | } |
| 1123 | |
| 1124 | fn (mut r BitReader) read_bits(n int) !u64 { |
| 1125 | if n < 0 || n > 56 { |
| 1126 | return error('bzip2: invalid bit width') |
| 1127 | } |
| 1128 | mut out := u64(0) |
| 1129 | for _ in 0 .. n { |
| 1130 | if r.byte_pos >= r.data.len { |
| 1131 | return error('bzip2: unexpected end of input') |
| 1132 | } |
| 1133 | b := r.data[r.byte_pos] |
| 1134 | bit := (b >> u8(7 - r.bit_pos)) & u8(1) |
| 1135 | out = (out << 1) | u64(bit) |
| 1136 | r.bit_pos++ |
| 1137 | if r.bit_pos == 8 { |
| 1138 | r.bit_pos = 0 |
| 1139 | r.byte_pos++ |
| 1140 | } |
| 1141 | } |
| 1142 | return out |
| 1143 | } |
| 1144 | |
| 1145 | fn (r &BitReader) is_aligned_to_byte_zero_padding() bool { |
| 1146 | if r.bit_pos == 0 { |
| 1147 | return r.byte_pos == r.data.len |
| 1148 | } |
| 1149 | if r.byte_pos >= r.data.len { |
| 1150 | return false |
| 1151 | } |
| 1152 | mask := u8((1 << u8(8 - r.bit_pos)) - 1) |
| 1153 | if (r.data[r.byte_pos] & mask) != 0 { |
| 1154 | return false |
| 1155 | } |
| 1156 | for i in r.byte_pos + 1 .. r.data.len { |
| 1157 | if r.data[i] != 0 { |
| 1158 | return false |
| 1159 | } |
| 1160 | } |
| 1161 | return true |
| 1162 | } |
| 1163 | |
| 1164 | fn rotate_left_1(v u32) u32 { |
| 1165 | return (v << 1) | (v >> 31) |
| 1166 | } |
| 1167 | |
| 1168 | fn bzip2_crc32(data []u8) u32 { |
| 1169 | mut crc := u32(0xffffffff) |
| 1170 | for b in data { |
| 1171 | table_idx := int(((crc >> 24) ^ u32(b)) & u32(0xff)) |
| 1172 | crc = (crc << 8) ^ bzip2_crc32_table[table_idx] |
| 1173 | } |
| 1174 | return ~crc |
| 1175 | } |
| 1176 | |