| 1 | module bzip2 |
| 2 | |
| 3 | import encoding.hex |
| 4 | |
| 5 | fn must_decode_hex(s string) []u8 { |
| 6 | return hex.decode(s) or { panic(err) } |
| 7 | } |
| 8 | |
| 9 | fn test_roundtrip_empty_input() { |
| 10 | src := []u8{} |
| 11 | compressed := compress(src) or { panic(err) } |
| 12 | decompressed := decompress(compressed) or { panic(err) } |
| 13 | assert decompressed == src |
| 14 | } |
| 15 | |
| 16 | fn test_roundtrip_small_text() { |
| 17 | src := 'hello world\n'.bytes() |
| 18 | compressed := compress(src) or { panic(err) } |
| 19 | decompressed := decompress(compressed) or { panic(err) } |
| 20 | assert decompressed == src |
| 21 | } |
| 22 | |
| 23 | fn test_roundtrip_binary_data() { |
| 24 | mut src := []u8{len: 4096} |
| 25 | for i in 0 .. src.len { |
| 26 | src[i] = u8((i * 17 + 13) & 0xff) |
| 27 | } |
| 28 | compressed := compress(src, block_size: 1) or { panic(err) } |
| 29 | decompressed := decompress(compressed) or { panic(err) } |
| 30 | assert decompressed == src |
| 31 | } |
| 32 | |
| 33 | fn test_roundtrip_long_runs() { |
| 34 | src := 'a'.repeat(2000).bytes() |
| 35 | compressed := compress(src) or { panic(err) } |
| 36 | decompressed := decompress(compressed) or { panic(err) } |
| 37 | assert decompressed == src |
| 38 | } |
| 39 | |
| 40 | fn test_decompress_known_python_vector_empty() { |
| 41 | bz2 := must_decode_hex('425a683917724538509000000000') |
| 42 | plain := decompress(bz2) or { panic(err) } |
| 43 | assert plain == []u8{} |
| 44 | } |
| 45 | |
| 46 | fn test_decompress_known_python_vector_hello() { |
| 47 | bz2 := |
| 48 | must_decode_hex('425a68393141592653594eece83600000251800010400006449080200031064c4101a7a9a580bb9431f8bb9229c28482776741b0') |
| 49 | plain := decompress(bz2) or { panic(err) } |
| 50 | assert plain.bytestr() == 'hello world\n' |
| 51 | } |
| 52 | |
| 53 | fn test_decompress_known_python_vector_text() { |
| 54 | bz2 := |
| 55 | must_decode_hex('425a6839314159265359dc01b0d8000002d9800010410120080a00cc20200021a4d3688cd0806800e28a3de49f0b16b10d177245385090dc01b0d8') |
| 56 | plain := decompress(bz2) or { panic(err) } |
| 57 | assert plain.bytestr() == '1.test\ncopy ©\n' |
| 58 | } |
| 59 | |
| 60 | fn test_decompress_known_python_vector_repeated_a() { |
| 61 | bz2 := |
| 62 | must_decode_hex('425a6839314159265359ca3d8dfb000000010420000400200021008283177245385090ca3d8dfb') |
| 63 | plain := decompress(bz2) or { panic(err) } |
| 64 | assert plain == 'a'.repeat(200).bytes() |
| 65 | } |
| 66 | |
| 67 | fn test_decompress_rejects_invalid_header() { |
| 68 | _ := decompress('not-bzip2'.bytes()) or { |
| 69 | assert err.msg().contains('invalid header') |
| 70 | return |
| 71 | } |
| 72 | assert false |
| 73 | } |
| 74 | |
| 75 | fn test_decompress_rejects_crc_mismatch() { |
| 76 | mut bz2 := |
| 77 | must_decode_hex('425a68393141592653594eece83600000251800010400006449080200031064c4101a7a9a580bb9431f8bb9229c28482776741b0') |
| 78 | // Corrupt the stored block CRC field (bytes 10..13 in a single-block stream). |
| 79 | bz2[10] ^= 0x01 |
| 80 | _ := decompress(bz2) or { |
| 81 | assert err.msg().contains('crc mismatch') |
| 82 | return |
| 83 | } |
| 84 | assert false |
| 85 | } |
| 86 | |
| 87 | fn test_selector_count_limit_boundaries() { |
| 88 | below_limit := selector_count_from_symbol_count(900050) or { panic(err) } |
| 89 | at_limit := selector_count_from_symbol_count(900100) or { panic(err) } |
| 90 | assert below_limit == 18001 |
| 91 | assert at_limit == 18002 |
| 92 | |
| 93 | _ := selector_count_from_symbol_count(900101) or { |
| 94 | assert err.msg().contains('invalid selector count') |
| 95 | return |
| 96 | } |
| 97 | assert false |
| 98 | } |
| 99 | |
| 100 | fn test_block_output_limit_guard() { |
| 101 | ensure_block_output_limit(0, 100000, 100000) or { panic(err) } |
| 102 | ensure_block_output_limit(99999, 1, 100000) or { panic(err) } |
| 103 | |
| 104 | ensure_block_output_limit(100000, 1, 100000) or { |
| 105 | assert err.msg().contains('block output exceeds declared block size') |
| 106 | return |
| 107 | } |
| 108 | assert false |
| 109 | } |
| 110 | |
| 111 | fn test_find_rle1_block_end_for_four_byte_runs() { |
| 112 | mut src := []u8{cap: 100000} |
| 113 | for i in 0 .. 25000 { |
| 114 | b := u8(i & 0xff) |
| 115 | for _ in 0 .. 4 { |
| 116 | src << b |
| 117 | } |
| 118 | } |
| 119 | end := find_rle1_block_end(src, 0, 100000) |
| 120 | assert end == 80000 |
| 121 | assert rle1_encode(src[0..end]).len == 100000 |
| 122 | assert rle1_encode(src[0..end + 4]).len > 100000 |
| 123 | } |
| 124 | |
| 125 | fn test_roundtrip_block_size_1_four_byte_runs() { |
| 126 | mut src := []u8{cap: 100000} |
| 127 | for i in 0 .. 25000 { |
| 128 | b := u8(i & 0xff) |
| 129 | for _ in 0 .. 4 { |
| 130 | src << b |
| 131 | } |
| 132 | } |
| 133 | compressed := compress(src, block_size: 1) or { panic(err) } |
| 134 | decompressed := decompress(compressed) or { panic(err) } |
| 135 | assert decompressed == src |
| 136 | } |
| 137 | |