| 1 | #!/usr/bin/env -S v |
| 2 | |
| 3 | import compress.bzip2 |
| 4 | |
| 5 | const compression_levels = [1, 6, 9]! |
| 6 | |
| 7 | struct TestVector { |
| 8 | name string |
| 9 | data []u8 |
| 10 | } |
| 11 | |
| 12 | fn main() { |
| 13 | ensure_tools() or { |
| 14 | eprintln('SKIP: ${err.msg()}') |
| 15 | exit(2) |
| 16 | } |
| 17 | vectors := make_test_vectors() |
| 18 | mut total_checks := 0 |
| 19 | mut total_runs := 0 |
| 20 | tmp_root := join_path(temp_dir(), 'v_bzip2_interop_${getpid()}') |
| 21 | mkdir_all(tmp_root) or { panic(err) } |
| 22 | defer { |
| 23 | rmdir_all(tmp_root) or {} |
| 24 | } |
| 25 | for level in compression_levels { |
| 26 | for i, vec in vectors { |
| 27 | total_checks += run_case(tmp_root, level, i, vec) or { |
| 28 | eprintln('FAIL: ${vec.name} (level=${level}): ${err}') |
| 29 | exit(1) |
| 30 | 0 |
| 31 | } |
| 32 | total_runs++ |
| 33 | println('ok ${total_runs}/${vectors.len * compression_levels.len}: ${vec.name} (level=${level}, ${vec.data.len} bytes)') |
| 34 | } |
| 35 | } |
| 36 | println('PASS: ${vectors.len} vectors x ${compression_levels.len} levels, ${total_checks} cross-checks') |
| 37 | } |
| 38 | |
| 39 | fn ensure_tools() ! { |
| 40 | must_succeed('bzip2 --help >/dev/null 2>&1', 'system bzip2 command is not available')! |
| 41 | must_succeed("python3 -c 'import bz2' >/dev/null 2>&1", |
| 42 | 'python3 with bz2 module is not available')! |
| 43 | } |
| 44 | |
| 45 | fn make_test_vectors() []TestVector { |
| 46 | mut vectors := []TestVector{} |
| 47 | vectors << TestVector{'empty', []u8{}} |
| 48 | vectors << TestVector{'ascii_text', 'The quick brown fox jumps over the lazy dog.\n'.repeat(64).bytes()} |
| 49 | vectors << TestVector{'repeated_byte', []u8{len: 10000, init: `A`}} |
| 50 | vectors << TestVector{'all_bytes_x4', all_bytes_repeated(4)} |
| 51 | vectors << TestVector{'lcg_64k', lcg_bytes(65536)} |
| 52 | return vectors |
| 53 | } |
| 54 | |
| 55 | fn run_case(tmp_root string, level int, case_idx int, vec TestVector) !int { |
| 56 | case_dir := join_path(tmp_root, 'case_l${level}_${case_idx:02}_${vec.name}') |
| 57 | mkdir_all(case_dir)! |
| 58 | |
| 59 | v_bz2 := bzip2.compress(vec.data, block_size: level)! |
| 60 | cli_bz2 := cli_compress(case_dir, vec.data, level)! |
| 61 | py_bz2 := py_compress(case_dir, vec.data, level)! |
| 62 | |
| 63 | mut checks := 0 |
| 64 | |
| 65 | producers := { |
| 66 | 'v': v_bz2 |
| 67 | 'cli': cli_bz2 |
| 68 | 'py': py_bz2 |
| 69 | } |
| 70 | for producer, compressed in producers { |
| 71 | v_plain := bzip2.decompress(compressed)! |
| 72 | assert_equal_bytes('v.decompress(${producer}.compress, level=${level})', vec.data, v_plain)! |
| 73 | checks++ |
| 74 | |
| 75 | cli_plain := cli_decompress(case_dir, producer, compressed)! |
| 76 | assert_equal_bytes('cli.decompress(${producer}.compress, level=${level})', vec.data, |
| 77 | cli_plain)! |
| 78 | checks++ |
| 79 | |
| 80 | py_plain := py_decompress(case_dir, producer, compressed)! |
| 81 | assert_equal_bytes('py.decompress(${producer}.compress, level=${level})', vec.data, |
| 82 | py_plain)! |
| 83 | checks++ |
| 84 | } |
| 85 | return checks |
| 86 | } |
| 87 | |
| 88 | fn cli_compress(case_dir string, plain []u8, level int) ![]u8 { |
| 89 | in_path := join_path(case_dir, 'plain.in') |
| 90 | out_path := join_path(case_dir, 'cli_l${level}.bz2') |
| 91 | write_file_array(in_path, plain)! |
| 92 | must_succeed('bzip2 -${level} -c -- ${shell_quote(in_path)} > ${shell_quote(out_path)}', |
| 93 | 'bzip2 compression failed')! |
| 94 | return read_bytes(out_path)! |
| 95 | } |
| 96 | |
| 97 | fn cli_decompress(case_dir string, producer string, compressed []u8) ![]u8 { |
| 98 | in_path := join_path(case_dir, '${producer}.for_cli.bz2') |
| 99 | out_path := join_path(case_dir, '${producer}.from_cli.out') |
| 100 | write_file_array(in_path, compressed)! |
| 101 | must_succeed('bzip2 -d -c -- ${shell_quote(in_path)} > ${shell_quote(out_path)}', |
| 102 | 'bzip2 decompression failed')! |
| 103 | return read_bytes(out_path)! |
| 104 | } |
| 105 | |
| 106 | fn py_compress(case_dir string, plain []u8, level int) ![]u8 { |
| 107 | in_path := join_path(case_dir, 'plain_py.in') |
| 108 | out_path := join_path(case_dir, 'py_l${level}.bz2') |
| 109 | write_file_array(in_path, plain)! |
| 110 | py_code := 'import bz2, pathlib, sys; p=pathlib.Path(sys.argv[1]); o=pathlib.Path(sys.argv[2]); l=int(sys.argv[3]); o.write_bytes(bz2.compress(p.read_bytes(), compresslevel=l))' |
| 111 | must_succeed('python3 -c ${shell_quote(py_code)} ${shell_quote(in_path)} ${shell_quote(out_path)} ${level}', |
| 112 | 'python bz2 compression failed')! |
| 113 | return read_bytes(out_path)! |
| 114 | } |
| 115 | |
| 116 | fn py_decompress(case_dir string, producer string, compressed []u8) ![]u8 { |
| 117 | in_path := join_path(case_dir, '${producer}.for_py.bz2') |
| 118 | out_path := join_path(case_dir, '${producer}.from_py.out') |
| 119 | write_file_array(in_path, compressed)! |
| 120 | py_code := 'import bz2, pathlib, sys; p=pathlib.Path(sys.argv[1]); o=pathlib.Path(sys.argv[2]); o.write_bytes(bz2.decompress(p.read_bytes()))' |
| 121 | must_succeed('python3 -c ${shell_quote(py_code)} ${shell_quote(in_path)} ${shell_quote(out_path)}', |
| 122 | 'python bz2 decompression failed')! |
| 123 | return read_bytes(out_path)! |
| 124 | } |
| 125 | |
| 126 | fn all_bytes_repeated(times int) []u8 { |
| 127 | mut out := []u8{cap: 256 * times} |
| 128 | for _ in 0 .. times { |
| 129 | for i in 0 .. 256 { |
| 130 | out << u8(i) |
| 131 | } |
| 132 | } |
| 133 | return out |
| 134 | } |
| 135 | |
| 136 | fn lcg_bytes(n int) []u8 { |
| 137 | mut out := []u8{len: n} |
| 138 | mut x := u32(0x12345678) |
| 139 | for i in 0 .. n { |
| 140 | x = x * u32(1664525) + u32(1013904223) |
| 141 | out[i] = u8((x >> 16) & u32(0xff)) |
| 142 | } |
| 143 | return out |
| 144 | } |
| 145 | |
| 146 | fn assert_equal_bytes(label string, expected []u8, got []u8) ! { |
| 147 | if expected.len != got.len { |
| 148 | return error('${label}: length mismatch expected=${expected.len} got=${got.len}') |
| 149 | } |
| 150 | for i in 0 .. expected.len { |
| 151 | if expected[i] != got[i] { |
| 152 | return error('${label}: byte mismatch at offset ${i}') |
| 153 | } |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | fn must_succeed(command string, context string) ! { |
| 158 | res := execute(command) |
| 159 | if res.exit_code != 0 { |
| 160 | return error('${context}\ncommand: ${command}\nexit_code: ${res.exit_code}\n${res.output}') |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | fn shell_quote(s string) string { |
| 165 | return "'${s.replace("'", "'\\''")}'" |
| 166 | } |
| 167 | |