From 96c365159d037716a7097f6b02e4b4b82edd0a8a Mon Sep 17 00:00:00 2001 From: JalonSolov Date: Fri, 15 May 2026 07:10:46 -0400 Subject: [PATCH] compress.deflate: changed to pure V RFC-compliant code (#27156) --- vlib/compress/deflate/README.md | 28 +- vlib/compress/deflate/deflate.v | 166 ++++++++++- vlib/compress/deflate/deflate_compress.v | 184 ++++++++++++ vlib/compress/deflate/deflate_inflate.v | 262 ++++++++++++++++++ vlib/compress/deflate/deflate_test.v | 69 ++++- .../deflate/interop/deflate_interop.v | 219 +++++++++++++++ vlib/compress/deflate/interop/deflate_ref.c | 135 +++++++++ vlib/compress/deflate/interop/deflate_ref.py | 27 ++ 8 files changed, 1069 insertions(+), 21 deletions(-) create mode 100644 vlib/compress/deflate/deflate_compress.v create mode 100644 vlib/compress/deflate/deflate_inflate.v create mode 100644 vlib/compress/deflate/interop/deflate_interop.v create mode 100644 vlib/compress/deflate/interop/deflate_ref.c create mode 100644 vlib/compress/deflate/interop/deflate_ref.py diff --git a/vlib/compress/deflate/README.md b/vlib/compress/deflate/README.md index 9aa198d3a..62c1f1552 100644 --- a/vlib/compress/deflate/README.md +++ b/vlib/compress/deflate/README.md @@ -1,7 +1,23 @@ ## Description -`compress.deflate` is a module that assists in the compression and -decompression of binary data using `deflate` compression +`compress.deflate` is a pure V RFC-compliant DEFLATE module. + +Compression output format is selected by `CompressFormat` via +`compress(data, format: ...)`: + +- `.zlib` (RFC 1950 wrapper) +- `.gzip` (RFC 1952 wrapper) +- `.raw_deflate` (RFC 1951 raw stream) + +`compress` keeps default zlib behavior, and `decompress` auto-detects all three. + +## Interop Validation + +Cross-validation with C/zlib is kept separate from `v test` and can be run manually: + +```bash +./vnew run vlib/compress/deflate/interop/deflate_interop.v +``` ## Example @@ -10,8 +26,12 @@ import compress.deflate fn main() { uncompressed := 'Hello world!' - compressed := deflate.compress(uncompressed.bytes())! - decompressed := deflate.decompress(compressed)! + zlib_stream := deflate.compress(uncompressed.bytes())! + gzip_stream := deflate.compress(uncompressed.bytes(), format: .gzip)! + raw_stream := deflate.compress(uncompressed.bytes(), format: .raw_deflate)! + assert deflate.decompress(zlib_stream)! == uncompressed.bytes() + assert deflate.decompress(gzip_stream)! == uncompressed.bytes() + decompressed := deflate.decompress(raw_stream)! assert decompressed == uncompressed.bytes() } ``` diff --git a/vlib/compress/deflate/deflate.v b/vlib/compress/deflate/deflate.v index 29d5f6e66..07354489d 100644 --- a/vlib/compress/deflate/deflate.v +++ b/vlib/compress/deflate/deflate.v @@ -1,15 +1,165 @@ module deflate -import compress as compr +import encoding.binary +import hash.crc32 -// compresses an array of bytes using deflate and returns the compressed bytes in a new array -// Example: b := 'abcabc'.repeat(100).bytes(); compressed := deflate.compress(b)!; dump(compressed); assert compressed.len == 163 -pub fn compress(data []u8) ![]u8 { - return compr.compress(data, 0) +// CompressFormat selects the output container around the RFC 1951 payload. +pub enum CompressFormat { + zlib + gzip + raw_deflate } -// decompresses an array of bytes using deflate and returns the decompressed bytes in a new array -// Example: b := 'abcabc'.repeat(100).bytes(); compressed := deflate.compress(b)!; decompressed := deflate.decompress(compressed)!; assert b == decompressed +@[params] +pub struct CompressParams { +pub: + format CompressFormat = .zlib +} + +// compress compresses data as zlib, gzip, or raw DEFLATE. +pub fn compress(data []u8, format CompressParams) ![]u8 { + payload := deflate_compress_fixed(data) + match format.format { + .zlib { return compress_zlib(data) } + .gzip { return compress_gzip(data) } + .raw_deflate { return payload } + } +} + +pub fn compress_zlib(data []u8) ![]u8 { + payload := deflate_compress_fixed(data) + cksum := adler32(data) + mut out := []u8{cap: 2 + payload.len + 4} + out << u8(0x78) // CMF: CM=8 deflate, CINFO=7 (32K window) + out << u8(0x9c) // FLG: default compression, FCHECK satisfies (CMF*256+FLG)%31==0 + out << payload + out << binary.big_endian_get_u32(cksum) + return out +} + +// compress_gzip compresses data into a gzip stream (RFC 1952). +pub fn compress_gzip(data []u8) ![]u8 { + payload := deflate_compress_fixed(data) + mut out := []u8{cap: 10 + payload.len + 8} + // 10-byte gzip header: ID1 ID2 CM FLG MTIME(4) XFL OS + out << [u8(0x1f), 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff] + out << payload + out << binary.little_endian_get_u32(crc32.sum(data)) + out << binary.little_endian_get_u32(u32(data.len)) + return out +} + +// compress_raw compresses data to a raw RFC 1951 DEFLATE stream. +pub fn compress_raw(data []u8) ![]u8 { + return deflate_compress_fixed(data) +} + +// decompress decompresses a zlib (RFC 1950), gzip (RFC 1952), +// or raw DEFLATE (RFC 1951) stream — format is auto-detected. pub fn decompress(data []u8) ![]u8 { - return compr.decompress(data, 0) + if data.len >= 2 { + // gzip magic: 0x1f 0x8b + if data[0] == 0x1f && data[1] == 0x8b { + return decompress_gzip(data) + } + // zlib: CM=8 and header checksum passes + if data[0] & 0x0f == 8 && (u32(data[0]) * 256 + u32(data[1])) % 31 == 0 { + return decompress_zlib(data) + } + } + // raw DEFLATE + return inflate(data) +} + +fn decompress_zlib(data []u8) ![]u8 { + if data.len < 6 { + return error('invalid zlib stream: too short') + } + if data[0] & 0x0f != 8 { + return error('invalid zlib stream: unsupported compression method') + } + if (u32(data[0]) * 256 + u32(data[1])) % 31 != 0 { + return error('invalid zlib stream: bad header checksum') + } + if data[1] & 0x20 != 0 { + return error('invalid zlib stream: preset dictionary not supported') + } + payload := data[2..data.len - 4] + expected := binary.big_endian_u32_at(data, data.len - 4) + decoded := inflate(payload)! + if adler32(decoded) != expected { + return error('invalid zlib stream: adler32 mismatch') + } + return decoded +} + +fn decompress_gzip(data []u8) ![]u8 { + if data.len < 18 { + return error('invalid gzip stream: too short') + } + if data[0] != 0x1f || data[1] != 0x8b { + return error('invalid gzip stream: bad magic') + } + if data[2] != 8 { + return error('invalid gzip stream: unsupported compression method') + } + flg := data[3] + mut pos := 10 // fixed header size + if flg & 0x04 != 0 { // FEXTRA + if pos + 2 > data.len { + return error('invalid gzip stream: truncated extra') + } + xlen := int(u32(data[pos]) | u32(data[pos + 1]) << 8) + pos += 2 + xlen + } + if flg & 0x08 != 0 { // FNAME + for pos < data.len && data[pos] != 0 { + pos++ + } + pos++ + } + if flg & 0x10 != 0 { // FCOMMENT + for pos < data.len && data[pos] != 0 { + pos++ + } + pos++ + } + if flg & 0x02 != 0 { // FHCRC + pos += 2 + } + if pos + 8 > data.len { + return error('invalid gzip stream: truncated payload') + } + payload := data[pos..data.len - 8] + expected_crc := binary.little_endian_u32_at(data, data.len - 8) + expected_size := binary.little_endian_u32_at(data, data.len - 4) + decoded := inflate(payload)! + if crc32.sum(decoded) != expected_crc { + return error('invalid gzip stream: crc32 mismatch') + } + if u32(decoded.len) != expected_size { + return error('invalid gzip stream: size mismatch') + } + return decoded +} + +fn adler32(data []u8) u32 { + mod_adler := u32(65521) + mut a := u32(1) + mut b := u32(0) + for byte_ in data { + a = (a + u32(byte_)) % mod_adler + b = (b + a) % mod_adler + } + return (b << 16) | a +} + +fn bit_reverse(v u32, n int) u32 { + mut r := u32(0) + mut val := v + for _ in 0 .. n { + r = (r << 1) | (val & 1) + val >>= 1 + } + return r } diff --git a/vlib/compress/deflate/deflate_compress.v b/vlib/compress/deflate/deflate_compress.v new file mode 100644 index 000000000..d49355e0c --- /dev/null +++ b/vlib/compress/deflate/deflate_compress.v @@ -0,0 +1,184 @@ +module deflate + +const deflate_hash_bits = 15 +const deflate_hash_size = 1 << deflate_hash_bits +const deflate_max_chain = 64 +const deflate_min_match = 3 +const deflate_max_match = 258 +const deflate_window = 32768 + +// fixed_litlen_encode returns (reversed_codes, code_lengths) for fixed Huffman lit/len. +fn fixed_litlen_encode() ([]u32, []int) { + lens := fixed_litlen_lengths() + mut max_bits := 0 + for l in lens { + if l > max_bits { + max_bits = l + } + } + mut bl_count := []int{len: max_bits + 1} + for l in lens { + if l > 0 { + bl_count[l]++ + } + } + mut next_code := []u32{len: max_bits + 1} + mut c := u32(0) + for bits in 1 .. max_bits + 1 { + c = (c + u32(bl_count[bits - 1])) << 1 + next_code[bits] = c + } + mut codes := []u32{len: 288} + for sym in 0 .. 288 { + l := lens[sym] + if l == 0 { + continue + } + codes[sym] = bit_reverse(next_code[l], l) + next_code[l]++ + } + return codes, lens +} + +// fixed_dist_encode returns (reversed_codes, code_lengths) for fixed Huffman distance. +fn fixed_dist_encode() ([]u32, []int) { + mut codes := []u32{len: 30} + for i in 0 .. 30 { + codes[i] = bit_reverse(u32(i), 5) + } + return codes, []int{len: 30, init: 5} +} + +fn length_code_info(length int) (int, int, int) { + for i := length_bases.len - 1; i >= 0; i-- { + if length >= length_bases[i] { + return i, length - length_bases[i], length_extra_bits[i] + } + } + return 0, 0, 0 +} + +fn dist_code_info(distance int) (int, int, int) { + for i := dist_bases.len - 1; i >= 0; i-- { + if distance >= dist_bases[i] { + return i, distance - dist_bases[i], dist_extra_bits[i] + } + } + return 0, 0, 0 +} + +fn hash3(data []u8, pos int) int { + v := u32(data[pos]) | (u32(data[pos + 1]) << 8) | (u32(data[pos + 2]) << 16) + return int((v * u32(2654435761)) >> u32(32 - deflate_hash_bits)) +} + +@[direct_array_access] +fn find_lz_match(data []u8, pos int, last []int, prev []int) (int, int) { + if pos + deflate_min_match > data.len { + return 0, 0 + } + max_len := if pos + deflate_max_match < data.len { deflate_max_match } else { data.len - pos } + mut best_len := 0 + mut best_off := 0 + mut i := last[hash3(data, pos)] + mut chain := 0 + for i >= 0 && chain < deflate_max_chain { + off := pos - i + if off > deflate_window { + break + } + mut l := 0 + for l < max_len && data[i + l] == data[pos + l] { + l++ + } + if l > best_len { + best_len = l + best_off = off + if best_len == max_len { + break + } + } + i = prev[i] + chain++ + } + return best_off, best_len +} + +struct BitWriter { +mut: + buf []u8 + bits u32 + nbits int +} + +@[direct_array_access; inline] +fn (mut w BitWriter) write_bits(value u32, nbits int) { + if nbits == 0 { + return + } + w.bits |= value << w.nbits + w.nbits += nbits + for w.nbits >= 8 { + w.buf << u8(w.bits & 0xff) + w.bits >>= 8 + w.nbits -= 8 + } +} + +fn (mut w BitWriter) flush() { + if w.nbits > 0 { + w.buf << u8(w.bits & 0xff) + w.bits = 0 + w.nbits = 0 + } +} + +// deflate_compress_fixed compresses data to RFC 1951 DEFLATE using fixed Huffman codes. +@[direct_array_access] +fn deflate_compress_fixed(data []u8) []u8 { + ll_codes, ll_lens := fixed_litlen_encode() + d_codes, d_lens := fixed_dist_encode() + mut w := BitWriter{} + // BFINAL=1, BTYPE=01 (fixed Huffman) + w.write_bits(1, 1) + w.write_bits(1, 2) + if data.len == 0 { + w.write_bits(ll_codes[256], ll_lens[256]) + w.flush() + return w.buf + } + mut last := []int{len: deflate_hash_size, init: -1} + mut prev := []int{len: data.len, init: -1} + mut pos := 0 + for pos < data.len { + off, match_len := find_lz_match(data, pos, last, prev) + if match_len >= deflate_min_match { + li, lext, lext_bits := length_code_info(match_len) + sym := 257 + li + w.write_bits(ll_codes[sym], ll_lens[sym]) + w.write_bits(u32(lext), lext_bits) + di, dext, dext_bits := dist_code_info(off) + w.write_bits(d_codes[di], d_lens[di]) + w.write_bits(u32(dext), dext_bits) + for i in pos .. pos + match_len { + if i + deflate_min_match < data.len { + h := hash3(data, i) + prev[i] = last[h] + last[h] = i + } + } + pos += match_len + } else { + w.write_bits(ll_codes[int(data[pos])], ll_lens[int(data[pos])]) + if pos + deflate_min_match < data.len { + h := hash3(data, pos) + prev[pos] = last[h] + last[h] = pos + } + pos++ + } + } + w.write_bits(ll_codes[256], ll_lens[256]) + w.flush() + return w.buf +} diff --git a/vlib/compress/deflate/deflate_inflate.v b/vlib/compress/deflate/deflate_inflate.v new file mode 100644 index 000000000..9d159d9d8 --- /dev/null +++ b/vlib/compress/deflate/deflate_inflate.v @@ -0,0 +1,262 @@ +module deflate + +// vfmt off +// RFC 1951 length/distance decode tables +const length_bases = [3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258] +const length_extra_bits = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + 4, 5, 5, 5, 5, 0] +const dist_bases = [1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, + 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577] +const dist_extra_bits = [0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13] +// code-length alphabet order (RFC 1951) +const cl_order = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] +// vmt on + +// fixed_litlen_lengths returns code lengths for the fixed Huffman lit/len tree (RFC 1951 §3.2.6). +fn fixed_litlen_lengths() []int { + mut lens := []int{len: 288} + for i in 0 .. 144 { + lens[i] = 8 + } + for i in 144 .. 256 { + lens[i] = 9 + } + for i in 256 .. 280 { + lens[i] = 7 + } + for i in 280 .. 288 { + lens[i] = 8 + } + return lens +} + +// HuffTree is a MSB-first Huffman lookup table for DEFLATE decoding. +// Indexed by the next max_bits bits read LSB-first from the stream. +struct HuffTree { + table []u32 // entry: (symbol << 5) | code_length; 0xFFFF_FFFF = invalid + max_bits int +} + +fn build_huff_tree(lengths []int) HuffTree { + mut max_bits := 0 + for l in lengths { + if l > max_bits { + max_bits = l + } + } + if max_bits == 0 { + return HuffTree{ + table: [u32(0)] + max_bits: 0 + } + } + mut bl_count := []int{len: max_bits + 1} + for l in lengths { + if l > 0 { + bl_count[l]++ + } + } + mut next_code := []u32{len: max_bits + 1} + mut c := u32(0) + for bits in 1 .. max_bits + 1 { + c = (c + u32(bl_count[bits - 1])) << 1 + next_code[bits] = c + } + table_size := 1 << max_bits + mut table := []u32{len: table_size, init: 0xffff_ffff} + for sym in 0 .. lengths.len { + l := lengths[sym] + if l == 0 { + continue + } + code := next_code[l] + next_code[l]++ + // Reverse code for LSB-first bit reader + rev := bit_reverse(code, l) + step := 1 << l + mut idx := int(rev) + for idx < table_size { + table[idx] = (u32(sym) << 5) | u32(l) + idx += step + } + } + return HuffTree{ + table: table + max_bits: max_bits + } +} + +struct BitReader { + buf []u8 +mut: + pos int + bits u32 + nbits int +} + +@[direct_array_access; inline] +fn (mut r BitReader) read_bits(n int) !u32 { + for r.nbits < n { + if r.pos >= r.buf.len { + return error('inflate: unexpected end of stream') + } + r.bits |= u32(r.buf[r.pos]) << r.nbits + r.pos++ + r.nbits += 8 + } + val := r.bits & ((u32(1) << n) - 1) + r.bits >>= u32(n) + r.nbits -= n + return val +} + +@[inline] +fn (mut r BitReader) align_byte() { + r.bits = 0 + r.nbits = 0 +} + +@[direct_array_access; inline] +fn (mut r BitReader) read_byte_raw() !u8 { + if r.pos >= r.buf.len { + return error('inflate: unexpected end of stream') + } + b := r.buf[r.pos] + r.pos++ + return b +} + +@[direct_array_access; inline] +fn (mut r BitReader) huff_decode(t HuffTree) !u32 { + for r.nbits < t.max_bits { + if r.pos >= r.buf.len { + break + } + r.bits |= u32(r.buf[r.pos]) << r.nbits + r.pos++ + r.nbits += 8 + } + idx := int(r.bits & ((u32(1) << t.max_bits) - 1)) + entry := t.table[idx] + if entry == 0xffff_ffff { + return error('inflate: invalid Huffman code') + } + len_ := int(entry & 0x1f) + sym := entry >> 5 + r.bits >>= u32(len_) + r.nbits -= len_ + return sym +} + +// inflate decompresses raw RFC 1951 DEFLATE data. +fn inflate(data []u8) ![]u8 { + mut r := BitReader{ + buf: data + } + mut out := []u8{} + fixed_ll := build_huff_tree(fixed_litlen_lengths()) + fixed_d := build_huff_tree([]int{len: 32, init: 5}) + for { + bfinal := r.read_bits(1)! + btype := r.read_bits(2)! + match btype { + 0 { + r.align_byte() + len_ := int(r.read_byte_raw()!) | (int(u32(r.read_byte_raw()!) << 8)) + nlen := int(r.read_byte_raw()!) | (int(u32(r.read_byte_raw()!) << 8)) + if len_ & 0xffff != (~nlen) & 0xffff { + return error('inflate: bad stored block length') + } + for _ in 0 .. len_ { + out << r.read_byte_raw()! + } + } + 1 { + inflate_block(mut r, mut out, fixed_ll, fixed_d)! + } + 2 { + hlit := int(r.read_bits(5)!) + 257 + hdist := int(r.read_bits(5)!) + 1 + hclen := int(r.read_bits(4)!) + 4 + mut cl_lens := []int{len: 19} + for i in 0 .. hclen { + cl_lens[cl_order[i]] = int(r.read_bits(3)!) + } + cl_tree := build_huff_tree(cl_lens) + mut all_lens := []int{} + for all_lens.len < hlit + hdist { + sym := r.huff_decode(cl_tree)! + if sym <= 15 { + all_lens << int(sym) + } else if sym == 16 { + if all_lens.len == 0 { + return error('inflate: repeat with empty history') + } + rep := int(r.read_bits(2)!) + 3 + last := all_lens[all_lens.len - 1] + for _ in 0 .. rep { + all_lens << last + } + } else if sym == 17 { + rep := int(r.read_bits(3)!) + 3 + for _ in 0 .. rep { + all_lens << 0 + } + } else if sym == 18 { + rep := int(r.read_bits(7)!) + 11 + for _ in 0 .. rep { + all_lens << 0 + } + } else { + return error('inflate: bad code length symbol') + } + } + ll_tree := build_huff_tree(all_lens[..hlit]) + d_tree := build_huff_tree(all_lens[hlit..]) + inflate_block(mut r, mut out, ll_tree, d_tree)! + } + else { + return error('inflate: reserved block type') + } + } + + if bfinal == 1 { + break + } + } + return out +} + +@[direct_array_access] +fn inflate_block(mut r BitReader, mut out []u8, ll HuffTree, dist HuffTree) ! { + for { + sym := r.huff_decode(ll)! + if sym == 256 { + break + } + if sym < 256 { + out << u8(sym) + } else { + li := int(sym) - 257 + if li < 0 || li >= length_bases.len { + return error('inflate: invalid length symbol ${sym}') + } + length := length_bases[li] + int(r.read_bits(length_extra_bits[li])!) + dsym := r.huff_decode(dist)! + di := int(dsym) + if di >= dist_bases.len { + return error('inflate: invalid distance symbol ${dsym}') + } + distance := dist_bases[di] + int(r.read_bits(dist_extra_bits[di])!) + if distance > out.len { + return error('inflate: distance past output start') + } + base := out.len - distance + for i in 0 .. length { + out << out[base + i] + } + } + } +} diff --git a/vlib/compress/deflate/deflate_test.v b/vlib/compress/deflate/deflate_test.v index 2db48f6c5..06bcbedaf 100644 --- a/vlib/compress/deflate/deflate_test.v +++ b/vlib/compress/deflate/deflate_test.v @@ -1,12 +1,63 @@ module deflate -const gzip_magic_numbers = [u8(0x1f), 0x8b] - -fn test_gzip() { - uncompressed := 'Hello world!' - compressed := compress(uncompressed.bytes())! - first2 := compressed[0..2] - assert first2 != gzip_magic_numbers - decompressed := decompress(compressed)! - assert decompressed == uncompressed.bytes() +fn test_zlib_roundtrip() { + data := 'Hello world!'.bytes() + compressed := compress(data)! + assert compressed[0] == 0x78 && compressed[1] == 0x9c // zlib header + assert decompress(compressed)! == data +} + +fn test_gzip_roundtrip() { + data := 'Hello gzip!'.repeat(10).bytes() + compressed := compress(data, format: .gzip)! + assert compressed[0] == 0x1f && compressed[1] == 0x8b // gzip magic + assert decompress(compressed)! == data +} + +fn test_raw_deflate_roundtrip() { + data := 'raw deflate'.repeat(20).bytes() + raw := compress(data, format: .raw_deflate)! + decoded := decompress(raw)! // auto-detected as raw + assert decoded == data +} + +fn test_decompress_auto_detects_all_formats() { + data := 'multi-format detection test'.repeat(5).bytes() + assert decompress(compress(data)!)! == data + assert decompress(compress(data, format: .gzip)!)! == data + assert decompress(compress(data, format: .raw_deflate)!)! == data +} + +fn test_wrapper_helpers_match_unified_api() { + data := 'wrapper compatibility'.repeat(8).bytes() + assert compress(data)! == compress(data, format: .zlib)! + assert compress_gzip(data)! == compress(data, format: .gzip)! + assert compress_raw(data)! == compress(data, format: .raw_deflate)! +} + +fn test_roundtrip_repeated() { + data := 'abcabc'.repeat(100).bytes() + compressed := compress(data)! + assert compressed.len < data.len + assert decompress(compressed)! == data +} + +fn test_bad_compression_method_fails() { + bad := [u8(0x79), 0x18, 0x00, 0x00, 0x00, 0x00] + decompress(bad) or { + assert err.msg().len > 0 + return + } + assert false +} + +fn test_corrupt_checksum_fails() { + mut enc := compress(('hello world').repeat(10).bytes())! + // flip a byte in the adler32 footer + enc[enc.len - 1] ^= 0xff + decompress(enc) or { + assert err.msg().contains('adler32') + return + } + assert false } diff --git a/vlib/compress/deflate/interop/deflate_interop.v b/vlib/compress/deflate/interop/deflate_interop.v new file mode 100644 index 000000000..3af33d428 --- /dev/null +++ b/vlib/compress/deflate/interop/deflate_interop.v @@ -0,0 +1,219 @@ +module main + +import compress.deflate +import os + +fn choose_cc() string { + for cc in ['cc', 'gcc', 'clang'] { + if os.execute('${cc} --version').exit_code == 0 { + return cc + } + } + return '' +} + +fn compile_c_ref(workdir string) !string { + cc := choose_cc() + if cc == '' { + return error('no C compiler found') + } + src := os.join_path(@DIR, 'deflate_ref.c') + bin := os.join_path(workdir, 'deflate_cross_validate') + res := os.execute('${cc} -O2 ${os.quoted_path(src)} -lz -o ${os.quoted_path(bin)}') + if res.exit_code != 0 { + return error('C compile failed: ${res.output}') + } + return bin +} + +fn main() { + base_tmp := os.join_path(os.temp_dir(), 'v_deflate_interop') + os.mkdir_all(base_tmp) or { + eprintln('could not create base temp dir: ${err.msg()}') + exit(1) + } + workdir := os.join_path(base_tmp, 'run_${os.getpid()}') + os.mkdir_all(workdir) or { + eprintln('could not create work dir: ${err.msg()}') + exit(1) + } + defer { + os.rmdir_all(workdir) or {} + } + + bin := compile_c_ref(workdir) or { + eprintln('Skipping C cross-validation: ${err.msg()}') + return + } + + input := 'deflate C cross-validation payload'.repeat(50).bytes() + ip := os.join_path(workdir, 'xval_in.bin') + cp := os.join_path(workdir, 'xval_c_zlib.bin') + gp := os.join_path(workdir, 'xval_c_gzip.bin') + vp := os.join_path(workdir, 'xval_v_zlib.bin') + vgp := os.join_path(workdir, 'xval_v_gzip.bin') + dp := os.join_path(workdir, 'xval_dec.bin') + + os.write_file_array(ip, input) or { + eprintln('write input failed: ${err.msg()}') + exit(1) + } + + res1 := + os.execute('${os.quoted_path(bin)} compress ${os.quoted_path(ip)} ${os.quoted_path(cp)}') + if res1.exit_code != 0 { + eprintln('C zlib compress failed: ${res1.output}') + exit(1) + } + c_zlib := os.read_bytes(cp) or { + eprintln('read C zlib stream failed: ${err.msg()}') + exit(1) + } + v_decoded_zlib := deflate.decompress(c_zlib) or { + eprintln('V decompress of C zlib failed: ${err.msg()}') + exit(1) + } + if v_decoded_zlib != input { + eprintln('C zlib -> V mismatch') + exit(1) + } + println('OK: C zlib -> V decompress') + + res2 := os.execute('${os.quoted_path(bin)} gzip ${os.quoted_path(ip)} ${os.quoted_path(gp)}') + if res2.exit_code != 0 { + eprintln('C gzip failed: ${res2.output}') + exit(1) + } + c_gzip := os.read_bytes(gp) or { + eprintln('read C gzip stream failed: ${err.msg()}') + exit(1) + } + v_decoded_gzip := deflate.decompress(c_gzip) or { + eprintln('V decompress of C gzip failed: ${err.msg()}') + exit(1) + } + if v_decoded_gzip != input { + eprintln('C gzip -> V mismatch') + exit(1) + } + println('OK: C gzip -> V decompress') + + v_zlib := deflate.compress(input) or { + eprintln('V zlib compress failed: ${err.msg()}') + exit(1) + } + os.write_file_array(vp, v_zlib) or { + eprintln('write V zlib failed: ${err.msg()}') + exit(1) + } + res3 := + os.execute('${os.quoted_path(bin)} decompress ${os.quoted_path(vp)} ${os.quoted_path(dp)}') + if res3.exit_code != 0 { + eprintln('C decompress of V zlib failed: ${res3.output}') + exit(1) + } + c_unzlib := os.read_bytes(dp) or { + eprintln('read C zlib decompressed output failed: ${err.msg()}') + exit(1) + } + if c_unzlib != input { + eprintln('V zlib -> C mismatch') + exit(1) + } + println('OK: V zlib -> C decompress') + + v_gzip := deflate.compress(input, format: .gzip) or { + eprintln('V gzip compress failed: ${err.msg()}') + exit(1) + } + os.write_file_array(vgp, v_gzip) or { + eprintln('write V gzip failed: ${err.msg()}') + exit(1) + } + res4 := os.execute('${os.quoted_path(bin)} gunzip ${os.quoted_path(vgp)} ${os.quoted_path(dp)}') + if res4.exit_code != 0 { + eprintln('C gunzip of V gzip failed: ${res4.output}') + exit(1) + } + c_ungzip := os.read_bytes(dp) or { + eprintln('read C gzip decompressed output failed: ${err.msg()}') + exit(1) + } + if c_ungzip != input { + eprintln('V gzip -> C mismatch') + exit(1) + } + println('OK: V gzip -> C decompress') + + if os.execute('python3 --version').exit_code == 0 { + py_src := os.join_path(@DIR, 'deflate_ref.py') + py_driver := os.join_path(workdir, 'deflate_ref.py') + os.cp(py_src, py_driver) or { + eprintln('copy Python reference script failed: ${err.msg()}') + exit(1) + } + + py_zp := os.join_path(workdir, 'xval_py_zlib.bin') + py_gp := os.join_path(workdir, 'xval_py_gzip.bin') + + res5 := + os.execute('python3 ${os.quoted_path(py_driver)} compress ${os.quoted_path(ip)} ${os.quoted_path(py_zp)}') + if res5.exit_code != 0 { + eprintln('Python zlib compress failed: ${res5.output}') + exit(1) + } + py_zlib := os.read_bytes(py_zp) or { + eprintln('read Python zlib stream failed: ${err.msg()}') + exit(1) + } + if deflate.decompress(py_zlib) or { []u8{} } != input { + eprintln('Python zlib -> V mismatch') + exit(1) + } + println('OK: Python zlib -> V decompress') + + res6 := + os.execute('python3 ${os.quoted_path(py_driver)} gzip ${os.quoted_path(ip)} ${os.quoted_path(py_gp)}') + if res6.exit_code != 0 { + eprintln('Python gzip failed: ${res6.output}') + exit(1) + } + py_gzip := os.read_bytes(py_gp) or { + eprintln('read Python gzip stream failed: ${err.msg()}') + exit(1) + } + if deflate.decompress(py_gzip) or { []u8{} } != input { + eprintln('Python gzip -> V mismatch') + exit(1) + } + println('OK: Python gzip -> V decompress') + + py_unz := os.join_path(workdir, 'xval_py_unz.bin') + res7 := + os.execute('python3 ${os.quoted_path(py_driver)} decompress ${os.quoted_path(vp)} ${os.quoted_path(py_unz)}') + if res7.exit_code != 0 { + eprintln('Python decompress of V zlib failed: ${res7.output}') + exit(1) + } + if (os.read_bytes(py_unz) or { []u8{} }) != input { + eprintln('V zlib -> Python mismatch') + exit(1) + } + println('OK: V zlib -> Python decompress') + + py_ungz := os.join_path(workdir, 'xval_py_ungz.bin') + res8 := + os.execute('python3 ${os.quoted_path(py_driver)} gunzip ${os.quoted_path(vgp)} ${os.quoted_path(py_ungz)}') + if res8.exit_code != 0 { + eprintln('Python gunzip of V gzip failed: ${res8.output}') + exit(1) + } + if (os.read_bytes(py_ungz) or { []u8{} }) != input { + eprintln('V gzip -> Python mismatch') + exit(1) + } + println('OK: V gzip -> Python decompress') + } else { + eprintln('Skipping Python cross-validation: python3 not found') + } +} diff --git a/vlib/compress/deflate/interop/deflate_ref.c b/vlib/compress/deflate/interop/deflate_ref.c new file mode 100644 index 000000000..d0c93f1b4 --- /dev/null +++ b/vlib/compress/deflate/interop/deflate_ref.c @@ -0,0 +1,135 @@ +#include +#include +#include +#include + +static int rf(const char *p, unsigned char **o, size_t *n) { + FILE *f = fopen(p, "rb"); + if (!f) return 1; + fseek(f, 0, SEEK_END); + long s = ftell(f); + fseek(f, 0, SEEK_SET); + *o = malloc(*n = (size_t)s); + if (!*o) { + fclose(f); + return 1; + } + if (fread(*o, 1, *n, f) != *n) { + free(*o); + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +static int wf(const char *p, const unsigned char *b, size_t n) { + FILE *f = fopen(p, "wb"); + if (!f) return 1; + if (fwrite(b, 1, n, f) != n) { + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +int main(int argc, char **argv) { + if (argc != 4) { + fputs("usage: xval compress|decompress|gzip|gunzip in out\n", stderr); + return 2; + } + unsigned char *in; + size_t in_n; + if (rf(argv[2], &in, &in_n)) { + fputs("read error\n", stderr); + return 1; + } + + if (strcmp(argv[1], "compress") == 0) { + uLongf cap = compressBound((uLong)in_n); + unsigned char *out = malloc(cap); + if (!out) return 1; + if (compress2(out, &cap, in, (uLong)in_n, Z_DEFAULT_COMPRESSION) != Z_OK) { + fputs("compress2 failed\n", stderr); + free(in); + free(out); + return 1; + } + wf(argv[3], out, (size_t)cap); + free(out); + } else if (strcmp(argv[1], "decompress") == 0) { + uLongf cap = in_n * 8 + 65536; + unsigned char *out = malloc(cap); + if (!out) return 1; + if (uncompress(out, &cap, in, (uLong)in_n) != Z_OK) { + fputs("uncompress failed\n", stderr); + free(in); + free(out); + return 1; + } + wf(argv[3], out, (size_t)cap); + free(out); + } else if (strcmp(argv[1], "gzip") == 0) { + z_stream s; + memset(&s, 0, sizeof(s)); + if (deflateInit2(&s, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 | 16, 8, Z_DEFAULT_STRATEGY) != Z_OK) { + free(in); + return 1; + } + uLongf cap = deflateBound(&s, (uLong)in_n) + 32; + unsigned char *out = malloc(cap); + if (!out) { + deflateEnd(&s); + free(in); + return 1; + } + s.next_in = in; + s.avail_in = (uInt)in_n; + s.next_out = out; + s.avail_out = (uInt)cap; + if (deflate(&s, Z_FINISH) != Z_STREAM_END) { + fputs("gzip deflate failed\n", stderr); + deflateEnd(&s); + free(in); + free(out); + return 1; + } + wf(argv[3], out, (size_t)s.total_out); + deflateEnd(&s); + free(out); + } else if (strcmp(argv[1], "gunzip") == 0) { + z_stream s; + memset(&s, 0, sizeof(s)); + if (inflateInit2(&s, 15 | 16) != Z_OK) { + free(in); + return 1; + } + uLongf cap = in_n * 8 + 65536; + unsigned char *out = malloc(cap); + if (!out) { + inflateEnd(&s); + free(in); + return 1; + } + s.next_in = in; + s.avail_in = (uInt)in_n; + s.next_out = out; + s.avail_out = (uInt)cap; + if (inflate(&s, Z_FINISH) != Z_STREAM_END) { + fputs("gunzip inflate failed\n", stderr); + inflateEnd(&s); + free(in); + free(out); + return 1; + } + wf(argv[3], out, (size_t)s.total_out); + inflateEnd(&s); + free(out); + } + + free(in); + return 0; +} + + diff --git a/vlib/compress/deflate/interop/deflate_ref.py b/vlib/compress/deflate/interop/deflate_ref.py new file mode 100644 index 000000000..7bff33451 --- /dev/null +++ b/vlib/compress/deflate/interop/deflate_ref.py @@ -0,0 +1,27 @@ +import sys, zlib + +def rf(p): + with open(p, "rb") as f: return f.read() + +def wf(p, b): + with open(p, "wb") as f: f.write(b) + +if len(sys.argv) != 4: + print("usage: py_ref compress|decompress|gzip|gunzip in out", file=sys.stderr) + sys.exit(2) + +mode, inp, outp = sys.argv[1], sys.argv[2], sys.argv[3] +data = rf(inp) +if mode == "compress": + wf(outp, zlib.compress(data)) +elif mode == "decompress": + wf(outp, zlib.decompress(data)) +elif mode == "gzip": + co = zlib.compressobj(level=6, wbits=16 + zlib.MAX_WBITS) + wf(outp, co.compress(data) + co.flush()) +elif mode == "gunzip": + wf(outp, zlib.decompress(data, 16 + zlib.MAX_WBITS)) +else: + print("unknown mode", file=sys.stderr) + sys.exit(2) + -- 2.39.5