| 1 | module gzip |
| 2 | |
| 3 | import hash.crc32 |
| 4 | import os |
| 5 | |
| 6 | const test_ftext = u8(0b0000_0001) |
| 7 | const test_fhcrc = u8(0b0000_0010) |
| 8 | const test_fextra = u8(0b0000_0100) |
| 9 | const test_fname = u8(0b0000_1000) |
| 10 | const test_fcomment = u8(0b0001_0000) |
| 11 | const samples_folder = os.join_path(os.dir(@FILE), 'samples') |
| 12 | |
| 13 | fn test_gzip() { |
| 14 | uncompressed := 'Hello world!' |
| 15 | compressed := compress(uncompressed.bytes())! |
| 16 | decompressed := decompress(compressed)! |
| 17 | assert decompressed == uncompressed.bytes() |
| 18 | } |
| 19 | |
| 20 | fn assert_decompress_error(data []u8, reason string) ! { |
| 21 | decompress(data) or { |
| 22 | assert err.msg() == reason |
| 23 | return |
| 24 | } |
| 25 | return error('did not error') |
| 26 | } |
| 27 | |
| 28 | fn test_gzip_invalid_too_short() { |
| 29 | assert_decompress_error([]u8{}, 'invalid gzip stream: too short')! |
| 30 | } |
| 31 | |
| 32 | fn test_gzip_invalid_magic_numbers() { |
| 33 | assert_decompress_error([]u8{len: 100}, 'invalid gzip stream: bad magic')! |
| 34 | } |
| 35 | |
| 36 | fn test_gzip_invalid_compression() { |
| 37 | mut data := []u8{len: 100} |
| 38 | data[0] = 0x1f |
| 39 | data[1] = 0x8b |
| 40 | assert_decompress_error(data, 'invalid gzip stream: unsupported compression method')! |
| 41 | } |
| 42 | |
| 43 | fn test_gzip_with_ftext() { |
| 44 | uncompressed := 'Hello world!' |
| 45 | mut compressed := compress(uncompressed.bytes())! |
| 46 | compressed[3] |= test_ftext |
| 47 | decompressed := decompress(compressed)! |
| 48 | assert decompressed == uncompressed.bytes() |
| 49 | } |
| 50 | |
| 51 | fn test_gzip_with_fname() { |
| 52 | uncompressed := 'Hello world!' |
| 53 | mut compressed := compress(uncompressed.bytes())! |
| 54 | compressed[3] |= test_fname |
| 55 | compressed.insert(10, `h`) |
| 56 | compressed.insert(11, `i`) |
| 57 | compressed.insert(12, 0x00) |
| 58 | decompressed := decompress(compressed)! |
| 59 | assert decompressed == uncompressed.bytes() |
| 60 | } |
| 61 | |
| 62 | fn test_gzip_with_fcomment() { |
| 63 | uncompressed := 'Hello world!' |
| 64 | mut compressed := compress(uncompressed.bytes())! |
| 65 | compressed[3] |= test_fcomment |
| 66 | compressed.insert(10, `h`) |
| 67 | compressed.insert(11, `i`) |
| 68 | compressed.insert(12, 0x00) |
| 69 | decompressed := decompress(compressed)! |
| 70 | assert decompressed == uncompressed.bytes() |
| 71 | } |
| 72 | |
| 73 | fn test_gzip_with_fname_fcomment() { |
| 74 | uncompressed := 'Hello world!' |
| 75 | mut compressed := compress(uncompressed.bytes())! |
| 76 | compressed[3] |= (test_fname | test_fcomment) |
| 77 | compressed.insert(10, `h`) |
| 78 | compressed.insert(11, `i`) |
| 79 | compressed.insert(12, 0x00) |
| 80 | compressed.insert(10, `h`) |
| 81 | compressed.insert(11, `i`) |
| 82 | compressed.insert(12, 0x00) |
| 83 | decompressed := decompress(compressed)! |
| 84 | assert decompressed == uncompressed.bytes() |
| 85 | } |
| 86 | |
| 87 | fn test_gzip_with_fextra() { |
| 88 | uncompressed := 'Hello world!' |
| 89 | mut compressed := compress(uncompressed.bytes())! |
| 90 | compressed[3] |= test_fextra |
| 91 | // XLEN is 2-byte little-endian value |
| 92 | xlen := u16(2) |
| 93 | compressed.insert(10, u8(xlen)) |
| 94 | compressed.insert(11, u8(xlen >> 8)) |
| 95 | compressed.insert(12, `h`) |
| 96 | compressed.insert(13, `i`) |
| 97 | decompressed := decompress(compressed)! |
| 98 | assert decompressed == uncompressed.bytes() |
| 99 | } |
| 100 | |
| 101 | fn test_gzip_with_hcrc() { |
| 102 | uncompressed := 'Hello world!' |
| 103 | mut compressed := compress(uncompressed.bytes())! |
| 104 | compressed[3] |= test_fhcrc |
| 105 | // FHCRC is 2-byte CRC-16 (low 16 bits of CRC32) in little-endian format |
| 106 | checksum := crc32.sum(compressed[..10]) |
| 107 | crc16 := u16(checksum & 0xffff) |
| 108 | compressed.insert(10, u8(crc16)) |
| 109 | compressed.insert(11, u8(crc16 >> 8)) |
| 110 | decompressed := decompress(compressed)! |
| 111 | assert decompressed == uncompressed.bytes() |
| 112 | } |
| 113 | |
| 114 | fn test_gzip_with_invalid_hcrc() { |
| 115 | uncompressed := 'Hello world!' |
| 116 | mut compressed := compress(uncompressed.bytes())! |
| 117 | compressed[3] |= test_fhcrc |
| 118 | // FHCRC is 2-byte CRC-16 (low 16 bits of CRC32) in little-endian format |
| 119 | checksum := crc32.sum(compressed[..10]) |
| 120 | crc16 := u16(checksum & 0xffff) |
| 121 | compressed.insert(10, u8(crc16)) |
| 122 | compressed.insert(11, u8((crc16 >> 8) + 1)) // corrupt high byte |
| 123 | assert_decompress_error(compressed, 'invalid gzip stream: header crc16 mismatch')! |
| 124 | } |
| 125 | |
| 126 | fn test_gzip_with_invalid_checksum() { |
| 127 | uncompressed := 'Hello world!' |
| 128 | mut compressed := compress(uncompressed.bytes())! |
| 129 | compressed[compressed.len - 5] += 1 |
| 130 | assert_decompress_error(compressed, 'invalid gzip stream: crc32 mismatch')! |
| 131 | } |
| 132 | |
| 133 | fn test_gzip_with_invalid_length() { |
| 134 | uncompressed := 'Hello world!' |
| 135 | mut compressed := compress(uncompressed.bytes())! |
| 136 | compressed[compressed.len - 1] += 1 |
| 137 | assert_decompress_error(compressed, 'invalid gzip stream: size mismatch')! |
| 138 | } |
| 139 | |
| 140 | fn test_gzip_with_invalid_flags() { |
| 141 | uncompressed := 'Hello world!' |
| 142 | mut compressed := compress(uncompressed.bytes())! |
| 143 | compressed[3] |= 0b1000_0000 |
| 144 | assert_decompress_error(compressed, 'invalid gzip stream: reserved flags set')! |
| 145 | } |
| 146 | |
| 147 | fn test_gzip_decompress_callback() { |
| 148 | uncompressed := '321323'.repeat(10_000) |
| 149 | gz := compress(uncompressed.bytes())! |
| 150 | mut size := 0 |
| 151 | mut ref := &size |
| 152 | decoded := decompress_with_callback(gz, fn (chunk []u8, ref &int) int { |
| 153 | unsafe { |
| 154 | *ref += chunk.len |
| 155 | } |
| 156 | return chunk.len |
| 157 | }, ref)! |
| 158 | assert decoded == size |
| 159 | assert decoded == uncompressed.len |
| 160 | } |
| 161 | |
| 162 | fn test_gzip_decompress_callback_rejects_non_gzip() { |
| 163 | z := [u8(0x78), 0x9c, 0x03, 0x00, 0x00, 0x00, 0x01] |
| 164 | decompress_with_callback(z, fn (chunk []u8, _ voidptr) int { |
| 165 | return chunk.len |
| 166 | }, unsafe { nil }) or { |
| 167 | assert err.msg() == 'invalid gzip stream: too short' |
| 168 | return |
| 169 | } |
| 170 | assert false |
| 171 | } |
| 172 | |
| 173 | fn s(fname string) string { |
| 174 | return os.join_path(samples_folder, fname) |
| 175 | } |
| 176 | |
| 177 | fn read_and_decode_file(fpath string) !([]u8, string) { |
| 178 | compressed := os.read_bytes(fpath)! |
| 179 | decoded := decompress(compressed)! |
| 180 | content := decoded.bytestr() |
| 181 | return compressed, content |
| 182 | } |
| 183 | |
| 184 | fn test_reading_and_decoding_a_known_gziped_file() { |
| 185 | compressed, content := read_and_decode_file(s('known.gz'))! |
| 186 | assert compressed#[0..3] == [u8(31), 139, 8] |
| 187 | assert compressed#[-5..] == [u8(127), 115, 1, 0, 0] |
| 188 | assert content.contains('## Description') |
| 189 | assert content.contains('## Examples:') |
| 190 | assert content.ends_with('```\n') |
| 191 | } |
| 192 | |
| 193 | fn test_decoding_all_samples_files() { |
| 194 | for gz_file in os.walk_ext(samples_folder, '.gz') { |
| 195 | _, content := read_and_decode_file(gz_file)! |
| 196 | assert content.len > 0, 'decoded content should not be empty: `${content}`' |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | fn test_reading_gzip_files_compressed_with_different_options() { |
| 201 | _, content1 := read_and_decode_file(s('readme_level_1.gz'))! |
| 202 | _, content5 := read_and_decode_file(s('readme_level_5.gz'))! |
| 203 | _, content9 := read_and_decode_file(s('readme_level_9.gz'))! |
| 204 | _, content9_rsyncable := read_and_decode_file(s('readme_level_9_rsyncable.gz'))! |
| 205 | assert content9_rsyncable == content9 |
| 206 | assert content9 == content5 |
| 207 | assert content5 == content1 |
| 208 | } |
| 209 | |