| 1 | module snappy |
| 2 | |
| 3 | // --------------------------------------------------------------------------- |
| 4 | // Helpers |
| 5 | // --------------------------------------------------------------------------- |
| 6 | |
| 7 | fn round_trip(label string, input []u8) ! { |
| 8 | compressed := compress(input) |
| 9 | decompressed := decompress(compressed)! |
| 10 | assert decompressed == input, '${label}: round-trip mismatch (input len=${input.len})' |
| 11 | bound := max_compressed_length(input.len) |
| 12 | assert compressed.len <= bound, '${label}: compressed size ${compressed.len} exceeds bound ${bound}' |
| 13 | } |
| 14 | |
| 15 | // --------------------------------------------------------------------------- |
| 16 | // Tests |
| 17 | // --------------------------------------------------------------------------- |
| 18 | |
| 19 | fn test_empty() { |
| 20 | round_trip('empty', []u8{}) or { panic(err) } |
| 21 | } |
| 22 | |
| 23 | fn test_single_byte() { |
| 24 | round_trip('single byte', [u8(0x42)]) or { panic(err) } |
| 25 | } |
| 26 | |
| 27 | fn test_two_bytes() { |
| 28 | round_trip('two bytes', [u8(0xde), 0xad]) or { panic(err) } |
| 29 | } |
| 30 | |
| 31 | fn test_all_zeros() { |
| 32 | // Highly compressible — long run of the same byte. |
| 33 | input := []u8{len: 4096, init: 0} |
| 34 | compressed := compress(input) |
| 35 | decompressed := decompress(compressed) or { panic(err) } |
| 36 | assert decompressed == input, 'all-zeros round-trip failed' |
| 37 | // Should compress to much less than input. |
| 38 | assert compressed.len < input.len / 4, 'all-zeros should compress well' |
| 39 | } |
| 40 | |
| 41 | fn test_all_same_nonzero() { |
| 42 | input := []u8{len: 1024, init: u8(0xff)} |
| 43 | round_trip('all-0xff', input) or { panic(err) } |
| 44 | } |
| 45 | |
| 46 | fn test_incompressible() { |
| 47 | // Pseudo-random bytes — poor compressibility. |
| 48 | mut input := []u8{len: 512} |
| 49 | mut seed := u32(0xdeadbeef) |
| 50 | for i in 0 .. input.len { |
| 51 | seed = seed * 1664525 + 1013904223 // LCG |
| 52 | input[i] = u8(seed >> 24) |
| 53 | } |
| 54 | round_trip('pseudo-random', input) or { panic(err) } |
| 55 | } |
| 56 | |
| 57 | fn test_repeated_pattern() { |
| 58 | // Short repeating pattern — exercises the copy back-reference paths. |
| 59 | pattern := 'abcdefgh'.bytes() |
| 60 | mut input := []u8{cap: pattern.len * 200} |
| 61 | for _ in 0 .. 200 { |
| 62 | input << pattern |
| 63 | } |
| 64 | compressed := compress(input) |
| 65 | decompressed := decompress(compressed) or { panic(err) } |
| 66 | assert decompressed == input, 'repeated-pattern round-trip failed' |
| 67 | assert compressed.len < input.len / 2, 'repeated pattern should compress well' |
| 68 | } |
| 69 | |
| 70 | fn test_lorem_ipsum() { |
| 71 | lorem := 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. ' + |
| 72 | 'Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ' + |
| 73 | 'Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris ' + |
| 74 | 'nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in ' + |
| 75 | 'reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla ' + |
| 76 | 'pariatur. Excepteur sint occaecat cupidatat non proident, sunt in ' + |
| 77 | 'culpa qui officia deserunt mollit anim id est laborum.' |
| 78 | mut input := []u8{cap: lorem.len * 50} |
| 79 | for _ in 0 .. 50 { |
| 80 | input << lorem.bytes() |
| 81 | } |
| 82 | round_trip('lorem-ipsum-x50', input) or { panic(err) } |
| 83 | } |
| 84 | |
| 85 | fn test_multi_block() { |
| 86 | // Larger than one 64 KiB block to exercise the block-splitting path. |
| 87 | mut input := []u8{len: 200_000, init: u8(index & 0xff)} |
| 88 | round_trip('multi-block', input) or { panic(err) } |
| 89 | } |
| 90 | |
| 91 | fn test_max_compressed_length_bound() { |
| 92 | for n in [0, 1, 100, 1000, 65536, 200_000] { |
| 93 | bound := max_compressed_length(n) |
| 94 | mut input := []u8{len: n, init: u8(index & 0xff)} |
| 95 | compressed := compress(input) |
| 96 | assert compressed.len <= bound, 'max_compressed_length(${n})=${bound} exceeded by ${compressed.len}' |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | fn test_decompress_invalid_varint() { |
| 101 | // A stream that never terminates the varint (all bytes have MSB set). |
| 102 | bad := [u8(0xff), 0xff, 0xff, 0xff, 0xff, 0xff] |
| 103 | decompress(bad) or { return } // expected — an error is correct |
| 104 | panic('expected decompress to fail on invalid varint') |
| 105 | } |
| 106 | |
| 107 | fn test_decompress_truncated_literal() { |
| 108 | // Header says 10 bytes uncompressed; literal tag claims 10 bytes |
| 109 | // but the data is truncated. |
| 110 | bad := [u8(10), u8(9 << 2), u8(0x41)] // varint(10) + tag + 1 byte |
| 111 | decompress(bad) or { return } |
| 112 | panic('expected decompress to fail on truncated literal') |
| 113 | } |
| 114 | |
| 115 | fn test_decompress_bad_offset() { |
| 116 | // A COPY_2 that references before the start of output. |
| 117 | // varint(5) tag=COPY_2(len=5, _) offset=999 (past output) |
| 118 | bad := [u8(5), u8(2 | ((5 - 1) << 2)), u8(0xe7), u8(0x03)] |
| 119 | decompress(bad) or { return } |
| 120 | panic('expected decompress to fail on out-of-range offset') |
| 121 | } |
| 122 | |