// Implementation of Snappy compression fully in V. // // Compatible with the standard Snappy binary format. Implements level-1 // (fast) compression using a hash-table approach over 64 KiB blocks. // // Public API: // compress(input []u8) []u8 // decompress(input []u8) ![]u8 module snappy // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- const block_size = 1 << 16 // 65 536 — max block processed at once const min_table_size = 1 << 8 // 256 const max_table_size = 1 << 15 // 32 768 (fits in u16 alongside block_size-1) const input_margin = 15 // bytes of look-ahead the compressor needs const hash_magic = u32(0x1e35a7bd) // --------------------------------------------------------------------------- // Public API // --------------------------------------------------------------------------- // max_compressed_length returns an upper bound on the compressed size for // `n` bytes of uncompressed input. fn max_compressed_length(n int) int { return 32 + n + n / 6 } // compress compresses `input` and returns the compressed bytes. pub fn compress(input []u8) []u8 { n := input.len mut out := []u8{cap: max_compressed_length(n)} // Write the uncompressed length as a varint header. write_uvarint(mut out, u32(n)) if n == 0 { return out } // Allocate a single hash table reused across every block. mut table := []u16{len: max_table_size} mut pos := 0 for pos < n { end := if pos + block_size < n { pos + block_size } else { n } block := input[pos..end] // Zero only the slice of the table we will actually use. tsize := table_size_for(block.len) for i in 0 .. tsize { table[i] = 0 } compress_fragment(block, mut out, mut table, tsize) pos = end } return out } // decompress decompresses a Snappy-compressed slice. // It returns the original bytes, or an error if the data is malformed. pub fn decompress(input []u8) ![]u8 { // Read the varint-encoded uncompressed length from the header. ulen, hdr_end, ok := read_uvarint(input, 0) if !ok { return error('snappy: invalid header varint') } // Pre-allocate the exact output size; reject obviously bogus lengths. ilen := int(ulen) if ilen < 0 { return error('snappy: decoded length overflow') } // Use a pre-sized buffer with a write cursor instead of dynamic appends. // This eliminates per-byte reallocation checks on every literal and copy. mut out := []u8{len: ilen} mut op := 0 mut pos := hdr_end for pos < input.len { tag := input[pos] pos++ tag_type := tag & 0x03 if tag_type == 0 { // LITERAL lit_len, new_pos, lit_ok := decode_literal_len(input, tag, pos) if !lit_ok { return error('snappy: truncated literal length') } pos = new_pos if pos + lit_len > input.len { return error('snappy: literal extends past input') } if op + lit_len > ilen { return error('snappy: output exceeds declared length') } for i in 0 .. lit_len { out[op + i] = input[pos + i] } op += lit_len pos += lit_len } else if tag_type == 1 { // COPY_1 (2-byte offset) if pos >= input.len { return error('snappy: truncated copy-1 tag') } length := int((tag >> 2) & 0x07) + 4 offset := int(u32(tag & 0xe0) << 3 | u32(input[pos])) pos++ if offset == 0 { return error('snappy: zero offset in copy-1') } op = expand_copy_into(mut out, op, ilen, offset, length)! } else if tag_type == 2 { // COPY_2 (3-byte tag) if pos + 1 >= input.len { return error('snappy: truncated copy-2 tag') } length := int((tag >> 2) & 0x3f) + 1 offset := int(u32(input[pos]) | u32(input[pos + 1]) << 8) pos += 2 if offset == 0 { return error('snappy: zero offset in copy-2') } op = expand_copy_into(mut out, op, ilen, offset, length)! } else { // COPY_4 (5-byte tag) if pos + 3 >= input.len { return error('snappy: truncated copy-4 tag') } length := int((tag >> 2) & 0x3f) + 1 offset := int(u32(input[pos]) | u32(input[pos + 1]) << 8 | u32(input[pos + 2]) << 16 | u32(input[ pos + 3]) << 24) pos += 4 if offset == 0 { return error('snappy: zero offset in copy-4') } op = expand_copy_into(mut out, op, ilen, offset, length)! } } if op != ilen { return error('snappy: decompressed length mismatch: got ${op}, want ${ilen}') } return out } // --------------------------------------------------------------------------- // Compression internals // --------------------------------------------------------------------------- // compress_fragment compresses a single block (≤ 64 KiB) into `out`. // `table` must have at least `tsize` zeroed entries (tsize is a power of 2). fn compress_fragment(input []u8, mut out []u8, mut table []u16, tsize int) { n := input.len if n == 0 { return } // tbits is the number of bits needed to index `tsize` entries; used // to right-shift a 32-bit hash down to the table index. tbits := ilog2(tsize) // ip — current input position (index into `input`) // lit — start of current literal run mut ip := 0 mut lit := 0 // We need at least `input_margin` bytes of look-ahead for the main loop. // Very short blocks go to the slow literal-only path. if n >= input_margin { // Emit one forced literal so we have a non-empty literal buffer, // then begin the hash loop one byte in. ip = 1 mut next_hash := mix32(load32le(input, ip), tbits) outer: for { // skip grows when matches are not found — we scan with // increasing stride to amortize the cost. mut skip := 32 mut candidate := 0 mut next_ip := ip // Inner probe loop: walk forward until we find a 4-byte match. for { ip = next_ip hash := next_hash bytes_until_skip := skip >> 5 skip++ next_ip = ip + bytes_until_skip if next_ip > n - input_margin { break outer } next_hash = mix32(load32le(input, next_ip), tbits) candidate = int(table[hash]) table[hash] = u16(ip) if load32le(input, ip) == load32le(input, candidate) { break } } // We have a 4-byte match at [ip] == [candidate]. // Emit the literal bytes accumulated since `lit`. write_literal(mut out, input[lit..ip]) // Extend the match as far as possible. for { matched := 4 + find_match_length(input, candidate + 4, ip + 4, n) write_copy(mut out, ip - candidate, matched) ip += matched lit = ip if ip >= n - input_margin { break outer } // Look for a consecutive match immediately after the copy. // Note: mix32 already returns a value in [0, tsize), so the // redundant `& tmask` from the original code is removed. prev_hash := mix32(load32le(input, ip - 1), tbits) table[prev_hash] = u16(ip - 1) cur_hash := mix32(load32le(input, ip), tbits) candidate = int(table[cur_hash]) table[cur_hash] = u16(ip) if load32le(input, ip) != load32le(input, candidate) { // seed next_hash for the outer loop's next iteration next_hash = mix32(load32le(input, ip + 1), tbits) ip++ break } } } } // Emit whatever literal bytes remain. write_literal(mut out, input[lit..]) } // write_literal emits a LITERAL tag followed by the literal bytes. @[inline] fn write_literal(mut out []u8, lit []u8) { n := lit.len if n == 0 { return } n1 := n - 1 if n1 < 60 { out << u8(u32(n1) << 2) // tag byte encodes length-1 directly } else { w := byte_width(n1) out << u8(u32(59 + w) << 2) // tag signals 1..4 extra length bytes mut v := n1 for _ in 0 .. w { out << u8(v & 0xff) v >>= 8 } } out << lit } // write_copy chooses and emits the most compact COPY tag for the given // (offset, length) back-reference. @[inline] fn write_copy(mut out []u8, offset int, length int) { mut remaining := length for remaining > 0 { // COPY_2 handles lengths 1..64; use it as the general case. // COPY_1 saves a byte when offset < 2048 and length is 4..11. chunk := if remaining > 64 { 64 } else { remaining } if offset < 2048 && chunk >= 4 && chunk <= 11 { // COPY_1: 2 bytes total out << u8(u32(1) | (u32(chunk - 4) << 2) | ((u32(offset) >> 3) & u32(0xe0))) out << u8(offset & 0xff) } else if offset < 65536 { // COPY_2: 3 bytes total out << u8(2 | (u32(chunk - 1) << 2)) out << u8(offset & 0xff) out << u8((offset >> 8) & 0xff) } else { // COPY_4: 5 bytes total (never reached for 64 KiB blocks, left for completeness) out << u8(3 | (u32(chunk - 1) << 2)) out << u8(offset & 0xff) out << u8((offset >> 8) & 0xff) out << u8((offset >> 16) & 0xff) out << u8((offset >> 24) & 0xff) } remaining -= chunk } } // find_match_length returns how many bytes starting at s1 and s2 are equal, // stopping before `limit`. Uses 4-byte word comparisons when possible. @[inline] fn find_match_length(input []u8, s1 int, s2 int, limit int) int { mut i := 0 // Compare 4 bytes at a time while there is room for a full word. for s2 + i + 4 <= limit && s1 + i + 4 <= limit { if load32le(input, s1 + i) != load32le(input, s2 + i) { // Mismatch somewhere in this word — find the exact byte. for s2 + i < limit && input[s1 + i] == input[s2 + i] { i++ } return i } i += 4 } // Handle the remaining 0..3 tail bytes. for s2 + i < limit && input[s1 + i] == input[s2 + i] { i++ } return i } // --------------------------------------------------------------------------- // Decompression internals // --------------------------------------------------------------------------- // expand_copy_into copies `length` bytes from `offset` bytes before the // write cursor `op` in the pre-allocated `out` buffer. Returns the new // write cursor. Handles the overlapping (run-length) case correctly. @[inline] fn expand_copy_into(mut out []u8, op int, cap_ int, offset int, length int) !int { src := op - offset if src < 0 { return error('snappy: copy offset past beginning of output') } if op + length > cap_ { return error('snappy: output exceeds declared length') } // Both overlapping and non-overlapping copies read from indices // that were written in earlier iterations, so a forward byte loop // is correct in all cases (and required for the RLE / overlap case). for i in 0 .. length { out[op + i] = out[src + i] } return op + length } // decode_literal_len decodes the length (in bytes) of a literal run from the // tag byte and returns (length, new_pos, ok). `pos` should point to the // byte immediately after the tag. @[inline] fn decode_literal_len(data []u8, tag u8, pos int) (int, int, bool) { n1 := int(tag >> 2) // length - 1, or 60..63 for extended form if n1 < 60 { return n1 + 1, pos, true } extra := n1 - 59 // number of extra bytes (1..4) if pos + extra > data.len { return 0, pos, false } mut val := 0 for i in 0 .. extra { val |= int(u32(data[pos + i]) << u32(8 * i)) } return val + 1, pos + extra, true } // --------------------------------------------------------------------------- // Varint (unsigned LEB-128, protobuf-style) // --------------------------------------------------------------------------- // write_uvarint appends `val` as a variable-length unsigned integer. @[inline] fn write_uvarint(mut out []u8, val u32) { mut v := val for v >= 0x80 { out << u8(v & 0x7f | 0x80) v >>= 7 } out << u8(v) } // read_uvarint decodes a varint from `data` starting at `pos`. // Returns (value, new_pos, ok). @[inline] fn read_uvarint(data []u8, pos int) (u32, int, bool) { mut val := u32(0) mut shift := u32(0) mut i := pos for i < data.len && i - pos < 5 { b := data[i] i++ if shift == 28 && b > 0x0f { return 0, pos, false } val |= u32(b & 0x7f) << shift if b & 0x80 == 0 { return val, i, true } shift += 7 } return 0, pos, false } // --------------------------------------------------------------------------- // Utilities // --------------------------------------------------------------------------- // load32le reads four bytes from `data[pos..]` as a little-endian u32. @[inline] fn load32le(data []u8, pos int) u32 { return u32(data[pos]) | u32(data[pos + 1]) << 8 | u32(data[pos + 2]) << 16 | u32(data[pos + 3]) << 24 } // mix32 hashes a 32-bit word down to a `tbits`-bit index using a // multiplicative hash (same constant as the reference implementation). @[inline] fn mix32(v u32, tbits int) int { return int((v * hash_magic) >> u32(32 - tbits)) } // table_size_for returns the smallest power-of-two hash table size that // comfortably covers `n` bytes, clamped to [min_table_size, max_table_size]. fn table_size_for(n int) int { mut size := min_table_size for size < n && size < max_table_size { size <<= 1 } return size } // ilog2 returns floor(log2(n)) for n ≥ 1. @[inline] fn ilog2(n int) int { mut v := n mut r := 0 for v > 1 { v >>= 1 r++ } return r } // byte_width returns the number of bytes needed to represent `n` (1..4). @[inline] fn byte_width(n int) int { if n < (1 << 8) { return 1 } if n < (1 << 16) { return 2 } if n < (1 << 24) { return 3 } return 4 }