| 1 | // Implementation of Snappy compression fully in V. |
| 2 | // |
| 3 | // Compatible with the standard Snappy binary format. Implements level-1 |
| 4 | // (fast) compression using a hash-table approach over 64 KiB blocks. |
| 5 | // |
| 6 | // Public API: |
| 7 | // compress(input []u8) []u8 |
| 8 | // decompress(input []u8) ![]u8 |
| 9 | |
| 10 | module snappy |
| 11 | |
| 12 | // --------------------------------------------------------------------------- |
| 13 | // Constants |
| 14 | // --------------------------------------------------------------------------- |
| 15 | |
| 16 | const block_size = 1 << 16 // 65 536 — max block processed at once |
| 17 | const min_table_size = 1 << 8 // 256 |
| 18 | const max_table_size = 1 << 15 // 32 768 (fits in u16 alongside block_size-1) |
| 19 | const input_margin = 15 // bytes of look-ahead the compressor needs |
| 20 | const hash_magic = u32(0x1e35a7bd) |
| 21 | |
| 22 | // --------------------------------------------------------------------------- |
| 23 | // Public API |
| 24 | // --------------------------------------------------------------------------- |
| 25 | |
| 26 | // max_compressed_length returns an upper bound on the compressed size for |
| 27 | // `n` bytes of uncompressed input. |
| 28 | fn max_compressed_length(n int) int { |
| 29 | return 32 + n + n / 6 |
| 30 | } |
| 31 | |
| 32 | // compress compresses `input` and returns the compressed bytes. |
| 33 | pub fn compress(input []u8) []u8 { |
| 34 | n := input.len |
| 35 | mut out := []u8{cap: max_compressed_length(n)} |
| 36 | |
| 37 | // Write the uncompressed length as a varint header. |
| 38 | write_uvarint(mut out, u32(n)) |
| 39 | |
| 40 | if n == 0 { |
| 41 | return out |
| 42 | } |
| 43 | |
| 44 | // Allocate a single hash table reused across every block. |
| 45 | mut table := []u16{len: max_table_size} |
| 46 | |
| 47 | mut pos := 0 |
| 48 | for pos < n { |
| 49 | end := if pos + block_size < n { pos + block_size } else { n } |
| 50 | block := input[pos..end] |
| 51 | |
| 52 | // Zero only the slice of the table we will actually use. |
| 53 | tsize := table_size_for(block.len) |
| 54 | for i in 0 .. tsize { |
| 55 | table[i] = 0 |
| 56 | } |
| 57 | |
| 58 | compress_fragment(block, mut out, mut table, tsize) |
| 59 | pos = end |
| 60 | } |
| 61 | |
| 62 | return out |
| 63 | } |
| 64 | |
| 65 | // decompress decompresses a Snappy-compressed slice. |
| 66 | // It returns the original bytes, or an error if the data is malformed. |
| 67 | pub fn decompress(input []u8) ![]u8 { |
| 68 | // Read the varint-encoded uncompressed length from the header. |
| 69 | ulen, hdr_end, ok := read_uvarint(input, 0) |
| 70 | if !ok { |
| 71 | return error('snappy: invalid header varint') |
| 72 | } |
| 73 | |
| 74 | // Pre-allocate the exact output size; reject obviously bogus lengths. |
| 75 | ilen := int(ulen) |
| 76 | if ilen < 0 { |
| 77 | return error('snappy: decoded length overflow') |
| 78 | } |
| 79 | |
| 80 | // Use a pre-sized buffer with a write cursor instead of dynamic appends. |
| 81 | // This eliminates per-byte reallocation checks on every literal and copy. |
| 82 | mut out := []u8{len: ilen} |
| 83 | mut op := 0 |
| 84 | mut pos := hdr_end |
| 85 | |
| 86 | for pos < input.len { |
| 87 | tag := input[pos] |
| 88 | pos++ |
| 89 | |
| 90 | tag_type := tag & 0x03 |
| 91 | |
| 92 | if tag_type == 0 { |
| 93 | // LITERAL |
| 94 | lit_len, new_pos, lit_ok := decode_literal_len(input, tag, pos) |
| 95 | if !lit_ok { |
| 96 | return error('snappy: truncated literal length') |
| 97 | } |
| 98 | pos = new_pos |
| 99 | if pos + lit_len > input.len { |
| 100 | return error('snappy: literal extends past input') |
| 101 | } |
| 102 | if op + lit_len > ilen { |
| 103 | return error('snappy: output exceeds declared length') |
| 104 | } |
| 105 | for i in 0 .. lit_len { |
| 106 | out[op + i] = input[pos + i] |
| 107 | } |
| 108 | op += lit_len |
| 109 | pos += lit_len |
| 110 | } else if tag_type == 1 { |
| 111 | // COPY_1 (2-byte offset) |
| 112 | if pos >= input.len { |
| 113 | return error('snappy: truncated copy-1 tag') |
| 114 | } |
| 115 | length := int((tag >> 2) & 0x07) + 4 |
| 116 | offset := int(u32(tag & 0xe0) << 3 | u32(input[pos])) |
| 117 | pos++ |
| 118 | if offset == 0 { |
| 119 | return error('snappy: zero offset in copy-1') |
| 120 | } |
| 121 | op = expand_copy_into(mut out, op, ilen, offset, length)! |
| 122 | } else if tag_type == 2 { |
| 123 | // COPY_2 (3-byte tag) |
| 124 | if pos + 1 >= input.len { |
| 125 | return error('snappy: truncated copy-2 tag') |
| 126 | } |
| 127 | length := int((tag >> 2) & 0x3f) + 1 |
| 128 | offset := int(u32(input[pos]) | u32(input[pos + 1]) << 8) |
| 129 | pos += 2 |
| 130 | if offset == 0 { |
| 131 | return error('snappy: zero offset in copy-2') |
| 132 | } |
| 133 | op = expand_copy_into(mut out, op, ilen, offset, length)! |
| 134 | } else { |
| 135 | // COPY_4 (5-byte tag) |
| 136 | if pos + 3 >= input.len { |
| 137 | return error('snappy: truncated copy-4 tag') |
| 138 | } |
| 139 | length := int((tag >> 2) & 0x3f) + 1 |
| 140 | offset := int(u32(input[pos]) | u32(input[pos + 1]) << 8 | u32(input[pos + 2]) << 16 | u32(input[ |
| 141 | pos + 3]) << 24) |
| 142 | pos += 4 |
| 143 | if offset == 0 { |
| 144 | return error('snappy: zero offset in copy-4') |
| 145 | } |
| 146 | op = expand_copy_into(mut out, op, ilen, offset, length)! |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | if op != ilen { |
| 151 | return error('snappy: decompressed length mismatch: got ${op}, want ${ilen}') |
| 152 | } |
| 153 | return out |
| 154 | } |
| 155 | |
| 156 | // --------------------------------------------------------------------------- |
| 157 | // Compression internals |
| 158 | // --------------------------------------------------------------------------- |
| 159 | |
| 160 | // compress_fragment compresses a single block (≤ 64 KiB) into `out`. |
| 161 | // `table` must have at least `tsize` zeroed entries (tsize is a power of 2). |
| 162 | fn compress_fragment(input []u8, mut out []u8, mut table []u16, tsize int) { |
| 163 | n := input.len |
| 164 | if n == 0 { |
| 165 | return |
| 166 | } |
| 167 | |
| 168 | // tbits is the number of bits needed to index `tsize` entries; used |
| 169 | // to right-shift a 32-bit hash down to the table index. |
| 170 | tbits := ilog2(tsize) |
| 171 | |
| 172 | // ip — current input position (index into `input`) |
| 173 | // lit — start of current literal run |
| 174 | mut ip := 0 |
| 175 | mut lit := 0 |
| 176 | |
| 177 | // We need at least `input_margin` bytes of look-ahead for the main loop. |
| 178 | // Very short blocks go to the slow literal-only path. |
| 179 | if n >= input_margin { |
| 180 | // Emit one forced literal so we have a non-empty literal buffer, |
| 181 | // then begin the hash loop one byte in. |
| 182 | ip = 1 |
| 183 | |
| 184 | mut next_hash := mix32(load32le(input, ip), tbits) |
| 185 | |
| 186 | outer: for { |
| 187 | // skip grows when matches are not found — we scan with |
| 188 | // increasing stride to amortize the cost. |
| 189 | mut skip := 32 |
| 190 | mut candidate := 0 |
| 191 | mut next_ip := ip |
| 192 | |
| 193 | // Inner probe loop: walk forward until we find a 4-byte match. |
| 194 | for { |
| 195 | ip = next_ip |
| 196 | hash := next_hash |
| 197 | bytes_until_skip := skip >> 5 |
| 198 | skip++ |
| 199 | next_ip = ip + bytes_until_skip |
| 200 | if next_ip > n - input_margin { |
| 201 | break outer |
| 202 | } |
| 203 | next_hash = mix32(load32le(input, next_ip), tbits) |
| 204 | candidate = int(table[hash]) |
| 205 | table[hash] = u16(ip) |
| 206 | if load32le(input, ip) == load32le(input, candidate) { |
| 207 | break |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | // We have a 4-byte match at [ip] == [candidate]. |
| 212 | // Emit the literal bytes accumulated since `lit`. |
| 213 | write_literal(mut out, input[lit..ip]) |
| 214 | |
| 215 | // Extend the match as far as possible. |
| 216 | for { |
| 217 | matched := 4 + find_match_length(input, candidate + 4, ip + 4, n) |
| 218 | write_copy(mut out, ip - candidate, matched) |
| 219 | ip += matched |
| 220 | lit = ip |
| 221 | |
| 222 | if ip >= n - input_margin { |
| 223 | break outer |
| 224 | } |
| 225 | |
| 226 | // Look for a consecutive match immediately after the copy. |
| 227 | // Note: mix32 already returns a value in [0, tsize), so the |
| 228 | // redundant `& tmask` from the original code is removed. |
| 229 | prev_hash := mix32(load32le(input, ip - 1), tbits) |
| 230 | table[prev_hash] = u16(ip - 1) |
| 231 | cur_hash := mix32(load32le(input, ip), tbits) |
| 232 | candidate = int(table[cur_hash]) |
| 233 | table[cur_hash] = u16(ip) |
| 234 | if load32le(input, ip) != load32le(input, candidate) { |
| 235 | // seed next_hash for the outer loop's next iteration |
| 236 | next_hash = mix32(load32le(input, ip + 1), tbits) |
| 237 | ip++ |
| 238 | break |
| 239 | } |
| 240 | } |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | // Emit whatever literal bytes remain. |
| 245 | write_literal(mut out, input[lit..]) |
| 246 | } |
| 247 | |
| 248 | // write_literal emits a LITERAL tag followed by the literal bytes. |
| 249 | @[inline] |
| 250 | fn write_literal(mut out []u8, lit []u8) { |
| 251 | n := lit.len |
| 252 | if n == 0 { |
| 253 | return |
| 254 | } |
| 255 | n1 := n - 1 |
| 256 | if n1 < 60 { |
| 257 | out << u8(u32(n1) << 2) // tag byte encodes length-1 directly |
| 258 | } else { |
| 259 | w := byte_width(n1) |
| 260 | out << u8(u32(59 + w) << 2) // tag signals 1..4 extra length bytes |
| 261 | mut v := n1 |
| 262 | for _ in 0 .. w { |
| 263 | out << u8(v & 0xff) |
| 264 | v >>= 8 |
| 265 | } |
| 266 | } |
| 267 | out << lit |
| 268 | } |
| 269 | |
| 270 | // write_copy chooses and emits the most compact COPY tag for the given |
| 271 | // (offset, length) back-reference. |
| 272 | @[inline] |
| 273 | fn write_copy(mut out []u8, offset int, length int) { |
| 274 | mut remaining := length |
| 275 | for remaining > 0 { |
| 276 | // COPY_2 handles lengths 1..64; use it as the general case. |
| 277 | // COPY_1 saves a byte when offset < 2048 and length is 4..11. |
| 278 | chunk := if remaining > 64 { 64 } else { remaining } |
| 279 | if offset < 2048 && chunk >= 4 && chunk <= 11 { |
| 280 | // COPY_1: 2 bytes total |
| 281 | out << u8(u32(1) | (u32(chunk - 4) << 2) | ((u32(offset) >> 3) & u32(0xe0))) |
| 282 | out << u8(offset & 0xff) |
| 283 | } else if offset < 65536 { |
| 284 | // COPY_2: 3 bytes total |
| 285 | out << u8(2 | (u32(chunk - 1) << 2)) |
| 286 | out << u8(offset & 0xff) |
| 287 | out << u8((offset >> 8) & 0xff) |
| 288 | } else { |
| 289 | // COPY_4: 5 bytes total (never reached for 64 KiB blocks, left for completeness) |
| 290 | out << u8(3 | (u32(chunk - 1) << 2)) |
| 291 | out << u8(offset & 0xff) |
| 292 | out << u8((offset >> 8) & 0xff) |
| 293 | out << u8((offset >> 16) & 0xff) |
| 294 | out << u8((offset >> 24) & 0xff) |
| 295 | } |
| 296 | remaining -= chunk |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | // find_match_length returns how many bytes starting at s1 and s2 are equal, |
| 301 | // stopping before `limit`. Uses 4-byte word comparisons when possible. |
| 302 | @[inline] |
| 303 | fn find_match_length(input []u8, s1 int, s2 int, limit int) int { |
| 304 | mut i := 0 |
| 305 | // Compare 4 bytes at a time while there is room for a full word. |
| 306 | for s2 + i + 4 <= limit && s1 + i + 4 <= limit { |
| 307 | if load32le(input, s1 + i) != load32le(input, s2 + i) { |
| 308 | // Mismatch somewhere in this word — find the exact byte. |
| 309 | for s2 + i < limit && input[s1 + i] == input[s2 + i] { |
| 310 | i++ |
| 311 | } |
| 312 | return i |
| 313 | } |
| 314 | i += 4 |
| 315 | } |
| 316 | // Handle the remaining 0..3 tail bytes. |
| 317 | for s2 + i < limit && input[s1 + i] == input[s2 + i] { |
| 318 | i++ |
| 319 | } |
| 320 | return i |
| 321 | } |
| 322 | |
| 323 | // --------------------------------------------------------------------------- |
| 324 | // Decompression internals |
| 325 | // --------------------------------------------------------------------------- |
| 326 | |
| 327 | // expand_copy_into copies `length` bytes from `offset` bytes before the |
| 328 | // write cursor `op` in the pre-allocated `out` buffer. Returns the new |
| 329 | // write cursor. Handles the overlapping (run-length) case correctly. |
| 330 | @[inline] |
| 331 | fn expand_copy_into(mut out []u8, op int, cap_ int, offset int, length int) !int { |
| 332 | src := op - offset |
| 333 | if src < 0 { |
| 334 | return error('snappy: copy offset past beginning of output') |
| 335 | } |
| 336 | if op + length > cap_ { |
| 337 | return error('snappy: output exceeds declared length') |
| 338 | } |
| 339 | // Both overlapping and non-overlapping copies read from indices |
| 340 | // that were written in earlier iterations, so a forward byte loop |
| 341 | // is correct in all cases (and required for the RLE / overlap case). |
| 342 | for i in 0 .. length { |
| 343 | out[op + i] = out[src + i] |
| 344 | } |
| 345 | return op + length |
| 346 | } |
| 347 | |
| 348 | // decode_literal_len decodes the length (in bytes) of a literal run from the |
| 349 | // tag byte and returns (length, new_pos, ok). `pos` should point to the |
| 350 | // byte immediately after the tag. |
| 351 | @[inline] |
| 352 | fn decode_literal_len(data []u8, tag u8, pos int) (int, int, bool) { |
| 353 | n1 := int(tag >> 2) // length - 1, or 60..63 for extended form |
| 354 | if n1 < 60 { |
| 355 | return n1 + 1, pos, true |
| 356 | } |
| 357 | extra := n1 - 59 // number of extra bytes (1..4) |
| 358 | if pos + extra > data.len { |
| 359 | return 0, pos, false |
| 360 | } |
| 361 | mut val := 0 |
| 362 | for i in 0 .. extra { |
| 363 | val |= int(u32(data[pos + i]) << u32(8 * i)) |
| 364 | } |
| 365 | return val + 1, pos + extra, true |
| 366 | } |
| 367 | |
| 368 | // --------------------------------------------------------------------------- |
| 369 | // Varint (unsigned LEB-128, protobuf-style) |
| 370 | // --------------------------------------------------------------------------- |
| 371 | |
| 372 | // write_uvarint appends `val` as a variable-length unsigned integer. |
| 373 | @[inline] |
| 374 | fn write_uvarint(mut out []u8, val u32) { |
| 375 | mut v := val |
| 376 | for v >= 0x80 { |
| 377 | out << u8(v & 0x7f | 0x80) |
| 378 | v >>= 7 |
| 379 | } |
| 380 | out << u8(v) |
| 381 | } |
| 382 | |
| 383 | // read_uvarint decodes a varint from `data` starting at `pos`. |
| 384 | // Returns (value, new_pos, ok). |
| 385 | @[inline] |
| 386 | fn read_uvarint(data []u8, pos int) (u32, int, bool) { |
| 387 | mut val := u32(0) |
| 388 | mut shift := u32(0) |
| 389 | mut i := pos |
| 390 | for i < data.len && i - pos < 5 { |
| 391 | b := data[i] |
| 392 | i++ |
| 393 | if shift == 28 && b > 0x0f { |
| 394 | return 0, pos, false |
| 395 | } |
| 396 | val |= u32(b & 0x7f) << shift |
| 397 | if b & 0x80 == 0 { |
| 398 | return val, i, true |
| 399 | } |
| 400 | shift += 7 |
| 401 | } |
| 402 | return 0, pos, false |
| 403 | } |
| 404 | |
| 405 | // --------------------------------------------------------------------------- |
| 406 | // Utilities |
| 407 | // --------------------------------------------------------------------------- |
| 408 | |
| 409 | // load32le reads four bytes from `data[pos..]` as a little-endian u32. |
| 410 | @[inline] |
| 411 | fn load32le(data []u8, pos int) u32 { |
| 412 | return u32(data[pos]) | u32(data[pos + 1]) << 8 | u32(data[pos + 2]) << 16 | u32(data[pos + 3]) << 24 |
| 413 | } |
| 414 | |
| 415 | // mix32 hashes a 32-bit word down to a `tbits`-bit index using a |
| 416 | // multiplicative hash (same constant as the reference implementation). |
| 417 | @[inline] |
| 418 | fn mix32(v u32, tbits int) int { |
| 419 | return int((v * hash_magic) >> u32(32 - tbits)) |
| 420 | } |
| 421 | |
| 422 | // table_size_for returns the smallest power-of-two hash table size that |
| 423 | // comfortably covers `n` bytes, clamped to [min_table_size, max_table_size]. |
| 424 | fn table_size_for(n int) int { |
| 425 | mut size := min_table_size |
| 426 | for size < n && size < max_table_size { |
| 427 | size <<= 1 |
| 428 | } |
| 429 | return size |
| 430 | } |
| 431 | |
| 432 | // ilog2 returns floor(log2(n)) for n ≥ 1. |
| 433 | @[inline] |
| 434 | fn ilog2(n int) int { |
| 435 | mut v := n |
| 436 | mut r := 0 |
| 437 | for v > 1 { |
| 438 | v >>= 1 |
| 439 | r++ |
| 440 | } |
| 441 | return r |
| 442 | } |
| 443 | |
| 444 | // byte_width returns the number of bytes needed to represent `n` (1..4). |
| 445 | @[inline] |
| 446 | fn byte_width(n int) int { |
| 447 | if n < (1 << 8) { |
| 448 | return 1 |
| 449 | } |
| 450 | if n < (1 << 16) { |
| 451 | return 2 |
| 452 | } |
| 453 | if n < (1 << 24) { |
| 454 | return 3 |
| 455 | } |
| 456 | return 4 |
| 457 | } |
| 458 | |