v2 / vlib / compress / snappy / snappy_block.v
457 lines · 409 sloc · 12.81 KB · 37513121a31460af1448cbe957f474d30e6f0516
Raw
1// Implementation of Snappy compression fully in V.
2//
3// Compatible with the standard Snappy binary format. Implements level-1
4// (fast) compression using a hash-table approach over 64 KiB blocks.
5//
6// Public API:
7// compress(input []u8) []u8
8// decompress(input []u8) ![]u8
9
10module snappy
11
12// ---------------------------------------------------------------------------
13// Constants
14// ---------------------------------------------------------------------------
15
16const block_size = 1 << 16 // 65 536 — max block processed at once
17const min_table_size = 1 << 8 // 256
18const max_table_size = 1 << 15 // 32 768 (fits in u16 alongside block_size-1)
19const input_margin = 15 // bytes of look-ahead the compressor needs
20const hash_magic = u32(0x1e35a7bd)
21
22// ---------------------------------------------------------------------------
23// Public API
24// ---------------------------------------------------------------------------
25
26// max_compressed_length returns an upper bound on the compressed size for
27// `n` bytes of uncompressed input.
28fn max_compressed_length(n int) int {
29 return 32 + n + n / 6
30}
31
32// compress compresses `input` and returns the compressed bytes.
33pub fn compress(input []u8) []u8 {
34 n := input.len
35 mut out := []u8{cap: max_compressed_length(n)}
36
37 // Write the uncompressed length as a varint header.
38 write_uvarint(mut out, u32(n))
39
40 if n == 0 {
41 return out
42 }
43
44 // Allocate a single hash table reused across every block.
45 mut table := []u16{len: max_table_size}
46
47 mut pos := 0
48 for pos < n {
49 end := if pos + block_size < n { pos + block_size } else { n }
50 block := input[pos..end]
51
52 // Zero only the slice of the table we will actually use.
53 tsize := table_size_for(block.len)
54 for i in 0 .. tsize {
55 table[i] = 0
56 }
57
58 compress_fragment(block, mut out, mut table, tsize)
59 pos = end
60 }
61
62 return out
63}
64
65// decompress decompresses a Snappy-compressed slice.
66// It returns the original bytes, or an error if the data is malformed.
67pub fn decompress(input []u8) ![]u8 {
68 // Read the varint-encoded uncompressed length from the header.
69 ulen, hdr_end, ok := read_uvarint(input, 0)
70 if !ok {
71 return error('snappy: invalid header varint')
72 }
73
74 // Pre-allocate the exact output size; reject obviously bogus lengths.
75 ilen := int(ulen)
76 if ilen < 0 {
77 return error('snappy: decoded length overflow')
78 }
79
80 // Use a pre-sized buffer with a write cursor instead of dynamic appends.
81 // This eliminates per-byte reallocation checks on every literal and copy.
82 mut out := []u8{len: ilen}
83 mut op := 0
84 mut pos := hdr_end
85
86 for pos < input.len {
87 tag := input[pos]
88 pos++
89
90 tag_type := tag & 0x03
91
92 if tag_type == 0 {
93 // LITERAL
94 lit_len, new_pos, lit_ok := decode_literal_len(input, tag, pos)
95 if !lit_ok {
96 return error('snappy: truncated literal length')
97 }
98 pos = new_pos
99 if pos + lit_len > input.len {
100 return error('snappy: literal extends past input')
101 }
102 if op + lit_len > ilen {
103 return error('snappy: output exceeds declared length')
104 }
105 for i in 0 .. lit_len {
106 out[op + i] = input[pos + i]
107 }
108 op += lit_len
109 pos += lit_len
110 } else if tag_type == 1 {
111 // COPY_1 (2-byte offset)
112 if pos >= input.len {
113 return error('snappy: truncated copy-1 tag')
114 }
115 length := int((tag >> 2) & 0x07) + 4
116 offset := int(u32(tag & 0xe0) << 3 | u32(input[pos]))
117 pos++
118 if offset == 0 {
119 return error('snappy: zero offset in copy-1')
120 }
121 op = expand_copy_into(mut out, op, ilen, offset, length)!
122 } else if tag_type == 2 {
123 // COPY_2 (3-byte tag)
124 if pos + 1 >= input.len {
125 return error('snappy: truncated copy-2 tag')
126 }
127 length := int((tag >> 2) & 0x3f) + 1
128 offset := int(u32(input[pos]) | u32(input[pos + 1]) << 8)
129 pos += 2
130 if offset == 0 {
131 return error('snappy: zero offset in copy-2')
132 }
133 op = expand_copy_into(mut out, op, ilen, offset, length)!
134 } else {
135 // COPY_4 (5-byte tag)
136 if pos + 3 >= input.len {
137 return error('snappy: truncated copy-4 tag')
138 }
139 length := int((tag >> 2) & 0x3f) + 1
140 offset := int(u32(input[pos]) | u32(input[pos + 1]) << 8 | u32(input[pos + 2]) << 16 | u32(input[
141 pos + 3]) << 24)
142 pos += 4
143 if offset == 0 {
144 return error('snappy: zero offset in copy-4')
145 }
146 op = expand_copy_into(mut out, op, ilen, offset, length)!
147 }
148 }
149
150 if op != ilen {
151 return error('snappy: decompressed length mismatch: got ${op}, want ${ilen}')
152 }
153 return out
154}
155
156// ---------------------------------------------------------------------------
157// Compression internals
158// ---------------------------------------------------------------------------
159
160// compress_fragment compresses a single block (≤ 64 KiB) into `out`.
161// `table` must have at least `tsize` zeroed entries (tsize is a power of 2).
162fn compress_fragment(input []u8, mut out []u8, mut table []u16, tsize int) {
163 n := input.len
164 if n == 0 {
165 return
166 }
167
168 // tbits is the number of bits needed to index `tsize` entries; used
169 // to right-shift a 32-bit hash down to the table index.
170 tbits := ilog2(tsize)
171
172 // ip — current input position (index into `input`)
173 // lit — start of current literal run
174 mut ip := 0
175 mut lit := 0
176
177 // We need at least `input_margin` bytes of look-ahead for the main loop.
178 // Very short blocks go to the slow literal-only path.
179 if n >= input_margin {
180 // Emit one forced literal so we have a non-empty literal buffer,
181 // then begin the hash loop one byte in.
182 ip = 1
183
184 mut next_hash := mix32(load32le(input, ip), tbits)
185
186 outer: for {
187 // skip grows when matches are not found — we scan with
188 // increasing stride to amortize the cost.
189 mut skip := 32
190 mut candidate := 0
191 mut next_ip := ip
192
193 // Inner probe loop: walk forward until we find a 4-byte match.
194 for {
195 ip = next_ip
196 hash := next_hash
197 bytes_until_skip := skip >> 5
198 skip++
199 next_ip = ip + bytes_until_skip
200 if next_ip > n - input_margin {
201 break outer
202 }
203 next_hash = mix32(load32le(input, next_ip), tbits)
204 candidate = int(table[hash])
205 table[hash] = u16(ip)
206 if load32le(input, ip) == load32le(input, candidate) {
207 break
208 }
209 }
210
211 // We have a 4-byte match at [ip] == [candidate].
212 // Emit the literal bytes accumulated since `lit`.
213 write_literal(mut out, input[lit..ip])
214
215 // Extend the match as far as possible.
216 for {
217 matched := 4 + find_match_length(input, candidate + 4, ip + 4, n)
218 write_copy(mut out, ip - candidate, matched)
219 ip += matched
220 lit = ip
221
222 if ip >= n - input_margin {
223 break outer
224 }
225
226 // Look for a consecutive match immediately after the copy.
227 // Note: mix32 already returns a value in [0, tsize), so the
228 // redundant `& tmask` from the original code is removed.
229 prev_hash := mix32(load32le(input, ip - 1), tbits)
230 table[prev_hash] = u16(ip - 1)
231 cur_hash := mix32(load32le(input, ip), tbits)
232 candidate = int(table[cur_hash])
233 table[cur_hash] = u16(ip)
234 if load32le(input, ip) != load32le(input, candidate) {
235 // seed next_hash for the outer loop's next iteration
236 next_hash = mix32(load32le(input, ip + 1), tbits)
237 ip++
238 break
239 }
240 }
241 }
242 }
243
244 // Emit whatever literal bytes remain.
245 write_literal(mut out, input[lit..])
246}
247
248// write_literal emits a LITERAL tag followed by the literal bytes.
249@[inline]
250fn write_literal(mut out []u8, lit []u8) {
251 n := lit.len
252 if n == 0 {
253 return
254 }
255 n1 := n - 1
256 if n1 < 60 {
257 out << u8(u32(n1) << 2) // tag byte encodes length-1 directly
258 } else {
259 w := byte_width(n1)
260 out << u8(u32(59 + w) << 2) // tag signals 1..4 extra length bytes
261 mut v := n1
262 for _ in 0 .. w {
263 out << u8(v & 0xff)
264 v >>= 8
265 }
266 }
267 out << lit
268}
269
270// write_copy chooses and emits the most compact COPY tag for the given
271// (offset, length) back-reference.
272@[inline]
273fn write_copy(mut out []u8, offset int, length int) {
274 mut remaining := length
275 for remaining > 0 {
276 // COPY_2 handles lengths 1..64; use it as the general case.
277 // COPY_1 saves a byte when offset < 2048 and length is 4..11.
278 chunk := if remaining > 64 { 64 } else { remaining }
279 if offset < 2048 && chunk >= 4 && chunk <= 11 {
280 // COPY_1: 2 bytes total
281 out << u8(u32(1) | (u32(chunk - 4) << 2) | ((u32(offset) >> 3) & u32(0xe0)))
282 out << u8(offset & 0xff)
283 } else if offset < 65536 {
284 // COPY_2: 3 bytes total
285 out << u8(2 | (u32(chunk - 1) << 2))
286 out << u8(offset & 0xff)
287 out << u8((offset >> 8) & 0xff)
288 } else {
289 // COPY_4: 5 bytes total (never reached for 64 KiB blocks, left for completeness)
290 out << u8(3 | (u32(chunk - 1) << 2))
291 out << u8(offset & 0xff)
292 out << u8((offset >> 8) & 0xff)
293 out << u8((offset >> 16) & 0xff)
294 out << u8((offset >> 24) & 0xff)
295 }
296 remaining -= chunk
297 }
298}
299
300// find_match_length returns how many bytes starting at s1 and s2 are equal,
301// stopping before `limit`. Uses 4-byte word comparisons when possible.
302@[inline]
303fn find_match_length(input []u8, s1 int, s2 int, limit int) int {
304 mut i := 0
305 // Compare 4 bytes at a time while there is room for a full word.
306 for s2 + i + 4 <= limit && s1 + i + 4 <= limit {
307 if load32le(input, s1 + i) != load32le(input, s2 + i) {
308 // Mismatch somewhere in this word — find the exact byte.
309 for s2 + i < limit && input[s1 + i] == input[s2 + i] {
310 i++
311 }
312 return i
313 }
314 i += 4
315 }
316 // Handle the remaining 0..3 tail bytes.
317 for s2 + i < limit && input[s1 + i] == input[s2 + i] {
318 i++
319 }
320 return i
321}
322
323// ---------------------------------------------------------------------------
324// Decompression internals
325// ---------------------------------------------------------------------------
326
327// expand_copy_into copies `length` bytes from `offset` bytes before the
328// write cursor `op` in the pre-allocated `out` buffer. Returns the new
329// write cursor. Handles the overlapping (run-length) case correctly.
330@[inline]
331fn expand_copy_into(mut out []u8, op int, cap_ int, offset int, length int) !int {
332 src := op - offset
333 if src < 0 {
334 return error('snappy: copy offset past beginning of output')
335 }
336 if op + length > cap_ {
337 return error('snappy: output exceeds declared length')
338 }
339 // Both overlapping and non-overlapping copies read from indices
340 // that were written in earlier iterations, so a forward byte loop
341 // is correct in all cases (and required for the RLE / overlap case).
342 for i in 0 .. length {
343 out[op + i] = out[src + i]
344 }
345 return op + length
346}
347
348// decode_literal_len decodes the length (in bytes) of a literal run from the
349// tag byte and returns (length, new_pos, ok). `pos` should point to the
350// byte immediately after the tag.
351@[inline]
352fn decode_literal_len(data []u8, tag u8, pos int) (int, int, bool) {
353 n1 := int(tag >> 2) // length - 1, or 60..63 for extended form
354 if n1 < 60 {
355 return n1 + 1, pos, true
356 }
357 extra := n1 - 59 // number of extra bytes (1..4)
358 if pos + extra > data.len {
359 return 0, pos, false
360 }
361 mut val := 0
362 for i in 0 .. extra {
363 val |= int(u32(data[pos + i]) << u32(8 * i))
364 }
365 return val + 1, pos + extra, true
366}
367
368// ---------------------------------------------------------------------------
369// Varint (unsigned LEB-128, protobuf-style)
370// ---------------------------------------------------------------------------
371
372// write_uvarint appends `val` as a variable-length unsigned integer.
373@[inline]
374fn write_uvarint(mut out []u8, val u32) {
375 mut v := val
376 for v >= 0x80 {
377 out << u8(v & 0x7f | 0x80)
378 v >>= 7
379 }
380 out << u8(v)
381}
382
383// read_uvarint decodes a varint from `data` starting at `pos`.
384// Returns (value, new_pos, ok).
385@[inline]
386fn read_uvarint(data []u8, pos int) (u32, int, bool) {
387 mut val := u32(0)
388 mut shift := u32(0)
389 mut i := pos
390 for i < data.len && i - pos < 5 {
391 b := data[i]
392 i++
393 if shift == 28 && b > 0x0f {
394 return 0, pos, false
395 }
396 val |= u32(b & 0x7f) << shift
397 if b & 0x80 == 0 {
398 return val, i, true
399 }
400 shift += 7
401 }
402 return 0, pos, false
403}
404
405// ---------------------------------------------------------------------------
406// Utilities
407// ---------------------------------------------------------------------------
408
409// load32le reads four bytes from `data[pos..]` as a little-endian u32.
410@[inline]
411fn load32le(data []u8, pos int) u32 {
412 return u32(data[pos]) | u32(data[pos + 1]) << 8 | u32(data[pos + 2]) << 16 | u32(data[pos + 3]) << 24
413}
414
415// mix32 hashes a 32-bit word down to a `tbits`-bit index using a
416// multiplicative hash (same constant as the reference implementation).
417@[inline]
418fn mix32(v u32, tbits int) int {
419 return int((v * hash_magic) >> u32(32 - tbits))
420}
421
422// table_size_for returns the smallest power-of-two hash table size that
423// comfortably covers `n` bytes, clamped to [min_table_size, max_table_size].
424fn table_size_for(n int) int {
425 mut size := min_table_size
426 for size < n && size < max_table_size {
427 size <<= 1
428 }
429 return size
430}
431
432// ilog2 returns floor(log2(n)) for n ≥ 1.
433@[inline]
434fn ilog2(n int) int {
435 mut v := n
436 mut r := 0
437 for v > 1 {
438 v >>= 1
439 r++
440 }
441 return r
442}
443
444// byte_width returns the number of bytes needed to represent `n` (1..4).
445@[inline]
446fn byte_width(n int) int {
447 if n < (1 << 8) {
448 return 1
449 }
450 if n < (1 << 16) {
451 return 2
452 }
453 if n < (1 << 24) {
454 return 3
455 }
456 return 4
457}
458