Gitly


1 // Implementation of Snappy compression fully in V.
2 //
3 // Compatible with the standard Snappy binary format. Implements level-1
4 // (fast) compression using a hash-table approach over 64 KiB blocks.
5 //
6 // Public API:
7 //   compress(input []u8) []u8
8 //   decompress(input []u8) ![]u8
9 
10 module snappy
11 
12 // ---------------------------------------------------------------------------
13 // Constants
14 // ---------------------------------------------------------------------------
15 
16 const block_size = 1 << 16 // 65 536 — max block processed at once
17 const min_table_size = 1 << 8 // 256
18 const max_table_size = 1 << 15 // 32 768  (fits in u16 alongside block_size-1)
19 const input_margin = 15 // bytes of look-ahead the compressor needs
20 const hash_magic = u32(0x1e35a7bd)
21 
22 // ---------------------------------------------------------------------------
23 // Public API
24 // ---------------------------------------------------------------------------
25 
26 // max_compressed_length returns an upper bound on the compressed size for
27 // `n` bytes of uncompressed input.
28 fn max_compressed_length(n int) int {
29     return 32 + n + n / 6
30 }
31 
32 // compress compresses `input` and returns the compressed bytes.
33 pub fn compress(input []u8) []u8 {
34     n := input.len
35     mut out := []u8{cap: max_compressed_length(n)}
36 
37     // Write the uncompressed length as a varint header.
38     write_uvarint(mut out, u32(n))
39 
40     if n == 0 {
41         return out
42     }
43 
44     // Allocate a single hash table reused across every block.
45     mut table := []u16{len: max_table_size}
46 
47     mut pos := 0
48     for pos < n {
49         end := if pos + block_size < n { pos + block_size } else { n }
50         block := input[pos..end]
51 
52         // Zero only the slice of the table we will actually use.
53         tsize := table_size_for(block.len)
54         for i in 0 .. tsize {
55             table[i] = 0
56         }
57 
58         compress_fragment(block, mut out, mut table, tsize)
59         pos = end
60     }
61 
62     return out
63 }
64 
65 // decompress decompresses a Snappy-compressed slice.
66 // It returns the original bytes, or an error if the data is malformed.
67 pub fn decompress(input []u8) ![]u8 {
68     // Read the varint-encoded uncompressed length from the header.
69     ulen, hdr_end, ok := read_uvarint(input, 0)
70     if !ok {
71         return error('snappy: invalid header varint')
72     }
73 
74     // Pre-allocate the exact output size; reject obviously bogus lengths.
75     ilen := int(ulen)
76     if ilen < 0 {
77         return error('snappy: decoded length overflow')
78     }
79 
80     // Use a pre-sized buffer with a write cursor instead of dynamic appends.
81     // This eliminates per-byte reallocation checks on every literal and copy.
82     mut out := []u8{len: ilen}
83     mut op := 0
84     mut pos := hdr_end
85 
86     for pos < input.len {
87         tag := input[pos]
88         pos++
89 
90         tag_type := tag & 0x03
91 
92         if tag_type == 0 {
93             // LITERAL
94             lit_len, new_pos, lit_ok := decode_literal_len(input, tag, pos)
95             if !lit_ok {
96                 return error('snappy: truncated literal length')
97             }
98             pos = new_pos
99             if pos + lit_len > input.len {
100                 return error('snappy: literal extends past input')
101             }
102             if op + lit_len > ilen {
103                 return error('snappy: output exceeds declared length')
104             }
105             for i in 0 .. lit_len {
106                 out[op + i] = input[pos + i]
107             }
108             op += lit_len
109             pos += lit_len
110         } else if tag_type == 1 {
111             // COPY_1 (2-byte offset)
112             if pos >= input.len {
113                 return error('snappy: truncated copy-1 tag')
114             }
115             length := int((tag >> 2) & 0x07) + 4
116             offset := int(u32(tag & 0xe0) << 3 | u32(input[pos]))
117             pos++
118             if offset == 0 {
119                 return error('snappy: zero offset in copy-1')
120             }
121             op = expand_copy_into(mut out, op, ilen, offset, length)!
122         } else if tag_type == 2 {
123             // COPY_2 (3-byte tag)
124             if pos + 1 >= input.len {
125                 return error('snappy: truncated copy-2 tag')
126             }
127             length := int((tag >> 2) & 0x3f) + 1
128             offset := int(u32(input[pos]) | u32(input[pos + 1]) << 8)
129             pos += 2
130             if offset == 0 {
131                 return error('snappy: zero offset in copy-2')
132             }
133             op = expand_copy_into(mut out, op, ilen, offset, length)!
134         } else {
135             // COPY_4 (5-byte tag)
136             if pos + 3 >= input.len {
137                 return error('snappy: truncated copy-4 tag')
138             }
139             length := int((tag >> 2) & 0x3f) + 1
140             offset := int(u32(input[pos]) | u32(input[pos + 1]) << 8 | u32(input[pos + 2]) << 16 | u32(input[
141                 pos + 3]) << 24)
142             pos += 4
143             if offset == 0 {
144                 return error('snappy: zero offset in copy-4')
145             }
146             op = expand_copy_into(mut out, op, ilen, offset, length)!
147         }
148     }
149 
150     if op != ilen {
151         return error('snappy: decompressed length mismatch: got ${op}, want ${ilen}')
152     }
153     return out
154 }
155 
156 // ---------------------------------------------------------------------------
157 // Compression internals
158 // ---------------------------------------------------------------------------
159 
160 // compress_fragment compresses a single block (≤ 64 KiB) into `out`.
161 // `table` must have at least `tsize` zeroed entries (tsize is a power of 2).
162 fn compress_fragment(input []u8, mut out []u8, mut table []u16, tsize int) {
163     n := input.len
164     if n == 0 {
165         return
166     }
167 
168     // tbits is the number of bits needed to index `tsize` entries; used
169     // to right-shift a 32-bit hash down to the table index.
170     tbits := ilog2(tsize)
171 
172     // ip  — current input position (index into `input`)
173     // lit — start of current literal run
174     mut ip := 0
175     mut lit := 0
176 
177     // We need at least `input_margin` bytes of look-ahead for the main loop.
178     // Very short blocks go to the slow literal-only path.
179     if n >= input_margin {
180         // Emit one forced literal so we have a non-empty literal buffer,
181         // then begin the hash loop one byte in.
182         ip = 1
183 
184         mut next_hash := mix32(load32le(input, ip), tbits)
185 
186         outer: for {
187             // skip grows when matches are not found — we scan with
188             // increasing stride to amortize the cost.
189             mut skip := 32
190             mut candidate := 0
191             mut next_ip := ip
192 
193             // Inner probe loop: walk forward until we find a 4-byte match.
194             for {
195                 ip = next_ip
196                 hash := next_hash
197                 bytes_until_skip := skip >> 5
198                 skip++
199                 next_ip = ip + bytes_until_skip
200                 if next_ip > n - input_margin {
201                     break outer
202                 }
203                 next_hash = mix32(load32le(input, next_ip), tbits)
204                 candidate = int(table[hash])
205                 table[hash] = u16(ip)
206                 if load32le(input, ip) == load32le(input, candidate) {
207                     break
208                 }
209             }
210 
211             // We have a 4-byte match at [ip] == [candidate].
212             // Emit the literal bytes accumulated since `lit`.
213             write_literal(mut out, input[lit..ip])
214 
215             // Extend the match as far as possible.
216             for {
217                 matched := 4 + find_match_length(input, candidate + 4, ip + 4, n)
218                 write_copy(mut out, ip - candidate, matched)
219                 ip += matched
220                 lit = ip
221 
222                 if ip >= n - input_margin {
223                     break outer
224                 }
225 
226                 // Look for a consecutive match immediately after the copy.
227                 // Note: mix32 already returns a value in [0, tsize), so the
228                 // redundant `& tmask` from the original code is removed.
229                 prev_hash := mix32(load32le(input, ip - 1), tbits)
230                 table[prev_hash] = u16(ip - 1)
231                 cur_hash := mix32(load32le(input, ip), tbits)
232                 candidate = int(table[cur_hash])
233                 table[cur_hash] = u16(ip)
234                 if load32le(input, ip) != load32le(input, candidate) {
235                     // seed next_hash for the outer loop's next iteration
236                     next_hash = mix32(load32le(input, ip + 1), tbits)
237                     ip++
238                     break
239                 }
240             }
241         }
242     }
243 
244     // Emit whatever literal bytes remain.
245     write_literal(mut out, input[lit..])
246 }
247 
248 // write_literal emits a LITERAL tag followed by the literal bytes.
249 @[inline]
250 fn write_literal(mut out []u8, lit []u8) {
251     n := lit.len
252     if n == 0 {
253         return
254     }
255     n1 := n - 1
256     if n1 < 60 {
257         out << u8(u32(n1) << 2) // tag byte encodes length-1 directly
258     } else {
259         w := byte_width(n1)
260         out << u8(u32(59 + w) << 2) // tag signals 1..4 extra length bytes
261         mut v := n1
262         for _ in 0 .. w {
263             out << u8(v & 0xff)
264             v >>= 8
265         }
266     }
267     out << lit
268 }
269 
270 // write_copy chooses and emits the most compact COPY tag for the given
271 // (offset, length) back-reference.
272 @[inline]
273 fn write_copy(mut out []u8, offset int, length int) {
274     mut remaining := length
275     for remaining > 0 {
276         // COPY_2 handles lengths 1..64; use it as the general case.
277         // COPY_1 saves a byte when offset < 2048 and length is 4..11.
278         chunk := if remaining > 64 { 64 } else { remaining }
279         if offset < 2048 && chunk >= 4 && chunk <= 11 {
280             // COPY_1: 2 bytes total
281             out << u8(u32(1) | (u32(chunk - 4) << 2) | ((u32(offset) >> 3) & u32(0xe0)))
282             out << u8(offset & 0xff)
283         } else if offset < 65536 {
284             // COPY_2: 3 bytes total
285             out << u8(2 | (u32(chunk - 1) << 2))
286             out << u8(offset & 0xff)
287             out << u8((offset >> 8) & 0xff)
288         } else {
289             // COPY_4: 5 bytes total (never reached for 64 KiB blocks, left for completeness)
290             out << u8(3 | (u32(chunk - 1) << 2))
291             out << u8(offset & 0xff)
292             out << u8((offset >> 8) & 0xff)
293             out << u8((offset >> 16) & 0xff)
294             out << u8((offset >> 24) & 0xff)
295         }
296         remaining -= chunk
297     }
298 }
299 
300 // find_match_length returns how many bytes starting at s1 and s2 are equal,
301 // stopping before `limit`. Uses 4-byte word comparisons when possible.
302 @[inline]
303 fn find_match_length(input []u8, s1 int, s2 int, limit int) int {
304     mut i := 0
305     // Compare 4 bytes at a time while there is room for a full word.
306     for s2 + i + 4 <= limit && s1 + i + 4 <= limit {
307         if load32le(input, s1 + i) != load32le(input, s2 + i) {
308             // Mismatch somewhere in this word — find the exact byte.
309             for s2 + i < limit && input[s1 + i] == input[s2 + i] {
310                 i++
311             }
312             return i
313         }
314         i += 4
315     }
316     // Handle the remaining 0..3 tail bytes.
317     for s2 + i < limit && input[s1 + i] == input[s2 + i] {
318         i++
319     }
320     return i
321 }
322 
323 // ---------------------------------------------------------------------------
324 // Decompression internals
325 // ---------------------------------------------------------------------------
326 
327 // expand_copy_into copies `length` bytes from `offset` bytes before the
328 // write cursor `op` in the pre-allocated `out` buffer. Returns the new
329 // write cursor. Handles the overlapping (run-length) case correctly.
330 @[inline]
331 fn expand_copy_into(mut out []u8, op int, cap_ int, offset int, length int) !int {
332     src := op - offset
333     if src < 0 {
334         return error('snappy: copy offset past beginning of output')
335     }
336     if op + length > cap_ {
337         return error('snappy: output exceeds declared length')
338     }
339     // Both overlapping and non-overlapping copies read from indices
340     // that were written in earlier iterations, so a forward byte loop
341     // is correct in all cases (and required for the RLE / overlap case).
342     for i in 0 .. length {
343         out[op + i] = out[src + i]
344     }
345     return op + length
346 }
347 
348 // decode_literal_len decodes the length (in bytes) of a literal run from the
349 // tag byte and returns (length, new_pos, ok). `pos` should point to the
350 // byte immediately after the tag.
351 @[inline]
352 fn decode_literal_len(data []u8, tag u8, pos int) (int, int, bool) {
353     n1 := int(tag >> 2) // length - 1, or 60..63 for extended form
354     if n1 < 60 {
355         return n1 + 1, pos, true
356     }
357     extra := n1 - 59 // number of extra bytes (1..4)
358     if pos + extra > data.len {
359         return 0, pos, false
360     }
361     mut val := 0
362     for i in 0 .. extra {
363         val |= int(u32(data[pos + i]) << u32(8 * i))
364     }
365     return val + 1, pos + extra, true
366 }
367 
368 // ---------------------------------------------------------------------------
369 // Varint (unsigned LEB-128, protobuf-style)
370 // ---------------------------------------------------------------------------
371 
372 // write_uvarint appends `val` as a variable-length unsigned integer.
373 @[inline]
374 fn write_uvarint(mut out []u8, val u32) {
375     mut v := val
376     for v >= 0x80 {
377         out << u8(v & 0x7f | 0x80)
378         v >>= 7
379     }
380     out << u8(v)
381 }
382 
383 // read_uvarint decodes a varint from `data` starting at `pos`.
384 // Returns (value, new_pos, ok).
385 @[inline]
386 fn read_uvarint(data []u8, pos int) (u32, int, bool) {
387     mut val := u32(0)
388     mut shift := u32(0)
389     mut i := pos
390     for i < data.len && i - pos < 5 {
391         b := data[i]
392         i++
393         if shift == 28 && b > 0x0f {
394             return 0, pos, false
395         }
396         val |= u32(b & 0x7f) << shift
397         if b & 0x80 == 0 {
398             return val, i, true
399         }
400         shift += 7
401     }
402     return 0, pos, false
403 }
404 
405 // ---------------------------------------------------------------------------
406 // Utilities
407 // ---------------------------------------------------------------------------
408 
409 // load32le reads four bytes from `data[pos..]` as a little-endian u32.
410 @[inline]
411 fn load32le(data []u8, pos int) u32 {
412     return u32(data[pos]) | u32(data[pos + 1]) << 8 | u32(data[pos + 2]) << 16 | u32(data[pos + 3]) << 24
413 }
414 
415 // mix32 hashes a 32-bit word down to a `tbits`-bit index using a
416 // multiplicative hash (same constant as the reference implementation).
417 @[inline]
418 fn mix32(v u32, tbits int) int {
419     return int((v * hash_magic) >> u32(32 - tbits))
420 }
421 
422 // table_size_for returns the smallest power-of-two hash table size that
423 // comfortably covers `n` bytes, clamped to [min_table_size, max_table_size].
424 fn table_size_for(n int) int {
425     mut size := min_table_size
426     for size < n && size < max_table_size {
427         size <<= 1
428     }
429     return size
430 }
431 
432 // ilog2 returns floor(log2(n)) for n ≥ 1.
433 @[inline]
434 fn ilog2(n int) int {
435     mut v := n
436     mut r := 0
437     for v > 1 {
438         v >>= 1
439         r++
440     }
441     return r
442 }
443 
444 // byte_width returns the number of bytes needed to represent `n` (1..4).
445 @[inline]
446 fn byte_width(n int) int {
447     if n < (1 << 8) {
448         return 1
449     }
450     if n < (1 << 16) {
451         return 2
452     }
453     if n < (1 << 24) {
454         return 3
455     }
456     return 4
457 }
458

1	// Implementation of Snappy compression fully in V.
2	//
3	// Compatible with the standard Snappy binary format. Implements level-1
4	// (fast) compression using a hash-table approach over 64 KiB blocks.
5	//
6	// Public API:
7	// compress(input []u8) []u8
8	// decompress(input []u8) ![]u8
9
10	module snappy
11
12	// ---------------------------------------------------------------------------
13	// Constants
14	// ---------------------------------------------------------------------------
15
16	const block_size = 1 << 16 // 65 536 — max block processed at once
17	const min_table_size = 1 << 8 // 256
18	const max_table_size = 1 << 15 // 32 768 (fits in u16 alongside block_size-1)
19	const input_margin = 15 // bytes of look-ahead the compressor needs
20	const hash_magic = u32(0x1e35a7bd)
21
22	// ---------------------------------------------------------------------------
23	// Public API
24	// ---------------------------------------------------------------------------
25
26	// max_compressed_length returns an upper bound on the compressed size for
27	// `n` bytes of uncompressed input.
28	fn max_compressed_length(n int) int {
29	return 32 + n + n / 6
30	}
31
32	// compress compresses `input` and returns the compressed bytes.
33	pub fn compress(input []u8) []u8 {
34	n := input.len
35	mut out := []u8{cap: max_compressed_length(n)}
36
37	// Write the uncompressed length as a varint header.
38	write_uvarint(mut out, u32(n))
39
40	if n == 0 {
41	return out
42	}
43
44	// Allocate a single hash table reused across every block.
45	mut table := []u16{len: max_table_size}
46
47	mut pos := 0
48	for pos < n {
49	end := if pos + block_size < n { pos + block_size } else { n }
50	block := input[pos..end]
51
52	// Zero only the slice of the table we will actually use.
53	tsize := table_size_for(block.len)
54	for i in 0 .. tsize {
55	table[i] = 0
56	}
57
58	compress_fragment(block, mut out, mut table, tsize)
59	pos = end
60	}
61
62	return out
63	}
64
65	// decompress decompresses a Snappy-compressed slice.
66	// It returns the original bytes, or an error if the data is malformed.
67	pub fn decompress(input []u8) ![]u8 {
68	// Read the varint-encoded uncompressed length from the header.
69	ulen, hdr_end, ok := read_uvarint(input, 0)
70	if !ok {
71	return error('snappy: invalid header varint')
72	}
73
74	// Pre-allocate the exact output size; reject obviously bogus lengths.
75	ilen := int(ulen)
76	if ilen < 0 {
77	return error('snappy: decoded length overflow')
78	}
79
80	// Use a pre-sized buffer with a write cursor instead of dynamic appends.
81	// This eliminates per-byte reallocation checks on every literal and copy.
82	mut out := []u8{len: ilen}
83	mut op := 0
84	mut pos := hdr_end
85
86	for pos < input.len {
87	tag := input[pos]
88	pos++
89
90	tag_type := tag & 0x03
91
92	if tag_type == 0 {
93	// LITERAL
94	lit_len, new_pos, lit_ok := decode_literal_len(input, tag, pos)
95	if !lit_ok {
96	return error('snappy: truncated literal length')
97	}
98	pos = new_pos
99	if pos + lit_len > input.len {
100	return error('snappy: literal extends past input')
101	}
102	if op + lit_len > ilen {
103	return error('snappy: output exceeds declared length')
104	}
105	for i in 0 .. lit_len {
106	out[op + i] = input[pos + i]
107	}
108	op += lit_len
109	pos += lit_len
110	} else if tag_type == 1 {
111	// COPY_1 (2-byte offset)
112	if pos >= input.len {
113	return error('snappy: truncated copy-1 tag')
114	}
115	length := int((tag >> 2) & 0x07) + 4
116	offset := int(u32(tag & 0xe0) << 3 \| u32(input[pos]))
117	pos++
118	if offset == 0 {
119	return error('snappy: zero offset in copy-1')
120	}
121	op = expand_copy_into(mut out, op, ilen, offset, length)!
122	} else if tag_type == 2 {
123	// COPY_2 (3-byte tag)
124	if pos + 1 >= input.len {
125	return error('snappy: truncated copy-2 tag')
126	}
127	length := int((tag >> 2) & 0x3f) + 1
128	offset := int(u32(input[pos]) \| u32(input[pos + 1]) << 8)
129	pos += 2
130	if offset == 0 {
131	return error('snappy: zero offset in copy-2')
132	}
133	op = expand_copy_into(mut out, op, ilen, offset, length)!
134	} else {
135	// COPY_4 (5-byte tag)
136	if pos + 3 >= input.len {
137	return error('snappy: truncated copy-4 tag')
138	}
139	length := int((tag >> 2) & 0x3f) + 1
140	offset := int(u32(input[pos]) \| u32(input[pos + 1]) << 8 \| u32(input[pos + 2]) << 16 \| u32(input[
141	pos + 3]) << 24)
142	pos += 4
143	if offset == 0 {
144	return error('snappy: zero offset in copy-4')
145	}
146	op = expand_copy_into(mut out, op, ilen, offset, length)!
147	}
148	}
149
150	if op != ilen {
151	return error('snappy: decompressed length mismatch: got ${op}, want ${ilen}')
152	}
153	return out
154	}
155
156	// ---------------------------------------------------------------------------
157	// Compression internals
158	// ---------------------------------------------------------------------------
159
160	// compress_fragment compresses a single block (≤ 64 KiB) into `out`.
161	// `table` must have at least `tsize` zeroed entries (tsize is a power of 2).
162	fn compress_fragment(input []u8, mut out []u8, mut table []u16, tsize int) {
163	n := input.len
164	if n == 0 {
165	return
166	}
167
168	// tbits is the number of bits needed to index `tsize` entries; used
169	// to right-shift a 32-bit hash down to the table index.
170	tbits := ilog2(tsize)
171
172	// ip — current input position (index into `input`)
173	// lit — start of current literal run
174	mut ip := 0
175	mut lit := 0
176
177	// We need at least `input_margin` bytes of look-ahead for the main loop.
178	// Very short blocks go to the slow literal-only path.
179	if n >= input_margin {
180	// Emit one forced literal so we have a non-empty literal buffer,
181	// then begin the hash loop one byte in.
182	ip = 1
183
184	mut next_hash := mix32(load32le(input, ip), tbits)
185
186	outer: for {
187	// skip grows when matches are not found — we scan with
188	// increasing stride to amortize the cost.
189	mut skip := 32
190	mut candidate := 0
191	mut next_ip := ip
192
193	// Inner probe loop: walk forward until we find a 4-byte match.
194	for {
195	ip = next_ip
196	hash := next_hash
197	bytes_until_skip := skip >> 5
198	skip++
199	next_ip = ip + bytes_until_skip
200	if next_ip > n - input_margin {
201	break outer
202	}
203	next_hash = mix32(load32le(input, next_ip), tbits)
204	candidate = int(table[hash])
205	table[hash] = u16(ip)
206	if load32le(input, ip) == load32le(input, candidate) {
207	break
208	}
209	}
210
211	// We have a 4-byte match at [ip] == [candidate].
212	// Emit the literal bytes accumulated since `lit`.
213	write_literal(mut out, input[lit..ip])
214
215	// Extend the match as far as possible.
216	for {
217	matched := 4 + find_match_length(input, candidate + 4, ip + 4, n)
218	write_copy(mut out, ip - candidate, matched)
219	ip += matched
220	lit = ip
221
222	if ip >= n - input_margin {
223	break outer
224	}
225
226	// Look for a consecutive match immediately after the copy.
227	// Note: mix32 already returns a value in [0, tsize), so the
228	// redundant `& tmask` from the original code is removed.
229	prev_hash := mix32(load32le(input, ip - 1), tbits)
230	table[prev_hash] = u16(ip - 1)
231	cur_hash := mix32(load32le(input, ip), tbits)
232	candidate = int(table[cur_hash])
233	table[cur_hash] = u16(ip)
234	if load32le(input, ip) != load32le(input, candidate) {
235	// seed next_hash for the outer loop's next iteration
236	next_hash = mix32(load32le(input, ip + 1), tbits)
237	ip++
238	break
239	}
240	}
241	}
242	}
243
244	// Emit whatever literal bytes remain.
245	write_literal(mut out, input[lit..])
246	}
247
248	// write_literal emits a LITERAL tag followed by the literal bytes.
249	@[inline]
250	fn write_literal(mut out []u8, lit []u8) {
251	n := lit.len
252	if n == 0 {
253	return
254	}
255	n1 := n - 1
256	if n1 < 60 {
257	out << u8(u32(n1) << 2) // tag byte encodes length-1 directly
258	} else {
259	w := byte_width(n1)
260	out << u8(u32(59 + w) << 2) // tag signals 1..4 extra length bytes
261	mut v := n1
262	for _ in 0 .. w {
263	out << u8(v & 0xff)
264	v >>= 8
265	}
266	}
267	out << lit
268	}
269
270	// write_copy chooses and emits the most compact COPY tag for the given
271	// (offset, length) back-reference.
272	@[inline]
273	fn write_copy(mut out []u8, offset int, length int) {
274	mut remaining := length
275	for remaining > 0 {
276	// COPY_2 handles lengths 1..64; use it as the general case.
277	// COPY_1 saves a byte when offset < 2048 and length is 4..11.
278	chunk := if remaining > 64 { 64 } else { remaining }
279	if offset < 2048 && chunk >= 4 && chunk <= 11 {
280	// COPY_1: 2 bytes total
281	out << u8(u32(1) \| (u32(chunk - 4) << 2) \| ((u32(offset) >> 3) & u32(0xe0)))
282	out << u8(offset & 0xff)
283	} else if offset < 65536 {
284	// COPY_2: 3 bytes total
285	out << u8(2 \| (u32(chunk - 1) << 2))
286	out << u8(offset & 0xff)
287	out << u8((offset >> 8) & 0xff)
288	} else {
289	// COPY_4: 5 bytes total (never reached for 64 KiB blocks, left for completeness)
290	out << u8(3 \| (u32(chunk - 1) << 2))
291	out << u8(offset & 0xff)
292	out << u8((offset >> 8) & 0xff)
293	out << u8((offset >> 16) & 0xff)
294	out << u8((offset >> 24) & 0xff)
295	}
296	remaining -= chunk
297	}
298	}
299
300	// find_match_length returns how many bytes starting at s1 and s2 are equal,
301	// stopping before `limit`. Uses 4-byte word comparisons when possible.
302	@[inline]
303	fn find_match_length(input []u8, s1 int, s2 int, limit int) int {
304	mut i := 0
305	// Compare 4 bytes at a time while there is room for a full word.
306	for s2 + i + 4 <= limit && s1 + i + 4 <= limit {
307	if load32le(input, s1 + i) != load32le(input, s2 + i) {
308	// Mismatch somewhere in this word — find the exact byte.
309	for s2 + i < limit && input[s1 + i] == input[s2 + i] {
310	i++
311	}
312	return i
313	}
314	i += 4
315	}
316	// Handle the remaining 0..3 tail bytes.
317	for s2 + i < limit && input[s1 + i] == input[s2 + i] {
318	i++
319	}
320	return i
321	}
322
323	// ---------------------------------------------------------------------------
324	// Decompression internals
325	// ---------------------------------------------------------------------------
326
327	// expand_copy_into copies `length` bytes from `offset` bytes before the
328	// write cursor `op` in the pre-allocated `out` buffer. Returns the new
329	// write cursor. Handles the overlapping (run-length) case correctly.
330	@[inline]
331	fn expand_copy_into(mut out []u8, op int, cap_ int, offset int, length int) !int {
332	src := op - offset
333	if src < 0 {
334	return error('snappy: copy offset past beginning of output')
335	}
336	if op + length > cap_ {
337	return error('snappy: output exceeds declared length')
338	}
339	// Both overlapping and non-overlapping copies read from indices
340	// that were written in earlier iterations, so a forward byte loop
341	// is correct in all cases (and required for the RLE / overlap case).
342	for i in 0 .. length {
343	out[op + i] = out[src + i]
344	}
345	return op + length
346	}
347
348	// decode_literal_len decodes the length (in bytes) of a literal run from the
349	// tag byte and returns (length, new_pos, ok). `pos` should point to the
350	// byte immediately after the tag.
351	@[inline]
352	fn decode_literal_len(data []u8, tag u8, pos int) (int, int, bool) {
353	n1 := int(tag >> 2) // length - 1, or 60..63 for extended form
354	if n1 < 60 {
355	return n1 + 1, pos, true
356	}
357	extra := n1 - 59 // number of extra bytes (1..4)
358	if pos + extra > data.len {
359	return 0, pos, false
360	}
361	mut val := 0
362	for i in 0 .. extra {
363	val \|= int(u32(data[pos + i]) << u32(8 * i))
364	}
365	return val + 1, pos + extra, true
366	}
367
368	// ---------------------------------------------------------------------------
369	// Varint (unsigned LEB-128, protobuf-style)
370	// ---------------------------------------------------------------------------
371
372	// write_uvarint appends `val` as a variable-length unsigned integer.
373	@[inline]
374	fn write_uvarint(mut out []u8, val u32) {
375	mut v := val
376	for v >= 0x80 {
377	out << u8(v & 0x7f \| 0x80)
378	v >>= 7
379	}
380	out << u8(v)
381	}
382
383	// read_uvarint decodes a varint from `data` starting at `pos`.
384	// Returns (value, new_pos, ok).
385	@[inline]
386	fn read_uvarint(data []u8, pos int) (u32, int, bool) {
387	mut val := u32(0)
388	mut shift := u32(0)
389	mut i := pos
390	for i < data.len && i - pos < 5 {
391	b := data[i]
392	i++
393	if shift == 28 && b > 0x0f {
394	return 0, pos, false
395	}
396	val \|= u32(b & 0x7f) << shift
397	if b & 0x80 == 0 {
398	return val, i, true
399	}
400	shift += 7
401	}
402	return 0, pos, false
403	}
404
405	// ---------------------------------------------------------------------------
406	// Utilities
407	// ---------------------------------------------------------------------------
408
409	// load32le reads four bytes from `data[pos..]` as a little-endian u32.
410	@[inline]
411	fn load32le(data []u8, pos int) u32 {
412	return u32(data[pos]) \| u32(data[pos + 1]) << 8 \| u32(data[pos + 2]) << 16 \| u32(data[pos + 3]) << 24
413	}
414
415	// mix32 hashes a 32-bit word down to a `tbits`-bit index using a
416	// multiplicative hash (same constant as the reference implementation).
417	@[inline]
418	fn mix32(v u32, tbits int) int {
419	return int((v * hash_magic) >> u32(32 - tbits))
420	}
421
422	// table_size_for returns the smallest power-of-two hash table size that
423	// comfortably covers `n` bytes, clamped to [min_table_size, max_table_size].
424	fn table_size_for(n int) int {
425	mut size := min_table_size
426	for size < n && size < max_table_size {
427	size <<= 1
428	}
429	return size
430	}
431
432	// ilog2 returns floor(log2(n)) for n ≥ 1.
433	@[inline]
434	fn ilog2(n int) int {
435	mut v := n
436	mut r := 0
437	for v > 1 {
438	v >>= 1
439	r++
440	}
441	return r
442	}
443
444	// byte_width returns the number of bytes needed to represent `n` (1..4).
445	@[inline]
446	fn byte_width(n int) int {
447	if n < (1 << 8) {
448	return 1
449	}
450	if n < (1 << 16) {
451	return 2
452	}
453	if n < (1 << 24) {
454	return 3
455	}
456	return 4
457	}
458