v / vlib / compress / deflate / deflate.v
291 lines · 270 sloc · 8.69 KB · a0632356d23a7c6ee16e85f89a2ef5ca2d360245
Raw
1module deflate
2
3import encoding.binary
4import hash.adler32
5import hash.crc32
6
7pub type ChunkCallback = fn (chunk []u8, userdata voidptr) int
8
9// CompressFormat selects the output container around the RFC 1951 payload.
10pub enum CompressFormat {
11 zlib
12 gzip
13 raw_deflate
14}
15
16@[params]
17pub struct CompressParams {
18pub:
19 format CompressFormat = .zlib
20}
21
22pub struct RawInflateResult {
23pub:
24 decoded []u8
25 consumed int
26}
27
28pub struct ZlibHeader {
29pub:
30 payload_start int = 2
31}
32
33pub struct GzipHeader {
34pub mut:
35 flags u8
36 payload_start int
37 extra []u8
38 filename []u8
39 comment []u8
40 modification_time u32
41 operating_system u8
42}
43
44// validate_zlib_header validates a RFC 1950 zlib header.
45@[direct_array_access]
46pub fn validate_zlib_header(data []u8) !ZlibHeader {
47 if data.len < 6 {
48 return error('invalid zlib stream: too short')
49 }
50 if data[0] & 0x0f != 8 {
51 return error('invalid zlib stream: unsupported compression method')
52 }
53 if (u32(data[0]) * 256 + u32(data[1])) % 31 != 0 {
54 return error('invalid zlib stream: bad header checksum')
55 }
56 if data[1] & 0x20 != 0 {
57 return error('invalid zlib stream: preset dictionary not supported')
58 }
59 return ZlibHeader{}
60}
61
62// validate_gzip_header validates a RFC 1952 gzip header and returns parsed fields.
63@[direct_array_access]
64pub fn validate_gzip_header(data []u8) !GzipHeader {
65 if data.len < 18 {
66 return error('invalid gzip stream: too short')
67 }
68 if data[0] != 0x1f || data[1] != 0x8b {
69 return error('invalid gzip stream: bad magic')
70 }
71 if data[2] != 8 {
72 return error('invalid gzip stream: unsupported compression method')
73 }
74 flg := data[3]
75 if flg & 0xe0 != 0 {
76 return error('invalid gzip stream: reserved flags set')
77 }
78 mut header := GzipHeader{
79 flags: flg
80 payload_start: 10
81 modification_time: binary.little_endian_u32_at(data, 4)
82 operating_system: data[9]
83 }
84 if flg & 0x04 != 0 {
85 if header.payload_start + 2 > data.len {
86 return error('invalid gzip stream: truncated extra')
87 }
88 xlen := int(u32(data[header.payload_start]) | u32(data[header.payload_start + 1]) << 8)
89 header.payload_start += 2
90 if header.payload_start + xlen > data.len {
91 return error('invalid gzip stream: truncated extra')
92 }
93 header.extra = data[header.payload_start..header.payload_start + xlen]
94 header.payload_start += xlen
95 }
96 if flg & 0x08 != 0 {
97 for header.payload_start < data.len && data[header.payload_start] != 0 {
98 header.filename << data[header.payload_start]
99 header.payload_start++
100 }
101 header.payload_start++
102 }
103 if flg & 0x10 != 0 {
104 for header.payload_start < data.len && data[header.payload_start] != 0 {
105 header.comment << data[header.payload_start]
106 header.payload_start++
107 }
108 header.payload_start++
109 }
110 if flg & 0x02 != 0 {
111 if header.payload_start + 2 > data.len {
112 return error('invalid gzip stream: truncated fhcrc')
113 }
114 expected_crc16 := u16(data[header.payload_start]) | (u16(data[header.payload_start + 1]) << 8)
115 actual_crc16 := u16(crc32.sum(data[..header.payload_start]) & 0xffff)
116 if actual_crc16 != expected_crc16 {
117 return error('invalid gzip stream: header crc16 mismatch')
118 }
119 header.payload_start += 2
120 }
121 if header.payload_start + 8 > data.len {
122 return error('invalid gzip stream: truncated payload')
123 }
124 return header
125}
126
127// compress compresses data as zlib, gzip, or raw DEFLATE.
128pub fn compress(data []u8, format CompressParams) ![]u8 {
129 return match format.format {
130 .zlib { compress_zlib(data) }
131 .gzip { compress_gzip(data) }
132 .raw_deflate { deflate_compress_fixed(data)! }
133 }
134}
135
136pub fn compress_zlib(data []u8) ![]u8 {
137 payload := deflate_compress_fixed(data)!
138 cksum := adler32.sum(data)
139 mut out := []u8{cap: 2 + payload.len + 4}
140 out << u8(0x78) // CMF: CM=8 deflate, CINFO=7 (32K window)
141 out << u8(0x9c) // FLG: default compression, FCHECK satisfies (CMF*256+FLG)%31==0
142 out << payload
143 out << binary.big_endian_get_u32(cksum)
144 return out
145}
146
147// compress_gzip compresses data into a gzip stream (RFC 1952).
148pub fn compress_gzip(data []u8) ![]u8 {
149 payload := deflate_compress_fixed(data)!
150 mut out := []u8{cap: 10 + payload.len + 8}
151 // 10-byte gzip header: ID1 ID2 CM FLG MTIME(4) XFL OS
152 out << [u8(0x1f), 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff]
153 out << payload
154 out << binary.little_endian_get_u32(crc32.sum(data))
155 out << binary.little_endian_get_u32(u32(data.len))
156 return out
157}
158
159// compress_raw compresses data to a raw RFC 1951 DEFLATE stream.
160pub fn compress_raw(data []u8) ![]u8 {
161 return deflate_compress_fixed(data)!
162}
163
164// decompress decompresses a zlib (RFC 1950), gzip (RFC 1952), or raw DEFLATE (RFC 1951) stream.
165// The format is auto-detected.
166pub fn decompress(data []u8) ![]u8 {
167 if data.len >= 2 {
168 // gzip magic: 0x1f 0x8b
169 if data[0] == 0x1f && data[1] == 0x8b {
170 return decompress_gzip(data)
171 }
172 // zlib: CM=8 and header checksum passes
173 if data[0] & 0x0f == 8 && (u32(data[0]) * 256 + u32(data[1])) % 31 == 0 {
174 return decompress_zlib(data)
175 }
176 }
177 // raw DEFLATE
178 return inflate(data)
179}
180
181// decompress_zlib decompresses a zlib stream (RFC 1950).
182// It returns the decompressed bytes in a new array.
183pub fn decompress_zlib(data []u8) ![]u8 {
184 header := validate_zlib_header(data)!
185 payload := data[header.payload_start..data.len - 4]
186 expected := binary.big_endian_u32_at(data, data.len - 4)
187 res := inflate_with_consumed(payload)!
188 if res.consumed != payload.len {
189 return error('invalid zlib stream: trailing data before adler32')
190 }
191 decoded := res.decoded
192 if adler32.sum(decoded) != expected { return error('invalid zlib stream: adler32 mismatch') }
193 return decoded
194}
195
196// decompress_gzip decompresses a gzip stream (RFC 1952).
197// It returns the decompressed bytes in a new array.
198pub fn decompress_gzip(data []u8) ![]u8 {
199 header := validate_gzip_header(data)!
200 payload := data[header.payload_start..data.len - 8]
201 expected_crc := binary.little_endian_u32_at(data, data.len - 8)
202 expected_size := binary.little_endian_u32_at(data, data.len - 4)
203 res := inflate_with_consumed(payload)!
204 if res.consumed != payload.len {
205 return error('invalid gzip stream: trailing data before trailer')
206 }
207 decoded := res.decoded
208 if crc32.sum(decoded) != expected_crc {
209 return error('invalid gzip stream: crc32 mismatch')
210 }
211 if u32(decoded.len) != expected_size {
212 return error('invalid gzip stream: size mismatch')
213 }
214 return decoded
215}
216
217// decompress_raw_with_consumed decompresses raw RFC 1951 DEFLATE data and tracks consumed bytes.
218pub fn decompress_raw_with_consumed(data []u8) !RawInflateResult {
219 res := inflate_with_consumed(data)!
220 return RawInflateResult{
221 decoded: res.decoded
222 consumed: res.consumed
223 }
224}
225
226// decompress_with_callback decompresses a zlib/gzip/raw stream (RFC 1950, RFC 1952) using a callback for chunked delivery.
227// The callback receives chunks of decompressed data and should return the chunk length to continue, or 0 to abort.
228// Returns the total decompressed length.
229pub fn decompress_with_callback(data []u8, cb ChunkCallback, userdata voidptr) !int {
230 if data.len >= 2 {
231 // gzip magic: 0x1f 0x8b
232 if data[0] == 0x1f && data[1] == 0x8b {
233 return decompress_gzip_with_callback(data, cb, userdata)
234 }
235 // zlib: CM=8 and header checksum passes
236 if data[0] & 0x0f == 8 && (u32(data[0]) * 256 + u32(data[1])) % 31 == 0 {
237 return decompress_zlib_with_callback(data, cb, userdata)
238 }
239 }
240 // raw DEFLATE
241 res := inflate_with_callback(data, cb, userdata)!
242 return res.delivered
243}
244
245fn decompress_zlib_with_callback(data []u8, cb ChunkCallback, userdata voidptr) !int {
246 header := validate_zlib_header(data)!
247 payload := data[header.payload_start..data.len - 4]
248 expected := binary.big_endian_u32_at(data, data.len - 4)
249 res := inflate_with_callback(payload, cb, userdata)!
250 if res.aborted {
251 return res.delivered
252 }
253 if res.consumed != payload.len {
254 return error('invalid zlib stream: trailing data before adler32')
255 }
256 if adler32.sum(res.decoded) != expected {
257 return error('invalid zlib stream: adler32 mismatch')
258 }
259 return res.delivered
260}
261
262fn decompress_gzip_with_callback(data []u8, cb ChunkCallback, userdata voidptr) !int {
263 header := validate_gzip_header(data)!
264 payload := data[header.payload_start..data.len - 8]
265 expected_crc := binary.little_endian_u32_at(data, data.len - 8)
266 expected_size := binary.little_endian_u32_at(data, data.len - 4)
267 res := inflate_with_callback(payload, cb, userdata)!
268 if res.aborted {
269 return res.delivered
270 }
271 if res.consumed != payload.len {
272 return error('invalid gzip stream: trailing data before trailer')
273 }
274 if crc32.sum(res.decoded) != expected_crc {
275 return error('invalid gzip stream: crc32 mismatch')
276 }
277 if u32(res.decoded.len) != expected_size {
278 return error('invalid gzip stream: size mismatch')
279 }
280 return res.delivered
281}
282
283fn bit_reverse(v u32, n int) u32 {
284 mut r := u32(0)
285 mut val := v
286 for _ in 0 .. n {
287 r = (r << 1) | (val & 1)
288 val >>= 1
289 }
290 return r
291}
292