v2 / vlib / compress / deflate / deflate.v
165 lines · 153 sloc · 4.19 KB · e449b50d7f652f6eecc2c219938200f440a925b6
Raw
1module deflate
2
3import encoding.binary
4import hash.crc32
5
6// CompressFormat selects the output container around the RFC 1951 payload.
7pub enum CompressFormat {
8 zlib
9 gzip
10 raw_deflate
11}
12
13@[params]
14pub struct CompressParams {
15pub:
16 format CompressFormat = .zlib
17}
18
19// compress compresses data as zlib, gzip, or raw DEFLATE.
20pub fn compress(data []u8, format CompressParams) ![]u8 {
21 payload := deflate_compress_fixed(data)
22 match format.format {
23 .zlib { return compress_zlib(data) }
24 .gzip { return compress_gzip(data) }
25 .raw_deflate { return payload }
26 }
27}
28
29pub fn compress_zlib(data []u8) ![]u8 {
30 payload := deflate_compress_fixed(data)
31 cksum := adler32(data)
32 mut out := []u8{cap: 2 + payload.len + 4}
33 out << u8(0x78) // CMF: CM=8 deflate, CINFO=7 (32K window)
34 out << u8(0x9c) // FLG: default compression, FCHECK satisfies (CMF*256+FLG)%31==0
35 out << payload
36 out << binary.big_endian_get_u32(cksum)
37 return out
38}
39
40// compress_gzip compresses data into a gzip stream (RFC 1952).
41pub fn compress_gzip(data []u8) ![]u8 {
42 payload := deflate_compress_fixed(data)
43 mut out := []u8{cap: 10 + payload.len + 8}
44 // 10-byte gzip header: ID1 ID2 CM FLG MTIME(4) XFL OS
45 out << [u8(0x1f), 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff]
46 out << payload
47 out << binary.little_endian_get_u32(crc32.sum(data))
48 out << binary.little_endian_get_u32(u32(data.len))
49 return out
50}
51
52// compress_raw compresses data to a raw RFC 1951 DEFLATE stream.
53pub fn compress_raw(data []u8) ![]u8 {
54 return deflate_compress_fixed(data)
55}
56
57// decompress decompresses a zlib (RFC 1950), gzip (RFC 1952), or raw DEFLATE (RFC 1951) stream.
58// The format is auto-detected.
59pub fn decompress(data []u8) ![]u8 {
60 if data.len >= 2 {
61 // gzip magic: 0x1f 0x8b
62 if data[0] == 0x1f && data[1] == 0x8b {
63 return decompress_gzip(data)
64 }
65 // zlib: CM=8 and header checksum passes
66 if data[0] & 0x0f == 8 && (u32(data[0]) * 256 + u32(data[1])) % 31 == 0 {
67 return decompress_zlib(data)
68 }
69 }
70 // raw DEFLATE
71 return inflate(data)
72}
73
74fn decompress_zlib(data []u8) ![]u8 {
75 if data.len < 6 {
76 return error('invalid zlib stream: too short')
77 }
78 if data[0] & 0x0f != 8 {
79 return error('invalid zlib stream: unsupported compression method')
80 }
81 if (u32(data[0]) * 256 + u32(data[1])) % 31 != 0 {
82 return error('invalid zlib stream: bad header checksum')
83 }
84 if data[1] & 0x20 != 0 {
85 return error('invalid zlib stream: preset dictionary not supported')
86 }
87 payload := data[2..data.len - 4]
88 expected := binary.big_endian_u32_at(data, data.len - 4)
89 decoded := inflate(payload)!
90 if adler32(decoded) != expected {
91 return error('invalid zlib stream: adler32 mismatch')
92 }
93 return decoded
94}
95
96fn decompress_gzip(data []u8) ![]u8 {
97 if data.len < 18 {
98 return error('invalid gzip stream: too short')
99 }
100 if data[0] != 0x1f || data[1] != 0x8b {
101 return error('invalid gzip stream: bad magic')
102 }
103 if data[2] != 8 {
104 return error('invalid gzip stream: unsupported compression method')
105 }
106 flg := data[3]
107 mut pos := 10 // fixed header size
108 if flg & 0x04 != 0 { // FEXTRA
109 if pos + 2 > data.len {
110 return error('invalid gzip stream: truncated extra')
111 }
112 xlen := int(u32(data[pos]) | u32(data[pos + 1]) << 8)
113 pos += 2 + xlen
114 }
115 if flg & 0x08 != 0 { // FNAME
116 for pos < data.len && data[pos] != 0 {
117 pos++
118 }
119 pos++
120 }
121 if flg & 0x10 != 0 { // FCOMMENT
122 for pos < data.len && data[pos] != 0 {
123 pos++
124 }
125 pos++
126 }
127 if flg & 0x02 != 0 { // FHCRC
128 pos += 2
129 }
130 if pos + 8 > data.len {
131 return error('invalid gzip stream: truncated payload')
132 }
133 payload := data[pos..data.len - 8]
134 expected_crc := binary.little_endian_u32_at(data, data.len - 8)
135 expected_size := binary.little_endian_u32_at(data, data.len - 4)
136 decoded := inflate(payload)!
137 if crc32.sum(decoded) != expected_crc {
138 return error('invalid gzip stream: crc32 mismatch')
139 }
140 if u32(decoded.len) != expected_size {
141 return error('invalid gzip stream: size mismatch')
142 }
143 return decoded
144}
145
146fn adler32(data []u8) u32 {
147 mod_adler := u32(65521)
148 mut a := u32(1)
149 mut b := u32(0)
150 for byte_ in data {
151 a = (a + u32(byte_)) % mod_adler
152 b = (b + a) % mod_adler
153 }
154 return (b << 16) | a
155}
156
157fn bit_reverse(v u32, n int) u32 {
158 mut r := u32(0)
159 mut val := v
160 for _ in 0 .. n {
161 r = (r << 1) | (val & 1)
162 val >>= 1
163 }
164 return r
165}
166