v2 / vlib / compress / lz / lzw.v
92 lines · 82 sloc · 1.96 KB · 714bc792a1024c3e7eccc72f7312c6dc9821d04f
Raw
1module lz
2
3// compress_lzw compresses data using a pure-V LZW dictionary stream.
4pub fn compress_lzw(data []u8) ![]u8 {
5 mut payload := []u8{}
6 mut dict := map[string]int{}
7 for i in 0 .. 256 {
8 dict[[u8(i)].bytestr()] = i
9 }
10 mut next_code := 256
11 mut word := ''
12
13 for b in data {
14 symbol := [b].bytestr()
15 candidate := word + symbol
16 if candidate in dict {
17 word = candidate
18 continue
19 }
20 if word != '' {
21 encode_uvarint(mut payload, u64(dict[word]))
22 }
23 dict[candidate] = next_code
24 next_code++
25 word = symbol
26 }
27
28 if word != '' {
29 encode_uvarint(mut payload, u64(dict[word]))
30 }
31
32 return wrap_payload(.lzw, data, payload)
33}
34
35// decompress_lzw decompresses data produced by compress_lzw.
36pub fn decompress_lzw(data []u8) ![]u8 {
37 payload, expected_len := unwrap_payload(data, .lzw)!
38 if payload.len == 0 {
39 if expected_len == i64(0) {
40 return []u8{}
41 }
42 return error('invalid lzw stream: missing codes')
43 }
44
45 mut dict := map[int][]u8{}
46 for i in 0 .. 256 {
47 dict[i] = [u8(i)]
48 }
49 mut next_code := 256
50 mut pos := 0
51
52 first_code, next_pos, ok := decode_uvarint(payload, pos)
53 if !ok {
54 return error('invalid lzw stream: bad initial code')
55 }
56 pos = next_pos
57 if int(first_code) !in dict {
58 return error('invalid lzw stream: unknown initial code')
59 }
60 mut word := dict[int(first_code)].clone()
61 mut out := word.clone()
62
63 for pos < payload.len {
64 code_u64, new_pos, ok_code := decode_uvarint(payload, pos)
65 if !ok_code {
66 return error('invalid lzw stream: bad code')
67 }
68 pos = new_pos
69 code := int(code_u64)
70 mut entry := []u8{}
71 if code in dict {
72 entry = dict[code].clone()
73 } else if code == next_code {
74 entry = word.clone()
75 entry << word[0]
76 } else {
77 return error('invalid lzw stream: unknown code')
78 }
79
80 out << entry
81 mut new_entry := word.clone()
82 new_entry << entry[0]
83 dict[next_code] = new_entry
84 next_code++
85 word = entry.clone()
86 }
87
88 if i64(out.len) != expected_len {
89 return error('invalid lzw stream: length mismatch')
90 }
91 return out
92}
93