v2 / vlib / compress / lz / lz78.v
85 lines · 74 sloc · 1.91 KB · 714bc792a1024c3e7eccc72f7312c6dc9821d04f
Raw
1module lz
2
3// compress_lz78 compresses data using a pure-V LZ78 dictionary stream.
4pub fn compress_lz78(data []u8) ![]u8 {
5 mut payload := []u8{}
6 mut dict := map[string]int{}
7 mut next_index := 1
8 mut word := []u8{}
9
10 for b in data {
11 mut candidate := word.clone()
12 candidate << b
13 candidate_key := candidate.bytestr()
14 if candidate_key in dict {
15 word = candidate.clone()
16 continue
17 }
18
19 prefix_index := if word.len == 0 { 0 } else { dict[word.bytestr()] }
20 encode_uvarint(mut payload, u64(prefix_index))
21 payload << u8(1)
22 payload << b
23 dict[candidate_key] = next_index
24 next_index++
25 word.clear()
26 }
27
28 if word.len > 0 {
29 final_index := dict[word.bytestr()]
30 encode_uvarint(mut payload, u64(final_index))
31 payload << u8(0)
32 }
33
34 return wrap_payload(.lz78, data, payload)
35}
36
37// decompress_lz78 decompresses data produced by compress_lz78.
38pub fn decompress_lz78(data []u8) ![]u8 {
39 payload, expected_len := unwrap_payload(data, .lz78)!
40 mut out := []u8{cap: int(expected_len)}
41 mut dict := map[int][]u8{}
42 mut next_index := 1
43 mut pos := 0
44
45 for pos < payload.len {
46 prefix, next_pos, ok := decode_uvarint(payload, pos)
47 if !ok {
48 return error('invalid lz78 stream: bad prefix index')
49 }
50 pos = next_pos
51 if pos >= payload.len {
52 return error('invalid lz78 stream: missing suffix flag')
53 }
54 has_suffix := payload[pos]
55 pos++
56
57 mut phrase := if prefix == 0 {
58 []u8{}
59 } else {
60 if int(prefix) !in dict {
61 return error('invalid lz78 stream: unknown prefix index')
62 }
63 dict[int(prefix)].clone()
64 }
65
66 if has_suffix == 1 {
67 if pos >= payload.len {
68 return error('invalid lz78 stream: missing suffix byte')
69 }
70 phrase << payload[pos]
71 pos++
72 } else if has_suffix != 0 {
73 return error('invalid lz78 stream: bad suffix flag')
74 }
75
76 out << phrase
77 dict[next_index] = phrase
78 next_index++
79 }
80
81 if i64(out.len) != expected_len {
82 return error('invalid lz78 stream: length mismatch')
83 }
84 return out
85}
86