| 1 | module lz |
| 2 | |
| 3 | // compress_lz78 compresses data using a pure-V LZ78 dictionary stream. |
| 4 | pub fn compress_lz78(data []u8) ![]u8 { |
| 5 | mut payload := []u8{} |
| 6 | mut dict := map[string]int{} |
| 7 | mut next_index := 1 |
| 8 | mut word := []u8{} |
| 9 | |
| 10 | for b in data { |
| 11 | mut candidate := word.clone() |
| 12 | candidate << b |
| 13 | candidate_key := candidate.bytestr() |
| 14 | if candidate_key in dict { |
| 15 | word = candidate.clone() |
| 16 | continue |
| 17 | } |
| 18 | |
| 19 | prefix_index := if word.len == 0 { 0 } else { dict[word.bytestr()] } |
| 20 | encode_uvarint(mut payload, u64(prefix_index)) |
| 21 | payload << u8(1) |
| 22 | payload << b |
| 23 | dict[candidate_key] = next_index |
| 24 | next_index++ |
| 25 | word.clear() |
| 26 | } |
| 27 | |
| 28 | if word.len > 0 { |
| 29 | final_index := dict[word.bytestr()] |
| 30 | encode_uvarint(mut payload, u64(final_index)) |
| 31 | payload << u8(0) |
| 32 | } |
| 33 | |
| 34 | return wrap_payload(.lz78, data, payload) |
| 35 | } |
| 36 | |
| 37 | // decompress_lz78 decompresses data produced by compress_lz78. |
| 38 | pub fn decompress_lz78(data []u8) ![]u8 { |
| 39 | payload, expected_len := unwrap_payload(data, .lz78)! |
| 40 | mut out := []u8{cap: int(expected_len)} |
| 41 | mut dict := map[int][]u8{} |
| 42 | mut next_index := 1 |
| 43 | mut pos := 0 |
| 44 | |
| 45 | for pos < payload.len { |
| 46 | prefix, next_pos, ok := decode_uvarint(payload, pos) |
| 47 | if !ok { |
| 48 | return error('invalid lz78 stream: bad prefix index') |
| 49 | } |
| 50 | pos = next_pos |
| 51 | if pos >= payload.len { |
| 52 | return error('invalid lz78 stream: missing suffix flag') |
| 53 | } |
| 54 | has_suffix := payload[pos] |
| 55 | pos++ |
| 56 | |
| 57 | mut phrase := if prefix == 0 { |
| 58 | []u8{} |
| 59 | } else { |
| 60 | if int(prefix) !in dict { |
| 61 | return error('invalid lz78 stream: unknown prefix index') |
| 62 | } |
| 63 | dict[int(prefix)].clone() |
| 64 | } |
| 65 | |
| 66 | if has_suffix == 1 { |
| 67 | if pos >= payload.len { |
| 68 | return error('invalid lz78 stream: missing suffix byte') |
| 69 | } |
| 70 | phrase << payload[pos] |
| 71 | pos++ |
| 72 | } else if has_suffix != 0 { |
| 73 | return error('invalid lz78 stream: bad suffix flag') |
| 74 | } |
| 75 | |
| 76 | out << phrase |
| 77 | dict[next_index] = phrase |
| 78 | next_index++ |
| 79 | } |
| 80 | |
| 81 | if i64(out.len) != expected_len { |
| 82 | return error('invalid lz78 stream: length mismatch') |
| 83 | } |
| 84 | return out |
| 85 | } |
| 86 | |