| 1 | module lz |
| 2 | |
| 3 | // compress_lzw compresses data using a pure-V LZW dictionary stream. |
| 4 | pub fn compress_lzw(data []u8) ![]u8 { |
| 5 | mut payload := []u8{} |
| 6 | mut dict := map[string]int{} |
| 7 | for i in 0 .. 256 { |
| 8 | dict[[u8(i)].bytestr()] = i |
| 9 | } |
| 10 | mut next_code := 256 |
| 11 | mut word := '' |
| 12 | |
| 13 | for b in data { |
| 14 | symbol := [b].bytestr() |
| 15 | candidate := word + symbol |
| 16 | if candidate in dict { |
| 17 | word = candidate |
| 18 | continue |
| 19 | } |
| 20 | if word != '' { |
| 21 | encode_uvarint(mut payload, u64(dict[word])) |
| 22 | } |
| 23 | dict[candidate] = next_code |
| 24 | next_code++ |
| 25 | word = symbol |
| 26 | } |
| 27 | |
| 28 | if word != '' { |
| 29 | encode_uvarint(mut payload, u64(dict[word])) |
| 30 | } |
| 31 | |
| 32 | return wrap_payload(.lzw, data, payload) |
| 33 | } |
| 34 | |
| 35 | // decompress_lzw decompresses data produced by compress_lzw. |
| 36 | pub fn decompress_lzw(data []u8) ![]u8 { |
| 37 | payload, expected_len := unwrap_payload(data, .lzw)! |
| 38 | if payload.len == 0 { |
| 39 | if expected_len == i64(0) { |
| 40 | return []u8{} |
| 41 | } |
| 42 | return error('invalid lzw stream: missing codes') |
| 43 | } |
| 44 | |
| 45 | mut dict := map[int][]u8{} |
| 46 | for i in 0 .. 256 { |
| 47 | dict[i] = [u8(i)] |
| 48 | } |
| 49 | mut next_code := 256 |
| 50 | mut pos := 0 |
| 51 | |
| 52 | first_code, next_pos, ok := decode_uvarint(payload, pos) |
| 53 | if !ok { |
| 54 | return error('invalid lzw stream: bad initial code') |
| 55 | } |
| 56 | pos = next_pos |
| 57 | if int(first_code) !in dict { |
| 58 | return error('invalid lzw stream: unknown initial code') |
| 59 | } |
| 60 | mut word := dict[int(first_code)].clone() |
| 61 | mut out := word.clone() |
| 62 | |
| 63 | for pos < payload.len { |
| 64 | code_u64, new_pos, ok_code := decode_uvarint(payload, pos) |
| 65 | if !ok_code { |
| 66 | return error('invalid lzw stream: bad code') |
| 67 | } |
| 68 | pos = new_pos |
| 69 | code := int(code_u64) |
| 70 | mut entry := []u8{} |
| 71 | if code in dict { |
| 72 | entry = dict[code].clone() |
| 73 | } else if code == next_code { |
| 74 | entry = word.clone() |
| 75 | entry << word[0] |
| 76 | } else { |
| 77 | return error('invalid lzw stream: unknown code') |
| 78 | } |
| 79 | |
| 80 | out << entry |
| 81 | mut new_entry := word.clone() |
| 82 | new_entry << entry[0] |
| 83 | dict[next_code] = new_entry |
| 84 | next_code++ |
| 85 | word = entry.clone() |
| 86 | } |
| 87 | |
| 88 | if i64(out.len) != expected_len { |
| 89 | return error('invalid lzw stream: length mismatch') |
| 90 | } |
| 91 | return out |
| 92 | } |
| 93 | |