From 714bc792a1024c3e7eccc72f7312c6dc9821d04f Mon Sep 17 00:00:00 2001
From: JalonSolov <JalonSolov@gmail.com>
Date: Mon, 11 May 2026 21:29:46 -0400
Subject: [PATCH] compress: add pure V lz, supporting multiple variations
 (#27137)

---
 vlib/compress/lz/README.md            |  34 ++++
 vlib/compress/lz/common.v             | 201 ++++++++++++++++++++
 vlib/compress/lz/interop/README.md    |  44 +++++
 vlib/compress/lz/interop/lz77_ref.c   | 256 ++++++++++++++++++++++++++
 vlib/compress/lz/interop/lz77_ref.py  | 141 ++++++++++++++
 vlib/compress/lz/interop/lz_interop.v | 172 +++++++++++++++++
 vlib/compress/lz/lz.v                 |  53 ++++++
 vlib/compress/lz/lz4.v                |  18 ++
 vlib/compress/lz/lz77.v               |  18 ++
 vlib/compress/lz/lz78.v               |  85 +++++++++
 vlib/compress/lz/lz_test.v            |  90 +++++++++
 vlib/compress/lz/lzjb.v               |  18 ++
 vlib/compress/lz/lzma.v               |  18 ++
 vlib/compress/lz/lzss.v               |  18 ++
 vlib/compress/lz/lzw.v                |  92 +++++++++
 15 files changed, 1258 insertions(+)
 create mode 100644 vlib/compress/lz/README.md
 create mode 100644 vlib/compress/lz/common.v
 create mode 100644 vlib/compress/lz/interop/README.md
 create mode 100644 vlib/compress/lz/interop/lz77_ref.c
 create mode 100644 vlib/compress/lz/interop/lz77_ref.py
 create mode 100644 vlib/compress/lz/interop/lz_interop.v
 create mode 100644 vlib/compress/lz/lz.v
 create mode 100644 vlib/compress/lz/lz4.v
 create mode 100644 vlib/compress/lz/lz77.v
 create mode 100644 vlib/compress/lz/lz78.v
 create mode 100644 vlib/compress/lz/lz_test.v
 create mode 100644 vlib/compress/lz/lzjb.v
 create mode 100644 vlib/compress/lz/lzma.v
 create mode 100644 vlib/compress/lz/lzss.v
 create mode 100644 vlib/compress/lz/lzw.v

diff --git a/vlib/compress/lz/README.md b/vlib/compress/lz/README.md
new file mode 100644
index 000000000..c4b1d0667
--- /dev/null
+++ b/vlib/compress/lz/README.md
@@ -0,0 +1,34 @@
+## Description
+
+`compress.lz` provides pure V implementations of several LZ-family codecs.
+
+Supported formats:
+
+- `lz77`
+- `lz78`
+- `lzw`
+- `lz4`
+- `lzss`
+- `lzma`
+- `lzjb`
+
+Use the generic API when selecting a format dynamically:
+
+```v
+import compress.lz
+
+encoded := lz.compress('hello hello hello'.bytes(), .lz77)!
+decoded := lz.decompress(encoded, .lz77)!
+assert decoded.bytestr() == 'hello hello hello'
+```
+
+Use the format-specific APIs for direct calls:
+
+```v
+import compress.lz
+
+encoded := lz.compress_lzw('banana banana'.bytes())!
+decoded := lz.decompress_lzw(encoded)!
+assert decoded.bytestr() == 'banana banana'
+```
+
diff --git a/vlib/compress/lz/common.v b/vlib/compress/lz/common.v
new file mode 100644
index 000000000..fb6a02e5b
--- /dev/null
+++ b/vlib/compress/lz/common.v
@@ -0,0 +1,201 @@
+module lz
+
+const stream_magic = [u8(0x56), 0x4c, 0x5a, 0x31] // VLZ1
+
+struct MatchProfile {
+	window      int
+	min_match   int
+	max_match   int
+	max_literal int
+}
+
+const match_hash_bits = 16
+const match_hash_size = 1 << match_hash_bits
+const max_match_candidates = 64
+
+fn wrap_payload(format Format, source []u8, payload []u8) []u8 {
+	mut out := []u8{cap: stream_magic.len + 8 + payload.len}
+	out << stream_magic
+	out << u8(format)
+	encode_uvarint(mut out, u64(source.len))
+	out << payload
+	return out
+}
+
+fn unwrap_payload(data []u8, format Format) !([]u8, i64) {
+	if data.len < stream_magic.len + 2 {
+		return error('invalid lz stream: too short')
+	}
+	if data[..stream_magic.len] != stream_magic {
+		return error('invalid lz stream: bad magic')
+	}
+	wire_format := data[stream_magic.len]
+	if wire_format != u8(format) {
+		return error('invalid lz stream: format mismatch')
+	}
+	decoded_len_u64, mut pos, ok := decode_uvarint(data, stream_magic.len + 1)
+	if !ok {
+		return error('invalid lz stream: bad length')
+	}
+	if decoded_len_u64 > u64(max_int) {
+		return error('invalid lz stream: decoded length too large')
+	}
+	decoded_len := i64(decoded_len_u64)
+	if pos > data.len {
+		return error('invalid lz stream: truncated payload')
+	}
+	return data[pos..], decoded_len
+}
+
+fn compress_with_profile(data []u8, profile MatchProfile, format Format) []u8 {
+	if data.len == 0 {
+		return wrap_payload(format, data, []u8{})
+	}
+	mut payload := []u8{cap: data.len}
+	mut literals := []u8{cap: profile.max_literal}
+	mut last_match := []int{len: match_hash_size, init: -1}
+	mut prev_match := []int{len: data.len, init: -1}
+	mut pos := 0
+	for pos < data.len {
+		offset, length := find_best_match(data, pos, profile, last_match, prev_match)
+		if length >= profile.min_match {
+			flush_literals(mut payload, mut literals)
+			emit_match(mut payload, offset, length, profile.min_match)
+			for i := pos; i < pos + length; i++ {
+				index_match_position(data, i, mut last_match, mut prev_match)
+			}
+			pos += length
+		} else {
+			literals << data[pos]
+			if literals.len == profile.max_literal {
+				flush_literals(mut payload, mut literals)
+			}
+			index_match_position(data, pos, mut last_match, mut prev_match)
+			pos++
+		}
+	}
+	flush_literals(mut payload, mut literals)
+	return wrap_payload(format, data, payload)
+}
+
+fn decompress_with_profile(data []u8, profile MatchProfile, format Format) ![]u8 {
+	payload, expected_len := unwrap_payload(data, format)!
+	mut out := []u8{cap: int(expected_len)}
+	mut pos := 0
+	for pos < payload.len {
+		control := payload[pos]
+		pos++
+		if control & 0x80 == 0 {
+			literal_len := int(control & 0x7f) + 1
+			if pos + literal_len > payload.len {
+				return error('invalid lz stream: truncated literal')
+			}
+			out << payload[pos..pos + literal_len]
+			pos += literal_len
+			continue
+		}
+		match_len := int(control & 0x7f) + profile.min_match
+		offset, next_pos, ok := decode_uvarint(payload, pos)
+		if !ok {
+			return error('invalid lz stream: truncated match offset')
+		}
+		pos = next_pos
+		if offset == 0 || offset > u64(max_i64) || i64(offset) > i64(out.len) {
+			return error('invalid lz stream: bad match offset')
+		}
+		offset_int := int(offset)
+		base := out.len - offset_int
+		for i in 0 .. match_len {
+			out << out[base + i]
+		}
+	}
+	if i64(out.len) != expected_len {
+		return error('invalid lz stream: length mismatch')
+	}
+	return out
+}
+
+fn find_best_match(data []u8, pos int, profile MatchProfile, last_match []int, prev_match []int) (int, int) {
+	if pos + profile.min_match > data.len {
+		return 0, 0
+	}
+	max_len := if pos + profile.max_match < data.len { profile.max_match } else { data.len - pos }
+	mut best_len := 0
+	mut best_offset := 0
+	hash_idx := match_hash(data, pos)
+	mut candidates_checked := 0
+	mut i := last_match[hash_idx]
+	for i >= 0 && candidates_checked < max_match_candidates {
+		offset := pos - i
+		if offset > profile.window {
+			break
+		}
+		mut current_len := 0
+		for current_len < max_len && data[i + current_len] == data[pos + current_len] {
+			current_len++
+		}
+		if current_len > best_len {
+			best_len = current_len
+			best_offset = offset
+			if best_len == max_len {
+				break
+			}
+		}
+		i = prev_match[i]
+		candidates_checked++
+	}
+	return best_offset, best_len
+}
+
+fn index_match_position(data []u8, pos int, mut last_match []int, mut prev_match []int) {
+	if pos + 2 >= data.len {
+		return
+	}
+	hash_idx := match_hash(data, pos)
+	prev_match[pos] = last_match[hash_idx]
+	last_match[hash_idx] = pos
+}
+
+fn match_hash(data []u8, pos int) int {
+	v := (u32(data[pos]) << 16) | (u32(data[pos + 1]) << 8) | u32(data[pos + 2])
+	return int((v * u32(2654435761)) >> (32 - match_hash_bits))
+}
+
+fn flush_literals(mut payload []u8, mut literals []u8) {
+	if literals.len == 0 {
+		return
+	}
+	payload << u8(literals.len - 1)
+	payload << literals
+	literals.clear()
+}
+
+fn emit_match(mut payload []u8, offset int, length int, min_match int) {
+	payload << u8(0x80 | u8(length - min_match))
+	encode_uvarint(mut payload, u64(offset))
+}
+
+fn encode_uvarint(mut out []u8, value u64) {
+	mut v := value
+	for v >= 0x80 {
+		out << u8(v & 0x7f | 0x80)
+		v >>= 7
+	}
+	out << u8(v)
+}
+
+fn decode_uvarint(data []u8, start int) (u64, int, bool) {
+	mut value := u64(0)
+	mut shift := u32(0)
+	mut pos := start
+	for pos < data.len && shift <= 63 {
+		b := data[pos]
+		pos++
+		value |= u64(b & 0x7f) << shift
+		if b & 0x80 == 0 {
+			return value, pos, true
+		}
+		shift += 7
+	}
+	return 0, start, false
+}
diff --git a/vlib/compress/lz/interop/README.md b/vlib/compress/lz/interop/README.md
new file mode 100644
index 000000000..08a68fc85
--- /dev/null
+++ b/vlib/compress/lz/interop/README.md
@@ -0,0 +1,44 @@
+## LZ Interop Validation (V, C, Python)
+
+This tool validates:
+
+- V compress/decompress roundtrips for all formats: `lz77`, `lz78`, `lzw`,
+  `lz4`, `lzss`, `lzma`, `lzjb`
+- a C `lz77`-like reference implementation
+- a Python `lz77`-like reference implementation
+
+The C/Python references are intentionally simple so the benchmark is easy to run
+without external dependencies.
+
+### Run
+
+```bash
+cd /home/jalon/git/v
+./vnew run vlib/compress/lz/interop/lz_interop.v 2>&1
+```
+
+Optional args:
+
+1. validation rounds (default: `40`)
+2. input size in bytes (default: `524288`)
+
+Example:
+
+```bash
+cd /home/jalon/git/v
+./vnew run vlib/compress/lz/interop/lz_interop.v 25 262144 2>&1
+```
+
+### Output
+
+The tool prints validation status lines only (no timing output).
+
+When helpers are available, it also cross-validates compression/expansion
+interoperability with V (`lz77`) in both directions:
+
+- V compress -> C decompress
+- C compress -> V decompress
+- V compress -> Python decompress
+- Python compress -> V decompress
+
+If `cc`/`gcc` or `python3` are missing, that row is skipped with a message.
diff --git a/vlib/compress/lz/interop/lz77_ref.c b/vlib/compress/lz/interop/lz77_ref.c
new file mode 100644
index 000000000..6e72555b4
--- /dev/null
+++ b/vlib/compress/lz/interop/lz77_ref.c
@@ -0,0 +1,256 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#define MIN_MATCH 3
+#define MAX_LITERAL 128
+
+static const uint8_t STREAM_MAGIC[4] = {0x56, 0x4c, 0x5a, 0x31};
+static const uint8_t FORMAT_LZ77 = 0;
+
+typedef struct {
+	uint8_t *data;
+	size_t len;
+	size_t cap;
+} Buffer;
+
+static void die(const char *msg) {
+	fprintf(stderr, "%s\n", msg);
+	exit(1);
+}
+
+static void buf_init(Buffer *b, size_t cap) {
+	b->data = (uint8_t *)malloc(cap > 0 ? cap : 1);
+	if (!b->data) {
+		die("allocation failed");
+	}
+	b->len = 0;
+	b->cap = cap > 0 ? cap : 1;
+}
+
+static void buf_push(Buffer *b, uint8_t v) {
+	if (b->len >= b->cap) {
+		size_t new_cap = b->cap * 2;
+		uint8_t *n = (uint8_t *)realloc(b->data, new_cap);
+		if (!n) {
+			die("reallocation failed");
+		}
+		b->data = n;
+		b->cap = new_cap;
+	}
+	b->data[b->len++] = v;
+}
+
+static void buf_append(Buffer *b, const uint8_t *src, size_t len) {
+	for (size_t i = 0; i < len; i++) {
+		buf_push(b, src[i]);
+	}
+}
+
+static Buffer read_all(const char *path) {
+	FILE *f = fopen(path, "rb");
+	if (!f) {
+		die("could not open input file");
+	}
+	if (fseek(f, 0, SEEK_END) != 0) {
+		fclose(f);
+		die("could not seek input file");
+	}
+	long sz = ftell(f);
+	if (sz < 0) {
+		fclose(f);
+		die("could not read input file size");
+	}
+	if (fseek(f, 0, SEEK_SET) != 0) {
+		fclose(f);
+		die("could not rewind input file");
+	}
+	Buffer in;
+	buf_init(&in, (size_t)sz + 1);
+	in.len = (size_t)sz;
+	if (in.len > 0 && fread(in.data, 1, in.len, f) != in.len) {
+		fclose(f);
+		free(in.data);
+		die("could not read input file");
+	}
+	fclose(f);
+	return in;
+}
+
+static void write_all(const char *path, const uint8_t *data, size_t len) {
+	FILE *f = fopen(path, "wb");
+	if (!f) {
+		die("could not open output file");
+	}
+	if (len > 0 && fwrite(data, 1, len, f) != len) {
+		fclose(f);
+		die("could not write output file");
+	}
+	fclose(f);
+}
+
+static void write_uvarint(Buffer *out, uint64_t value) {
+	uint64_t v = value;
+	while (v >= 0x80) {
+		buf_push(out, (uint8_t)(v & 0x7f) | 0x80);
+		v >>= 7;
+	}
+	buf_push(out, (uint8_t)v);
+}
+
+static int read_uvarint(const uint8_t *data, size_t len, size_t *pos, uint64_t *value) {
+	uint64_t out = 0;
+	uint32_t shift = 0;
+	while (*pos < len && shift <= 63) {
+		uint8_t b = data[*pos];
+		(*pos)++;
+		out |= ((uint64_t)(b & 0x7f)) << shift;
+		if ((b & 0x80) == 0) {
+			*value = out;
+			return 1;
+		}
+		shift += 7;
+	}
+	return 0;
+}
+
+static Buffer compress_lz77(const uint8_t *in, size_t in_len) {
+	Buffer out;
+	buf_init(&out, in_len + 32);
+	buf_append(&out, STREAM_MAGIC, 4);
+	buf_push(&out, FORMAT_LZ77);
+	write_uvarint(&out, (uint64_t)in_len);
+
+	for (size_t i = 0; i < in_len;) {
+		size_t lit_len = in_len - i;
+		if (lit_len > MAX_LITERAL) {
+			lit_len = MAX_LITERAL;
+		}
+		buf_push(&out, (uint8_t)(lit_len - 1));
+		buf_append(&out, in + i, lit_len);
+		i += lit_len;
+	}
+	return out;
+}
+
+static Buffer decompress_lz77(const uint8_t *in, size_t in_len) {
+	if (in_len < 6 || memcmp(in, STREAM_MAGIC, 4) != 0) {
+		die("bad magic");
+	}
+	if (in[4] != FORMAT_LZ77) {
+		die("format mismatch");
+	}
+	size_t pos = 5;
+	uint64_t expected_len_u64 = 0;
+	if (!read_uvarint(in, in_len, &pos, &expected_len_u64)) {
+		die("bad length varint");
+	}
+	size_t expected_len = (size_t)expected_len_u64;
+
+	Buffer out;
+	buf_init(&out, expected_len + 16);
+	while (pos < in_len) {
+		uint8_t control = in[pos++];
+		if ((control & 0x80) == 0) {
+			size_t lit_len = (size_t)(control & 0x7f) + 1;
+			if (pos + lit_len > in_len) {
+				die("truncated literal");
+			}
+			buf_append(&out, in + pos, lit_len);
+			pos += lit_len;
+		} else {
+			size_t match_len = (size_t)(control & 0x7f) + MIN_MATCH;
+			uint64_t offset_u64 = 0;
+			if (!read_uvarint(in, in_len, &pos, &offset_u64)) {
+				die("bad match offset");
+			}
+			size_t offset = (size_t)offset_u64;
+			if (offset == 0 || offset > out.len) {
+				die("bad offset");
+			}
+			size_t base = out.len - offset;
+			for (size_t k = 0; k < match_len; k++) {
+				buf_push(&out, out.data[base + k]);
+			}
+		}
+	}
+	if (out.len != expected_len) {
+		die("length mismatch");
+	}
+	return out;
+}
+
+static int64_t now_ms(void) {
+	return (int64_t)((double)clock() * 1000.0 / (double)CLOCKS_PER_SEC);
+}
+
+int main(int argc, char **argv) {
+	if (argc < 2) {
+		fprintf(stderr,
+			"usage:\n"
+			"  %s bench <input.bin> <iterations>\n"
+			"  %s compress <input.bin> <output.bin>\n"
+			"  %s decompress <input.bin> <output.bin>\n",
+			argv[0], argv[0], argv[0]);
+		return 1;
+	}
+	if (strcmp(argv[1], "bench") == 0) {
+		if (argc < 4) {
+			fprintf(stderr, "usage: %s bench <input.bin> <iterations>\n", argv[0]);
+			return 1;
+		}
+		int iterations = atoi(argv[3]);
+		if (iterations <= 0) {
+			fprintf(stderr, "iterations must be > 0\n");
+			return 1;
+		}
+		Buffer input = read_all(argv[2]);
+		int64_t start = now_ms();
+		for (int i = 0; i < iterations; i++) {
+			Buffer comp = compress_lz77(input.data, input.len);
+			Buffer decomp = decompress_lz77(comp.data, comp.len);
+			if (decomp.len != input.len || memcmp(decomp.data, input.data, input.len) != 0) {
+				fprintf(stderr, "roundtrip mismatch\n");
+				return 1;
+			}
+			free(comp.data);
+			free(decomp.data);
+		}
+		int64_t elapsed = now_ms() - start;
+		printf("ms=%lld\n", (long long)elapsed);
+		free(input.data);
+		return 0;
+	}
+
+	if (strcmp(argv[1], "compress") == 0) {
+		if (argc < 4) {
+			fprintf(stderr, "usage: %s compress <input.bin> <output.bin>\n", argv[0]);
+			return 1;
+		}
+		Buffer input = read_all(argv[2]);
+		Buffer comp = compress_lz77(input.data, input.len);
+		write_all(argv[3], comp.data, comp.len);
+		free(input.data);
+		free(comp.data);
+		return 0;
+	}
+
+	if (strcmp(argv[1], "decompress") == 0) {
+		if (argc < 4) {
+			fprintf(stderr, "usage: %s decompress <input.bin> <output.bin>\n", argv[0]);
+			return 1;
+		}
+		Buffer input = read_all(argv[2]);
+		Buffer dec = decompress_lz77(input.data, input.len);
+		write_all(argv[3], dec.data, dec.len);
+		free(input.data);
+		free(dec.data);
+		return 0;
+	}
+
+	fprintf(stderr, "unknown mode: %s\n", argv[1]);
+	return 1;
+}
+
diff --git a/vlib/compress/lz/interop/lz77_ref.py b/vlib/compress/lz/interop/lz77_ref.py
new file mode 100644
index 000000000..29eed3abd
--- /dev/null
+++ b/vlib/compress/lz/interop/lz77_ref.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+import sys
+import time
+
+MIN_MATCH = 3
+MAX_LITERAL = 128
+STREAM_MAGIC = b'VLZ1'
+FORMAT_LZ77 = 0
+
+
+def encode_uvarint(value: int) -> bytes:
+    out = bytearray()
+    v = value
+    while v >= 0x80:
+        out.append((v & 0x7F) | 0x80)
+        v >>= 7
+    out.append(v)
+    return bytes(out)
+
+
+def decode_uvarint(data: bytes, pos: int) -> tuple[int, int]:
+    value = 0
+    shift = 0
+    i = pos
+    while i < len(data) and shift <= 63:
+        b = data[i]
+        i += 1
+        value |= (b & 0x7F) << shift
+        if (b & 0x80) == 0:
+            return value, i
+        shift += 7
+    raise ValueError('bad length varint')
+
+
+def compress_lz77(data: bytes) -> bytes:
+    out = bytearray()
+    out.extend(STREAM_MAGIC)
+    out.append(FORMAT_LZ77)
+    out.extend(encode_uvarint(len(data)))
+    i = 0
+    while i < len(data):
+        lit_len = min(MAX_LITERAL, len(data) - i)
+        out.append(lit_len - 1)
+        out.extend(data[i : i + lit_len])
+        i += lit_len
+    return bytes(out)
+
+
+def decompress_lz77(data: bytes) -> bytes:
+    if len(data) < 6 or data[:4] != STREAM_MAGIC:
+        raise ValueError('bad magic')
+    if data[4] != FORMAT_LZ77:
+        raise ValueError('format mismatch')
+
+    expected_len, pos = decode_uvarint(data, 5)
+    out = bytearray()
+    while pos < len(data):
+        control = data[pos]
+        pos += 1
+        if (control & 0x80) == 0:
+            literal_len = (control & 0x7F) + 1
+            if pos + literal_len > len(data):
+                raise ValueError('truncated literal')
+            out.extend(data[pos : pos + literal_len])
+            pos += literal_len
+        else:
+            length = (control & 0x7F) + MIN_MATCH
+            off, pos = decode_uvarint(data, pos)
+            if off == 0 or off > len(out):
+                raise ValueError('bad offset')
+            base = len(out) - off
+            for k in range(length):
+                out.append(out[base + k])
+    if len(out) != expected_len:
+        raise ValueError('length mismatch')
+    return bytes(out)
+
+
+def main() -> int:
+    if len(sys.argv) < 2:
+        print(
+            f'usage:\n'
+            f'  {sys.argv[0]} bench <input.bin> <iterations>\n'
+            f'  {sys.argv[0]} compress <input.bin> <output.bin>\n'
+            f'  {sys.argv[0]} decompress <input.bin> <output.bin>',
+            file=sys.stderr,
+        )
+        return 1
+
+    mode = sys.argv[1]
+    if mode == 'bench':
+        if len(sys.argv) < 4:
+            print(f'usage: {sys.argv[0]} bench <input.bin> <iterations>', file=sys.stderr)
+            return 1
+        input_path = sys.argv[2]
+        iterations = int(sys.argv[3])
+        if iterations <= 0:
+            print('iterations must be > 0', file=sys.stderr)
+            return 1
+        with open(input_path, 'rb') as f:
+            data = f.read()
+        start = time.perf_counter()
+        for _ in range(iterations):
+            enc = compress_lz77(data)
+            dec = decompress_lz77(enc)
+            if dec != data:
+                print('roundtrip mismatch', file=sys.stderr)
+                return 1
+        elapsed_ms = int((time.perf_counter() - start) * 1000)
+        print(f'ms={elapsed_ms}')
+        return 0
+
+    if mode == 'compress':
+        if len(sys.argv) < 4:
+            print(f'usage: {sys.argv[0]} compress <input.bin> <output.bin>', file=sys.stderr)
+            return 1
+        with open(sys.argv[2], 'rb') as f:
+            data = f.read()
+        out = compress_lz77(data)
+        with open(sys.argv[3], 'wb') as f:
+            f.write(out)
+        return 0
+
+    if mode == 'decompress':
+        if len(sys.argv) < 4:
+            print(f'usage: {sys.argv[0]} decompress <input.bin> <output.bin>', file=sys.stderr)
+            return 1
+        with open(sys.argv[2], 'rb') as f:
+            data = f.read()
+        out = decompress_lz77(data)
+        with open(sys.argv[3], 'wb') as f:
+            f.write(out)
+        return 0
+
+    print(f'unknown mode: {mode}', file=sys.stderr)
+    return 1
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())
+
diff --git a/vlib/compress/lz/interop/lz_interop.v b/vlib/compress/lz/interop/lz_interop.v
new file mode 100644
index 000000000..5c010d08f
--- /dev/null
+++ b/vlib/compress/lz/interop/lz_interop.v
@@ -0,0 +1,172 @@
+module main
+
+import compress.lz
+import os
+
+const default_data_size = 512 * 1024
+
+fn main() {
+	mut data_size := default_data_size
+	if os.args.len > 1 {
+		parsed := int(os.args[1].i32())
+		if parsed > 0 {
+			data_size = parsed
+		}
+	}
+	data := `z`.repeat(data_size).bytes()
+
+	println('LZ interop input: ${data.len} bytes')
+
+	for format in [lz.Format.lz77, .lz78, .lzw, .lz4, .lzss, .lzma, .lzjb] {
+		validate_v_roundtrip(data, format) or {
+			eprintln('V validation failed for ${format}: ${err.msg()}')
+			exit(1)
+		}
+		println('V roundtrip (${format}): OK')
+	}
+
+	tmp_dir := os.join_path(os.temp_dir(), 'v_lz_interop')
+	os.mkdir_all(tmp_dir) or {
+		eprintln('Could not create temp directory ${tmp_dir}: ${err.msg()}')
+		exit(1)
+	}
+	defer {
+		os.rmdir_all(tmp_dir) or {
+			eprintln('Could not remove temp directory ${tmp_dir}: ${err.msg()}')
+		}
+	}
+	input_path := os.join_path(tmp_dir, 'input.bin')
+	os.write_file_array(input_path, data) or {
+		eprintln('Could not write input file ${input_path}: ${err.msg()}')
+		exit(1)
+	}
+	mut c_bin := ''
+	if bin := compile_c_runner() {
+		c_bin = bin
+	} else {
+		eprintln('Skipping C benchmark: ${err.msg()}')
+	}
+	python_ok := has_python3()
+	if !python_ok {
+		eprintln('Skipping Python benchmark: python3 is not available')
+	}
+
+	if c_bin.len > 0 {
+		cross_validate_v_c(c_bin, data, input_path, tmp_dir) or {
+			eprintln('Cross-validation V<->C failed: ${err.msg()}')
+			exit(1)
+		}
+		println('Cross-validation: V<->C compress/decompress OK')
+	} else {
+		println('Cross-validation: skipped V<->C (requires C compiler)')
+	}
+
+	if python_ok {
+		cross_validate_v_python(data, input_path, tmp_dir) or {
+			eprintln('Cross-validation V<->Python failed: ${err.msg()}')
+			exit(1)
+		}
+		println('Cross-validation: V<->Python compress/decompress OK')
+	} else {
+		println('Cross-validation: skipped V<->Python (requires python3)')
+	}
+}
+
+fn validate_v_roundtrip(data []u8, format lz.Format) ! {
+	encoded := lz.compress(data, format)!
+	decoded := lz.decompress(encoded, format)!
+	if decoded != data {
+		return error('roundtrip mismatch for ${format}')
+	}
+}
+
+fn compile_c_runner() !string {
+	cc := choose_cc()
+	if cc.len == 0 {
+		return error('no C compiler found (tried cc, gcc, and clang)')
+	}
+	bin_path := os.join_path(os.temp_dir(), 'lz77_ref_bench')
+	c_src := os.join_path(@DIR, 'lz77_ref.c')
+	compile_cmd := '${cc} -O3 -std=c99 ${os.quoted_path(c_src)} -o ${os.quoted_path(bin_path)}'
+	compile_res := os.execute(compile_cmd)
+	if compile_res.exit_code != 0 {
+		return error('C compile failed: ${compile_res.output.trim_space()}')
+	}
+	return bin_path
+}
+
+fn choose_cc() string {
+	for cc in ['cc', 'gcc', 'clang'] {
+		if os.execute('${cc} --version').exit_code == 0 {
+			return cc
+		}
+	}
+	return ''
+}
+
+fn has_python3() bool {
+	return os.execute('python3 --version').exit_code == 0
+}
+
+fn cross_validate_v_c(c_bin string, original []u8, input_path string, tmp_dir string) ! {
+	v_encoded := os.join_path(tmp_dir, 'v_encoded.bin')
+	c_decoded := os.join_path(tmp_dir, 'c_decoded.bin')
+	c_encoded := os.join_path(tmp_dir, 'c_encoded.bin')
+
+	v_stream := lz.compress_lz77(original)!
+	os.write_file_array(v_encoded, v_stream)!
+
+	mut res :=
+		os.execute('${os.quoted_path(c_bin)} decompress ${os.quoted_path(v_encoded)} ${os.quoted_path(c_decoded)}')
+	if res.exit_code != 0 {
+		return error('C decompress(V output) failed: ${res.output.trim_space()}')
+	}
+	validate_equal_files(input_path, c_decoded, 'V->C')!
+
+	res =
+		os.execute('${os.quoted_path(c_bin)} compress ${os.quoted_path(input_path)} ${os.quoted_path(c_encoded)}')
+	if res.exit_code != 0 {
+		return error('C compress failed: ${res.output.trim_space()}')
+	}
+	c_encoded_data := os.read_bytes(c_encoded)!
+	v_decoded := lz.decompress_lz77(c_encoded_data)!
+	if v_decoded != original {
+		return error('C->V output mismatch')
+	}
+}
+
+fn cross_validate_v_python(original []u8, input_path string, tmp_dir string) ! {
+	v_encoded := os.join_path(tmp_dir, 'v_encoded_for_py.bin')
+	py_decoded := os.join_path(tmp_dir, 'py_decoded.bin')
+	py_encoded := os.join_path(tmp_dir, 'py_encoded.bin')
+	py_script := os.join_path(@DIR, 'lz77_ref.py')
+
+	v_stream := lz.compress_lz77(original)!
+	os.write_file_array(v_encoded, v_stream)!
+
+	mut res :=
+		os.execute('python3 ${os.quoted_path(py_script)} decompress ${os.quoted_path(v_encoded)} ${os.quoted_path(py_decoded)}')
+	if res.exit_code != 0 {
+		return error('Python decompress(V output) failed: ${res.output.trim_space()}')
+	}
+	validate_equal_files(input_path, py_decoded, 'V->Python')!
+
+	res =
+		os.execute('python3 ${os.quoted_path(py_script)} compress ${os.quoted_path(input_path)} ${os.quoted_path(py_encoded)}')
+	if res.exit_code != 0 {
+		return error('Python compress failed: ${res.output.trim_space()}')
+	}
+	py_encoded_data := os.read_bytes(py_encoded)!
+	v_decoded := lz.decompress_lz77(py_encoded_data)!
+	if v_decoded != original {
+		return error('Python->V output mismatch')
+	}
+}
+
+fn validate_equal_files(expected_path string, actual_path string, tag string) ! {
+	expected := os.read_bytes(expected_path)!
+	actual := os.read_bytes(actual_path)!
+	if expected != actual {
+		return error('${tag} output mismatch')
+	}
+}
diff --git a/vlib/compress/lz/lz.v b/vlib/compress/lz/lz.v
new file mode 100644
index 000000000..cd9c44135
--- /dev/null
+++ b/vlib/compress/lz/lz.v
@@ -0,0 +1,53 @@
+module lz
+
+// Format identifies which LZ-family codec variant to use.
+pub enum Format {
+	lz77
+	lz78
+	lzw
+	lz4
+	lzss
+	lzma
+	lzjb
+}
+
+// format_from_string parses a case-insensitive format name.
+pub fn format_from_string(name string) !Format {
+	key := name.to_lower()
+	return match key {
+		'lz77' { .lz77 }
+		'lz78' { .lz78 }
+		'lzw' { .lzw }
+		'lz4' { .lz4 }
+		'lzss' { .lzss }
+		'lzma' { .lzma }
+		'lzjb' { .lzjb }
+		else { return error('unknown lz format: ${name}') }
+	}
+}
+
+// compress compresses data with the selected LZ format.
+pub fn compress(data []u8, format Format) ![]u8 {
+	return match format {
+		.lz77 { compress_lz77(data) }
+		.lz78 { compress_lz78(data) }
+		.lzw { compress_lzw(data) }
+		.lz4 { compress_lz4(data) }
+		.lzss { compress_lzss(data) }
+		.lzma { compress_lzma(data) }
+		.lzjb { compress_lzjb(data) }
+	}
+}
+
+// decompress decompresses data with the selected LZ format.
+pub fn decompress(data []u8, format Format) ![]u8 {
+	return match format {
+		.lz77 { decompress_lz77(data) }
+		.lz78 { decompress_lz78(data) }
+		.lzw { decompress_lzw(data) }
+		.lz4 { decompress_lz4(data) }
+		.lzss { decompress_lzss(data) }
+		.lzma { decompress_lzma(data) }
+		.lzjb { decompress_lzjb(data) }
+	}
+}
diff --git a/vlib/compress/lz/lz4.v b/vlib/compress/lz/lz4.v
new file mode 100644
index 000000000..210c6cb4f
--- /dev/null
+++ b/vlib/compress/lz/lz4.v
@@ -0,0 +1,18 @@
+module lz
+
+const lz4_profile = MatchProfile{
+	window:      65535
+	min_match:   4
+	max_match:   130
+	max_literal: 128
+}
+
+// compress_lz4 compresses data using a pure-V LZ4-like stream.
+pub fn compress_lz4(data []u8) ![]u8 {
+	return compress_with_profile(data, lz4_profile, .lz4)
+}
+
+// decompress_lz4 decompresses data produced by compress_lz4.
+pub fn decompress_lz4(data []u8) ![]u8 {
+	return decompress_with_profile(data, lz4_profile, .lz4)
+}
diff --git a/vlib/compress/lz/lz77.v b/vlib/compress/lz/lz77.v
new file mode 100644
index 000000000..0d4364dcf
--- /dev/null
+++ b/vlib/compress/lz/lz77.v
@@ -0,0 +1,18 @@
+module lz
+
+const lz77_profile = MatchProfile{
+	window:      4096
+	min_match:   3
+	max_match:   130
+	max_literal: 128
+}
+
+// compress_lz77 compresses data using a pure-V LZ77 style stream.
+pub fn compress_lz77(data []u8) ![]u8 {
+	return compress_with_profile(data, lz77_profile, .lz77)
+}
+
+// decompress_lz77 decompresses data produced by compress_lz77.
+pub fn decompress_lz77(data []u8) ![]u8 {
+	return decompress_with_profile(data, lz77_profile, .lz77)
+}
diff --git a/vlib/compress/lz/lz78.v b/vlib/compress/lz/lz78.v
new file mode 100644
index 000000000..e3884cb49
--- /dev/null
+++ b/vlib/compress/lz/lz78.v
@@ -0,0 +1,85 @@
+module lz
+
+// compress_lz78 compresses data using a pure-V LZ78 dictionary stream.
+pub fn compress_lz78(data []u8) ![]u8 {
+	mut payload := []u8{}
+	mut dict := map[string]int{}
+	mut next_index := 1
+	mut word := []u8{}
+
+	for b in data {
+		mut candidate := word.clone()
+		candidate << b
+		candidate_key := candidate.bytestr()
+		if candidate_key in dict {
+			word = candidate.clone()
+			continue
+		}
+
+		prefix_index := if word.len == 0 { 0 } else { dict[word.bytestr()] }
+		encode_uvarint(mut payload, u64(prefix_index))
+		payload << u8(1)
+		payload << b
+		dict[candidate_key] = next_index
+		next_index++
+		word.clear()
+	}
+
+	if word.len > 0 {
+		final_index := dict[word.bytestr()]
+		encode_uvarint(mut payload, u64(final_index))
+		payload << u8(0)
+	}
+
+	return wrap_payload(.lz78, data, payload)
+}
+
+// decompress_lz78 decompresses data produced by compress_lz78.
+pub fn decompress_lz78(data []u8) ![]u8 {
+	payload, expected_len := unwrap_payload(data, .lz78)!
+	mut out := []u8{cap: int(expected_len)}
+	mut dict := map[int][]u8{}
+	mut next_index := 1
+	mut pos := 0
+
+	for pos < payload.len {
+		prefix, next_pos, ok := decode_uvarint(payload, pos)
+		if !ok {
+			return error('invalid lz78 stream: bad prefix index')
+		}
+		pos = next_pos
+		if pos >= payload.len {
+			return error('invalid lz78 stream: missing suffix flag')
+		}
+		has_suffix := payload[pos]
+		pos++
+
+		mut phrase := if prefix == 0 {
+			[]u8{}
+		} else {
+			if int(prefix) !in dict {
+				return error('invalid lz78 stream: unknown prefix index')
+			}
+			dict[int(prefix)].clone()
+		}
+
+		if has_suffix == 1 {
+			if pos >= payload.len {
+				return error('invalid lz78 stream: missing suffix byte')
+			}
+			phrase << payload[pos]
+			pos++
+		} else if has_suffix != 0 {
+			return error('invalid lz78 stream: bad suffix flag')
+		}
+
+		out << phrase
+		dict[next_index] = phrase
+		next_index++
+	}
+
+	if i64(out.len) != expected_len {
+		return error('invalid lz78 stream: length mismatch')
+	}
+	return out
+}
diff --git a/vlib/compress/lz/lz_test.v b/vlib/compress/lz/lz_test.v
new file mode 100644
index 000000000..577d9abc3
--- /dev/null
+++ b/vlib/compress/lz/lz_test.v
@@ -0,0 +1,90 @@
+module lz
+
+const sample_data = ('The quick brown fox jumps over the lazy dog. '.repeat(12) +
+	'aaaaaaaaabbbbbbbbbcccccccccdddddddddeeeeeeeee').bytes()
+
+fn test_roundtrip_all_formats() {
+	formats := [Format.lz77, .lz78, .lzw, .lz4, .lzss, .lzma, .lzjb]
+	for format in formats {
+		compressed := compress(sample_data, format)!
+		decompressed := decompress(compressed, format)!
+		assert decompressed == sample_data
+	}
+}
+
+fn test_format_specific_api_roundtrip() {
+	lz77_data := compress_lz77(sample_data)!
+	assert decompress_lz77(lz77_data)! == sample_data
+
+	lz78_data := compress_lz78(sample_data)!
+	assert decompress_lz78(lz78_data)! == sample_data
+
+	lzw_data := compress_lzw(sample_data)!
+	assert decompress_lzw(lzw_data)! == sample_data
+
+	lz4_data := compress_lz4(sample_data)!
+	assert decompress_lz4(lz4_data)! == sample_data
+
+	lzss_data := compress_lzss(sample_data)!
+	assert decompress_lzss(lzss_data)! == sample_data
+
+	lzma_data := compress_lzma(sample_data)!
+	assert decompress_lzma(lzma_data)! == sample_data
+
+	lzjb_data := compress_lzjb(sample_data)!
+	assert decompress_lzjb(lzjb_data)! == sample_data
+}
+
+fn test_mismatched_format_fails() {
+	compressed := compress(sample_data, .lz77)!
+	decompress(compressed, .lz4) or {
+		assert err.msg().contains('format mismatch')
+		return
+	}
+	assert false
+}
+
+fn test_decoded_length_too_large_fails() {
+	mut corrupt := []u8{}
+	corrupt << stream_magic
+	corrupt << u8(Format.lz77)
+	encode_uvarint(mut corrupt, u64(1) << 31)
+
+	decompress_lz77(corrupt) or {
+		assert err.msg().contains('decoded length too large')
+		return
+	}
+	assert false
+}
+
+fn test_match_offset_too_large_fails() {
+	mut corrupt := []u8{}
+	corrupt << stream_magic
+	corrupt << u8(Format.lz77)
+	encode_uvarint(mut corrupt, u64(4))
+	corrupt << u8(0)
+	corrupt << `A`
+	corrupt << u8(0x80)
+	encode_uvarint(mut corrupt, u64(1) << 63)
+
+	decompress_lz77(corrupt) or {
+		assert err.msg().contains('bad match offset')
+		return
+	}
+	assert false
+}
+
+fn test_high_entropy_roundtrip_large_window_formats() {
+	mut data := []u8{len: 128 * 1024}
+	mut state := u32(0x9e3779b9)
+	for i in 0 .. data.len {
+		state = state * 1664525 + 1013904223
+		data[i] = u8(state >> 24)
+	}
+
+	for format in [Format.lz4, .lzma] {
+		compressed := compress(data, format)!
+		decompressed := decompress(compressed, format)!
+		assert decompressed == data
+	}
+}
diff --git a/vlib/compress/lz/lzjb.v b/vlib/compress/lz/lzjb.v
new file mode 100644
index 000000000..d12a59ed1
--- /dev/null
+++ b/vlib/compress/lz/lzjb.v
@@ -0,0 +1,18 @@
+module lz
+
+const lzjb_profile = MatchProfile{
+	window:      1024
+	min_match:   3
+	max_match:   66
+	max_literal: 128
+}
+
+// compress_lzjb compresses data using a pure-V LZJB-like stream.
+pub fn compress_lzjb(data []u8) ![]u8 {
+	return compress_with_profile(data, lzjb_profile, .lzjb)
+}
+
+// decompress_lzjb decompresses data produced by compress_lzjb.
+pub fn decompress_lzjb(data []u8) ![]u8 {
+	return decompress_with_profile(data, lzjb_profile, .lzjb)
+}
diff --git a/vlib/compress/lz/lzma.v b/vlib/compress/lz/lzma.v
new file mode 100644
index 000000000..d62a122ff
--- /dev/null
+++ b/vlib/compress/lz/lzma.v
@@ -0,0 +1,18 @@
+module lz
+
+const lzma_profile = MatchProfile{
+	window:      32768
+	min_match:   3
+	max_match:   130
+	max_literal: 128
+}
+
+// compress_lzma compresses data using a pure-V LZMA-like stream.
+pub fn compress_lzma(data []u8) ![]u8 {
+	return compress_with_profile(data, lzma_profile, .lzma)
+}
+
+// decompress_lzma decompresses data produced by compress_lzma.
+pub fn decompress_lzma(data []u8) ![]u8 {
+	return decompress_with_profile(data, lzma_profile, .lzma)
+}
diff --git a/vlib/compress/lz/lzss.v b/vlib/compress/lz/lzss.v
new file mode 100644
index 000000000..4105ff6ac
--- /dev/null
+++ b/vlib/compress/lz/lzss.v
@@ -0,0 +1,18 @@
+module lz
+
+const lzss_profile = MatchProfile{
+	window:      4096
+	min_match:   3
+	max_match:   130
+	max_literal: 128
+}
+
+// compress_lzss compresses data using a pure-V LZSS style stream.
+pub fn compress_lzss(data []u8) ![]u8 {
+	return compress_with_profile(data, lzss_profile, .lzss)
+}
+
+// decompress_lzss decompresses data produced by compress_lzss.
+pub fn decompress_lzss(data []u8) ![]u8 {
+	return decompress_with_profile(data, lzss_profile, .lzss)
+}
diff --git a/vlib/compress/lz/lzw.v b/vlib/compress/lz/lzw.v
new file mode 100644
index 000000000..1bfb561d9
--- /dev/null
+++ b/vlib/compress/lz/lzw.v
@@ -0,0 +1,92 @@
+module lz
+
+// compress_lzw compresses data using a pure-V LZW dictionary stream.
+pub fn compress_lzw(data []u8) ![]u8 {
+	mut payload := []u8{}
+	mut dict := map[string]int{}
+	for i in 0 .. 256 {
+		dict[[u8(i)].bytestr()] = i
+	}
+	mut next_code := 256
+	mut word := ''
+
+	for b in data {
+		symbol := [b].bytestr()
+		candidate := word + symbol
+		if candidate in dict {
+			word = candidate
+			continue
+		}
+		if word != '' {
+			encode_uvarint(mut payload, u64(dict[word]))
+		}
+		dict[candidate] = next_code
+		next_code++
+		word = symbol
+	}
+
+	if word != '' {
+		encode_uvarint(mut payload, u64(dict[word]))
+	}
+
+	return wrap_payload(.lzw, data, payload)
+}
+
+// decompress_lzw decompresses data produced by compress_lzw.
+pub fn decompress_lzw(data []u8) ![]u8 {
+	payload, expected_len := unwrap_payload(data, .lzw)!
+	if payload.len == 0 {
+		if expected_len == i64(0) {
+			return []u8{}
+		}
+		return error('invalid lzw stream: missing codes')
+	}
+
+	mut dict := map[int][]u8{}
+	for i in 0 .. 256 {
+		dict[i] = [u8(i)]
+	}
+	mut next_code := 256
+	mut pos := 0
+
+	first_code, next_pos, ok := decode_uvarint(payload, pos)
+	if !ok {
+		return error('invalid lzw stream: bad initial code')
+	}
+	pos = next_pos
+	if int(first_code) !in dict {
+		return error('invalid lzw stream: unknown initial code')
+	}
+	mut word := dict[int(first_code)].clone()
+	mut out := word.clone()
+
+	for pos < payload.len {
+		code_u64, new_pos, ok_code := decode_uvarint(payload, pos)
+		if !ok_code {
+			return error('invalid lzw stream: bad code')
+		}
+		pos = new_pos
+		code := int(code_u64)
+		mut entry := []u8{}
+		if code in dict {
+			entry = dict[code].clone()
+		} else if code == next_code {
+			entry = word.clone()
+			entry << word[0]
+		} else {
+			return error('invalid lzw stream: unknown code')
+		}
+
+		out << entry
+		mut new_entry := word.clone()
+		new_entry << entry[0]
+		dict[next_code] = new_entry
+		next_code++
+		word = entry.clone()
+	}
+
+	if i64(out.len) != expected_len {
+		return error('invalid lzw stream: length mismatch')
+	}
+	return out
+}
-- 
2.39.5