module cbor import math import time const i32_min_i64 = -i64(2_147_483_647) - 1 const i32_max_i64 = i64(2_147_483_647) const u32_max_i64 = i64(4_294_967_295) // Generic comptime-driven encoder/decoder. The pack[T] / unpack[T] // methods below dispatch on T at compile time, so each call site // monomorphises into straight-line code with no runtime type tests. // // Supported targets: // * bool, all signed/unsigned integer widths, f32, f64 // * string (text), []u8 (byte string), enums (encoded as int) // * `$array` (any V array) and `$map` (any K with a primitive scalar // decoder — string, signed/unsigned ints, bool — plus any V). // * `$struct` (encoded as a string-keyed map; honours // `@[cbor: 'alt']`, `@[skip]`, `@[cbor: '-']`, optional fields) // * `time.Time` — whole seconds use tag 1 (epoch seconds, integer); // sub-second values use tag 0 (RFC 3339 string with nanosecond // precision). Decode accepts tag 0 (RFC 3339 text) or tag 1 // (integer or float). // * `RawMessage`, `Value`, `Marshaler`/`Unmarshaler` implementers. // pack encodes `val` into the packer's buffer using compile-time dispatch. @[inline] pub fn (mut p Packer) pack[T](val T) ! { $if T is RawMessage { p.pack_raw(val)! } $else $if T is Marshaler { bytes := val.to_cbor() if bytes.len == 0 { return error('cbor: ${T.name}.to_cbor() returned empty bytes') } // Validate the user's output is exactly one well-formed CBOR // item before splicing it into the parent stream. A malformed // or truncated Marshaler would otherwise silently corrupt the // surrounding fields (the next struct field would be parsed // from inside the bad item's claimed payload). mut probe := new_unpacker(bytes, DecodeOpts{}) probe.skip_value() or { return error('cbor: ${T.name}.to_cbor() returned malformed CBOR: ${err.msg()}') } if !probe.done() { return error('cbor: ${T.name}.to_cbor() returned ${probe.remaining()} trailing byte(s) past one item') } p.reserve(bytes.len) unsafe { p.buf.push_many(bytes.data, bytes.len) } } $else $if T is Value { p.pack_value(val)! } $else $if T is time.Time { // Whole-second values use tag 1 (epoch seconds) + integer — the // most compact and canonical form (RFC 8949 §3.4.2). Sub-second // values fall back to tag 0 (RFC 3339 string) with nanosecond // precision: encoding the seconds.nanoseconds pair as a tag-1 // float would lose ~µs of resolution past the year 2001 (f64 // can't carry both a 10-digit unix epoch and 9 fractional digits). if val.nanosecond == 0 { p.pack_tag(tag_epoch) p.pack_int(val.unix()) } else { p.pack_tag(tag_date_time) p.pack_text(format_rfc3339_nano(val)) } } $else $if T is string { if p.opts.validate_utf8 && !utf8_validate_slice(val.bytes(), 0, val.len) { return error('cbor: validate_utf8 set, but string contains invalid UTF-8 (len=${val.len})') } p.pack_text(val) } $else $if T is bool { p.pack_bool(val) } $else $if T is i8 { p.pack_int(i64(val)) } $else $if T is i16 { p.pack_int(i64(val)) } $else $if T is int { p.pack_int(i64(val)) } $else $if T is i32 { p.pack_int(i64(val)) } $else $if T is i64 { p.pack_int(val) } $else $if T is u8 { p.pack_uint(u64(val)) } $else $if T is u16 { p.pack_uint(u64(val)) } $else $if T is u32 { p.pack_uint(u64(val)) } $else $if T is u64 { p.pack_uint(val) } $else $if T is f32 { p.pack_float(f64(val)) } $else $if T is f64 { p.pack_float(val) } $else $if T is $enum { p.pack_int(i64(val)) } $else $if T is []u8 { p.pack_bytes(val) } $else $if T is $array { p.pack_array_header(u64(val.len)) for item in val { p.pack(item)! } } $else $if T is $map { p.pack_map_header(u64(val.len)) if p.opts.canonical && val.len > 1 { // Sub-encoders inherit `validate_utf8` so the strict-encode // guarantee survives canonical mode. `self_describe` and // `initial_cap` stay local — the wrapper belongs to the top-level // stream only, and 16 B is enough for almost every key/value pair. sub_opts := EncodeOpts{ initial_cap: 16 canonical: true validate_utf8: p.opts.validate_utf8 } mut encoded_keys := [][]u8{cap: val.len} mut encoded_vals := [][]u8{cap: val.len} for k, item in val { mut ksub := new_packer(sub_opts) ksub.pack(k)! encoded_keys << ksub.bytes().clone() mut vsub := new_packer(sub_opts) vsub.pack(item)! encoded_vals << vsub.bytes().clone() } for i in sort_canonical_indices(encoded_keys) { p.reserve(encoded_keys[i].len + encoded_vals[i].len) unsafe { p.buf.push_many(encoded_keys[i].data, encoded_keys[i].len) p.buf.push_many(encoded_vals[i].data, encoded_vals[i].len) } } } else { for k, item in val { p.pack(k)! p.pack(item)! } } } $else $if T is $struct { mut strategy := '' $for attr in T.attributes { if attr.name == 'cbor_rename_all' { strategy = attr.arg } } mut field_count := 0 $for field in T.fields { if !cbor_field_skipped(field) { field_count++ } } p.pack_map_header(u64(field_count)) if p.opts.canonical && field_count > 1 { // RFC 8949 §4.2.1: deterministic encoding requires keys to // be ordered by their encoded byte form, not by struct // declaration. Encode each (key, value) pair to a sub-buffer, // sort, then splice — same shape as the $map branch above. // `validate_utf8` propagates so strict-encode callers don't // silently lose the guarantee in canonical mode. sub_opts := EncodeOpts{ initial_cap: 16 canonical: true validate_utf8: p.opts.validate_utf8 } mut encoded_keys := [][]u8{cap: field_count} mut encoded_vals := [][]u8{cap: field_count} $for field in T.fields { if !cbor_field_skipped(field) { key := cbor_field_explicit_key(field) or { if strategy != '' { cbor_rename(field.name, strategy) } else { field.name } } mut ksub := new_packer(sub_opts) ksub.pack_text(key) encoded_keys << ksub.bytes().clone() mut vsub := new_packer(sub_opts) $if field.typ is $option { if val.$(field.name) == none { vsub.pack_null() } else { vsub.pack(get_value_from_optional(val.$(field.name)))! } } $else { vsub.pack(val.$(field.name))! } encoded_vals << vsub.bytes().clone() } } for i in sort_canonical_indices(encoded_keys) { p.reserve(encoded_keys[i].len + encoded_vals[i].len) unsafe { p.buf.push_many(encoded_keys[i].data, encoded_keys[i].len) p.buf.push_many(encoded_vals[i].data, encoded_vals[i].len) } } } else { $for field in T.fields { if !cbor_field_skipped(field) { key := cbor_field_explicit_key(field) or { if strategy != '' { cbor_rename(field.name, strategy) } else { field.name } } p.pack_text(key) $if field.typ is $option { if val.$(field.name) == none { p.pack_null() } else { p.pack(get_value_from_optional(val.$(field.name)))! } } $else { p.pack(val.$(field.name))! } } } } } $else { p.pack_null() } } // get_value_from_optional unwraps an Option known to be `Some`. // Its signature exists solely so V's generic inferrer can pick up the // inner T at the comptime call site. fn get_value_from_optional[T](val ?T) T { return val or { T{} } } // unpack reads one CBOR value from the buffer and converts it to T. @[inline] pub fn (mut u Unpacker) unpack[T]() !T { $if T is RawMessage { return u.unpack_raw()! } $else $if T is Unmarshaler { start := u.pos u.skip_value()! mut v := T{} v.from_cbor(u.data[start..u.pos])! return v } $else $if T is Value { return u.unpack_value()! } $else $if T is time.Time { return u.unpack_time()! } $else $if T is string { return u.unpack_text()! } $else $if T is bool { // Accept null as false-equivalent? No — strict by default. return u.unpack_bool()! } $else $if T is i8 { v := u.unpack_int()! if v < -128 || v > 127 { return int_range(u.pos, 'i8', v.str()) } return i8(v) } $else $if T is i16 { v := u.unpack_int()! if v < -32_768 || v > 32_767 { return int_range(u.pos, 'i16', v.str()) } return i16(v) } $else $if T is int { v := u.unpack_int()! if v < i32_min_i64 || v > i32_max_i64 { return int_range(u.pos, 'int', v.str()) } return int(v) } $else $if T is i32 { v := u.unpack_int()! if v < i32_min_i64 || v > i32_max_i64 { return int_range(u.pos, 'i32', v.str()) } return i32(v) } $else $if T is i64 { return u.unpack_int()! } $else $if T is u8 { v := u.unpack_int()! if v < 0 || v > 255 { return int_range(u.pos, 'u8', v.str()) } return u8(v) } $else $if T is u16 { v := u.unpack_int()! if v < 0 || v > 65_535 { return int_range(u.pos, 'u16', v.str()) } return u16(v) } $else $if T is u32 { v := u.unpack_int()! if v < 0 || v > u32_max_i64 { return int_range(u.pos, 'u32', v.str()) } return u32(v) } $else $if T is u64 { neg, mag := u.unpack_int_full()! if neg { return int_range(u.pos, 'u64', '-1 - ${mag}') } return mag } $else $if T is f32 { return f32(u.unpack_float()!) } $else $if T is f64 { return u.unpack_float()! } $else $if T is $enum { v := int(u.unpack_int()!) return unsafe { T(v) } } $else $if T is []u8 { return u.unpack_bytes()! } $else $if T is $array { mut out := T{} u.unpack_array_into(mut out)! return out } $else $if T is $map { mut out := T{} read_pairs_into_helper(mut u, mut out)! return out } $else $if T is $struct { mut result := T{} u.unpack_struct_into(mut result)! return result } $else { return error('cbor: unsupported target type') } } fn (mut u Unpacker) unpack_array_into[E](mut out []E) ! { hdr := u.unpack_array_header()! if hdr < 0 { // Indefinite. for { if u.consume_break() { break } out << u.unpack[E]()! } return } for _ in 0 .. hdr { out << u.unpack[E]()! } } // read_pairs_into_helper is a standalone (non-method) generic function; // V's generic-method dispatch can drop the second type parameter when // invoked from a comptime $map branch, while the standalone form // monomorphises correctly. fn read_pairs_into_helper[K, V](mut u Unpacker, mut out map[K]V) ! { hdr := u.unpack_map_header()! if hdr < 0 { for { if u.consume_break() { break } key := u.unpack[K]()! val := u.unpack[V]()! if u.opts.deny_duplicate_keys && key in out { return malformed(u.pos, 'duplicate map key') } out[key] = val } return } for _ in 0 .. hdr { key := u.unpack[K]()! val := u.unpack[V]()! if u.opts.deny_duplicate_keys && key in out { return malformed(u.pos, 'duplicate map key') } out[key] = val } } fn (mut u Unpacker) unpack_struct_into[T](mut result T) ! { mut strategy := '' $for attr in T.attributes { if attr.name == 'cbor_rename_all' { strategy = attr.arg } } hdr := u.unpack_map_header()! indef := hdr < 0 mut remaining := if indef { i64(-1) } else { hdr } // Tracks keys already seen so deny_duplicate_keys can fire on struct // decode too (the typed-map and Value paths track separately). Built // only when the option is set, so the common case stays allocation-free. // O(1) lookup via V map keeps decode linear even on adversarial inputs // with thousands of distinct keys. mut seen_keys := map[string]bool{} for { if indef { if u.consume_break() { break } } else { if remaining == 0 { break } remaining-- } key_ptr, key_len := u.read_text_view()! if u.opts.deny_duplicate_keys { key_str := unsafe { tos(key_ptr, key_len) }.clone() if key_str in seen_keys { return malformed(u.pos, 'duplicate map key "${key_str}"') } seen_keys[key_str] = true } mut matched := false $for field in T.fields { if !cbor_field_skipped(field) { name := cbor_field_explicit_key(field) or { if strategy != '' { cbor_rename(field.name, strategy) } else { field.name } } if !matched && key_len == name.len && unsafe { C.memcmp(key_ptr, name.str, key_len) } == 0 { matched = true $if field.typ is $option { if u.pos < u.data.len && u.data[u.pos] == 0xf6 { u.pos++ result.$(field.name) = none } else { mut inner := create_value_from_optional(result.$(field.name)) u.unpack_into(mut inner)! result.$(field.name) = inner } } $else { u.unpack_into(mut result.$(field.name))! } } } } if !matched { start := u.pos u.skip_value()! if u.opts.deny_unknown_fields { return UnknownFieldError{ pos: start name: unsafe { tos(key_ptr, key_len) } } } } } } // read_text_view returns a (ptr, len) view into the underlying buffer // for one definite-length text string. Avoids allocation when matching // struct field names. Errors on indefinite-length text since we'd have // to copy chunks anyway. @[direct_array_access] fn (mut u Unpacker) read_text_view() !(&u8, int) { start := u.pos b := u.read_byte()! major := b >> 5 if major != 3 { u.pos = start return type_mismatch(start, 'text', b) } info := b & 0x1f if info == 31 { u.pos = start return error('cbor: indefinite-length text not supported as map key (decoder)') } size := u.read_arg(info)! if size > u64(u.data.len - u.pos) { return eof_oversized(u.pos, size, u.data.len - u.pos) } size_int := int(size) if u.opts.validate_utf8 { if !u.is_utf8_at(u.pos, size_int) { return InvalidUtf8Error{ pos: u.pos } } } ptr := unsafe { &u8(u.data.data) + u.pos } u.pos += size_int return ptr, size_int } @[direct_array_access; inline] fn (u &Unpacker) is_utf8_at(start int, size int) bool { if size == 0 { return true } return utf8_validate_slice(u.data, start, size) } // utf8_validate_slice runs the standard UTF-8 validator on a slice // without making an intermediate copy. Mirrors the FSM used by // `vlib/encoding/utf8/utf8_util.v`. The 8-byte SWAR pre-scan turns a // pure-ASCII payload (the common case: JSON-shaped keys, identifiers) // into one load + one mask + one branch per 8 bytes. @[direct_array_access] fn utf8_validate_slice(data []u8, start int, size int) bool { mut i := start end := start + size for i < end { // 8-byte SWAR ASCII fast path: a pure-ASCII run skips the // per-byte FSM entirely. Triggers on every iteration so a single // non-ASCII rune doesn't disable the fast path for the rest. // `memcpy` into a stack u64 instead of `*(&u64(&data[i]))`: the // latter is undefined behaviour when `i` isn't 8-byte aligned, and // crashes on strict-alignment targets (e.g. some ARMv7, MIPS). // Modern C compilers lower this memcpy to a single unaligned load // on x86 / arm64, so the SWAR speed-up is preserved. for i + 8 <= end { mut chunk := u64(0) unsafe { C.memcpy(&chunk, &data[i], 8) } if chunk & 0x8080808080808080 != 0 { break } i += 8 } if i >= end { break } c := data[i] if c < 0x80 { i++ continue } mut n := 0 if c & 0xe0 == 0xc0 { n = 2 } else if c & 0xf0 == 0xe0 { n = 3 } else if c & 0xf8 == 0xf0 { n = 4 } else { return false } if i + n > end { return false } // Reject overlongs / surrogates / out-of-range. match n { 2 { if c < 0xc2 { return false } } 3 { b := data[i + 1] if c == 0xe0 && b < 0xa0 { return false } if c == 0xed && b > 0x9f { return false } } 4 { b := data[i + 1] if c == 0xf0 && b < 0x90 { return false } if c == 0xf4 && b > 0x8f { return false } if c > 0xf4 { return false } } else {} } for k in 1 .. n { if data[i + k] & 0xc0 != 0x80 { return false } } i += n } return true } // create_value_from_optional returns a zero value of an Option's inner T. // Exists so the comptime call site can infer T from a struct field. fn create_value_from_optional[T](_val ?T) T { return T{} } // unpack_into fills the target through a mutable reference. The mut // parameter exists so V's generic inferer picks up T from the // `u.unpack_into(mut result.$(field.name))!` call site. @[inline] fn (mut u Unpacker) unpack_into[T](mut out T) ! { _ = out // vet's "unused parameter" check doesn't track write-only mut args out = u.unpack[T]()! } // format_rfc3339_nano emits a time.Time as RFC 3339 with full nanosecond // precision ("YYYY-MM-DDTHH:mm:ss.nnnnnnnnnZ"). vlib's `time` module // only goes down to milliseconds (`format_rfc3339`), but tag 0 // round-trips need 9 digits to preserve `time.Time.nanosecond` exactly. // Inputs are normalised to UTC first so a `time.now()` from a local // session is encoded as the correct instant rather than as wall-clock // digits without an offset. fn format_rfc3339_nano(t time.Time) string { utc := if t.is_local { t.local_to_utc() } else { t } return '${utc.year:04d}-${utc.month:02d}-${utc.day:02d}T${utc.hour:02d}:${utc.minute:02d}:${utc.second:02d}.${utc.nanosecond:09d}Z' } // -------------------------------------------------------------------- // time.Time decoding // -------------------------------------------------------------------- fn (mut u Unpacker) unpack_time() !time.Time { start := u.pos b := u.read_byte()! major := b >> 5 if major != 6 { u.pos = start return type_mismatch(start, 'time tag', b) } number := u.read_arg(b & 0x1f)! match number { 0 { s := u.unpack_text()! return time.parse_iso8601(s) or { return malformed(start, 'invalid RFC 3339 timestamp: ${err}') } } 1 { peek := u.peek_byte() or { return error('cbor: missing tag-1 content') } major2 := peek >> 5 if major2 == 0 || major2 == 1 { secs := u.unpack_int()! return time.unix(secs) } f := u.unpack_float()! // Reject NaN, ±Inf, and any magnitude that won't fit i64 // before casting. Without this, NaN silently saturates to 0 // (epoch 1970-01-01) and overflow saturates to i64::max, // either of which could bypass an application-level expiry // or freshness check. if math.is_nan(f) || math.is_inf(f, 0) { return malformed(start, 'tag 1 float must be finite, got ${f}') } if f >= 9_223_372_036_854_775_808.0 || f < -9_223_372_036_854_775_808.0 { return malformed(start, 'tag 1 float ${f} out of range for i64 epoch seconds') } whole := i64(math.floor(f)) frac := f - f64(whole) // math.round (not i64-truncate) so 0.999_999_999s doesn't // silently round to 0 ns. Clamp to the valid ns range; the // only way to land on the boundary now is true rounding noise. mut ns := i64(math.round(frac * 1_000_000_000.0)) if ns < 0 { ns = 0 } else if ns > 999_999_999 { ns = 999_999_999 } return time.unix_nanosecond(whole, int(ns)) } else { u.pos = start return malformed(start, 'unexpected tag ${number} for time.Time') } } } // -------------------------------------------------------------------- // Struct attribute helpers // -------------------------------------------------------------------- @[inline] fn cbor_field_skipped[F](field F) bool { for attr in field.attrs { if attr == 'skip' { return true } if attr.starts_with('cbor:') { if val := parse_cbor_attr(attr) { if val == '-' { return true } } } } return false } // cbor_field_explicit_key returns the rename target from `@[cbor: '...']` // when one is set, or `none` if the field has no explicit override. // `@[cbor: '-']` and the empty form `@[cbor: '']` are treated as no // override (skipping is handled by `cbor_field_skipped`). @[inline] fn cbor_field_explicit_key[F](field F) ?string { for attr in field.attrs { if attr.starts_with('cbor:') { if val := parse_cbor_attr(attr) { if val != '-' && val != '' { return val } } } } return none } fn cbor_rename(name string, strategy string) string { match strategy { 'snake_case' { return cbor_to_snake(name) } 'camelCase' { return cbor_to_camel(name) } 'PascalCase' { return cbor_to_pascal(name) } 'kebab-case' { return cbor_to_kebab(name) } 'SCREAMING_SNAKE_CASE' { return cbor_to_snake(name).to_upper() } else { return name } } } fn cbor_to_snake(s string) string { mut out := []u8{cap: s.len + 4} for i, c in s { if c >= `A` && c <= `Z` { if i > 0 { out << `_` } out << u8(c + 32) } else { out << c } } return out.bytestr() } fn cbor_to_camel(s string) string { mut out := []u8{cap: s.len} mut upper_next := false for i, c in s { if c == `_` { upper_next = true continue } if upper_next && c >= `a` && c <= `z` { out << u8(c - 32) upper_next = false } else if i == 0 && c >= `A` && c <= `Z` { out << u8(c + 32) } else { out << c } } return out.bytestr() } fn cbor_to_pascal(s string) string { camel := cbor_to_camel(s) if camel.len == 0 { return camel } first := camel[0] if first >= `a` && first <= `z` { return u8(first - 32).ascii_str() + camel[1..] } return camel } fn cbor_to_kebab(s string) string { mut out := []u8{cap: s.len + 4} for i, c in s { if c >= `A` && c <= `Z` { if i > 0 { out << `-` } out << u8(c + 32) } else if c == `_` { out << `-` } else { out << c } } return out.bytestr() } fn parse_cbor_attr(attr string) ?string { idx := attr.index(':') or { return none } mut v := attr[idx + 1..].trim_space() if v.len >= 2 && ((v.starts_with("'") && v.ends_with("'")) || (v.starts_with('"') && v.ends_with('"'))) { v = v[1..v.len - 1] } return v }