| 1 | module cbor |
| 2 | |
| 3 | import math |
| 4 | |
| 5 | // EncodeOpts tunes the encoder. Defaults yield RFC 8949 *preferred* |
| 6 | // serialisation: floats shrink to the shortest IEEE 754 width that |
| 7 | // preserves their value, headers use the shortest length encoding. |
| 8 | // |
| 9 | // Setting `canonical = true` additionally sorts map keys per RFC 8949 |
| 10 | // §4.2.1 (deterministic encoding) — useful for hashing/signing. |
| 11 | pub struct EncodeOpts { |
| 12 | pub: |
| 13 | initial_cap int = 64 |
| 14 | canonical bool // sort map keys, definite-length only |
| 15 | self_describe bool // prepend tag 55799 (`d9 d9 f7`) |
| 16 | // validate_utf8 makes encode[T] reject V `string` payloads that |
| 17 | // contain non-UTF-8 bytes. Off by default to match the conventional |
| 18 | // V invariant ("strings are UTF-8") and avoid paying for validation |
| 19 | // on hot paths. Turn on at trust boundaries when callers may build |
| 20 | // strings from raw bytes (e.g. `bytestr()`), so the wire stays |
| 21 | // round-trip-safe against the strict-by-default decoder. |
| 22 | validate_utf8 bool |
| 23 | } |
| 24 | |
| 25 | // Packer accumulates CBOR bytes into an internal buffer. Use `bytes()` |
| 26 | // to retrieve the wire output, or `reset()` to reuse the buffer for the |
| 27 | // next message — that's the cheapest way to emit many small frames. |
| 28 | // |
| 29 | // `indef_string_open` and `indef_other_depth` track open indefinite-length |
| 30 | // items so the encoder can reject malformed compositions: nested indef |
| 31 | // strings, indef array/map inside an indef string (RFC 8949 §3.2.3), or |
| 32 | // a stray break code. |
| 33 | pub struct Packer { |
| 34 | pub mut: |
| 35 | buf []u8 |
| 36 | opts EncodeOpts |
| 37 | mut: |
| 38 | indef_string_open bool // top of the indef "stack" is text or bytes |
| 39 | indef_other_depth int // count of currently open indef arrays/maps |
| 40 | } |
| 41 | |
| 42 | // new_packer builds a Packer with the given options. `opts.initial_cap` |
| 43 | // reserves the buffer up-front; oversize is harmless, undersize triggers |
| 44 | // the usual growth policy. |
| 45 | pub fn new_packer(opts EncodeOpts) Packer { |
| 46 | cap := if opts.initial_cap > 0 { opts.initial_cap } else { 64 } |
| 47 | mut p := Packer{ |
| 48 | buf: []u8{cap: cap} |
| 49 | opts: opts |
| 50 | } |
| 51 | if opts.self_describe { |
| 52 | p.buf << self_describe_prefix |
| 53 | } |
| 54 | return p |
| 55 | } |
| 56 | |
| 57 | // bytes returns the encoded buffer. The returned slice aliases the |
| 58 | // Packer's storage — clone it if you keep using the Packer. This is a |
| 59 | // low-level accessor that does NOT verify the buffer holds a complete |
| 60 | // item; if you opened an indefinite-length container without closing |
| 61 | // it, the bytes will be malformed. Use `pack_to` (or `encode[T]`) for |
| 62 | // the validated path, or call `is_complete()` yourself. |
| 63 | @[inline] |
| 64 | pub fn (mut p Packer) bytes() []u8 { |
| 65 | return p.buf |
| 66 | } |
| 67 | |
| 68 | // is_complete reports whether the buffer holds a sequence of fully |
| 69 | // closed items. False while an indefinite-length array, map, text, or |
| 70 | // bytes container is still open (waiting for `pack_break`). |
| 71 | @[inline] |
| 72 | pub fn (p &Packer) is_complete() bool { |
| 73 | return !p.indef_string_open && p.indef_other_depth == 0 |
| 74 | } |
| 75 | |
| 76 | // reset clears the buffer for reuse. The capacity is preserved, so this |
| 77 | // is the fast path for high-throughput senders. |
| 78 | @[inline] |
| 79 | pub fn (mut p Packer) reset() { |
| 80 | unsafe { |
| 81 | p.buf.len = 0 |
| 82 | } |
| 83 | p.indef_string_open = false |
| 84 | p.indef_other_depth = 0 |
| 85 | if p.opts.self_describe { |
| 86 | p.buf << self_describe_prefix |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | // reserve grows the buffer's capacity by at least `n` bytes. Useful |
| 91 | // before a string/binary write of known length to skip per-byte growth. |
| 92 | @[inline] |
| 93 | pub fn (mut p Packer) reserve(n int) { |
| 94 | if n <= 0 { |
| 95 | return |
| 96 | } |
| 97 | needed := p.buf.len + n |
| 98 | if needed > p.buf.cap { |
| 99 | mut new_cap := if p.buf.cap == 0 { 64 } else { p.buf.cap * 2 } |
| 100 | for new_cap < needed { |
| 101 | new_cap *= 2 |
| 102 | } |
| 103 | mut grown := []u8{cap: new_cap} |
| 104 | grown << p.buf |
| 105 | p.buf = grown |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | // extend_unchecked grows the buffer's length by `n`. The caller must |
| 110 | // have already ensured enough capacity via `reserve`. Returns the |
| 111 | // position at which the new bytes start. |
| 112 | @[direct_array_access; inline] |
| 113 | fn (mut p Packer) extend_unchecked(n int) int { |
| 114 | pos := p.buf.len |
| 115 | unsafe { |
| 116 | p.buf.len = pos + n |
| 117 | } |
| 118 | return pos |
| 119 | } |
| 120 | |
| 121 | // -------------------------------------------------------------------- |
| 122 | // Low-level head writer |
| 123 | // -------------------------------------------------------------------- |
| 124 | |
| 125 | // write_head emits an initial byte (major type | additional info) plus |
| 126 | // the appropriate big-endian argument. Always uses the shortest encoding |
| 127 | // (RFC 8949 §4.2.1, "preferred serialization"). Hot path: avoid the |
| 128 | // `<<` operator (which carries cap-grow checks per byte) by reserving |
| 129 | // once, then using direct unsafe index writes. |
| 130 | @[direct_array_access; inline] |
| 131 | fn (mut p Packer) write_head(major u8, arg u64) { |
| 132 | if arg < 24 { |
| 133 | p.reserve(1) |
| 134 | pos := p.extend_unchecked(1) |
| 135 | unsafe { |
| 136 | p.buf[pos] = major | u8(arg) |
| 137 | } |
| 138 | return |
| 139 | } |
| 140 | if arg <= 0xff { |
| 141 | p.reserve(2) |
| 142 | pos := p.extend_unchecked(2) |
| 143 | unsafe { |
| 144 | p.buf[pos] = major | 24 |
| 145 | p.buf[pos + 1] = u8(arg) |
| 146 | } |
| 147 | return |
| 148 | } |
| 149 | if arg <= 0xffff { |
| 150 | p.reserve(3) |
| 151 | pos := p.extend_unchecked(3) |
| 152 | unsafe { |
| 153 | p.buf[pos] = major | 25 |
| 154 | p.buf[pos + 1] = u8(arg >> 8) |
| 155 | p.buf[pos + 2] = u8(arg) |
| 156 | } |
| 157 | return |
| 158 | } |
| 159 | if arg <= 0xffffffff { |
| 160 | p.reserve(5) |
| 161 | pos := p.extend_unchecked(5) |
| 162 | unsafe { |
| 163 | p.buf[pos] = major | 26 |
| 164 | p.buf[pos + 1] = u8(arg >> 24) |
| 165 | p.buf[pos + 2] = u8(arg >> 16) |
| 166 | p.buf[pos + 3] = u8(arg >> 8) |
| 167 | p.buf[pos + 4] = u8(arg) |
| 168 | } |
| 169 | return |
| 170 | } |
| 171 | p.reserve(9) |
| 172 | pos := p.extend_unchecked(9) |
| 173 | unsafe { |
| 174 | p.buf[pos] = major | 27 |
| 175 | p.buf[pos + 1] = u8(arg >> 56) |
| 176 | p.buf[pos + 2] = u8(arg >> 48) |
| 177 | p.buf[pos + 3] = u8(arg >> 40) |
| 178 | p.buf[pos + 4] = u8(arg >> 32) |
| 179 | p.buf[pos + 5] = u8(arg >> 24) |
| 180 | p.buf[pos + 6] = u8(arg >> 16) |
| 181 | p.buf[pos + 7] = u8(arg >> 8) |
| 182 | p.buf[pos + 8] = u8(arg) |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | @[direct_array_access; inline] |
| 187 | fn (mut p Packer) write_be_u16(v u16) { |
| 188 | p.reserve(2) |
| 189 | pos := p.extend_unchecked(2) |
| 190 | unsafe { |
| 191 | p.buf[pos] = u8(v >> 8) |
| 192 | p.buf[pos + 1] = u8(v) |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | @[direct_array_access; inline] |
| 197 | fn (mut p Packer) write_be_u32(v u32) { |
| 198 | p.reserve(4) |
| 199 | pos := p.extend_unchecked(4) |
| 200 | unsafe { |
| 201 | p.buf[pos] = u8(v >> 24) |
| 202 | p.buf[pos + 1] = u8(v >> 16) |
| 203 | p.buf[pos + 2] = u8(v >> 8) |
| 204 | p.buf[pos + 3] = u8(v) |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | @[direct_array_access; inline] |
| 209 | fn (mut p Packer) write_be_u64(v u64) { |
| 210 | p.reserve(8) |
| 211 | pos := p.extend_unchecked(8) |
| 212 | unsafe { |
| 213 | p.buf[pos] = u8(v >> 56) |
| 214 | p.buf[pos + 1] = u8(v >> 48) |
| 215 | p.buf[pos + 2] = u8(v >> 40) |
| 216 | p.buf[pos + 3] = u8(v >> 32) |
| 217 | p.buf[pos + 4] = u8(v >> 24) |
| 218 | p.buf[pos + 5] = u8(v >> 16) |
| 219 | p.buf[pos + 6] = u8(v >> 8) |
| 220 | p.buf[pos + 7] = u8(v) |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | // -------------------------------------------------------------------- |
| 225 | // High-level packers — primitives |
| 226 | // -------------------------------------------------------------------- |
| 227 | |
| 228 | // pack_uint emits a CBOR unsigned-integer (major type 0). Covers the |
| 229 | // full u64 range, including values above i64.max. |
| 230 | @[inline] |
| 231 | pub fn (mut p Packer) pack_uint(v u64) { |
| 232 | p.write_head(0x00, v) |
| 233 | } |
| 234 | |
| 235 | // pack_int picks the right major type for a signed integer. |
| 236 | // For values below i64.min that can still fit -1-u64, prefer |
| 237 | // `pack_negative_arg`. |
| 238 | @[inline] |
| 239 | pub fn (mut p Packer) pack_int(v i64) { |
| 240 | if v >= 0 { |
| 241 | p.write_head(0x00, u64(v)) |
| 242 | } else { |
| 243 | p.write_head(0x20, u64(-1 - v)) |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | // pack_negative_arg writes a major type 1 value where the encoded |
| 248 | // argument is `arg` and the represented integer is `-1 - arg`. Lets you |
| 249 | // emit values down to -2^64 (the lower bound of CBOR negative ints). |
| 250 | @[inline] |
| 251 | pub fn (mut p Packer) pack_negative_arg(arg u64) { |
| 252 | p.write_head(0x20, arg) |
| 253 | } |
| 254 | |
| 255 | // pack_bool emits the simple value 20 (false) or 21 (true). |
| 256 | @[direct_array_access; inline] |
| 257 | pub fn (mut p Packer) pack_bool(v bool) { |
| 258 | p.reserve(1) |
| 259 | pos := p.extend_unchecked(1) |
| 260 | unsafe { |
| 261 | p.buf[pos] = if v { u8(0xf5) } else { u8(0xf4) } |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | // pack_null emits CBOR null (simple value 22, byte 0xf6). |
| 266 | @[direct_array_access; inline] |
| 267 | pub fn (mut p Packer) pack_null() { |
| 268 | p.reserve(1) |
| 269 | pos := p.extend_unchecked(1) |
| 270 | unsafe { |
| 271 | p.buf[pos] = 0xf6 |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | // pack_undefined emits CBOR undefined (simple value 23, byte 0xf7). |
| 276 | @[direct_array_access; inline] |
| 277 | pub fn (mut p Packer) pack_undefined() { |
| 278 | p.reserve(1) |
| 279 | pos := p.extend_unchecked(1) |
| 280 | unsafe { |
| 281 | p.buf[pos] = 0xf7 |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | // pack_simple emits a CBOR simple value. Values 0..23 use the inline |
| 286 | // form, values 32..255 use the 1-byte trailer form. Values 24..31 are |
| 287 | // not well-formed per RFC 8949 §3.3 and are rejected here. |
| 288 | @[direct_array_access] |
| 289 | pub fn (mut p Packer) pack_simple(v u8) ! { |
| 290 | // RFC 8949 §3.3 assigns simple values 20..23 to false/true/null/ |
| 291 | // undefined; encoding them through pack_simple would silently produce |
| 292 | // wire-equivalent bytes that decode back as Bool/Null/Undefined, not |
| 293 | // as a Simple — surprising and ambiguous. Force the caller through |
| 294 | // the dedicated typed packers. |
| 295 | if v >= 20 && v < 24 { |
| 296 | return error('cbor: simple values 20..23 must be packed via pack_bool / pack_null / pack_undefined (RFC 8949 §3.3)') |
| 297 | } |
| 298 | if v < 24 { |
| 299 | p.reserve(1) |
| 300 | pos := p.extend_unchecked(1) |
| 301 | unsafe { |
| 302 | p.buf[pos] = 0xe0 | v |
| 303 | } |
| 304 | return |
| 305 | } |
| 306 | if v < 32 { |
| 307 | return error('cbor: simple values 24..31 are not well-formed (RFC 8949 §3.3)') |
| 308 | } |
| 309 | p.reserve(2) |
| 310 | pos := p.extend_unchecked(2) |
| 311 | unsafe { |
| 312 | p.buf[pos] = 0xf8 |
| 313 | p.buf[pos + 1] = v |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | // -------------------------------------------------------------------- |
| 318 | // High-level packers — strings and bytes |
| 319 | // -------------------------------------------------------------------- |
| 320 | |
| 321 | // pack_text writes a UTF-8 text string (major type 3). Single-shot |
| 322 | // reservation: the head + payload bytes are appended via one capacity |
| 323 | // check and one memcpy. |
| 324 | @[direct_array_access; inline] |
| 325 | pub fn (mut p Packer) pack_text(s string) { |
| 326 | if s.len < 24 { |
| 327 | // Short string: head + payload fit in s.len + 1 bytes. |
| 328 | total := s.len + 1 |
| 329 | p.reserve(total) |
| 330 | pos := p.extend_unchecked(total) |
| 331 | unsafe { |
| 332 | p.buf[pos] = u8(0x60) | u8(s.len) |
| 333 | if s.len > 0 { |
| 334 | vmemcpy(&p.buf[pos + 1], s.str, s.len) |
| 335 | } |
| 336 | } |
| 337 | return |
| 338 | } |
| 339 | p.write_head(0x60, u64(s.len)) |
| 340 | p.reserve(s.len) |
| 341 | unsafe { p.buf.push_many(s.str, s.len) } |
| 342 | } |
| 343 | |
| 344 | // pack_bytes writes a byte string (major type 2). |
| 345 | @[direct_array_access] |
| 346 | pub fn (mut p Packer) pack_bytes(b []u8) { |
| 347 | if b.len < 24 { |
| 348 | total := b.len + 1 |
| 349 | p.reserve(total) |
| 350 | pos := p.extend_unchecked(total) |
| 351 | unsafe { |
| 352 | p.buf[pos] = u8(0x40) | u8(b.len) |
| 353 | if b.len > 0 { |
| 354 | vmemcpy(&p.buf[pos + 1], b.data, b.len) |
| 355 | } |
| 356 | } |
| 357 | return |
| 358 | } |
| 359 | p.write_head(0x40, u64(b.len)) |
| 360 | p.reserve(b.len) |
| 361 | unsafe { p.buf.push_many(b.data, b.len) } |
| 362 | } |
| 363 | |
| 364 | // -------------------------------------------------------------------- |
| 365 | // High-level packers — arrays, maps, tags |
| 366 | // -------------------------------------------------------------------- |
| 367 | |
| 368 | // pack_array_header writes the prefix for a definite-length array. |
| 369 | @[inline] |
| 370 | pub fn (mut p Packer) pack_array_header(n u64) { |
| 371 | p.write_head(0x80, n) |
| 372 | } |
| 373 | |
| 374 | // pack_map_header writes the prefix for a definite-length map. The |
| 375 | // argument is the number of *pairs*, not items. |
| 376 | @[inline] |
| 377 | pub fn (mut p Packer) pack_map_header(n u64) { |
| 378 | p.write_head(0xa0, n) |
| 379 | } |
| 380 | |
| 381 | // pack_tag writes a tag header (major type 6). The next packed item is |
| 382 | // the tag's content. |
| 383 | @[inline] |
| 384 | pub fn (mut p Packer) pack_tag(number u64) { |
| 385 | p.write_head(0xc0, number) |
| 386 | } |
| 387 | |
| 388 | // open_indef_or_error rejects opening any indef container inside an |
| 389 | // open indef text/bytes context (RFC 8949 §3.2.3 only allows definite |
| 390 | // chunks of the matching major type), then writes `head` and updates |
| 391 | // the tracking state. |
| 392 | @[direct_array_access; inline] |
| 393 | fn (mut p Packer) open_indef_or_error(head u8, is_string bool) ! { |
| 394 | if p.indef_string_open { |
| 395 | return error('cbor: indefinite-length string chunks must be definite-length strings of the same major type') |
| 396 | } |
| 397 | p.reserve(1) |
| 398 | pos := p.extend_unchecked(1) |
| 399 | unsafe { |
| 400 | p.buf[pos] = head |
| 401 | } |
| 402 | if is_string { |
| 403 | p.indef_string_open = true |
| 404 | } else { |
| 405 | p.indef_other_depth++ |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | // pack_array_indef opens an indefinite-length array. Close with `pack_break`. |
| 410 | @[inline] |
| 411 | pub fn (mut p Packer) pack_array_indef() ! { |
| 412 | p.open_indef_or_error(0x9f, false)! |
| 413 | } |
| 414 | |
| 415 | // pack_map_indef opens an indefinite-length map. Close with `pack_break`. |
| 416 | @[inline] |
| 417 | pub fn (mut p Packer) pack_map_indef() ! { |
| 418 | p.open_indef_or_error(0xbf, false)! |
| 419 | } |
| 420 | |
| 421 | // pack_text_indef opens an indefinite-length text string. Each chunk |
| 422 | // must be a definite-length text string; close with `pack_break`. |
| 423 | @[inline] |
| 424 | pub fn (mut p Packer) pack_text_indef() ! { |
| 425 | p.open_indef_or_error(0x7f, true)! |
| 426 | } |
| 427 | |
| 428 | // pack_bytes_indef opens an indefinite-length byte string. Each chunk |
| 429 | // must be a definite-length byte string; close with `pack_break`. |
| 430 | @[inline] |
| 431 | pub fn (mut p Packer) pack_bytes_indef() ! { |
| 432 | p.open_indef_or_error(0x5f, true)! |
| 433 | } |
| 434 | |
| 435 | // pack_break writes the break stop code 0xff that terminates the most |
| 436 | // recently opened indefinite-length item. Errors when no item is open |
| 437 | // (the byte 0xff is otherwise reserved and emitting one would corrupt |
| 438 | // the stream). |
| 439 | @[direct_array_access; inline] |
| 440 | pub fn (mut p Packer) pack_break() ! { |
| 441 | if p.indef_string_open { |
| 442 | p.indef_string_open = false |
| 443 | } else if p.indef_other_depth > 0 { |
| 444 | p.indef_other_depth-- |
| 445 | } else { |
| 446 | return error('cbor: pack_break called with no open indefinite-length item') |
| 447 | } |
| 448 | p.reserve(1) |
| 449 | pos := p.extend_unchecked(1) |
| 450 | unsafe { |
| 451 | p.buf[pos] = 0xff |
| 452 | } |
| 453 | } |
| 454 | |
| 455 | // -------------------------------------------------------------------- |
| 456 | // High-level packers — floats with preferred serialisation |
| 457 | // -------------------------------------------------------------------- |
| 458 | |
| 459 | // pack_float64 always emits an 8-byte IEEE 754 float. |
| 460 | @[direct_array_access; inline] |
| 461 | pub fn (mut p Packer) pack_float64(v f64) { |
| 462 | p.reserve(9) |
| 463 | pos := p.extend_unchecked(9) |
| 464 | bits := math.f64_bits(v) |
| 465 | unsafe { |
| 466 | p.buf[pos] = 0xfb |
| 467 | p.buf[pos + 1] = u8(bits >> 56) |
| 468 | p.buf[pos + 2] = u8(bits >> 48) |
| 469 | p.buf[pos + 3] = u8(bits >> 40) |
| 470 | p.buf[pos + 4] = u8(bits >> 32) |
| 471 | p.buf[pos + 5] = u8(bits >> 24) |
| 472 | p.buf[pos + 6] = u8(bits >> 16) |
| 473 | p.buf[pos + 7] = u8(bits >> 8) |
| 474 | p.buf[pos + 8] = u8(bits) |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | // pack_float32 always emits a 4-byte IEEE 754 float. |
| 479 | @[direct_array_access; inline] |
| 480 | pub fn (mut p Packer) pack_float32(v f32) { |
| 481 | p.reserve(5) |
| 482 | pos := p.extend_unchecked(5) |
| 483 | bits := math.f32_bits(v) |
| 484 | unsafe { |
| 485 | p.buf[pos] = 0xfa |
| 486 | p.buf[pos + 1] = u8(bits >> 24) |
| 487 | p.buf[pos + 2] = u8(bits >> 16) |
| 488 | p.buf[pos + 3] = u8(bits >> 8) |
| 489 | p.buf[pos + 4] = u8(bits) |
| 490 | } |
| 491 | } |
| 492 | |
| 493 | // pack_float16_bits always emits a 2-byte IEEE 754 float. |
| 494 | @[direct_array_access; inline] |
| 495 | pub fn (mut p Packer) pack_float16_bits(bits u16) { |
| 496 | p.reserve(3) |
| 497 | pos := p.extend_unchecked(3) |
| 498 | unsafe { |
| 499 | p.buf[pos] = 0xf9 |
| 500 | p.buf[pos + 1] = u8(bits >> 8) |
| 501 | p.buf[pos + 2] = u8(bits) |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | // pack_float emits the shortest IEEE 754 width that preserves the value, |
| 506 | // per RFC 8949 §4.2.2. NaN serialises as the canonical quiet NaN |
| 507 | // (0xf97e00), not the original payload. |
| 508 | @[direct_array_access] |
| 509 | pub fn (mut p Packer) pack_float(v f64) { |
| 510 | if math.is_nan(v) { |
| 511 | p.pack_float16_bits(half_qnan_bits) |
| 512 | return |
| 513 | } |
| 514 | if math.is_inf(v, 1) { |
| 515 | p.pack_float16_bits(half_pos_inf_bits) |
| 516 | return |
| 517 | } |
| 518 | if math.is_inf(v, -1) { |
| 519 | p.pack_float16_bits(half_neg_inf_bits) |
| 520 | return |
| 521 | } |
| 522 | // Try f32: lossless conversion? |
| 523 | f32_v := f32(v) |
| 524 | if f64(f32_v) == v { |
| 525 | bits16, ok := f32_to_half(f32_v) |
| 526 | if ok { |
| 527 | p.pack_float16_bits(bits16) |
| 528 | return |
| 529 | } |
| 530 | p.pack_float32(f32_v) |
| 531 | return |
| 532 | } |
| 533 | p.pack_float64(v) |
| 534 | } |
| 535 | |
| 536 | // -------------------------------------------------------------------- |
| 537 | // Value tree encoder |
| 538 | // -------------------------------------------------------------------- |
| 539 | |
| 540 | // pack_value emits an arbitrary `Value` tree, honouring the original |
| 541 | // float width hint. Map keys are sorted when `opts.canonical` is set. |
| 542 | // Returns an error if the tree is malformed (e.g. a `Tag` with no |
| 543 | // content) — silently emitting a placeholder would corrupt round-trips. |
| 544 | pub fn (mut p Packer) pack_value(v Value) ! { |
| 545 | match v { |
| 546 | IntNum { |
| 547 | if v.negative { |
| 548 | p.write_head(0x20, v.magnitude) |
| 549 | } else { |
| 550 | p.write_head(0x00, v.magnitude) |
| 551 | } |
| 552 | } |
| 553 | Bytes { |
| 554 | p.pack_bytes(v.data) |
| 555 | } |
| 556 | Text { |
| 557 | p.pack_text(v.value) |
| 558 | } |
| 559 | Array { |
| 560 | p.pack_array_header(u64(v.elements.len)) |
| 561 | for el in v.elements { |
| 562 | p.pack_value(el)! |
| 563 | } |
| 564 | } |
| 565 | Map { |
| 566 | p.pack_map_header(u64(v.pairs.len)) |
| 567 | if p.opts.canonical { |
| 568 | p.pack_map_canonical(v.pairs)! |
| 569 | } else { |
| 570 | for pair in v.pairs { |
| 571 | p.pack_value(pair.key)! |
| 572 | p.pack_value(pair.value)! |
| 573 | } |
| 574 | } |
| 575 | } |
| 576 | Tag { |
| 577 | if v.content_box.len == 0 { |
| 578 | return error('cbor: Tag(${v.number}) has no content — use new_tag() or set content_box') |
| 579 | } |
| 580 | p.pack_tag(v.number) |
| 581 | p.pack_value(v.content_box[0])! |
| 582 | } |
| 583 | Bool { |
| 584 | p.pack_bool(v.value) |
| 585 | } |
| 586 | Null { |
| 587 | p.pack_null() |
| 588 | } |
| 589 | Undefined { |
| 590 | p.pack_undefined() |
| 591 | } |
| 592 | FloatNum { |
| 593 | // RFC 8949 §4.2.1 deterministic encoding requires the shortest |
| 594 | // IEEE 754 form (§4.2.2) regardless of the original wire width. |
| 595 | // Drop the bits hint when canonical so re-encoded `Value`s |
| 596 | // match the rule, even if the producer copied a too-wide hint |
| 597 | // from a non-canonical source. |
| 598 | if p.opts.canonical { |
| 599 | p.pack_float(v.value) |
| 600 | } else { |
| 601 | match v.bits { |
| 602 | .half { |
| 603 | // NaN/±Inf bypass the lossless check (NaN != NaN |
| 604 | // breaks the f32 round-trip equality test). |
| 605 | if math.is_nan(v.value) { |
| 606 | p.pack_float16_bits(half_qnan_bits) |
| 607 | } else if math.is_inf(v.value, 1) { |
| 608 | p.pack_float16_bits(half_pos_inf_bits) |
| 609 | } else if math.is_inf(v.value, -1) { |
| 610 | p.pack_float16_bits(half_neg_inf_bits) |
| 611 | } else { |
| 612 | bits16, ok := f64_to_half(v.value) |
| 613 | if ok { |
| 614 | p.pack_float16_bits(bits16) |
| 615 | } else { |
| 616 | p.pack_float64(v.value) |
| 617 | } |
| 618 | } |
| 619 | } |
| 620 | .single { |
| 621 | p.pack_float32(f32(v.value)) |
| 622 | } |
| 623 | .double { |
| 624 | p.pack_float64(v.value) |
| 625 | } |
| 626 | .@none { |
| 627 | p.pack_float(v.value) |
| 628 | } |
| 629 | } |
| 630 | } |
| 631 | } |
| 632 | Simple { |
| 633 | p.pack_simple(v.value)! |
| 634 | } |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | // pack_map_canonical sorts pairs by encoded-key bytes per RFC 8949 |
| 639 | // §4.2.1 (length-first lexicographic, "bytewise lexicographic of the |
| 640 | // deterministic encodings of the keys") before emitting them. |
| 641 | fn (mut p Packer) pack_map_canonical(pairs []MapPair) ! { |
| 642 | if pairs.len == 0 { |
| 643 | return |
| 644 | } |
| 645 | // Encode each key once, sort indices by the encoded key bytes, then emit. |
| 646 | // Sub-encoders inherit `validate_utf8` so a strict-encode caller still |
| 647 | // gets the guarantee on text-typed keys in canonical mode. |
| 648 | sub_opts := EncodeOpts{ |
| 649 | initial_cap: 16 |
| 650 | canonical: true |
| 651 | validate_utf8: p.opts.validate_utf8 |
| 652 | } |
| 653 | mut encoded_keys := [][]u8{cap: pairs.len} |
| 654 | for pair in pairs { |
| 655 | mut sub := new_packer(sub_opts) |
| 656 | sub.pack_value(pair.key)! |
| 657 | encoded_keys << sub.bytes().clone() |
| 658 | } |
| 659 | for i in sort_canonical_indices(encoded_keys) { |
| 660 | p.reserve(encoded_keys[i].len) |
| 661 | unsafe { p.buf.push_many(encoded_keys[i].data, encoded_keys[i].len) } |
| 662 | p.pack_value(pairs[i].value)! |
| 663 | } |
| 664 | } |
| 665 | |
| 666 | // compare_canonical_keys orders byte slices by length first, then |
| 667 | // bytewise; this matches RFC 8949 §4.2.1 "Core Deterministic Encoding". |
| 668 | @[direct_array_access] |
| 669 | fn compare_canonical_keys(a []u8, b []u8) int { |
| 670 | if a.len != b.len { |
| 671 | return if a.len < b.len { -1 } else { 1 } |
| 672 | } |
| 673 | for i in 0 .. a.len { |
| 674 | if a[i] != b[i] { |
| 675 | return if a[i] < b[i] { -1 } else { 1 } |
| 676 | } |
| 677 | } |
| 678 | return 0 |
| 679 | } |
| 680 | |
| 681 | // sort_canonical_indices returns indices into `keys` ordered by RFC |
| 682 | // 8949 §4.2.1 (length-first lexicographic on the encoded key bytes). |
| 683 | // Shared by the three canonical-emit paths (Value Map, generic $map, |
| 684 | // generic $struct) so the closure literal lives in one place. |
| 685 | fn sort_canonical_indices(keys [][]u8) []int { |
| 686 | mut idx := []int{len: keys.len, init: index} |
| 687 | idx.sort_with_compare(fn [keys] (a &int, b &int) int { |
| 688 | return compare_canonical_keys(keys[*a], keys[*b]) |
| 689 | }) |
| 690 | return idx |
| 691 | } |
| 692 | |
| 693 | // -------------------------------------------------------------------- |
| 694 | // Module-level convenience wrappers |
| 695 | // -------------------------------------------------------------------- |
| 696 | |
| 697 | // encode_value emits a `Value` tree to a fresh byte slice with default opts. |
| 698 | pub fn encode_value(v Value, opts EncodeOpts) ![]u8 { |
| 699 | mut p := new_packer(opts) |
| 700 | p.pack_value(v)! |
| 701 | return p.bytes().clone() |
| 702 | } |
| 703 | |