| 1 | // Hardening tests: adversarial payloads, RFC 8949 strictness boundaries |
| 2 | // on both encode and decode, and round-trip invariants for the parts |
| 3 | // the rest of the suite doesn't cover (sub-second time, NaN dedup, |
| 4 | // canonical float, indef-string composition rules). |
| 5 | module main |
| 6 | |
| 7 | import encoding.cbor |
| 8 | import encoding.hex |
| 9 | import io |
| 10 | import time |
| 11 | |
| 12 | fn b(s string) []u8 { |
| 13 | return hex.decode(s) or { panic('invalid hex: ${s}') } |
| 14 | } |
| 15 | |
| 16 | // --------------------------------------------------------------------- |
| 17 | // Decode bounds: a length argument can be up to u64::max on the wire, |
| 18 | // but the host's int is i32 on 32-bit targets. Naively casting the |
| 19 | // argument before bounds-checking aborts with `array.slice: invalid |
| 20 | // slice index (start>end)` or `negative .cap`. Every length read path |
| 21 | // must clamp before any int(...) cast. |
| 22 | // --------------------------------------------------------------------- |
| 23 | |
| 24 | fn test_unpack_text_rejects_u64_max_length() { |
| 25 | mut u := cbor.new_unpacker(b('7bffffffffffffffff'), cbor.DecodeOpts{}) |
| 26 | if _ := u.unpack_text() { |
| 27 | assert false, 'u64::max text length must not panic the decoder' |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | fn test_unpack_bytes_rejects_u64_max_length() { |
| 32 | mut u := cbor.new_unpacker(b('5bffffffffffffffff'), cbor.DecodeOpts{}) |
| 33 | if _ := u.unpack_bytes() { |
| 34 | assert false, 'u64::max bytes length must not panic the decoder' |
| 35 | } |
| 36 | } |
| 37 | |
| 38 | fn test_unpack_text_rejects_i32_overflow_length() { |
| 39 | // 7a 80000000 = text(2^31) — exactly the i32 wrap point. |
| 40 | mut u := cbor.new_unpacker(b('7a80000000'), cbor.DecodeOpts{}) |
| 41 | if _ := u.unpack_text() { |
| 42 | assert false, 'expected EOF rejection at 2^31 boundary' |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | fn test_skip_value_rejects_oversized_text_length() { |
| 47 | mut u := cbor.new_unpacker(b('7bffffffffffffffff'), cbor.DecodeOpts{}) |
| 48 | if _ := u.skip_value() { |
| 49 | assert false, 'skip_value must reject u64::max text length' |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | fn test_skip_value_rejects_oversized_bytes_length() { |
| 54 | mut u := cbor.new_unpacker(b('5bffffffffffffffff'), cbor.DecodeOpts{}) |
| 55 | if _ := u.skip_value() { |
| 56 | assert false, 'skip_value must reject u64::max bytes length' |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | fn test_skip_value_rejects_oversized_indef_chunk() { |
| 61 | // 7f 7b ff..ff: indef text whose first chunk claims u64::max bytes. |
| 62 | mut u := cbor.new_unpacker(b('7f7bffffffffffffffffff'), cbor.DecodeOpts{}) |
| 63 | if _ := u.skip_value() { |
| 64 | assert false, 'skip_value must reject u64::max chunk length' |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | // --------------------------------------------------------------------- |
| 69 | // Pre-allocation safety: the array/map header argument is a u64. The |
| 70 | // decoder must refuse any value it can't realistically allocate (cast |
| 71 | // overflow at i64::max, or item count > remaining bytes), instead of |
| 72 | // triggering a `negative .cap` panic or attempting a multi-GB malloc. |
| 73 | // --------------------------------------------------------------------- |
| 74 | |
| 75 | fn test_value_decode_rejects_array_header_at_i64_max() { |
| 76 | mut u := cbor.new_unpacker(b('9b7fffffffffffffff'), cbor.DecodeOpts{}) |
| 77 | if _ := u.unpack_value() { |
| 78 | assert false, 'unpack_value must reject implausible array length' |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | fn test_value_decode_rejects_map_header_at_i64_max() { |
| 83 | mut u := cbor.new_unpacker(b('bb7fffffffffffffff'), cbor.DecodeOpts{}) |
| 84 | if _ := u.unpack_value() { |
| 85 | assert false, 'unpack_value must reject implausible map length' |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | fn test_value_decode_rejects_array_longer_than_buffer() { |
| 90 | // 9a 00ffffff = array of ~16M items in a 5-byte payload. |
| 91 | mut u := cbor.new_unpacker(b('9a00ffffff'), cbor.DecodeOpts{}) |
| 92 | if _ := u.unpack_value() { |
| 93 | assert false, 'unpack_value must reject array longer than buffer' |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | fn test_value_decode_rejects_map_longer_than_buffer() { |
| 98 | mut u := cbor.new_unpacker(b('ba00ffffff'), cbor.DecodeOpts{}) |
| 99 | if _ := u.unpack_value() { |
| 100 | assert false, 'unpack_value must reject map longer than buffer' |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | // --------------------------------------------------------------------- |
| 105 | // skip_value strictness: must reject every malformed shape that the |
| 106 | // typed unpack_* paths reject, otherwise malformed CBOR slips through |
| 107 | // RawMessage, Unmarshaler, and unknown-field skipping into downstream |
| 108 | // consumers. |
| 109 | // --------------------------------------------------------------------- |
| 110 | |
| 111 | fn test_skip_value_rejects_simple_below_32() { |
| 112 | // f8 1f = simple(1-byte form, value 31). RFC 8949 §3.3: values < 32 |
| 113 | // must use the inline form (info 0..23). |
| 114 | mut u := cbor.new_unpacker(b('f81f'), cbor.DecodeOpts{}) |
| 115 | if _ := u.skip_value() { |
| 116 | assert false, 'skip_value must reject simple < 32 in 1-byte form' |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | fn test_unpack_raw_rejects_simple_below_32() { |
| 121 | mut u := cbor.new_unpacker(b('f81f'), cbor.DecodeOpts{}) |
| 122 | if _ := u.unpack_raw() { |
| 123 | assert false, 'unpack_raw must reject simple < 32 in 1-byte form' |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | // --------------------------------------------------------------------- |
| 128 | // deny_duplicate_keys covers every map decode path (typed map, struct, |
| 129 | // Value tree definite, Value tree indefinite). RFC 8949 §5.6: map keys |
| 130 | // "should be unique"; deny_duplicate_keys turns that into a hard error. |
| 131 | // --------------------------------------------------------------------- |
| 132 | |
| 133 | fn test_value_decode_rejects_duplicate_keys_when_opted_in() { |
| 134 | // a2 01 01 01 02 = {1: 1, 1: 2} — duplicate uint key 1. |
| 135 | bytes := b('a201010102') |
| 136 | if _ := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true }) { |
| 137 | assert false, 'expected duplicate-key rejection on Value path' |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | fn test_value_decode_rejects_indef_map_duplicate_keys_when_opted_in() { |
| 142 | // bf 01 01 01 02 ff = indef map with key 1 twice. |
| 143 | bytes := b('bf01010102ff') |
| 144 | if _ := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true }) { |
| 145 | assert false, 'expected duplicate-key rejection on indef Value map' |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | fn test_value_decode_tolerates_duplicates_by_default() { |
| 150 | // Default mode is permissive (matches Go encoding/cbor and QCBOR); |
| 151 | // the option is opt-in. |
| 152 | bytes := b('a201010102') |
| 153 | v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) or { |
| 154 | assert false, 'default mode must accept duplicates: ${err}' |
| 155 | return |
| 156 | } |
| 157 | assert v is cbor.Map |
| 158 | } |
| 159 | |
| 160 | fn test_value_decode_detects_nan_keys_as_duplicate() { |
| 161 | // bf f9 7e00 01 f9 7e00 02 ff = {NaN: 1, NaN: 2}, both half qNaN. |
| 162 | // IEEE 754 says NaN != NaN, but RFC §5.6 dedup is over the encoded |
| 163 | // bytes — two identical NaN encodings ARE equal. |
| 164 | bytes := b('bff97e0001f97e0002ff') |
| 165 | if _ := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true }) { |
| 166 | assert false, 'two NaN keys must be detected as duplicates' |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | fn test_value_decode_keeps_distinct_float_widths_separate() { |
| 171 | // Same numeric value, different wire widths → distinct data items |
| 172 | // per RFC §5.6 (encoded form match). |
| 173 | // bf f9 3c00 01 fa 3f800000 02 ff = {f16(1.0): 1, f32(1.0): 2} |
| 174 | bytes := b('bff93c0001fa3f80000002ff') |
| 175 | v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true })! |
| 176 | assert v is cbor.Map |
| 177 | if v is cbor.Map { |
| 178 | assert v.pairs.len == 2 |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | // --------------------------------------------------------------------- |
| 183 | // Encoder indef state machine. RFC 8949 §3.2.3: an indef text/bytes |
| 184 | // string may only contain definite-length strings of the same major |
| 185 | // type. Opening any other indef container inside it is malformed. |
| 186 | // --------------------------------------------------------------------- |
| 187 | |
| 188 | fn test_pack_text_indef_rejects_self_nesting() { |
| 189 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 190 | p.pack_text_indef()! |
| 191 | if _ := p.pack_text_indef() { |
| 192 | assert false, 'nested indef text must be rejected' |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | fn test_pack_bytes_indef_rejects_self_nesting() { |
| 197 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 198 | p.pack_bytes_indef()! |
| 199 | if _ := p.pack_bytes_indef() { |
| 200 | assert false, 'nested indef bytes must be rejected' |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | fn test_pack_array_indef_rejected_inside_indef_text() { |
| 205 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 206 | p.pack_text_indef()! |
| 207 | if _ := p.pack_array_indef() { |
| 208 | assert false, 'pack_array_indef inside pack_text_indef must be rejected' |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | fn test_pack_map_indef_rejected_inside_indef_text() { |
| 213 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 214 | p.pack_text_indef()! |
| 215 | if _ := p.pack_map_indef() { |
| 216 | assert false, 'pack_map_indef inside pack_text_indef must be rejected' |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | fn test_pack_array_indef_rejected_inside_indef_bytes() { |
| 221 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 222 | p.pack_bytes_indef()! |
| 223 | if _ := p.pack_array_indef() { |
| 224 | assert false, 'pack_array_indef inside pack_bytes_indef must be rejected' |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | fn test_pack_text_indef_rejected_inside_indef_bytes() { |
| 229 | // A text-indef chunk inside a bytes-indef would carry the wrong |
| 230 | // major type and break decode. |
| 231 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 232 | p.pack_bytes_indef()! |
| 233 | if _ := p.pack_text_indef() { |
| 234 | assert false, 'pack_text_indef inside pack_bytes_indef must be rejected' |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | fn test_pack_break_rejects_no_open_indef() { |
| 239 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 240 | if _ := p.pack_break() { |
| 241 | assert false, 'break with no open indef context must be rejected' |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | fn test_indef_array_inside_indef_array_allowed() { |
| 246 | // Indef containers may freely nest; only indef text/bytes restrict |
| 247 | // what their chunks may be (RFC 8949 §3.2.2 vs §3.2.3). |
| 248 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 249 | p.pack_array_indef()! |
| 250 | p.pack_array_indef()! |
| 251 | p.pack_break()! |
| 252 | p.pack_break()! |
| 253 | assert p.bytes() == [u8(0x9f), 0x9f, 0xff, 0xff] |
| 254 | } |
| 255 | |
| 256 | fn test_indef_text_round_trip() { |
| 257 | // Sanity: a well-formed indef text (definite-length text chunks |
| 258 | // followed by a break) round-trips through the decoder. |
| 259 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 260 | p.pack_text_indef()! |
| 261 | p.pack_text('hel') |
| 262 | p.pack_text('lo') |
| 263 | p.pack_break()! |
| 264 | mut u := cbor.new_unpacker(p.bytes().clone(), cbor.DecodeOpts{}) |
| 265 | s := u.unpack_text()! |
| 266 | assert s == 'hello' |
| 267 | } |
| 268 | |
| 269 | // --------------------------------------------------------------------- |
| 270 | // Encoder must refuse compositions that would silently corrupt the |
| 271 | // stream: a Tag with no content, a RawMessage with empty data, a |
| 272 | // Simple value in the reserved 24..31 range. |
| 273 | // --------------------------------------------------------------------- |
| 274 | |
| 275 | fn test_pack_value_rejects_tag_with_empty_content() { |
| 276 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 277 | t := cbor.Tag{ |
| 278 | number: 42 |
| 279 | content_box: [] |
| 280 | } |
| 281 | if _ := p.pack_value(t) { |
| 282 | assert false, 'pack_value must reject Tag with empty content_box' |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | fn test_new_tag_round_trips() { |
| 287 | v := cbor.new_tag(99, cbor.new_uint(7)) |
| 288 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 289 | p.pack_value(v)! |
| 290 | mut u := cbor.new_unpacker(p.bytes().clone(), cbor.DecodeOpts{}) |
| 291 | back := u.unpack_value()! |
| 292 | assert back is cbor.Tag |
| 293 | if back is cbor.Tag { |
| 294 | assert back.number == 99 |
| 295 | c := back.content() |
| 296 | assert c is cbor.IntNum |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | fn test_pack_raw_rejects_empty_message() { |
| 301 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 302 | if _ := p.pack_raw(cbor.RawMessage{ data: [] }) { |
| 303 | assert false, 'pack_raw must reject empty RawMessage' |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | fn test_pack_value_rejects_reserved_simple() { |
| 308 | // Simple values 24..31 are reserved per RFC 8949 §3.3; pack_simple |
| 309 | // already rejects them, and pack_value must propagate rather than |
| 310 | // emit zero bytes. |
| 311 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 312 | if _ := p.pack_value(cbor.Simple{ value: 25 }) { |
| 313 | assert false, 'reserved Simple must surface the pack_simple error' |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | fn test_pack_simple_rejects_assigned_range() { |
| 318 | // RFC 8949 §3.3 assigns 20..23 to false/true/null/undefined. |
| 319 | // Encoding through pack_simple would silently produce wire-equivalent |
| 320 | // bytes that decode back as Bool/Null/Undefined — surprising the |
| 321 | // caller. The dedicated typed packers must be used instead. |
| 322 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 323 | for v in [u8(20), 21, 22, 23] { |
| 324 | if _ := p.pack_simple(v) { |
| 325 | assert false, 'pack_simple(${v}) must be rejected' |
| 326 | } |
| 327 | } |
| 328 | // 0..19 still inline cleanly, 32..255 still use the 1-byte form. |
| 329 | p.pack_simple(0)! |
| 330 | p.pack_simple(19)! |
| 331 | p.pack_simple(32)! |
| 332 | p.pack_simple(255)! |
| 333 | } |
| 334 | |
| 335 | fn test_value_as_int_boundary() { |
| 336 | // CBOR negative ints encode `-1 - magnitude`, so magnitude exactly |
| 337 | // 2^63 maps to -2^63 - 1 — outside i64 range. Must return none, not |
| 338 | // silently saturate to i64::min (which represents -2^63). |
| 339 | bytes_overflow := b('3b8000000000000000') // negative magnitude=2^63 |
| 340 | v_over := cbor.decode[cbor.Value](bytes_overflow, cbor.DecodeOpts{})! |
| 341 | assert v_over.as_int() == none, 'as_int must reject -2^63 - 1' |
| 342 | |
| 343 | // Just below the boundary: magnitude=2^63 - 1, value=i64::min, must fit. |
| 344 | bytes_min := b('3b7fffffffffffffff') |
| 345 | v_min := cbor.decode[cbor.Value](bytes_min, cbor.DecodeOpts{})! |
| 346 | assert v_min.as_int()? == i64(-9223372036854775807 - 1) |
| 347 | |
| 348 | // Positive boundary: magnitude=i64::max fits, magnitude=i64::max+1 doesn't. |
| 349 | bytes_pmax := b('1b7fffffffffffffff') |
| 350 | v_pmax := cbor.decode[cbor.Value](bytes_pmax, cbor.DecodeOpts{})! |
| 351 | assert v_pmax.as_int()? == i64(9223372036854775807) |
| 352 | |
| 353 | bytes_pover := b('1b8000000000000000') |
| 354 | v_pover := cbor.decode[cbor.Value](bytes_pover, cbor.DecodeOpts{})! |
| 355 | assert v_pover.as_int() == none, 'as_int must reject magnitudes > i64::max' |
| 356 | // as_uint still gives the full unsigned range. |
| 357 | assert v_pover.as_uint()? == u64(9223372036854775808) |
| 358 | } |
| 359 | |
| 360 | fn test_struct_decode_rejects_duplicate_keys() { |
| 361 | // Struct path: deny_duplicate_keys must fire on a repeated field |
| 362 | // name, not silently let the second value overwrite the first. |
| 363 | bytes := b('a26161016161 02'.replace(' ', '')) |
| 364 | if _ := cbor.decode[Foo](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true }) { |
| 365 | assert false, 'struct decode must reject duplicate keys when opted in' |
| 366 | } |
| 367 | } |
| 368 | |
| 369 | struct Foo { |
| 370 | a int |
| 371 | } |
| 372 | |
| 373 | // --------------------------------------------------------------------- |
| 374 | // Canonical encoding (RFC 8949 §4.2.1) requires the preferred form |
| 375 | // (§4.2.2) regardless of the source's float width hint. A FloatNum |
| 376 | // carrying `.double` must shrink to half precision when the value |
| 377 | // fits, so re-encoding a Value tree in canonical mode is byte-stable. |
| 378 | // --------------------------------------------------------------------- |
| 379 | |
| 380 | fn test_canonical_overrides_float_bits_hint() { |
| 381 | v := cbor.FloatNum{ |
| 382 | value: 1.0 |
| 383 | bits: .double |
| 384 | } |
| 385 | mut p := cbor.new_packer(cbor.EncodeOpts{ canonical: true }) |
| 386 | p.pack_value(v)! |
| 387 | assert p.bytes() == [u8(0xf9), 0x3c, 0x00], 'got ${p.bytes().hex()}' |
| 388 | } |
| 389 | |
| 390 | fn test_non_canonical_preserves_float_bits_hint() { |
| 391 | // Without canonical mode the hint dictates the wire width, so the |
| 392 | // encoder honours `.double` even when the value fits a half. |
| 393 | v := cbor.FloatNum{ |
| 394 | value: 1.0 |
| 395 | bits: .double |
| 396 | } |
| 397 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 398 | p.pack_value(v)! |
| 399 | assert p.bytes()[0] == 0xfb, 'non-canonical must respect .double hint' |
| 400 | } |
| 401 | |
| 402 | // --------------------------------------------------------------------- |
| 403 | // time.Time round-trip. Tag 1 (epoch) wraps an integer when the |
| 404 | // nanosecond component is zero, and a float for sub-second precision |
| 405 | // (RFC 8949 §3.4.2). The float-decode path uses math.round so values |
| 406 | // just below an integer second don't drop a nanosecond. |
| 407 | // --------------------------------------------------------------------- |
| 408 | |
| 409 | fn test_time_round_trip_preserves_full_nanoseconds() { |
| 410 | // Bit-exact round-trip across the full nanosecond range. The |
| 411 | // sub-second path uses tag 0 (RFC 3339 ns string), so unlike a |
| 412 | // tag-1 float it doesn't lose precision past the f64 mantissa. |
| 413 | for ns in [int(1), 999, 1_000_000, 250_000_000, 999_999_999] { |
| 414 | t := time.unix_nanosecond(1_700_000_000, ns) |
| 415 | bytes := cbor.encode[time.Time](t, cbor.EncodeOpts{})! |
| 416 | back := cbor.decode[time.Time](bytes, cbor.DecodeOpts{})! |
| 417 | assert back.unix() == t.unix(), 'ns=${ns}: unix drift' |
| 418 | assert back.nanosecond == ns, 'ns=${ns}: got ${back.nanosecond}' |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | fn test_time_whole_seconds_use_int_tag() { |
| 423 | // Whole-second values stay on the integer encoding (smaller wire, |
| 424 | // canonical form). |
| 425 | t := time.unix(1_700_000_000) |
| 426 | bytes := cbor.encode[time.Time](t, cbor.EncodeOpts{})! |
| 427 | mut u := cbor.new_unpacker(bytes, cbor.DecodeOpts{}) |
| 428 | v := u.unpack_value()! |
| 429 | assert v is cbor.Tag |
| 430 | if v is cbor.Tag { |
| 431 | c := v.content() |
| 432 | assert c is cbor.IntNum, 'whole-seconds path must stay integer' |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | fn test_time_decode_rejects_nan_in_tag1_float() { |
| 437 | // c1 fb 7ff8000000000001 = tag 1 + qNaN. Casting NaN to i64 is |
| 438 | // undefined per C ABI (V's underlying), and NaN as a timestamp is |
| 439 | // nonsense — must reject rather than silently decode to epoch 0. |
| 440 | bytes := b('c1fb7ff8000000000001') |
| 441 | if _ := cbor.decode[time.Time](bytes, cbor.DecodeOpts{}) { |
| 442 | assert false, 'tag 1 NaN must be rejected' |
| 443 | } |
| 444 | } |
| 445 | |
| 446 | fn test_time_decode_rejects_inf_in_tag1_float() { |
| 447 | bytes := b('c1fb7ff0000000000000') // +Inf |
| 448 | if _ := cbor.decode[time.Time](bytes, cbor.DecodeOpts{}) { |
| 449 | assert false, 'tag 1 +Inf must be rejected' |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | fn test_time_decode_rejects_overflow_in_tag1_float() { |
| 454 | // 1e30 — far beyond what i64 epoch seconds can hold; must error |
| 455 | // rather than saturate silently. |
| 456 | bytes := b('c1fb46293e5939a08cea') |
| 457 | if _ := cbor.decode[time.Time](bytes, cbor.DecodeOpts{}) { |
| 458 | assert false, 'tag 1 oversized float must be rejected' |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | fn test_time_decode_rounds_nanoseconds() { |
| 463 | // tag 1 + float64(1.999999999) — must round up to 1s 999_999_999ns |
| 464 | // rather than truncating to 1s 999_999_998 (or worse, 0). |
| 465 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 466 | p.pack_tag(1) |
| 467 | p.pack_float64(f64(1.999999999)) |
| 468 | bytes := p.bytes().clone() |
| 469 | t := cbor.decode[time.Time](bytes, cbor.DecodeOpts{})! |
| 470 | assert t.unix() == 1 |
| 471 | assert t.nanosecond >= 999_000_000, 'ns under-rounded: ${t.nanosecond}' |
| 472 | } |
| 473 | |
| 474 | // --------------------------------------------------------------------- |
| 475 | // Opt-in UTF-8 validation at the encode boundary. The streaming |
| 476 | // pack_text trusts its input for performance, but encode[T] honours |
| 477 | // EncodeOpts.validate_utf8 so callers building strings from raw bytes |
| 478 | // can refuse to emit a payload the strict-by-default decoder would |
| 479 | // reject on the way back. |
| 480 | // --------------------------------------------------------------------- |
| 481 | |
| 482 | fn test_encode_string_rejects_invalid_utf8_when_opted_in() { |
| 483 | // Lone continuation byte 0x80 — invalid UTF-8. |
| 484 | bad := unsafe { tos([u8(0x80)].data, 1) } |
| 485 | if _ := cbor.encode[string](bad, cbor.EncodeOpts{ validate_utf8: true }) { |
| 486 | assert false, 'expected validate_utf8 to reject invalid string' |
| 487 | } |
| 488 | } |
| 489 | |
| 490 | fn test_encode_string_passes_valid_utf8_with_validation() { |
| 491 | got := cbor.encode[string]('héllo', cbor.EncodeOpts{ validate_utf8: true })! |
| 492 | // `héllo` = 68 c3 a9 6c 6c 6f → header 66 (text len 6) + 6 bytes. |
| 493 | assert got.len == 7 |
| 494 | assert got[0] == 0x66 |
| 495 | } |
| 496 | |
| 497 | fn test_encode_string_passes_invalid_utf8_when_validation_off() { |
| 498 | // Default opts: encoder doesn't validate. The decoder will catch |
| 499 | // the invalid sequence on decode (caller responsibility). |
| 500 | bad := unsafe { tos([u8(0x80)].data, 1) } |
| 501 | got := cbor.encode[string](bad, cbor.EncodeOpts{})! |
| 502 | assert got.len == 2 // header 0x61 + 0x80 |
| 503 | } |
| 504 | |
| 505 | // --------------------------------------------------------------------- |
| 506 | // Sanity: well-formed pack_value calls (the common case) keep |
| 507 | // type-checking after the signature changes that the strictness |
| 508 | // tests above depend on. |
| 509 | // --------------------------------------------------------------------- |
| 510 | |
| 511 | // --------------------------------------------------------------------- |
| 512 | // `decode[T]` rejects extra bytes after the top-level item by default |
| 513 | // so callers can't be tricked into accepting smuggled suffixes |
| 514 | // (concatenated items, leftover transport framing). Callers that |
| 515 | // genuinely want partial parsing opt in via allow_trailing_bytes. |
| 516 | // --------------------------------------------------------------------- |
| 517 | |
| 518 | fn test_decode_rejects_trailing_bytes() { |
| 519 | // `01 02` = uint(1) followed by uint(2). The second item must |
| 520 | // surface as an error, not be silently dropped. |
| 521 | bytes := b('0102') |
| 522 | if _ := cbor.decode[u64](bytes, cbor.DecodeOpts{}) { |
| 523 | assert false, 'expected trailing-byte rejection' |
| 524 | } |
| 525 | } |
| 526 | |
| 527 | fn test_decode_allows_trailing_bytes_when_opted_in() { |
| 528 | bytes := b('0102') |
| 529 | v := cbor.decode[u64](bytes, cbor.DecodeOpts{ allow_trailing_bytes: true })! |
| 530 | assert v == 1, 'expected first item, got ${v}' |
| 531 | } |
| 532 | |
| 533 | // --------------------------------------------------------------------- |
| 534 | // `decode_from` distinguishes io.Eof (legitimate end of stream) from |
| 535 | // transport errors. A truncated payload whose prefix happens to be a |
| 536 | // valid CBOR item must surface the underlying read failure rather than |
| 537 | // silently return a partial decode. |
| 538 | // --------------------------------------------------------------------- |
| 539 | |
| 540 | struct FailingReader { |
| 541 | mut: |
| 542 | emitted []u8 |
| 543 | pos int |
| 544 | fail_at int |
| 545 | } |
| 546 | |
| 547 | fn (mut r FailingReader) read(mut buf []u8) !int { |
| 548 | if r.pos >= r.fail_at { |
| 549 | return error('FailingReader: simulated transport error') |
| 550 | } |
| 551 | if r.pos >= r.emitted.len { |
| 552 | return io.Eof{} |
| 553 | } |
| 554 | n_max := if r.fail_at - r.pos < buf.len { r.fail_at - r.pos } else { buf.len } |
| 555 | n := if r.emitted.len - r.pos < n_max { r.emitted.len - r.pos } else { n_max } |
| 556 | for i in 0 .. n { |
| 557 | buf[i] = r.emitted[r.pos + i] |
| 558 | } |
| 559 | r.pos += n |
| 560 | return n |
| 561 | } |
| 562 | |
| 563 | fn test_decode_from_propagates_reader_error() { |
| 564 | // Emit `01 02` as the first chunk, then fail. Without the io.Eof |
| 565 | // distinction the loop would `break` and decode `[1, 2]` as a |
| 566 | // successful 2-item garbage; we want the read error to surface. |
| 567 | mut r := FailingReader{ |
| 568 | emitted: [u8(0x01), 0x02] |
| 569 | fail_at: 2 |
| 570 | } |
| 571 | if _ := cbor.decode_from[u64](mut r, cbor.DecodeOpts{ max_stream_bytes: 1024 }) { |
| 572 | assert false, 'expected reader error to propagate' |
| 573 | } |
| 574 | } |
| 575 | |
| 576 | struct EofReader { |
| 577 | mut: |
| 578 | emitted []u8 |
| 579 | pos int |
| 580 | } |
| 581 | |
| 582 | fn (mut r EofReader) read(mut buf []u8) !int { |
| 583 | if r.pos >= r.emitted.len { |
| 584 | return io.Eof{} |
| 585 | } |
| 586 | n := if r.emitted.len - r.pos < buf.len { r.emitted.len - r.pos } else { buf.len } |
| 587 | for i in 0 .. n { |
| 588 | buf[i] = r.emitted[r.pos + i] |
| 589 | } |
| 590 | r.pos += n |
| 591 | return n |
| 592 | } |
| 593 | |
| 594 | fn test_decode_from_unbounded_propagates_reader_error() { |
| 595 | // Same contract on the unbounded branch (no max_stream_bytes set): |
| 596 | // transport errors must surface, not be swallowed as a clean EOF. |
| 597 | mut r := FailingReader{ |
| 598 | emitted: [u8(0x01)] |
| 599 | fail_at: 1 |
| 600 | } |
| 601 | if v := cbor.decode_from[u64](mut r, cbor.DecodeOpts{}) { |
| 602 | assert false, 'expected reader error to propagate, got ${v}' |
| 603 | } |
| 604 | } |
| 605 | |
| 606 | fn test_decode_from_treats_eof_as_normal() { |
| 607 | // Same shape as above but the reader returns io.Eof cleanly after |
| 608 | // emitting one valid item — must decode without error. |
| 609 | mut r := EofReader{ |
| 610 | emitted: [u8(0x01)] |
| 611 | } |
| 612 | v := cbor.decode_from[u64](mut r, cbor.DecodeOpts{ max_stream_bytes: 1024 })! |
| 613 | assert v == 1 |
| 614 | } |
| 615 | |
| 616 | // --------------------------------------------------------------------- |
| 617 | // User-defined `Marshaler` output is validated before splicing into |
| 618 | // the parent stream. Without this check, a buggy or hostile to_cbor() |
| 619 | // returning a truncated/malformed item silently corrupts the |
| 620 | // surrounding fields (the next field gets parsed as the bad item's |
| 621 | // claimed payload). |
| 622 | // --------------------------------------------------------------------- |
| 623 | |
| 624 | struct BadMarshaler {} |
| 625 | |
| 626 | pub fn (b BadMarshaler) to_cbor() []u8 { |
| 627 | return [u8(0x78), 0x64] // text-string head claiming 100 bytes, no payload |
| 628 | } |
| 629 | |
| 630 | struct EnvelopeWithBad { |
| 631 | id int |
| 632 | bad BadMarshaler |
| 633 | other int |
| 634 | } |
| 635 | |
| 636 | fn test_marshaler_output_validated() { |
| 637 | e := EnvelopeWithBad{ |
| 638 | id: 1 |
| 639 | bad: BadMarshaler{} |
| 640 | other: 42 |
| 641 | } |
| 642 | if _ := cbor.encode[EnvelopeWithBad](e, cbor.EncodeOpts{}) { |
| 643 | assert false, 'malformed Marshaler output must be rejected' |
| 644 | } |
| 645 | } |
| 646 | |
| 647 | struct TrailingMarshaler {} |
| 648 | |
| 649 | pub fn (t TrailingMarshaler) to_cbor() []u8 { |
| 650 | return [u8(0x01), 0x02] // two valid items where one was promised |
| 651 | } |
| 652 | |
| 653 | struct EnvelopeWithTrailing { |
| 654 | id int |
| 655 | bad TrailingMarshaler |
| 656 | } |
| 657 | |
| 658 | fn test_marshaler_rejects_trailing_bytes() { |
| 659 | e := EnvelopeWithTrailing{ |
| 660 | id: 1 |
| 661 | bad: TrailingMarshaler{} |
| 662 | } |
| 663 | if _ := cbor.encode[EnvelopeWithTrailing](e, cbor.EncodeOpts{}) { |
| 664 | assert false, 'Marshaler returning > 1 item must be rejected' |
| 665 | } |
| 666 | } |
| 667 | |
| 668 | // --------------------------------------------------------------------- |
| 669 | // `pack_to` refuses to ship a buffer with an open indef container. |
| 670 | // `bytes()` stays low-level and doesn't validate (callers can use |
| 671 | // `is_complete()` to check before reading the buffer directly). |
| 672 | // --------------------------------------------------------------------- |
| 673 | |
| 674 | struct DropWriter { |
| 675 | mut: |
| 676 | buf []u8 |
| 677 | } |
| 678 | |
| 679 | fn (mut w DropWriter) write(buf []u8) !int { |
| 680 | w.buf << buf |
| 681 | return buf.len |
| 682 | } |
| 683 | |
| 684 | fn test_pack_to_rejects_open_indef() { |
| 685 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 686 | p.pack_array_indef()! |
| 687 | p.pack_int(1) |
| 688 | mut w := DropWriter{} |
| 689 | if _ := p.pack_to(mut w) { |
| 690 | assert false, 'pack_to must reject buffer with open indef array' |
| 691 | } |
| 692 | } |
| 693 | |
| 694 | fn test_is_complete_reports_state() { |
| 695 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 696 | assert p.is_complete() |
| 697 | p.pack_array_indef()! |
| 698 | assert !p.is_complete() |
| 699 | p.pack_int(1) |
| 700 | p.pack_break()! |
| 701 | assert p.is_complete() |
| 702 | } |
| 703 | |
| 704 | // --------------------------------------------------------------------- |
| 705 | // Self-describe wrapping (RFC 8949 §3.4.6, tag 55799) is stripped |
| 706 | // transparently by `decode[T]` so payloads encoded with |
| 707 | // `EncodeOpts.self_describe` round-trip through the typed decoder. |
| 708 | // --------------------------------------------------------------------- |
| 709 | |
| 710 | fn test_self_describe_round_trips_via_typed_decode() { |
| 711 | bytes := cbor.encode[u64](42, cbor.EncodeOpts{ self_describe: true })! |
| 712 | v := cbor.decode[u64](bytes, cbor.DecodeOpts{})! |
| 713 | assert v == 42 |
| 714 | } |
| 715 | |
| 716 | fn test_self_describe_visible_via_unpacker() { |
| 717 | // Callers that want to see the wrapper as a Tag can drive the |
| 718 | // Unpacker directly — `unpack_value` does not strip the marker. |
| 719 | bytes := cbor.encode[u64](42, cbor.EncodeOpts{ self_describe: true })! |
| 720 | mut u := cbor.new_unpacker(bytes, cbor.DecodeOpts{}) |
| 721 | v := u.unpack_value()! |
| 722 | if v is cbor.Tag { |
| 723 | assert v.number == 55799 |
| 724 | } else { |
| 725 | assert false, 'expected Tag(55799), got ${v.type_name()}' |
| 726 | } |
| 727 | } |
| 728 | |
| 729 | // --------------------------------------------------------------------- |
| 730 | // `unpack_uint` and `unpack_tag` roll back position on partial-arg |
| 731 | // failure, matching the contract of the other typed reads. |
| 732 | // --------------------------------------------------------------------- |
| 733 | |
| 734 | fn test_unpack_uint_rollback_on_truncated_arg() { |
| 735 | // 0x1b = uint(8-byte arg), no trailing payload — read_arg fails. |
| 736 | mut u := cbor.new_unpacker([u8(0x1b)], cbor.DecodeOpts{ allow_trailing_bytes: true }) |
| 737 | if _ := u.unpack_uint() { |
| 738 | assert false, 'expected truncated arg to error' |
| 739 | } |
| 740 | assert u.pos == 0, 'expected rollback to start, got pos=${u.pos}' |
| 741 | } |
| 742 | |
| 743 | fn test_unpack_tag_rollback_on_truncated_arg() { |
| 744 | mut u := cbor.new_unpacker([u8(0xdb)], cbor.DecodeOpts{ allow_trailing_bytes: true }) |
| 745 | if _ := u.unpack_tag() { |
| 746 | assert false, 'expected truncated arg to error' |
| 747 | } |
| 748 | assert u.pos == 0, 'expected rollback to start, got pos=${u.pos}' |
| 749 | } |
| 750 | |
| 751 | fn test_pack_value_well_formed() { |
| 752 | mut p := cbor.new_packer(cbor.EncodeOpts{}) |
| 753 | p.pack_value(cbor.new_uint(1))! |
| 754 | p.pack_value(cbor.new_text('x'))! |
| 755 | assert p.bytes().len > 0 |
| 756 | } |
| 757 | |
| 758 | // --------------------------------------------------------------------- |
| 759 | // validate_utf8 must propagate into the canonical sub-encoders: the |
| 760 | // map / struct / Value-Map paths build temporary packers when sorting |
| 761 | // keys and used to drop the caller's option, silently letting invalid |
| 762 | // UTF-8 onto the wire even when strict-encode was requested. |
| 763 | // --------------------------------------------------------------------- |
| 764 | |
| 765 | struct BadStrField { |
| 766 | good string |
| 767 | bad string |
| 768 | } |
| 769 | |
| 770 | fn test_validate_utf8_propagates_into_canonical_struct() { |
| 771 | v := BadStrField{ |
| 772 | good: 'ok' |
| 773 | bad: unsafe { tos(c'\xff\xfe', 2) } |
| 774 | } |
| 775 | if _ := cbor.encode[BadStrField](v, cbor.EncodeOpts{ |
| 776 | canonical: true |
| 777 | validate_utf8: true |
| 778 | }) |
| 779 | { |
| 780 | assert false, 'canonical encode must reject invalid UTF-8 when validate_utf8 is set' |
| 781 | } |
| 782 | } |
| 783 | |
| 784 | fn test_validate_utf8_propagates_into_canonical_map() { |
| 785 | mut m := map[string]string{} |
| 786 | m['a'] = 'ok' |
| 787 | m['b'] = unsafe { tos(c'\xff\xfe', 2) } |
| 788 | if _ := cbor.encode[map[string]string](m, cbor.EncodeOpts{ |
| 789 | canonical: true |
| 790 | validate_utf8: true |
| 791 | }) |
| 792 | { |
| 793 | assert false, 'canonical encode must reject invalid UTF-8 in map values' |
| 794 | } |
| 795 | } |
| 796 | |
| 797 | fn test_validate_utf8_off_allows_invalid_in_canonical() { |
| 798 | v := BadStrField{ |
| 799 | good: 'ok' |
| 800 | bad: unsafe { tos(c'\xff\xfe', 2) } |
| 801 | } |
| 802 | // Default opts (validate_utf8: false) must still let the bytes through |
| 803 | // in canonical mode — the caller opted out of validation. |
| 804 | bytes := cbor.encode[BadStrField](v, cbor.EncodeOpts{ canonical: true })! |
| 805 | assert bytes.len > 0 |
| 806 | } |
| 807 | |
| 808 | // --------------------------------------------------------------------- |
| 809 | // SWAR ASCII fast path in `utf8_validate_slice` must remain correct on |
| 810 | // payloads of any length and any starting offset (the load is now via |
| 811 | // memcpy to be safe on strict-alignment targets — but the result must |
| 812 | // stay byte-for-byte identical with the per-byte path). |
| 813 | // --------------------------------------------------------------------- |
| 814 | |
| 815 | fn test_validate_utf8_long_ascii_run() { |
| 816 | // 257 bytes: spans multiple 8-byte SWAR chunks plus a non-multiple tail. |
| 817 | s := 'a'.repeat(257) |
| 818 | bytes := cbor.encode[string](s, cbor.EncodeOpts{ validate_utf8: true })! |
| 819 | back := cbor.decode[string](bytes, cbor.DecodeOpts{ validate_utf8: true })! |
| 820 | assert back == s |
| 821 | } |
| 822 | |
| 823 | fn test_validate_utf8_non_ascii_after_swar_chunks() { |
| 824 | // 16 ASCII bytes (two SWAR chunks) followed by a 2-byte UTF-8 rune, |
| 825 | // then more ASCII. Confirms the fast path bails out cleanly into the |
| 826 | // per-byte decoder when the high bit appears. |
| 827 | s := 'aaaaaaaa' + 'aaaaaaaa' + 'é' + 'bbbb' |
| 828 | bytes := cbor.encode[string](s, cbor.EncodeOpts{ validate_utf8: true })! |
| 829 | back := cbor.decode[string](bytes, cbor.DecodeOpts{ validate_utf8: true })! |
| 830 | assert back == s |
| 831 | } |
| 832 | |