v / vlib / encoding / cbor / tests / hardening_test.v
831 lines · 731 sloc · 28.48 KB · 468855eef1db0ff73c62be2d1bf176ffa0e1478e
Raw
1// Hardening tests: adversarial payloads, RFC 8949 strictness boundaries
2// on both encode and decode, and round-trip invariants for the parts
3// the rest of the suite doesn't cover (sub-second time, NaN dedup,
4// canonical float, indef-string composition rules).
5module main
6
7import encoding.cbor
8import encoding.hex
9import io
10import time
11
12fn b(s string) []u8 {
13 return hex.decode(s) or { panic('invalid hex: ${s}') }
14}
15
16// ---------------------------------------------------------------------
17// Decode bounds: a length argument can be up to u64::max on the wire,
18// but the host's int is i32 on 32-bit targets. Naively casting the
19// argument before bounds-checking aborts with `array.slice: invalid
20// slice index (start>end)` or `negative .cap`. Every length read path
21// must clamp before any int(...) cast.
22// ---------------------------------------------------------------------
23
24fn test_unpack_text_rejects_u64_max_length() {
25 mut u := cbor.new_unpacker(b('7bffffffffffffffff'), cbor.DecodeOpts{})
26 if _ := u.unpack_text() {
27 assert false, 'u64::max text length must not panic the decoder'
28 }
29}
30
31fn test_unpack_bytes_rejects_u64_max_length() {
32 mut u := cbor.new_unpacker(b('5bffffffffffffffff'), cbor.DecodeOpts{})
33 if _ := u.unpack_bytes() {
34 assert false, 'u64::max bytes length must not panic the decoder'
35 }
36}
37
38fn test_unpack_text_rejects_i32_overflow_length() {
39 // 7a 80000000 = text(2^31) — exactly the i32 wrap point.
40 mut u := cbor.new_unpacker(b('7a80000000'), cbor.DecodeOpts{})
41 if _ := u.unpack_text() {
42 assert false, 'expected EOF rejection at 2^31 boundary'
43 }
44}
45
46fn test_skip_value_rejects_oversized_text_length() {
47 mut u := cbor.new_unpacker(b('7bffffffffffffffff'), cbor.DecodeOpts{})
48 if _ := u.skip_value() {
49 assert false, 'skip_value must reject u64::max text length'
50 }
51}
52
53fn test_skip_value_rejects_oversized_bytes_length() {
54 mut u := cbor.new_unpacker(b('5bffffffffffffffff'), cbor.DecodeOpts{})
55 if _ := u.skip_value() {
56 assert false, 'skip_value must reject u64::max bytes length'
57 }
58}
59
60fn test_skip_value_rejects_oversized_indef_chunk() {
61 // 7f 7b ff..ff: indef text whose first chunk claims u64::max bytes.
62 mut u := cbor.new_unpacker(b('7f7bffffffffffffffffff'), cbor.DecodeOpts{})
63 if _ := u.skip_value() {
64 assert false, 'skip_value must reject u64::max chunk length'
65 }
66}
67
68// ---------------------------------------------------------------------
69// Pre-allocation safety: the array/map header argument is a u64. The
70// decoder must refuse any value it can't realistically allocate (cast
71// overflow at i64::max, or item count > remaining bytes), instead of
72// triggering a `negative .cap` panic or attempting a multi-GB malloc.
73// ---------------------------------------------------------------------
74
75fn test_value_decode_rejects_array_header_at_i64_max() {
76 mut u := cbor.new_unpacker(b('9b7fffffffffffffff'), cbor.DecodeOpts{})
77 if _ := u.unpack_value() {
78 assert false, 'unpack_value must reject implausible array length'
79 }
80}
81
82fn test_value_decode_rejects_map_header_at_i64_max() {
83 mut u := cbor.new_unpacker(b('bb7fffffffffffffff'), cbor.DecodeOpts{})
84 if _ := u.unpack_value() {
85 assert false, 'unpack_value must reject implausible map length'
86 }
87}
88
89fn test_value_decode_rejects_array_longer_than_buffer() {
90 // 9a 00ffffff = array of ~16M items in a 5-byte payload.
91 mut u := cbor.new_unpacker(b('9a00ffffff'), cbor.DecodeOpts{})
92 if _ := u.unpack_value() {
93 assert false, 'unpack_value must reject array longer than buffer'
94 }
95}
96
97fn test_value_decode_rejects_map_longer_than_buffer() {
98 mut u := cbor.new_unpacker(b('ba00ffffff'), cbor.DecodeOpts{})
99 if _ := u.unpack_value() {
100 assert false, 'unpack_value must reject map longer than buffer'
101 }
102}
103
104// ---------------------------------------------------------------------
105// skip_value strictness: must reject every malformed shape that the
106// typed unpack_* paths reject, otherwise malformed CBOR slips through
107// RawMessage, Unmarshaler, and unknown-field skipping into downstream
108// consumers.
109// ---------------------------------------------------------------------
110
111fn test_skip_value_rejects_simple_below_32() {
112 // f8 1f = simple(1-byte form, value 31). RFC 8949 §3.3: values < 32
113 // must use the inline form (info 0..23).
114 mut u := cbor.new_unpacker(b('f81f'), cbor.DecodeOpts{})
115 if _ := u.skip_value() {
116 assert false, 'skip_value must reject simple < 32 in 1-byte form'
117 }
118}
119
120fn test_unpack_raw_rejects_simple_below_32() {
121 mut u := cbor.new_unpacker(b('f81f'), cbor.DecodeOpts{})
122 if _ := u.unpack_raw() {
123 assert false, 'unpack_raw must reject simple < 32 in 1-byte form'
124 }
125}
126
127// ---------------------------------------------------------------------
128// deny_duplicate_keys covers every map decode path (typed map, struct,
129// Value tree definite, Value tree indefinite). RFC 8949 §5.6: map keys
130// "should be unique"; deny_duplicate_keys turns that into a hard error.
131// ---------------------------------------------------------------------
132
133fn test_value_decode_rejects_duplicate_keys_when_opted_in() {
134 // a2 01 01 01 02 = {1: 1, 1: 2} — duplicate uint key 1.
135 bytes := b('a201010102')
136 if _ := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true }) {
137 assert false, 'expected duplicate-key rejection on Value path'
138 }
139}
140
141fn test_value_decode_rejects_indef_map_duplicate_keys_when_opted_in() {
142 // bf 01 01 01 02 ff = indef map with key 1 twice.
143 bytes := b('bf01010102ff')
144 if _ := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true }) {
145 assert false, 'expected duplicate-key rejection on indef Value map'
146 }
147}
148
149fn test_value_decode_tolerates_duplicates_by_default() {
150 // Default mode is permissive (matches Go encoding/cbor and QCBOR);
151 // the option is opt-in.
152 bytes := b('a201010102')
153 v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) or {
154 assert false, 'default mode must accept duplicates: ${err}'
155 return
156 }
157 assert v is cbor.Map
158}
159
160fn test_value_decode_detects_nan_keys_as_duplicate() {
161 // bf f9 7e00 01 f9 7e00 02 ff = {NaN: 1, NaN: 2}, both half qNaN.
162 // IEEE 754 says NaN != NaN, but RFC §5.6 dedup is over the encoded
163 // bytes — two identical NaN encodings ARE equal.
164 bytes := b('bff97e0001f97e0002ff')
165 if _ := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true }) {
166 assert false, 'two NaN keys must be detected as duplicates'
167 }
168}
169
170fn test_value_decode_keeps_distinct_float_widths_separate() {
171 // Same numeric value, different wire widths → distinct data items
172 // per RFC §5.6 (encoded form match).
173 // bf f9 3c00 01 fa 3f800000 02 ff = {f16(1.0): 1, f32(1.0): 2}
174 bytes := b('bff93c0001fa3f80000002ff')
175 v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true })!
176 assert v is cbor.Map
177 if v is cbor.Map {
178 assert v.pairs.len == 2
179 }
180}
181
182// ---------------------------------------------------------------------
183// Encoder indef state machine. RFC 8949 §3.2.3: an indef text/bytes
184// string may only contain definite-length strings of the same major
185// type. Opening any other indef container inside it is malformed.
186// ---------------------------------------------------------------------
187
188fn test_pack_text_indef_rejects_self_nesting() {
189 mut p := cbor.new_packer(cbor.EncodeOpts{})
190 p.pack_text_indef()!
191 if _ := p.pack_text_indef() {
192 assert false, 'nested indef text must be rejected'
193 }
194}
195
196fn test_pack_bytes_indef_rejects_self_nesting() {
197 mut p := cbor.new_packer(cbor.EncodeOpts{})
198 p.pack_bytes_indef()!
199 if _ := p.pack_bytes_indef() {
200 assert false, 'nested indef bytes must be rejected'
201 }
202}
203
204fn test_pack_array_indef_rejected_inside_indef_text() {
205 mut p := cbor.new_packer(cbor.EncodeOpts{})
206 p.pack_text_indef()!
207 if _ := p.pack_array_indef() {
208 assert false, 'pack_array_indef inside pack_text_indef must be rejected'
209 }
210}
211
212fn test_pack_map_indef_rejected_inside_indef_text() {
213 mut p := cbor.new_packer(cbor.EncodeOpts{})
214 p.pack_text_indef()!
215 if _ := p.pack_map_indef() {
216 assert false, 'pack_map_indef inside pack_text_indef must be rejected'
217 }
218}
219
220fn test_pack_array_indef_rejected_inside_indef_bytes() {
221 mut p := cbor.new_packer(cbor.EncodeOpts{})
222 p.pack_bytes_indef()!
223 if _ := p.pack_array_indef() {
224 assert false, 'pack_array_indef inside pack_bytes_indef must be rejected'
225 }
226}
227
228fn test_pack_text_indef_rejected_inside_indef_bytes() {
229 // A text-indef chunk inside a bytes-indef would carry the wrong
230 // major type and break decode.
231 mut p := cbor.new_packer(cbor.EncodeOpts{})
232 p.pack_bytes_indef()!
233 if _ := p.pack_text_indef() {
234 assert false, 'pack_text_indef inside pack_bytes_indef must be rejected'
235 }
236}
237
238fn test_pack_break_rejects_no_open_indef() {
239 mut p := cbor.new_packer(cbor.EncodeOpts{})
240 if _ := p.pack_break() {
241 assert false, 'break with no open indef context must be rejected'
242 }
243}
244
245fn test_indef_array_inside_indef_array_allowed() {
246 // Indef containers may freely nest; only indef text/bytes restrict
247 // what their chunks may be (RFC 8949 §3.2.2 vs §3.2.3).
248 mut p := cbor.new_packer(cbor.EncodeOpts{})
249 p.pack_array_indef()!
250 p.pack_array_indef()!
251 p.pack_break()!
252 p.pack_break()!
253 assert p.bytes() == [u8(0x9f), 0x9f, 0xff, 0xff]
254}
255
256fn test_indef_text_round_trip() {
257 // Sanity: a well-formed indef text (definite-length text chunks
258 // followed by a break) round-trips through the decoder.
259 mut p := cbor.new_packer(cbor.EncodeOpts{})
260 p.pack_text_indef()!
261 p.pack_text('hel')
262 p.pack_text('lo')
263 p.pack_break()!
264 mut u := cbor.new_unpacker(p.bytes().clone(), cbor.DecodeOpts{})
265 s := u.unpack_text()!
266 assert s == 'hello'
267}
268
269// ---------------------------------------------------------------------
270// Encoder must refuse compositions that would silently corrupt the
271// stream: a Tag with no content, a RawMessage with empty data, a
272// Simple value in the reserved 24..31 range.
273// ---------------------------------------------------------------------
274
275fn test_pack_value_rejects_tag_with_empty_content() {
276 mut p := cbor.new_packer(cbor.EncodeOpts{})
277 t := cbor.Tag{
278 number: 42
279 content_box: []
280 }
281 if _ := p.pack_value(t) {
282 assert false, 'pack_value must reject Tag with empty content_box'
283 }
284}
285
286fn test_new_tag_round_trips() {
287 v := cbor.new_tag(99, cbor.new_uint(7))
288 mut p := cbor.new_packer(cbor.EncodeOpts{})
289 p.pack_value(v)!
290 mut u := cbor.new_unpacker(p.bytes().clone(), cbor.DecodeOpts{})
291 back := u.unpack_value()!
292 assert back is cbor.Tag
293 if back is cbor.Tag {
294 assert back.number == 99
295 c := back.content()
296 assert c is cbor.IntNum
297 }
298}
299
300fn test_pack_raw_rejects_empty_message() {
301 mut p := cbor.new_packer(cbor.EncodeOpts{})
302 if _ := p.pack_raw(cbor.RawMessage{ data: [] }) {
303 assert false, 'pack_raw must reject empty RawMessage'
304 }
305}
306
307fn test_pack_value_rejects_reserved_simple() {
308 // Simple values 24..31 are reserved per RFC 8949 §3.3; pack_simple
309 // already rejects them, and pack_value must propagate rather than
310 // emit zero bytes.
311 mut p := cbor.new_packer(cbor.EncodeOpts{})
312 if _ := p.pack_value(cbor.Simple{ value: 25 }) {
313 assert false, 'reserved Simple must surface the pack_simple error'
314 }
315}
316
317fn test_pack_simple_rejects_assigned_range() {
318 // RFC 8949 §3.3 assigns 20..23 to false/true/null/undefined.
319 // Encoding through pack_simple would silently produce wire-equivalent
320 // bytes that decode back as Bool/Null/Undefined — surprising the
321 // caller. The dedicated typed packers must be used instead.
322 mut p := cbor.new_packer(cbor.EncodeOpts{})
323 for v in [u8(20), 21, 22, 23] {
324 if _ := p.pack_simple(v) {
325 assert false, 'pack_simple(${v}) must be rejected'
326 }
327 }
328 // 0..19 still inline cleanly, 32..255 still use the 1-byte form.
329 p.pack_simple(0)!
330 p.pack_simple(19)!
331 p.pack_simple(32)!
332 p.pack_simple(255)!
333}
334
335fn test_value_as_int_boundary() {
336 // CBOR negative ints encode `-1 - magnitude`, so magnitude exactly
337 // 2^63 maps to -2^63 - 1 — outside i64 range. Must return none, not
338 // silently saturate to i64::min (which represents -2^63).
339 bytes_overflow := b('3b8000000000000000') // negative magnitude=2^63
340 v_over := cbor.decode[cbor.Value](bytes_overflow, cbor.DecodeOpts{})!
341 assert v_over.as_int() == none, 'as_int must reject -2^63 - 1'
342
343 // Just below the boundary: magnitude=2^63 - 1, value=i64::min, must fit.
344 bytes_min := b('3b7fffffffffffffff')
345 v_min := cbor.decode[cbor.Value](bytes_min, cbor.DecodeOpts{})!
346 assert v_min.as_int()? == i64(-9223372036854775807 - 1)
347
348 // Positive boundary: magnitude=i64::max fits, magnitude=i64::max+1 doesn't.
349 bytes_pmax := b('1b7fffffffffffffff')
350 v_pmax := cbor.decode[cbor.Value](bytes_pmax, cbor.DecodeOpts{})!
351 assert v_pmax.as_int()? == i64(9223372036854775807)
352
353 bytes_pover := b('1b8000000000000000')
354 v_pover := cbor.decode[cbor.Value](bytes_pover, cbor.DecodeOpts{})!
355 assert v_pover.as_int() == none, 'as_int must reject magnitudes > i64::max'
356 // as_uint still gives the full unsigned range.
357 assert v_pover.as_uint()? == u64(9223372036854775808)
358}
359
360fn test_struct_decode_rejects_duplicate_keys() {
361 // Struct path: deny_duplicate_keys must fire on a repeated field
362 // name, not silently let the second value overwrite the first.
363 bytes := b('a26161016161 02'.replace(' ', ''))
364 if _ := cbor.decode[Foo](bytes, cbor.DecodeOpts{ deny_duplicate_keys: true }) {
365 assert false, 'struct decode must reject duplicate keys when opted in'
366 }
367}
368
369struct Foo {
370 a int
371}
372
373// ---------------------------------------------------------------------
374// Canonical encoding (RFC 8949 §4.2.1) requires the preferred form
375// (§4.2.2) regardless of the source's float width hint. A FloatNum
376// carrying `.double` must shrink to half precision when the value
377// fits, so re-encoding a Value tree in canonical mode is byte-stable.
378// ---------------------------------------------------------------------
379
380fn test_canonical_overrides_float_bits_hint() {
381 v := cbor.FloatNum{
382 value: 1.0
383 bits: .double
384 }
385 mut p := cbor.new_packer(cbor.EncodeOpts{ canonical: true })
386 p.pack_value(v)!
387 assert p.bytes() == [u8(0xf9), 0x3c, 0x00], 'got ${p.bytes().hex()}'
388}
389
390fn test_non_canonical_preserves_float_bits_hint() {
391 // Without canonical mode the hint dictates the wire width, so the
392 // encoder honours `.double` even when the value fits a half.
393 v := cbor.FloatNum{
394 value: 1.0
395 bits: .double
396 }
397 mut p := cbor.new_packer(cbor.EncodeOpts{})
398 p.pack_value(v)!
399 assert p.bytes()[0] == 0xfb, 'non-canonical must respect .double hint'
400}
401
402// ---------------------------------------------------------------------
403// time.Time round-trip. Tag 1 (epoch) wraps an integer when the
404// nanosecond component is zero, and a float for sub-second precision
405// (RFC 8949 §3.4.2). The float-decode path uses math.round so values
406// just below an integer second don't drop a nanosecond.
407// ---------------------------------------------------------------------
408
409fn test_time_round_trip_preserves_full_nanoseconds() {
410 // Bit-exact round-trip across the full nanosecond range. The
411 // sub-second path uses tag 0 (RFC 3339 ns string), so unlike a
412 // tag-1 float it doesn't lose precision past the f64 mantissa.
413 for ns in [int(1), 999, 1_000_000, 250_000_000, 999_999_999] {
414 t := time.unix_nanosecond(1_700_000_000, ns)
415 bytes := cbor.encode[time.Time](t, cbor.EncodeOpts{})!
416 back := cbor.decode[time.Time](bytes, cbor.DecodeOpts{})!
417 assert back.unix() == t.unix(), 'ns=${ns}: unix drift'
418 assert back.nanosecond == ns, 'ns=${ns}: got ${back.nanosecond}'
419 }
420}
421
422fn test_time_whole_seconds_use_int_tag() {
423 // Whole-second values stay on the integer encoding (smaller wire,
424 // canonical form).
425 t := time.unix(1_700_000_000)
426 bytes := cbor.encode[time.Time](t, cbor.EncodeOpts{})!
427 mut u := cbor.new_unpacker(bytes, cbor.DecodeOpts{})
428 v := u.unpack_value()!
429 assert v is cbor.Tag
430 if v is cbor.Tag {
431 c := v.content()
432 assert c is cbor.IntNum, 'whole-seconds path must stay integer'
433 }
434}
435
436fn test_time_decode_rejects_nan_in_tag1_float() {
437 // c1 fb 7ff8000000000001 = tag 1 + qNaN. Casting NaN to i64 is
438 // undefined per C ABI (V's underlying), and NaN as a timestamp is
439 // nonsense — must reject rather than silently decode to epoch 0.
440 bytes := b('c1fb7ff8000000000001')
441 if _ := cbor.decode[time.Time](bytes, cbor.DecodeOpts{}) {
442 assert false, 'tag 1 NaN must be rejected'
443 }
444}
445
446fn test_time_decode_rejects_inf_in_tag1_float() {
447 bytes := b('c1fb7ff0000000000000') // +Inf
448 if _ := cbor.decode[time.Time](bytes, cbor.DecodeOpts{}) {
449 assert false, 'tag 1 +Inf must be rejected'
450 }
451}
452
453fn test_time_decode_rejects_overflow_in_tag1_float() {
454 // 1e30 — far beyond what i64 epoch seconds can hold; must error
455 // rather than saturate silently.
456 bytes := b('c1fb46293e5939a08cea')
457 if _ := cbor.decode[time.Time](bytes, cbor.DecodeOpts{}) {
458 assert false, 'tag 1 oversized float must be rejected'
459 }
460}
461
462fn test_time_decode_rounds_nanoseconds() {
463 // tag 1 + float64(1.999999999) — must round up to 1s 999_999_999ns
464 // rather than truncating to 1s 999_999_998 (or worse, 0).
465 mut p := cbor.new_packer(cbor.EncodeOpts{})
466 p.pack_tag(1)
467 p.pack_float64(f64(1.999999999))
468 bytes := p.bytes().clone()
469 t := cbor.decode[time.Time](bytes, cbor.DecodeOpts{})!
470 assert t.unix() == 1
471 assert t.nanosecond >= 999_000_000, 'ns under-rounded: ${t.nanosecond}'
472}
473
474// ---------------------------------------------------------------------
475// Opt-in UTF-8 validation at the encode boundary. The streaming
476// pack_text trusts its input for performance, but encode[T] honours
477// EncodeOpts.validate_utf8 so callers building strings from raw bytes
478// can refuse to emit a payload the strict-by-default decoder would
479// reject on the way back.
480// ---------------------------------------------------------------------
481
482fn test_encode_string_rejects_invalid_utf8_when_opted_in() {
483 // Lone continuation byte 0x80 — invalid UTF-8.
484 bad := unsafe { tos([u8(0x80)].data, 1) }
485 if _ := cbor.encode[string](bad, cbor.EncodeOpts{ validate_utf8: true }) {
486 assert false, 'expected validate_utf8 to reject invalid string'
487 }
488}
489
490fn test_encode_string_passes_valid_utf8_with_validation() {
491 got := cbor.encode[string]('héllo', cbor.EncodeOpts{ validate_utf8: true })!
492 // `héllo` = 68 c3 a9 6c 6c 6f → header 66 (text len 6) + 6 bytes.
493 assert got.len == 7
494 assert got[0] == 0x66
495}
496
497fn test_encode_string_passes_invalid_utf8_when_validation_off() {
498 // Default opts: encoder doesn't validate. The decoder will catch
499 // the invalid sequence on decode (caller responsibility).
500 bad := unsafe { tos([u8(0x80)].data, 1) }
501 got := cbor.encode[string](bad, cbor.EncodeOpts{})!
502 assert got.len == 2 // header 0x61 + 0x80
503}
504
505// ---------------------------------------------------------------------
506// Sanity: well-formed pack_value calls (the common case) keep
507// type-checking after the signature changes that the strictness
508// tests above depend on.
509// ---------------------------------------------------------------------
510
511// ---------------------------------------------------------------------
512// `decode[T]` rejects extra bytes after the top-level item by default
513// so callers can't be tricked into accepting smuggled suffixes
514// (concatenated items, leftover transport framing). Callers that
515// genuinely want partial parsing opt in via allow_trailing_bytes.
516// ---------------------------------------------------------------------
517
518fn test_decode_rejects_trailing_bytes() {
519 // `01 02` = uint(1) followed by uint(2). The second item must
520 // surface as an error, not be silently dropped.
521 bytes := b('0102')
522 if _ := cbor.decode[u64](bytes, cbor.DecodeOpts{}) {
523 assert false, 'expected trailing-byte rejection'
524 }
525}
526
527fn test_decode_allows_trailing_bytes_when_opted_in() {
528 bytes := b('0102')
529 v := cbor.decode[u64](bytes, cbor.DecodeOpts{ allow_trailing_bytes: true })!
530 assert v == 1, 'expected first item, got ${v}'
531}
532
533// ---------------------------------------------------------------------
534// `decode_from` distinguishes io.Eof (legitimate end of stream) from
535// transport errors. A truncated payload whose prefix happens to be a
536// valid CBOR item must surface the underlying read failure rather than
537// silently return a partial decode.
538// ---------------------------------------------------------------------
539
540struct FailingReader {
541mut:
542 emitted []u8
543 pos int
544 fail_at int
545}
546
547fn (mut r FailingReader) read(mut buf []u8) !int {
548 if r.pos >= r.fail_at {
549 return error('FailingReader: simulated transport error')
550 }
551 if r.pos >= r.emitted.len {
552 return io.Eof{}
553 }
554 n_max := if r.fail_at - r.pos < buf.len { r.fail_at - r.pos } else { buf.len }
555 n := if r.emitted.len - r.pos < n_max { r.emitted.len - r.pos } else { n_max }
556 for i in 0 .. n {
557 buf[i] = r.emitted[r.pos + i]
558 }
559 r.pos += n
560 return n
561}
562
563fn test_decode_from_propagates_reader_error() {
564 // Emit `01 02` as the first chunk, then fail. Without the io.Eof
565 // distinction the loop would `break` and decode `[1, 2]` as a
566 // successful 2-item garbage; we want the read error to surface.
567 mut r := FailingReader{
568 emitted: [u8(0x01), 0x02]
569 fail_at: 2
570 }
571 if _ := cbor.decode_from[u64](mut r, cbor.DecodeOpts{ max_stream_bytes: 1024 }) {
572 assert false, 'expected reader error to propagate'
573 }
574}
575
576struct EofReader {
577mut:
578 emitted []u8
579 pos int
580}
581
582fn (mut r EofReader) read(mut buf []u8) !int {
583 if r.pos >= r.emitted.len {
584 return io.Eof{}
585 }
586 n := if r.emitted.len - r.pos < buf.len { r.emitted.len - r.pos } else { buf.len }
587 for i in 0 .. n {
588 buf[i] = r.emitted[r.pos + i]
589 }
590 r.pos += n
591 return n
592}
593
594fn test_decode_from_unbounded_propagates_reader_error() {
595 // Same contract on the unbounded branch (no max_stream_bytes set):
596 // transport errors must surface, not be swallowed as a clean EOF.
597 mut r := FailingReader{
598 emitted: [u8(0x01)]
599 fail_at: 1
600 }
601 if v := cbor.decode_from[u64](mut r, cbor.DecodeOpts{}) {
602 assert false, 'expected reader error to propagate, got ${v}'
603 }
604}
605
606fn test_decode_from_treats_eof_as_normal() {
607 // Same shape as above but the reader returns io.Eof cleanly after
608 // emitting one valid item — must decode without error.
609 mut r := EofReader{
610 emitted: [u8(0x01)]
611 }
612 v := cbor.decode_from[u64](mut r, cbor.DecodeOpts{ max_stream_bytes: 1024 })!
613 assert v == 1
614}
615
616// ---------------------------------------------------------------------
617// User-defined `Marshaler` output is validated before splicing into
618// the parent stream. Without this check, a buggy or hostile to_cbor()
619// returning a truncated/malformed item silently corrupts the
620// surrounding fields (the next field gets parsed as the bad item's
621// claimed payload).
622// ---------------------------------------------------------------------
623
624struct BadMarshaler {}
625
626pub fn (b BadMarshaler) to_cbor() []u8 {
627 return [u8(0x78), 0x64] // text-string head claiming 100 bytes, no payload
628}
629
630struct EnvelopeWithBad {
631 id int
632 bad BadMarshaler
633 other int
634}
635
636fn test_marshaler_output_validated() {
637 e := EnvelopeWithBad{
638 id: 1
639 bad: BadMarshaler{}
640 other: 42
641 }
642 if _ := cbor.encode[EnvelopeWithBad](e, cbor.EncodeOpts{}) {
643 assert false, 'malformed Marshaler output must be rejected'
644 }
645}
646
647struct TrailingMarshaler {}
648
649pub fn (t TrailingMarshaler) to_cbor() []u8 {
650 return [u8(0x01), 0x02] // two valid items where one was promised
651}
652
653struct EnvelopeWithTrailing {
654 id int
655 bad TrailingMarshaler
656}
657
658fn test_marshaler_rejects_trailing_bytes() {
659 e := EnvelopeWithTrailing{
660 id: 1
661 bad: TrailingMarshaler{}
662 }
663 if _ := cbor.encode[EnvelopeWithTrailing](e, cbor.EncodeOpts{}) {
664 assert false, 'Marshaler returning > 1 item must be rejected'
665 }
666}
667
668// ---------------------------------------------------------------------
669// `pack_to` refuses to ship a buffer with an open indef container.
670// `bytes()` stays low-level and doesn't validate (callers can use
671// `is_complete()` to check before reading the buffer directly).
672// ---------------------------------------------------------------------
673
674struct DropWriter {
675mut:
676 buf []u8
677}
678
679fn (mut w DropWriter) write(buf []u8) !int {
680 w.buf << buf
681 return buf.len
682}
683
684fn test_pack_to_rejects_open_indef() {
685 mut p := cbor.new_packer(cbor.EncodeOpts{})
686 p.pack_array_indef()!
687 p.pack_int(1)
688 mut w := DropWriter{}
689 if _ := p.pack_to(mut w) {
690 assert false, 'pack_to must reject buffer with open indef array'
691 }
692}
693
694fn test_is_complete_reports_state() {
695 mut p := cbor.new_packer(cbor.EncodeOpts{})
696 assert p.is_complete()
697 p.pack_array_indef()!
698 assert !p.is_complete()
699 p.pack_int(1)
700 p.pack_break()!
701 assert p.is_complete()
702}
703
704// ---------------------------------------------------------------------
705// Self-describe wrapping (RFC 8949 §3.4.6, tag 55799) is stripped
706// transparently by `decode[T]` so payloads encoded with
707// `EncodeOpts.self_describe` round-trip through the typed decoder.
708// ---------------------------------------------------------------------
709
710fn test_self_describe_round_trips_via_typed_decode() {
711 bytes := cbor.encode[u64](42, cbor.EncodeOpts{ self_describe: true })!
712 v := cbor.decode[u64](bytes, cbor.DecodeOpts{})!
713 assert v == 42
714}
715
716fn test_self_describe_visible_via_unpacker() {
717 // Callers that want to see the wrapper as a Tag can drive the
718 // Unpacker directly — `unpack_value` does not strip the marker.
719 bytes := cbor.encode[u64](42, cbor.EncodeOpts{ self_describe: true })!
720 mut u := cbor.new_unpacker(bytes, cbor.DecodeOpts{})
721 v := u.unpack_value()!
722 if v is cbor.Tag {
723 assert v.number == 55799
724 } else {
725 assert false, 'expected Tag(55799), got ${v.type_name()}'
726 }
727}
728
729// ---------------------------------------------------------------------
730// `unpack_uint` and `unpack_tag` roll back position on partial-arg
731// failure, matching the contract of the other typed reads.
732// ---------------------------------------------------------------------
733
734fn test_unpack_uint_rollback_on_truncated_arg() {
735 // 0x1b = uint(8-byte arg), no trailing payload — read_arg fails.
736 mut u := cbor.new_unpacker([u8(0x1b)], cbor.DecodeOpts{ allow_trailing_bytes: true })
737 if _ := u.unpack_uint() {
738 assert false, 'expected truncated arg to error'
739 }
740 assert u.pos == 0, 'expected rollback to start, got pos=${u.pos}'
741}
742
743fn test_unpack_tag_rollback_on_truncated_arg() {
744 mut u := cbor.new_unpacker([u8(0xdb)], cbor.DecodeOpts{ allow_trailing_bytes: true })
745 if _ := u.unpack_tag() {
746 assert false, 'expected truncated arg to error'
747 }
748 assert u.pos == 0, 'expected rollback to start, got pos=${u.pos}'
749}
750
751fn test_pack_value_well_formed() {
752 mut p := cbor.new_packer(cbor.EncodeOpts{})
753 p.pack_value(cbor.new_uint(1))!
754 p.pack_value(cbor.new_text('x'))!
755 assert p.bytes().len > 0
756}
757
758// ---------------------------------------------------------------------
759// validate_utf8 must propagate into the canonical sub-encoders: the
760// map / struct / Value-Map paths build temporary packers when sorting
761// keys and used to drop the caller's option, silently letting invalid
762// UTF-8 onto the wire even when strict-encode was requested.
763// ---------------------------------------------------------------------
764
765struct BadStrField {
766 good string
767 bad string
768}
769
770fn test_validate_utf8_propagates_into_canonical_struct() {
771 v := BadStrField{
772 good: 'ok'
773 bad: unsafe { tos(c'\xff\xfe', 2) }
774 }
775 if _ := cbor.encode[BadStrField](v, cbor.EncodeOpts{
776 canonical: true
777 validate_utf8: true
778 })
779 {
780 assert false, 'canonical encode must reject invalid UTF-8 when validate_utf8 is set'
781 }
782}
783
784fn test_validate_utf8_propagates_into_canonical_map() {
785 mut m := map[string]string{}
786 m['a'] = 'ok'
787 m['b'] = unsafe { tos(c'\xff\xfe', 2) }
788 if _ := cbor.encode[map[string]string](m, cbor.EncodeOpts{
789 canonical: true
790 validate_utf8: true
791 })
792 {
793 assert false, 'canonical encode must reject invalid UTF-8 in map values'
794 }
795}
796
797fn test_validate_utf8_off_allows_invalid_in_canonical() {
798 v := BadStrField{
799 good: 'ok'
800 bad: unsafe { tos(c'\xff\xfe', 2) }
801 }
802 // Default opts (validate_utf8: false) must still let the bytes through
803 // in canonical mode — the caller opted out of validation.
804 bytes := cbor.encode[BadStrField](v, cbor.EncodeOpts{ canonical: true })!
805 assert bytes.len > 0
806}
807
808// ---------------------------------------------------------------------
809// SWAR ASCII fast path in `utf8_validate_slice` must remain correct on
810// payloads of any length and any starting offset (the load is now via
811// memcpy to be safe on strict-alignment targets — but the result must
812// stay byte-for-byte identical with the per-byte path).
813// ---------------------------------------------------------------------
814
815fn test_validate_utf8_long_ascii_run() {
816 // 257 bytes: spans multiple 8-byte SWAR chunks plus a non-multiple tail.
817 s := 'a'.repeat(257)
818 bytes := cbor.encode[string](s, cbor.EncodeOpts{ validate_utf8: true })!
819 back := cbor.decode[string](bytes, cbor.DecodeOpts{ validate_utf8: true })!
820 assert back == s
821}
822
823fn test_validate_utf8_non_ascii_after_swar_chunks() {
824 // 16 ASCII bytes (two SWAR chunks) followed by a 2-byte UTF-8 rune,
825 // then more ASCII. Confirms the fast path bails out cleanly into the
826 // per-byte decoder when the high bit appears.
827 s := 'aaaaaaaa' + 'aaaaaaaa' + 'é' + 'bbbb'
828 bytes := cbor.encode[string](s, cbor.EncodeOpts{ validate_utf8: true })!
829 back := cbor.decode[string](bytes, cbor.DecodeOpts{ validate_utf8: true })!
830 assert back == s
831}
832