| 1 | // Tests that the decoder is safe against adversarial input: malformed |
| 2 | // initial bytes, premature EOF, depth bombs, indefinite-length nesting, |
| 3 | // and invalid UTF-8. |
| 4 | module main |
| 5 | |
| 6 | import encoding.cbor |
| 7 | import encoding.hex |
| 8 | |
| 9 | fn h(s string) []u8 { |
| 10 | return hex.decode(s) or { panic('invalid hex: ${s}') } |
| 11 | } |
| 12 | |
| 13 | // --------------------------------------------------------------------- |
| 14 | // EOF handling |
| 15 | // --------------------------------------------------------------------- |
| 16 | |
| 17 | fn test_eof_truncated_uint() { |
| 18 | // 0x18 = uint(1-byte arg), but no following byte. |
| 19 | if _ := cbor.decode[u64](h('18'), cbor.DecodeOpts{}) { |
| 20 | assert false, 'expected EOF error' |
| 21 | } |
| 22 | } |
| 23 | |
| 24 | fn test_eof_truncated_array() { |
| 25 | // 0x83 = array of 3, but only 1 element follows. |
| 26 | if _ := cbor.decode[[]int](h('8301'), cbor.DecodeOpts{}) { |
| 27 | assert false, 'expected EOF error in array' |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | fn test_eof_truncated_string() { |
| 32 | // 0x65 = text len 5, but only 3 bytes follow. |
| 33 | if _ := cbor.decode[string](h('656162'), cbor.DecodeOpts{}) { |
| 34 | assert false, 'expected EOF error in text' |
| 35 | } |
| 36 | } |
| 37 | |
| 38 | // --------------------------------------------------------------------- |
| 39 | // Reserved additional info |
| 40 | // --------------------------------------------------------------------- |
| 41 | |
| 42 | fn test_reserved_info_rejected() { |
| 43 | // 0x1c = major 0, info 28 (reserved). |
| 44 | if _ := cbor.decode[cbor.Value](h('1c'), cbor.DecodeOpts{}) { |
| 45 | assert false, 'expected malformed for info 28' |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | // --------------------------------------------------------------------- |
| 50 | // Depth bomb |
| 51 | // --------------------------------------------------------------------- |
| 52 | |
| 53 | fn test_depth_bomb_rejected() { |
| 54 | // Build an indefinite-length array nested 1000 deep. |
| 55 | mut deep := []u8{cap: 2002} |
| 56 | for _ in 0 .. 1000 { |
| 57 | deep << 0x9f // start indefinite array |
| 58 | } |
| 59 | for _ in 0 .. 1000 { |
| 60 | deep << 0xff // close |
| 61 | } |
| 62 | if _ := cbor.decode[cbor.Value](deep, cbor.DecodeOpts{ max_depth: 16 }) { |
| 63 | assert false, 'expected MaxDepthError' |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | // --------------------------------------------------------------------- |
| 68 | // Indefinite-length string with mismatched chunk |
| 69 | // --------------------------------------------------------------------- |
| 70 | |
| 71 | fn test_indef_text_with_byte_chunk_rejected() { |
| 72 | // 0x7f = indef text. 0x42 = bytes(2). Should fail. |
| 73 | if _ := cbor.decode[string](h('7f4201020203ff'), cbor.DecodeOpts{}) { |
| 74 | assert false, 'expected malformed for mixed indef-text chunk' |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | fn test_nested_indef_text_rejected() { |
| 79 | // 0x7f7f...ff is indef text containing indef text — disallowed. |
| 80 | if _ := cbor.decode[string](h('7f7f60ffff'), cbor.DecodeOpts{}) { |
| 81 | assert false, 'expected malformed for nested indef text' |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | // --------------------------------------------------------------------- |
| 86 | // UTF-8 validation |
| 87 | // --------------------------------------------------------------------- |
| 88 | |
| 89 | fn test_invalid_utf8_rejected() { |
| 90 | // 0x62 = text len 2, then invalid 2-byte sequence 0xc3 0x28. |
| 91 | if _ := cbor.decode[string](h('62c328'), cbor.DecodeOpts{}) { |
| 92 | assert false, 'expected InvalidUtf8Error' |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | fn test_invalid_utf8_can_be_disabled() { |
| 97 | // Same input but with validate_utf8 = false succeeds (caller |
| 98 | // accepts responsibility for handling raw bytes). |
| 99 | got := cbor.decode[string](h('62c328'), cbor.DecodeOpts{ validate_utf8: false }) or { |
| 100 | panic('expected success: ${err}') |
| 101 | } |
| 102 | assert got.len == 2 |
| 103 | } |
| 104 | |
| 105 | fn test_invalid_utf8_overlong_rejected() { |
| 106 | // "/" = 0x2f, but encoded as 2-byte overlong 0xc0 0xaf — rejected. |
| 107 | if _ := cbor.decode[string](h('62c0af'), cbor.DecodeOpts{}) { |
| 108 | assert false, 'expected InvalidUtf8Error for overlong' |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | fn test_invalid_utf8_surrogate_rejected() { |
| 113 | // U+D800 (high surrogate) in 3-byte form: 0xed 0xa0 0x80. |
| 114 | if _ := cbor.decode[string](h('63eda080'), cbor.DecodeOpts{}) { |
| 115 | assert false, 'expected InvalidUtf8Error for surrogate' |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | // --------------------------------------------------------------------- |
| 120 | // Unknown fields in struct decode |
| 121 | // --------------------------------------------------------------------- |
| 122 | |
| 123 | struct Strict { |
| 124 | a int |
| 125 | } |
| 126 | |
| 127 | fn test_unknown_field_tolerated_by_default() { |
| 128 | // {"a": 1, "b": 2} |
| 129 | bytes := h('a26161016162 02'.replace(' ', '')) |
| 130 | got := cbor.decode[Strict](bytes, cbor.DecodeOpts{})! |
| 131 | assert got.a == 1 |
| 132 | } |
| 133 | |
| 134 | fn test_unknown_field_rejected_when_opted_in() { |
| 135 | bytes := h('a26161016162 02'.replace(' ', '')) |
| 136 | if _ := cbor.decode[Strict](bytes, cbor.DecodeOpts{ deny_unknown_fields: true }) { |
| 137 | assert false, 'expected UnknownFieldError' |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | // --------------------------------------------------------------------- |
| 142 | // Native tag 0/1 content-type validation (RFC 8949 §3.4.1). Unlike a |
| 143 | // permissive decoder, we reject tag 0 wrapping non-text and tag 1 |
| 144 | // wrapping non-numbers — same behaviour as QCBOR (the IETF reference). |
| 145 | // These cases come from the cbor-wg/bad conformance corpus. |
| 146 | // --------------------------------------------------------------------- |
| 147 | |
| 148 | fn test_tag0_wrapping_map_rejected() { |
| 149 | // c0 a1 61 61 00 = tag(0, {"a": 0}) — tag 0 must be tstr. |
| 150 | if _ := cbor.decode[cbor.Value](h('c0a1616100'), cbor.DecodeOpts{}) { |
| 151 | assert false, 'expected tag-0 type rejection' |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | fn test_tag1_wrapping_map_rejected() { |
| 156 | // c1 a1 61 61 00 = tag(1, {"a": 0}) — tag 1 must be int or float. |
| 157 | if _ := cbor.decode[cbor.Value](h('c1a1616100'), cbor.DecodeOpts{}) { |
| 158 | assert false, 'expected tag-1 type rejection' |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | fn test_tag0_wrapping_text_accepted() { |
| 163 | // c0 74 ... = tag(0, "2013-03-21T20:04:00Z") — well-formed. |
| 164 | v := cbor.decode[cbor.Value](h('c074323031332d30332d32315432303a30343a30305a'), cbor.DecodeOpts{}) or { |
| 165 | assert false, 'tag 0 + text MUST be accepted: ${err}' |
| 166 | return |
| 167 | } |
| 168 | assert v is cbor.Tag |
| 169 | } |
| 170 | |
| 171 | fn test_tag1_wrapping_int_or_float_accepted() { |
| 172 | v := cbor.decode[cbor.Value](h('c11a514b67b0'), cbor.DecodeOpts{}) or { |
| 173 | assert false, 'tag 1 + int MUST be accepted: ${err}' |
| 174 | return |
| 175 | } |
| 176 | assert v is cbor.Tag |
| 177 | |
| 178 | v2 := cbor.decode[cbor.Value](h('c1fb41d452d9ec200000'), cbor.DecodeOpts{}) or { |
| 179 | assert false, 'tag 1 + float MUST be accepted: ${err}' |
| 180 | return |
| 181 | } |
| 182 | assert v2 is cbor.Tag |
| 183 | } |
| 184 | |
| 185 | // --------------------------------------------------------------------- |
| 186 | // Header-length overflow: lengths beyond i64::max must be rejected, |
| 187 | // not silently wrapped to -1 (which would alias the indefinite-length |
| 188 | // sentinel and steer callers into the wrong loop). See decoder.v |
| 189 | // `unpack_array_header` / `unpack_map_header`. |
| 190 | // --------------------------------------------------------------------- |
| 191 | |
| 192 | fn test_array_header_oversized_length_rejected() { |
| 193 | // 9b ff ff ff ff ff ff ff ff = array, info=27, arg=u64::max. |
| 194 | mut u := cbor.new_unpacker(h('9bffffffffffffffff'), cbor.DecodeOpts{}) |
| 195 | if n := u.unpack_array_header() { |
| 196 | assert false, 'expected oversized length rejection, got ${n}' |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | fn test_map_header_oversized_length_rejected() { |
| 201 | // bb ff ff ff ff ff ff ff ff = map, info=27, arg=u64::max. |
| 202 | mut u := cbor.new_unpacker(h('bbffffffffffffffff'), cbor.DecodeOpts{}) |
| 203 | if n := u.unpack_map_header() { |
| 204 | assert false, 'expected oversized length rejection, got ${n}' |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | fn test_array_header_at_i64_max_accepted() { |
| 209 | // 9b 7f ff ff ff ff ff ff ff = array, info=27, arg=i64::max — boundary. |
| 210 | mut u := cbor.new_unpacker(h('9b7fffffffffffffff'), cbor.DecodeOpts{}) |
| 211 | n := u.unpack_array_header() or { |
| 212 | assert false, 'i64::max boundary must succeed: ${err}' |
| 213 | return |
| 214 | } |
| 215 | assert n == max_i64 |
| 216 | } |
| 217 | |
| 218 | // --------------------------------------------------------------------- |
| 219 | // skip_value MUST enforce RFC 8949 §3.2.3 chunk rules for indefinite |
| 220 | // strings: each chunk must be a definite-length string of the same |
| 221 | // major type. Otherwise the skip path silently accepts what |
| 222 | // unpack_text / unpack_bytes correctly reject — letting malformed |
| 223 | // CBOR through RawMessage / Unmarshaler / unknown-field skipping. |
| 224 | // --------------------------------------------------------------------- |
| 225 | |
| 226 | fn test_skip_value_rejects_cross_type_indef_string_chunk() { |
| 227 | // 7f 41 00 ff = indef text containing one bytes chunk (major=2), |
| 228 | // then break. Same chunk-type rule applies to skip_value as to |
| 229 | // unpack_text — both reject cross-type chunks. |
| 230 | mut u := cbor.new_unpacker(h('7f4100ff'), cbor.DecodeOpts{}) |
| 231 | if _ := u.skip_value() { |
| 232 | assert false, 'skip_value must reject cross-type indef chunk' |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | fn test_skip_value_rejects_nested_indef_string_chunk() { |
| 237 | // 7f 7f 61 61 ff ff = indef text whose chunk is itself indefinite. |
| 238 | mut u := cbor.new_unpacker(h('7f7f6161ffff'), cbor.DecodeOpts{}) |
| 239 | if _ := u.skip_value() { |
| 240 | assert false, 'skip_value must reject nested indef chunk' |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | fn test_raw_message_rejects_malformed_indef_string() { |
| 245 | // Same payload as above, but exercised through the RawMessage path |
| 246 | // (which calls skip_value internally to compute the slice bounds). |
| 247 | mut u := cbor.new_unpacker(h('7f4100ff'), cbor.DecodeOpts{}) |
| 248 | if _ := u.unpack_raw() { |
| 249 | assert false, 'unpack_raw must reject cross-type indef chunk' |
| 250 | } |
| 251 | } |
| 252 | |