v2 / vlib / encoding / cbor / tests / upstream_appendix_a_test.v
348 lines · 333 sloc · 9.0 KB · 468855eef1db0ff73c62be2d1bf176ffa0e1478e
Raw
1// Third-party conformance: drives the entire `appendix_a.json` corpus
2// from https://github.com/cbor/test-vectors (the same file that ciborium,
3// serde_cbor and cbor2 use). Each entry is checked against:
4//
5// * its `hex` round-trips byte-exact when `roundtrip == true`
6// * its `decoded` JSON value matches the V-decoded `cbor.Value`
7// * for entries that only carry a `diagnostic` (NaN, Infinity, undefined,
8// simple(N), tag(N)(...), h'…'), structural sanity is enforced via
9// the diagnostic prefix.
10//
11// The fixture file lives next to this test so the corpus is reproducible
12// and offline-buildable.
13module main
14
15import encoding.cbor
16import encoding.hex
17import math
18import os
19import x.json2
20
21fn h(s string) []u8 {
22 return hex.decode(s) or { panic('bad hex ${s}') }
23}
24
25fn bytes_eq(a []u8, b []u8) bool {
26 if a.len != b.len {
27 return false
28 }
29 for i in 0 .. a.len {
30 if a[i] != b[i] {
31 return false
32 }
33 }
34 return true
35}
36
37// match_decoded compares a V `Value` against a parsed JSON value from the
38// vector's `decoded` field. Returns an error string on mismatch; empty on success.
39fn match_decoded(v cbor.Value, j json2.Any) string {
40 match j {
41 i64 {
42 match v {
43 cbor.IntNum {
44 if j >= 0 {
45 if v.negative || v.magnitude != u64(j) {
46 return 'int ${j} ↔ IntNum(neg=${v.negative}, mag=${v.magnitude})'
47 }
48 } else {
49 mag := u64(-(j + 1))
50 if !v.negative || v.magnitude != mag {
51 return 'int ${j} ↔ IntNum(neg=${v.negative}, mag=${v.magnitude})'
52 }
53 }
54 return ''
55 }
56 cbor.Tag {
57 // JSON ints beyond ±2^63 land here as i64-clamped or as a string;
58 // real bignum vectors use the `decoded` field with a u64 / negative,
59 // so this branch shouldn't fire for that case.
60 return 'unexpected Tag for plain int ${j}'
61 }
62 else {
63 return 'expected int ${j}, got ${v.type_name()}'
64 }
65 }
66 }
67 u64 {
68 if v is cbor.IntNum {
69 if v.negative || v.magnitude != j {
70 return 'uint ${j} ↔ IntNum(neg=${v.negative}, mag=${v.magnitude})'
71 }
72 return ''
73 }
74 return 'expected uint ${j}'
75 }
76 f64 {
77 // JSON has a single number type, so an integer-valued vector
78 // arrives here as f64 even when the CBOR is major type 0/1.
79 if v is cbor.FloatNum {
80 if math.is_nan(j) && math.is_nan(v.value) {
81 return ''
82 }
83 if v.value != j {
84 return 'float ${j} ↔ ${v.value}'
85 }
86 return ''
87 }
88 if v is cbor.IntNum {
89 // Beyond 2^53 JSON's f64 representation loses precision —
90 // we can't tell IntNum(2^64-1) from IntNum(2^64). Trust the
91 // roundtrip byte check and accept the structural shape.
92 f64_exact_int_max := f64(1) * f64(u64(1) << 53)
93 if math.abs(j) >= f64_exact_int_max {
94 return ''
95 }
96 expected_neg := j < 0
97 abs_val := if expected_neg { -j } else { j }
98 if abs_val != f64(u64(abs_val)) {
99 return 'float ${j} → IntNum: not integer-valued'
100 }
101 if expected_neg {
102 if !v.negative {
103 return 'expected negative IntNum for ${j}'
104 }
105 if u64(abs_val) - 1 != v.magnitude {
106 return 'IntNum mag ${v.magnitude} != ${u64(abs_val) - 1}'
107 }
108 } else {
109 if v.negative {
110 return 'expected non-negative IntNum for ${j}'
111 }
112 if u64(abs_val) != v.magnitude {
113 return 'IntNum mag ${v.magnitude} != ${u64(abs_val)}'
114 }
115 }
116 return ''
117 }
118 if v is cbor.Tag {
119 // Bignum (tag 2/3) representing a value beyond i64. Caller skips.
120 return 'tag-bignum (caller decides)'
121 }
122 return 'expected number ${j}'
123 }
124 bool {
125 if v is cbor.Bool && v.value == j {
126 return ''
127 }
128 return 'expected bool ${j}'
129 }
130 string {
131 if v is cbor.Text && v.value == j {
132 return ''
133 }
134 return 'expected text "${j}"'
135 }
136 json2.Null {
137 if v is cbor.Null {
138 return ''
139 }
140 return 'expected null'
141 }
142 []json2.Any {
143 if v is cbor.Array {
144 if v.elements.len != j.len {
145 return 'array length ${v.elements.len} != ${j.len}'
146 }
147 for i, item in j {
148 sub := match_decoded(v.elements[i], item)
149 if sub != '' {
150 return 'array[${i}]: ${sub}'
151 }
152 }
153 return ''
154 }
155 return 'expected array'
156 }
157 map[string]json2.Any {
158 if v is cbor.Map {
159 if v.pairs.len != j.len {
160 return 'map size ${v.pairs.len} != ${j.len}'
161 }
162 for pair in v.pairs {
163 if pair.key !is cbor.Text {
164 // JSON can only express string keys; mixed-key maps live
165 // in the diagnostic-only set, so this is safe.
166 return 'non-text key in JSON-comparable map'
167 }
168 tk := pair.key as cbor.Text
169 if tk.value !in j {
170 return 'missing key ${tk.value}'
171 }
172 jv := j[tk.value] or { return 'missing key ${tk.value}' }
173 sub := match_decoded(pair.value, jv)
174 if sub != '' {
175 return 'map[${tk.value}]: ${sub}'
176 }
177 }
178 return ''
179 }
180 return 'expected map'
181 }
182 else {
183 return 'unsupported JSON kind ${typeof(j).name}'
184 }
185 }
186}
187
188// match_diagnostic enforces only structural sanity for entries that JSON
189// can't directly express (NaN, Infinity, undefined, simple, tag, bignum).
190fn match_diagnostic(v cbor.Value, diag string) string {
191 d := diag.trim_space()
192 match d {
193 'Infinity' {
194 if v is cbor.FloatNum && math.is_inf(v.value, 1) {
195 return ''
196 }
197 return 'expected +Inf'
198 }
199 '-Infinity' {
200 if v is cbor.FloatNum && math.is_inf(v.value, -1) {
201 return ''
202 }
203 return 'expected -Inf'
204 }
205 'NaN' {
206 if v is cbor.FloatNum && math.is_nan(v.value) {
207 return ''
208 }
209 return 'expected NaN'
210 }
211 'undefined' {
212 if v is cbor.Undefined {
213 return ''
214 }
215 return 'expected Undefined'
216 }
217 else {}
218 }
219
220 if d.starts_with('simple(') {
221 if v is cbor.Simple {
222 return ''
223 }
224 return 'expected Simple'
225 }
226 if d.starts_with("h'") {
227 if v is cbor.Bytes {
228 return ''
229 }
230 return 'expected Bytes'
231 }
232 if d.starts_with('(_') {
233 // Indefinite-length compound. Decoder collapses to definite Value.
234 if v is cbor.Bytes || v is cbor.Text || v is cbor.Array || v is cbor.Map {
235 return ''
236 }
237 return 'expected indef-collapsed compound'
238 }
239 // Tag forms: "0(\"...\")", "1(1363896240)", "23(h'…')", "24(h'…')", "32(\"…\")".
240 if d.contains('(') && d[0].is_digit() {
241 if v is cbor.Tag {
242 return ''
243 }
244 return 'expected Tag'
245 }
246 // Map literal "{1: 2, 3: 4}" — int-keyed map, can't be expressed in JSON.
247 if d.starts_with('{') {
248 if v is cbor.Map {
249 return ''
250 }
251 return 'expected Map'
252 }
253 return 'unrecognised diagnostic ${d}'
254}
255
256const fixture_path = os.join_path(os.dir(@FILE), 'appendix_a.json')
257
258fn test_upstream_appendix_a_corpus() {
259 raw := os.read_file(fixture_path) or { panic('cannot read fixture: ${err}') }
260 parsed := json2.decode[json2.Any](raw) or { panic('json: ${err}') }
261 entries := parsed.as_array()
262 assert entries.len > 0, 'fixture is empty'
263
264 mut total := 0
265 mut roundtrip := 0
266 mut value_checks := 0
267 mut diag_checks := 0
268 mut failures := []string{}
269
270 for entry in entries {
271 obj := entry.as_map()
272 total++
273 hex_str := obj['hex'] or { json2.Any('') }.str()
274 // `f818` (simple(24) two-byte form) is well-formed under RFC 7049
275 // but RFC 8949 §3.3 explicitly forbids it. The upstream corpus
276 // predates RFC 8949 — our decoder correctly rejects it.
277 if hex_str == 'f818' {
278 cbor.decode[cbor.Value](h(hex_str), cbor.DecodeOpts{}) or {
279 assert err.msg().contains('1-byte form'), 'unexpected error for f818: ${err}'
280 continue
281 }
282 assert false, 'f818 should be rejected per RFC 8949 §3.3'
283 }
284 input := h(hex_str)
285 rt := if rt_any := obj['roundtrip'] {
286 rt_any.bool()
287 } else {
288 false
289 }
290
291 decoded_v := cbor.decode[cbor.Value](input, cbor.DecodeOpts{}) or {
292 failures << 'decode ${hex_str}: ${err}'
293 continue
294 }
295
296 // Compare structure where possible.
297 if decoded := obj['decoded'] {
298 diff := match_decoded(decoded_v, decoded)
299 if diff != '' {
300 // Bignums (decoded JSON ints beyond ±2^64) are represented as
301 // `Tag(2|3, Bytes)` on the wire. JSON loses them as i64-clamped
302 // values, so accept Tag/IntNum mismatch when the JSON is at the
303 // extreme range.
304 if decoded_v is cbor.Tag {
305 t := decoded_v as cbor.Tag
306 if t.number == 2 || t.number == 3 {
307 value_checks++
308 continue
309 }
310 }
311 failures << '${hex_str} decoded: ${diff}'
312 continue
313 }
314 value_checks++
315 } else if diag := obj['diagnostic'] {
316 diff := match_diagnostic(decoded_v, diag.str())
317 if diff != '' {
318 failures << '${hex_str} diagnostic "${diag}": ${diff}'
319 continue
320 }
321 diag_checks++
322 }
323
324 // For roundtrip=true entries, re-encode and compare bytes.
325 if rt {
326 out := cbor.encode_value(decoded_v, cbor.EncodeOpts{})!
327 if !bytes_eq(out, input) {
328 failures << '${hex_str} roundtrip: got ${hex.encode(out)}'
329 continue
330 }
331 roundtrip++
332 }
333 }
334
335 if failures.len > 0 {
336 for f in failures {
337 eprintln('FAIL: ${f}')
338 }
339 assert false, '${failures.len}/${total} upstream Appendix A vectors failed'
340 }
341
342 // Sanity: the upstream corpus has 80+ entries; if we ever see less,
343 // the fixture file is wrong.
344 assert total >= 80, 'corpus too small: ${total}'
345 assert roundtrip >= 60, 'too few roundtrip checks: ${roundtrip}'
346 assert value_checks > 0
347 assert diag_checks > 0
348}
349