v / vlib / encoding / cbor / tests / cbor_wg_test.v
152 lines · 144 sloc · 4.27 KB · 468855eef1db0ff73c62be2d1bf176ffa0e1478e
Raw
1// CBOR Working Group conformance corpus
2// (https://github.com/cbor-wg/cbor-test-vectors).
3//
4// Two fixture files live next to this test:
5//
6// * `cbor_wg/rfc8949_good.edn` — 88 well-formed payloads. Each must
7// decode without error.
8// * `cbor_wg/rfc8949_bad.edn` — 47 malformed payloads. Each MUST be
9// rejected by the decoder per RFC 8949 §3.
10//
11// The fixtures are EDN (CBOR Diagnostic Notation, RFC 8610) — we only
12// pull the `"encoded": h'…'` hex literals because that's what we need
13// to drive the decoder. The expected `decoded` value is left to the
14// other test files (rfc8949_appendix_a, upstream_appendix_a) which use
15// the JSON-encoded corpus.
16module main
17
18import encoding.cbor
19import encoding.hex
20import os
21
22const wg_dir = os.join_path(os.dir(@FILE), 'cbor_wg')
23
24// extract_hex_literals pulls every `"encoded": h'…'` value out of an EDN
25// file. The hex string can contain whitespace (visual grouping per
26// RFC 8610) — we strip it before decoding.
27fn extract_hex_literals(text string) []string {
28 mut out := []string{}
29 mut i := 0
30 needle := '"encoded": h\''
31 for {
32 idx := text.index_after(needle, i) or { break }
33 start := idx + needle.len
34 end := text.index_after("'", start) or { break }
35 raw := text[start..end]
36 mut clean := []u8{cap: raw.len}
37 for c in raw {
38 if c == ` ` || c == `\t` || c == `\n` || c == `\r` {
39 continue
40 }
41 clean << c
42 }
43 out << clean.bytestr()
44 i = end + 1
45 }
46 return out
47}
48
49fn test_extractor_sanity() {
50 good := os.read_file(os.join_path(wg_dir, 'rfc8949_good.edn')) or {
51 panic('cannot read good.edn: ${err}')
52 }
53 bad := os.read_file(os.join_path(wg_dir, 'rfc8949_bad.edn')) or {
54 panic('cannot read bad.edn: ${err}')
55 }
56 good_hexes := extract_hex_literals(good)
57 bad_hexes := extract_hex_literals(bad)
58 assert good_hexes.len == 88, 'good corpus drift: ${good_hexes.len} (expected 88)'
59 assert bad_hexes.len == 47, 'bad corpus drift: ${bad_hexes.len} (expected 47)'
60}
61
62fn test_cbor_wg_good_corpus() {
63 text := os.read_file(os.join_path(wg_dir, 'rfc8949_good.edn'))!
64 hexes := extract_hex_literals(text)
65 // The corpus deliberately stresses 256+ deep nesting; raise the cap.
66 opts := cbor.DecodeOpts{
67 max_depth: 4096
68 }
69 mut failures := []string{}
70 for hex_str in hexes {
71 bytes := hex.decode(hex_str) or {
72 failures << '${hex_str}: hex decode: ${err}'
73 continue
74 }
75 cbor.decode[cbor.Value](bytes, opts) or {
76 failures << '${hex_str}: ${err}'
77 continue
78 }
79 }
80 if failures.len > 0 {
81 for f in failures {
82 eprintln('GOOD-FAIL: ${f}')
83 }
84 assert false, '${failures.len}/${hexes.len} good vectors rejected'
85 }
86}
87
88// Per-major-type files (mt0..mt7) and the streaming/indefinite suite all
89// hold well-formed entries. Each must decode without error.
90const mt_files = [
91 'appA_mt0.edn',
92 'appA_mt1.edn',
93 'appA_mt2.edn',
94 'appA_mt3.edn',
95 'appA_mt4.edn',
96 'appA_mt5.edn',
97 'appA_mt6.edn',
98 'appA_mt7-float.edn',
99 'appA_mt7-simple.edn',
100 'appA_streaming.edn',
101]
102
103fn test_cbor_wg_per_major_type_corpus() {
104 mut total := 0
105 mut failures := []string{}
106 for fname in mt_files {
107 text := os.read_file(os.join_path(wg_dir, fname)) or {
108 panic('cannot read ${fname}: ${err}')
109 }
110 hexes := extract_hex_literals(text)
111 assert hexes.len > 0, '${fname} has no entries'
112 for hex_str in hexes {
113 total++
114 bytes := hex.decode(hex_str) or {
115 failures << '${fname} ${hex_str}: hex: ${err}'
116 continue
117 }
118 cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) or {
119 failures << '${fname} ${hex_str}: ${err}'
120 continue
121 }
122 }
123 }
124 if failures.len > 0 {
125 for f in failures {
126 eprintln('MT-FAIL: ${f}')
127 }
128 assert false, '${failures.len}/${total} per-major-type vectors rejected'
129 }
130 assert total >= 80, 'corpus too small: ${total}'
131}
132
133fn test_cbor_wg_bad_corpus() {
134 text := os.read_file(os.join_path(wg_dir, 'rfc8949_bad.edn'))!
135 hexes := extract_hex_literals(text)
136 mut accepted_anyway := []string{}
137 for hex_str in hexes {
138 bytes := hex.decode(hex_str) or {
139 // Malformed at the hex layer is still a rejection; skip.
140 continue
141 }
142 if v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) {
143 accepted_anyway << '${hex_str} → ${v.type_name()}'
144 }
145 }
146 if accepted_anyway.len > 0 {
147 for a in accepted_anyway {
148 eprintln('BAD-ACCEPTED: ${a}')
149 }
150 assert false, '${accepted_anyway.len}/${hexes.len} malformed vectors were not rejected'
151 }
152}
153