Gitly


1 // CBOR Working Group conformance corpus
2 // (https://github.com/cbor-wg/cbor-test-vectors).
3 //
4 // Two fixture files live next to this test:
5 //
6 //   * `cbor_wg/rfc8949_good.edn` — 88 well-formed payloads. Each must
7 //     decode without error.
8 //   * `cbor_wg/rfc8949_bad.edn`  — 47 malformed payloads. Each MUST be
9 //     rejected by the decoder per RFC 8949 §3.
10 //
11 // The fixtures are EDN (CBOR Diagnostic Notation, RFC 8610) — we only
12 // pull the `"encoded": h'…'` hex literals because that's what we need
13 // to drive the decoder. The expected `decoded` value is left to the
14 // other test files (rfc8949_appendix_a, upstream_appendix_a) which use
15 // the JSON-encoded corpus.
16 module main
17 
18 import encoding.cbor
19 import encoding.hex
20 import os
21 
22 const wg_dir = os.join_path(os.dir(@FILE), 'cbor_wg')
23 
24 // extract_hex_literals pulls every `"encoded": h'…'` value out of an EDN
25 // file. The hex string can contain whitespace (visual grouping per
26 // RFC 8610) — we strip it before decoding.
27 fn extract_hex_literals(text string) []string {
28     mut out := []string{}
29     mut i := 0
30     needle := '"encoded": h\''
31     for {
32         idx := text.index_after(needle, i) or { break }
33         start := idx + needle.len
34         end := text.index_after("'", start) or { break }
35         raw := text[start..end]
36         mut clean := []u8{cap: raw.len}
37         for c in raw {
38             if c == ` ` || c == `\t` || c == `\n` || c == `\r` {
39                 continue
40             }
41             clean << c
42         }
43         out << clean.bytestr()
44         i = end + 1
45     }
46     return out
47 }
48 
49 fn test_extractor_sanity() {
50     good := os.read_file(os.join_path(wg_dir, 'rfc8949_good.edn')) or {
51         panic('cannot read good.edn: ${err}')
52     }
53     bad := os.read_file(os.join_path(wg_dir, 'rfc8949_bad.edn')) or {
54         panic('cannot read bad.edn: ${err}')
55     }
56     good_hexes := extract_hex_literals(good)
57     bad_hexes := extract_hex_literals(bad)
58     assert good_hexes.len == 88, 'good corpus drift: ${good_hexes.len} (expected 88)'
59     assert bad_hexes.len == 47, 'bad corpus drift: ${bad_hexes.len} (expected 47)'
60 }
61 
62 fn test_cbor_wg_good_corpus() {
63     text := os.read_file(os.join_path(wg_dir, 'rfc8949_good.edn'))!
64     hexes := extract_hex_literals(text)
65     // The corpus deliberately stresses 256+ deep nesting; raise the cap.
66     opts := cbor.DecodeOpts{
67         max_depth: 4096
68     }
69     mut failures := []string{}
70     for hex_str in hexes {
71         bytes := hex.decode(hex_str) or {
72             failures << '${hex_str}: hex decode: ${err}'
73             continue
74         }
75         cbor.decode[cbor.Value](bytes, opts) or {
76             failures << '${hex_str}: ${err}'
77             continue
78         }
79     }
80     if failures.len > 0 {
81         for f in failures {
82             eprintln('GOOD-FAIL: ${f}')
83         }
84         assert false, '${failures.len}/${hexes.len} good vectors rejected'
85     }
86 }
87 
88 // Per-major-type files (mt0..mt7) and the streaming/indefinite suite all
89 // hold well-formed entries. Each must decode without error.
90 const mt_files = [
91     'appA_mt0.edn',
92     'appA_mt1.edn',
93     'appA_mt2.edn',
94     'appA_mt3.edn',
95     'appA_mt4.edn',
96     'appA_mt5.edn',
97     'appA_mt6.edn',
98     'appA_mt7-float.edn',
99     'appA_mt7-simple.edn',
100     'appA_streaming.edn',
101 ]
102 
103 fn test_cbor_wg_per_major_type_corpus() {
104     mut total := 0
105     mut failures := []string{}
106     for fname in mt_files {
107         text := os.read_file(os.join_path(wg_dir, fname)) or {
108             panic('cannot read ${fname}: ${err}')
109         }
110         hexes := extract_hex_literals(text)
111         assert hexes.len > 0, '${fname} has no entries'
112         for hex_str in hexes {
113             total++
114             bytes := hex.decode(hex_str) or {
115                 failures << '${fname} ${hex_str}: hex: ${err}'
116                 continue
117             }
118             cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) or {
119                 failures << '${fname} ${hex_str}: ${err}'
120                 continue
121             }
122         }
123     }
124     if failures.len > 0 {
125         for f in failures {
126             eprintln('MT-FAIL: ${f}')
127         }
128         assert false, '${failures.len}/${total} per-major-type vectors rejected'
129     }
130     assert total >= 80, 'corpus too small: ${total}'
131 }
132 
133 fn test_cbor_wg_bad_corpus() {
134     text := os.read_file(os.join_path(wg_dir, 'rfc8949_bad.edn'))!
135     hexes := extract_hex_literals(text)
136     mut accepted_anyway := []string{}
137     for hex_str in hexes {
138         bytes := hex.decode(hex_str) or {
139             // Malformed at the hex layer is still a rejection; skip.
140             continue
141         }
142         if v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) {
143             accepted_anyway << '${hex_str} → ${v.type_name()}'
144         }
145     }
146     if accepted_anyway.len > 0 {
147         for a in accepted_anyway {
148             eprintln('BAD-ACCEPTED: ${a}')
149         }
150         assert false, '${accepted_anyway.len}/${hexes.len} malformed vectors were not rejected'
151     }
152 }
153

1	// CBOR Working Group conformance corpus
2	// (https://github.com/cbor-wg/cbor-test-vectors).
3	//
4	// Two fixture files live next to this test:
5	//
6	// `cbor_wg/rfc8949_good.edn` — 88 well-formed payloads. Each must*
7	// decode without error.
8	// `cbor_wg/rfc8949_bad.edn` — 47 malformed payloads. Each MUST be*
9	// rejected by the decoder per RFC 8949 §3.
10	//
11	// The fixtures are EDN (CBOR Diagnostic Notation, RFC 8610) — we only
12	// pull the `"encoded": h'…'` hex literals because that's what we need
13	// to drive the decoder. The expected `decoded` value is left to the
14	// other test files (rfc8949_appendix_a, upstream_appendix_a) which use
15	// the JSON-encoded corpus.
16	module main
17
18	import encoding.cbor
19	import encoding.hex
20	import os
21
22	const wg_dir = os.join_path(os.dir(@FILE), 'cbor_wg')
23
24	// extract_hex_literals pulls every `"encoded": h'…'` value out of an EDN
25	// file. The hex string can contain whitespace (visual grouping per
26	// RFC 8610) — we strip it before decoding.
27	fn extract_hex_literals(text string) []string {
28	mut out := []string{}
29	mut i := 0
30	needle := '"encoded": h\''
31	for {
32	idx := text.index_after(needle, i) or { break }
33	start := idx + needle.len
34	end := text.index_after("'", start) or { break }
35	raw := text[start..end]
36	mut clean := []u8{cap: raw.len}
37	for c in raw {
38	if c == ` ` \|\| c == `\t` \|\| c == `\n` \|\| c == `\r` {
39	continue
40	}
41	clean << c
42	}
43	out << clean.bytestr()
44	i = end + 1
45	}
46	return out
47	}
48
49	fn test_extractor_sanity() {
50	good := os.read_file(os.join_path(wg_dir, 'rfc8949_good.edn')) or {
51	panic('cannot read good.edn: ${err}')
52	}
53	bad := os.read_file(os.join_path(wg_dir, 'rfc8949_bad.edn')) or {
54	panic('cannot read bad.edn: ${err}')
55	}
56	good_hexes := extract_hex_literals(good)
57	bad_hexes := extract_hex_literals(bad)
58	assert good_hexes.len == 88, 'good corpus drift: ${good_hexes.len} (expected 88)'
59	assert bad_hexes.len == 47, 'bad corpus drift: ${bad_hexes.len} (expected 47)'
60	}
61
62	fn test_cbor_wg_good_corpus() {
63	text := os.read_file(os.join_path(wg_dir, 'rfc8949_good.edn'))!
64	hexes := extract_hex_literals(text)
65	// The corpus deliberately stresses 256+ deep nesting; raise the cap.
66	opts := cbor.DecodeOpts{
67	max_depth: 4096
68	}
69	mut failures := []string{}
70	for hex_str in hexes {
71	bytes := hex.decode(hex_str) or {
72	failures << '${hex_str}: hex decode: ${err}'
73	continue
74	}
75	cbor.decode[cbor.Value](bytes, opts) or {
76	failures << '${hex_str}: ${err}'
77	continue
78	}
79	}
80	if failures.len > 0 {
81	for f in failures {
82	eprintln('GOOD-FAIL: ${f}')
83	}
84	assert false, '${failures.len}/${hexes.len} good vectors rejected'
85	}
86	}
87
88	// Per-major-type files (mt0..mt7) and the streaming/indefinite suite all
89	// hold well-formed entries. Each must decode without error.
90	const mt_files = [
91	'appA_mt0.edn',
92	'appA_mt1.edn',
93	'appA_mt2.edn',
94	'appA_mt3.edn',
95	'appA_mt4.edn',
96	'appA_mt5.edn',
97	'appA_mt6.edn',
98	'appA_mt7-float.edn',
99	'appA_mt7-simple.edn',
100	'appA_streaming.edn',
101	]
102
103	fn test_cbor_wg_per_major_type_corpus() {
104	mut total := 0
105	mut failures := []string{}
106	for fname in mt_files {
107	text := os.read_file(os.join_path(wg_dir, fname)) or {
108	panic('cannot read ${fname}: ${err}')
109	}
110	hexes := extract_hex_literals(text)
111	assert hexes.len > 0, '${fname} has no entries'
112	for hex_str in hexes {
113	total++
114	bytes := hex.decode(hex_str) or {
115	failures << '${fname} ${hex_str}: hex: ${err}'
116	continue
117	}
118	cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) or {
119	failures << '${fname} ${hex_str}: ${err}'
120	continue
121	}
122	}
123	}
124	if failures.len > 0 {
125	for f in failures {
126	eprintln('MT-FAIL: ${f}')
127	}
128	assert false, '${failures.len}/${total} per-major-type vectors rejected'
129	}
130	assert total >= 80, 'corpus too small: ${total}'
131	}
132
133	fn test_cbor_wg_bad_corpus() {
134	text := os.read_file(os.join_path(wg_dir, 'rfc8949_bad.edn'))!
135	hexes := extract_hex_literals(text)
136	mut accepted_anyway := []string{}
137	for hex_str in hexes {
138	bytes := hex.decode(hex_str) or {
139	// Malformed at the hex layer is still a rejection; skip.
140	continue
141	}
142	if v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) {
143	accepted_anyway << '${hex_str} → ${v.type_name()}'
144	}
145	}
146	if accepted_anyway.len > 0 {
147	for a in accepted_anyway {
148	eprintln('BAD-ACCEPTED: ${a}')
149	}
150	assert false, '${accepted_anyway.len}/${hexes.len} malformed vectors were not rejected'
151	}
152	}
153