| 1 | // Canonical encoding (RFC 8949 §4.2.1, "core deterministic encoding"): |
| 2 | // map keys are sorted by length-first lexicographic order of their |
| 3 | // encoded forms. Used for hashable / signable payloads. |
| 4 | module main |
| 5 | |
| 6 | import encoding.cbor |
| 7 | import encoding.hex |
| 8 | |
| 9 | fn h(s string) []u8 { |
| 10 | return hex.decode(s) or { panic('invalid hex: ${s}') } |
| 11 | } |
| 12 | |
| 13 | fn beq(a []u8, b []u8) bool { |
| 14 | if a.len != b.len { |
| 15 | return false |
| 16 | } |
| 17 | for i in 0 .. a.len { |
| 18 | if a[i] != b[i] { |
| 19 | return false |
| 20 | } |
| 21 | } |
| 22 | return true |
| 23 | } |
| 24 | |
| 25 | fn test_canonical_sorts_text_keys() { |
| 26 | // Build a map with reverse-alphabetic insertion order; canonical |
| 27 | // output should still emit keys "a", "b", "c", "d", "e". |
| 28 | v := cbor.Value(cbor.Map{ |
| 29 | pairs: [ |
| 30 | cbor.MapPair{ |
| 31 | key: cbor.Value(cbor.Text{ |
| 32 | value: 'e' |
| 33 | }) |
| 34 | value: cbor.Value(cbor.Text{ |
| 35 | value: 'E' |
| 36 | }) |
| 37 | }, |
| 38 | cbor.MapPair{ |
| 39 | key: cbor.Value(cbor.Text{ |
| 40 | value: 'b' |
| 41 | }) |
| 42 | value: cbor.Value(cbor.Text{ |
| 43 | value: 'B' |
| 44 | }) |
| 45 | }, |
| 46 | cbor.MapPair{ |
| 47 | key: cbor.Value(cbor.Text{ |
| 48 | value: 'd' |
| 49 | }) |
| 50 | value: cbor.Value(cbor.Text{ |
| 51 | value: 'D' |
| 52 | }) |
| 53 | }, |
| 54 | cbor.MapPair{ |
| 55 | key: cbor.Value(cbor.Text{ |
| 56 | value: 'a' |
| 57 | }) |
| 58 | value: cbor.Value(cbor.Text{ |
| 59 | value: 'A' |
| 60 | }) |
| 61 | }, |
| 62 | cbor.MapPair{ |
| 63 | key: cbor.Value(cbor.Text{ |
| 64 | value: 'c' |
| 65 | }) |
| 66 | value: cbor.Value(cbor.Text{ |
| 67 | value: 'C' |
| 68 | }) |
| 69 | }, |
| 70 | ] |
| 71 | }) |
| 72 | got := cbor.encode_value(v, cbor.EncodeOpts{ canonical: true })! |
| 73 | want := h('a56161614161626142616361436164614461656145') |
| 74 | assert beq(got, want), 'canonical: got ${hex.encode(got)}, want ${hex.encode(want)}' |
| 75 | } |
| 76 | |
| 77 | fn test_canonical_length_first_then_lex() { |
| 78 | // Length-first ordering: shorter keys first. |
| 79 | // {"a": 1, "aa": 2} → short before long. |
| 80 | v := cbor.Value(cbor.Map{ |
| 81 | pairs: [ |
| 82 | cbor.MapPair{ |
| 83 | key: cbor.Value(cbor.Text{ |
| 84 | value: 'aa' |
| 85 | }) |
| 86 | value: cbor.Value(cbor.new_uint(2)) |
| 87 | }, |
| 88 | cbor.MapPair{ |
| 89 | key: cbor.Value(cbor.Text{ |
| 90 | value: 'a' |
| 91 | }) |
| 92 | value: cbor.Value(cbor.new_uint(1)) |
| 93 | }, |
| 94 | ] |
| 95 | }) |
| 96 | got := cbor.encode_value(v, cbor.EncodeOpts{ canonical: true })! |
| 97 | // Encoded keys "a"=0x6161 (2 bytes), "aa"=0x626161 (3 bytes). |
| 98 | // Length-first: "a" first, then "aa". |
| 99 | // Result: a2 61 61 01 62 61 61 02 |
| 100 | want := h('a2616101626161 02'.replace(' ', '')) |
| 101 | assert beq(got, want), 'length-first: got ${hex.encode(got)}' |
| 102 | } |
| 103 | |
| 104 | fn test_self_describe_prefix() { |
| 105 | bytes := cbor.encode[u64](u64(0), cbor.EncodeOpts{ self_describe: true })! |
| 106 | // Magic prefix: d9 d9 f7 then 0x00. |
| 107 | assert beq(bytes, [u8(0xd9), 0xd9, 0xf7, 0x00]) |
| 108 | } |
| 109 | |
| 110 | // --------------------------------------------------------------------- |
| 111 | // Struct-as-map canonical encoding: declaration order MUST NOT leak |
| 112 | // into the wire form when canonical mode is on. Otherwise hash- or |
| 113 | // signature-based payloads (COSE, CWT, DAG-CBOR) lose stability across |
| 114 | // V versions whenever a field is added or reordered in source. |
| 115 | // --------------------------------------------------------------------- |
| 116 | |
| 117 | struct OutOfOrder { |
| 118 | zeta int |
| 119 | alpha int |
| 120 | mid int |
| 121 | } |
| 122 | |
| 123 | fn test_canonical_struct_sorts_keys_by_encoded_form() { |
| 124 | v := OutOfOrder{ |
| 125 | zeta: 1 |
| 126 | alpha: 2 |
| 127 | mid: 3 |
| 128 | } |
| 129 | got := cbor.encode[OutOfOrder](v, cbor.EncodeOpts{ canonical: true })! |
| 130 | // Length-first lex on encoded keys: "mid" (4B) < "zeta" (5B) < "alpha" (6B). |
| 131 | // a3 636d6964 03 647a657461 01 65616c706861 02 |
| 132 | want := h('a3636d69640364 7a65746101 65616c706861 02'.replace(' ', '')) |
| 133 | assert beq(got, want), 'declaration order leaked: got ${hex.encode(got)}' |
| 134 | } |
| 135 | |
| 136 | fn test_canonical_struct_preserves_declaration_order_when_off() { |
| 137 | // Default (non-canonical) keeps source order — important for human |
| 138 | // inspection and matches the documented permissive behaviour. |
| 139 | v := OutOfOrder{ |
| 140 | zeta: 1 |
| 141 | alpha: 2 |
| 142 | mid: 3 |
| 143 | } |
| 144 | got := cbor.encode[OutOfOrder](v, cbor.EncodeOpts{})! |
| 145 | // a3 647a657461 01 65616c706861 02 636d6964 03 |
| 146 | want := h('a3647a65746101 65616c706861 02 636d6964 03'.replace(' ', '')) |
| 147 | assert beq(got, want), 'non-canonical reorder: got ${hex.encode(got)}' |
| 148 | } |
| 149 | |