Gitly


1 // Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2 // Use of this source code is governed by an MIT license
3 // that can be found in the LICENSE file.
4 // This implementation is derived from the golang implementation
5 // which itself is derived in part from the reference
6 // ANSI C implementation, which carries the following notice:
7 //
8 // rijndael-alg-fst.c
9 //
10 // @version 3.0 (December 2000)
11 //
12 // Optimised ANSI C code for the Rijndael cipher (now AES)
13 //
14 // @author Vincent Rijmen <[email protected]>
15 // @author Antoon Bosselaers <[email protected]>
16 // @author Paulo Barreto <[email protected]>
17 //
18 // This code is hereby placed in the public domain.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
21 // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 // ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
24 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
27 // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
29 // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // See FIPS 197 for specification, and see Daemen and Rijmen's Rijndael submission
33 // for implementation details.
34 // https://csrc.nist.gov/csrc/media/publications/fips/197/final/documents/fips-197.pdf
35 // https://csrc.nist.gov/archive/aes/rijndael/Rijndael-ammended.pdf
36 module aes
37 
38 import encoding.binary
39 
40 // ct_mask_u8 expands the low bit of `bit` to either 0x00 or 0xff.
41 @[inline]
42 fn ct_mask_u8(bit u8) u8 {
43     return u8(~(int(bit & 1) - 1))
44 }
45 
46 // xtime multiplies `x` by x in GF(2^8).
47 @[inline]
48 fn xtime(x u8) u8 {
49     return u8(u32(x) << 1) ^ (u8(0x1b) & ct_mask_u8(x >> 7))
50 }
51 
52 // gf_mul multiplies `x` and `y` in GF(2^8) without data-dependent branches.
53 @[direct_array_access; inline]
54 fn gf_mul(x u8, y u8) u8 {
55     mut a := x
56     mut b := y
57     mut out := u8(0)
58     for _ in 0 .. 8 {
59         out ^= a & ct_mask_u8(b)
60         a = xtime(a)
61         b >>= 1
62     }
63     return out
64 }
65 
66 // gf_square squares `x` in GF(2^8).
67 @[inline]
68 fn gf_square(x u8) u8 {
69     return gf_mul(x, x)
70 }
71 
72 @[inline]
73 fn rotl8(x u8, n int) u8 {
74     return u8((u32(x) << u32(n)) | (u32(x) >> u32(8 - n)))
75 }
76 
77 @[inline]
78 fn gf_inverse(x u8) u8 {
79     x2 := gf_square(x)
80     x4 := gf_square(x2)
81     x8 := gf_square(x4)
82     x16 := gf_square(x8)
83     x32 := gf_square(x16)
84     x64 := gf_square(x32)
85     x128 := gf_square(x64)
86     return gf_mul(gf_mul(gf_mul(gf_mul(gf_mul(gf_mul(x128, x64), x32), x16), x8), x4), x2)
87 }
88 
89 // sub_byte applies the AES S-box without lookup tables.
90 @[inline]
91 fn sub_byte(x u8) u8 {
92     inv := gf_inverse(x)
93     return inv ^ rotl8(inv, 1) ^ rotl8(inv, 2) ^ rotl8(inv, 3) ^ rotl8(inv, 4) ^ u8(0x63)
94 }
95 
96 // inv_sub_byte applies the inverse AES S-box without lookup tables.
97 @[inline]
98 fn inv_sub_byte(x u8) u8 {
99     return gf_inverse(rotl8(x, 1) ^ rotl8(x, 3) ^ rotl8(x, 6) ^ u8(0x05))
100 }
101 
102 @[direct_array_access; inline]
103 fn add_round_key(mut state [16]u8, xk []u32, round int) {
104     for col in 0 .. 4 {
105         word := xk[round * 4 + col]
106         base := col * 4
107         state[base + 0] ^= u8(word >> 24)
108         state[base + 1] ^= u8(word >> 16)
109         state[base + 2] ^= u8(word >> 8)
110         state[base + 3] ^= u8(word)
111     }
112 }
113 
114 @[direct_array_access; inline]
115 fn sub_bytes(mut state [16]u8) {
116     for i in 0 .. 16 {
117         state[i] = sub_byte(state[i])
118     }
119 }
120 
121 @[direct_array_access; inline]
122 fn inv_sub_bytes(mut state [16]u8) {
123     for i in 0 .. 16 {
124         state[i] = inv_sub_byte(state[i])
125     }
126 }
127 
128 @[direct_array_access; inline]
129 fn shift_rows(mut state [16]u8) {
130     t1 := state[1]
131     state[1] = state[5]
132     state[5] = state[9]
133     state[9] = state[13]
134     state[13] = t1
135 
136     t2 := state[2]
137     t6 := state[6]
138     state[2] = state[10]
139     state[6] = state[14]
140     state[10] = t2
141     state[14] = t6
142 
143     t3 := state[3]
144     state[3] = state[15]
145     state[15] = state[11]
146     state[11] = state[7]
147     state[7] = t3
148 }
149 
150 @[direct_array_access; inline]
151 fn inv_shift_rows(mut state [16]u8) {
152     t13 := state[13]
153     state[13] = state[9]
154     state[9] = state[5]
155     state[5] = state[1]
156     state[1] = t13
157 
158     t2 := state[2]
159     t6 := state[6]
160     state[2] = state[10]
161     state[6] = state[14]
162     state[10] = t2
163     state[14] = t6
164 
165     t3 := state[3]
166     state[3] = state[7]
167     state[7] = state[11]
168     state[11] = state[15]
169     state[15] = t3
170 }
171 
172 @[direct_array_access; inline]
173 fn mix_columns(mut state [16]u8) {
174     for col in 0 .. 4 {
175         base := col * 4
176         s0 := state[base + 0]
177         s1 := state[base + 1]
178         s2 := state[base + 2]
179         s3 := state[base + 3]
180         m2s0 := xtime(s0)
181         m2s1 := xtime(s1)
182         m2s2 := xtime(s2)
183         m2s3 := xtime(s3)
184         state[base + 0] = m2s0 ^ (m2s1 ^ s1) ^ s2 ^ s3
185         state[base + 1] = s0 ^ m2s1 ^ (m2s2 ^ s2) ^ s3
186         state[base + 2] = s0 ^ s1 ^ m2s2 ^ (m2s3 ^ s3)
187         state[base + 3] = (m2s0 ^ s0) ^ s1 ^ s2 ^ m2s3
188     }
189 }
190 
191 @[direct_array_access; inline]
192 fn inv_mix_columns(mut state [16]u8) {
193     for col in 0 .. 4 {
194         base := col * 4
195         s0 := state[base + 0]
196         s1 := state[base + 1]
197         s2 := state[base + 2]
198         s3 := state[base + 3]
199         state[base + 0] = gf_mul(s0, 14) ^ gf_mul(s1, 11) ^ gf_mul(s2, 13) ^ gf_mul(s3, 9)
200         state[base + 1] = gf_mul(s0, 9) ^ gf_mul(s1, 14) ^ gf_mul(s2, 11) ^ gf_mul(s3, 13)
201         state[base + 2] = gf_mul(s0, 13) ^ gf_mul(s1, 9) ^ gf_mul(s2, 14) ^ gf_mul(s3, 11)
202         state[base + 3] = gf_mul(s0, 11) ^ gf_mul(s1, 13) ^ gf_mul(s2, 9) ^ gf_mul(s3, 14)
203     }
204 }
205 
206 // Encrypt one block from src into dst, using the expanded key xk.
207 @[direct_array_access]
208 fn encrypt_block_generic(xk []u32, mut dst []u8, src []u8) {
209     _ = src[15] // early bounds check
210     mut state := [16]u8{}
211     for i in 0 .. 16 {
212         state[i] = src[i]
213     }
214     nr := xk.len / 4 - 1
215     add_round_key(mut state, xk, 0)
216     for round in 1 .. nr {
217         sub_bytes(mut state)
218         shift_rows(mut state)
219         mix_columns(mut state)
220         add_round_key(mut state, xk, round)
221     }
222     sub_bytes(mut state)
223     shift_rows(mut state)
224     add_round_key(mut state, xk, nr)
225     _ = dst[15] // early bounds check
226     for i in 0 .. 16 {
227         dst[i] = state[i]
228     }
229 }
230 
231 // Decrypt one block from src into dst, using the expanded key xk.
232 @[direct_array_access]
233 fn decrypt_block_generic(xk []u32, mut dst []u8, src []u8) {
234     _ = src[15] // early bounds check
235     mut state := [16]u8{}
236     for i in 0 .. 16 {
237         state[i] = src[i]
238     }
239     nr := xk.len / 4 - 1
240     add_round_key(mut state, xk, 0)
241     for round in 1 .. nr {
242         inv_shift_rows(mut state)
243         inv_sub_bytes(mut state)
244         add_round_key(mut state, xk, round)
245         inv_mix_columns(mut state)
246     }
247     inv_shift_rows(mut state)
248     inv_sub_bytes(mut state)
249     add_round_key(mut state, xk, nr)
250     _ = dst[15] // early bounds check
251     for i in 0 .. 16 {
252         dst[i] = state[i]
253     }
254 }
255 
256 // Apply the AES S-box to each byte in w without lookup tables.
257 @[inline]
258 fn subw(w u32) u32 {
259     return u32(sub_byte(u8(w >> 24))) << 24 | u32(sub_byte(u8(w >> 16))) << 16 | u32(sub_byte(u8(w >> 8))) << 8 | u32(sub_byte(u8(w)))
260 }
261 
262 // Rotate
263 @[inline]
264 fn rotw(w u32) u32 {
265     return (w << 8) | (w >> 24)
266 }
267 
268 // Key expansion algorithm. See FIPS-197, Figure 11.
269 // Their rcon[i] is our powx[i-1] << 24.
270 @[direct_array_access]
271 fn expand_key_generic(key []u8, mut enc []u32, mut dec []u32) {
272     // Encryption key setup.
273     mut i := 0
274     nk := key.len / 4
275     for i = 0; i < nk; i++ {
276         if 4 * i >= key.len {
277             break
278         }
279         enc[i] = binary.big_endian_u32(key[4 * i..])
280     }
281     for i < enc.len {
282         mut t := enc[i - 1]
283         if i % nk == 0 {
284             t = subw(rotw(t)) ^ u32(pow_x[i / nk - 1]) << 24
285         } else if nk > 6 && i % nk == 4 {
286             t = subw(t)
287         }
288         enc[i] = enc[i - nk] ^ t
289         i++
290     }
291     // Derive decryption key from encryption key.
292     // Reverse the 4-word round key sets from enc to produce dec.
293     // The byte-wise block path applies InvMixColumns separately during decryption.
294     if dec.len == 0 {
295         return
296     }
297     n := enc.len
298     for i = 0; i < n; i += 4 {
299         ei := n - i - 4
300         for j in 0 .. 4 {
301             dec[i + j] = enc[ei + j]
302         }
303     }
304 }
305

1	// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2	// Use of this source code is governed by an MIT license
3	// that can be found in the LICENSE file.
4	// This implementation is derived from the golang implementation
5	// which itself is derived in part from the reference
6	// ANSI C implementation, which carries the following notice:
7	//
8	// rijndael-alg-fst.c
9	//
10	// @version 3.0 (December 2000)
11	//
12	// Optimised ANSI C code for the Rijndael cipher (now AES)
13	//
14	// @author Vincent Rijmen <[email protected]>
15	// @author Antoon Bosselaers <[email protected]>
16	// @author Paulo Barreto <[email protected]>
17	//
18	// This code is hereby placed in the public domain.
19	//
20	// THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
21	// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22	// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23	// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
24	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
27	// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28	// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
29	// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30	// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31	//
32	// See FIPS 197 for specification, and see Daemen and Rijmen's Rijndael submission
33	// for implementation details.
34	// https://csrc.nist.gov/csrc/media/publications/fips/197/final/documents/fips-197.pdf
35	// https://csrc.nist.gov/archive/aes/rijndael/Rijndael-ammended.pdf
36	module aes
37
38	import encoding.binary
39
40	// ct_mask_u8 expands the low bit of `bit` to either 0x00 or 0xff.
41	@[inline]
42	fn ct_mask_u8(bit u8) u8 {
43	return u8(~(int(bit & 1) - 1))
44	}
45
46	// xtime multiplies `x` by x in GF(2^8).
47	@[inline]
48	fn xtime(x u8) u8 {
49	return u8(u32(x) << 1) ^ (u8(0x1b) & ct_mask_u8(x >> 7))
50	}
51
52	// gf_mul multiplies `x` and `y` in GF(2^8) without data-dependent branches.
53	@[direct_array_access; inline]
54	fn gf_mul(x u8, y u8) u8 {
55	mut a := x
56	mut b := y
57	mut out := u8(0)
58	for _ in 0 .. 8 {
59	out ^= a & ct_mask_u8(b)
60	a = xtime(a)
61	b >>= 1
62	}
63	return out
64	}
65
66	// gf_square squares `x` in GF(2^8).
67	@[inline]
68	fn gf_square(x u8) u8 {
69	return gf_mul(x, x)
70	}
71
72	@[inline]
73	fn rotl8(x u8, n int) u8 {
74	return u8((u32(x) << u32(n)) \| (u32(x) >> u32(8 - n)))
75	}
76
77	@[inline]
78	fn gf_inverse(x u8) u8 {
79	x2 := gf_square(x)
80	x4 := gf_square(x2)
81	x8 := gf_square(x4)
82	x16 := gf_square(x8)
83	x32 := gf_square(x16)
84	x64 := gf_square(x32)
85	x128 := gf_square(x64)
86	return gf_mul(gf_mul(gf_mul(gf_mul(gf_mul(gf_mul(x128, x64), x32), x16), x8), x4), x2)
87	}
88
89	// sub_byte applies the AES S-box without lookup tables.
90	@[inline]
91	fn sub_byte(x u8) u8 {
92	inv := gf_inverse(x)
93	return inv ^ rotl8(inv, 1) ^ rotl8(inv, 2) ^ rotl8(inv, 3) ^ rotl8(inv, 4) ^ u8(0x63)
94	}
95
96	// inv_sub_byte applies the inverse AES S-box without lookup tables.
97	@[inline]
98	fn inv_sub_byte(x u8) u8 {
99	return gf_inverse(rotl8(x, 1) ^ rotl8(x, 3) ^ rotl8(x, 6) ^ u8(0x05))
100	}
101
102	@[direct_array_access; inline]
103	fn add_round_key(mut state [16]u8, xk []u32, round int) {
104	for col in 0 .. 4 {
105	word := xk[round * 4 + col]
106	base := col * 4
107	state[base + 0] ^= u8(word >> 24)
108	state[base + 1] ^= u8(word >> 16)
109	state[base + 2] ^= u8(word >> 8)
110	state[base + 3] ^= u8(word)
111	}
112	}
113
114	@[direct_array_access; inline]
115	fn sub_bytes(mut state [16]u8) {
116	for i in 0 .. 16 {
117	state[i] = sub_byte(state[i])
118	}
119	}
120
121	@[direct_array_access; inline]
122	fn inv_sub_bytes(mut state [16]u8) {
123	for i in 0 .. 16 {
124	state[i] = inv_sub_byte(state[i])
125	}
126	}
127
128	@[direct_array_access; inline]
129	fn shift_rows(mut state [16]u8) {
130	t1 := state[1]
131	state[1] = state[5]
132	state[5] = state[9]
133	state[9] = state[13]
134	state[13] = t1
135
136	t2 := state[2]
137	t6 := state[6]
138	state[2] = state[10]
139	state[6] = state[14]
140	state[10] = t2
141	state[14] = t6
142
143	t3 := state[3]
144	state[3] = state[15]
145	state[15] = state[11]
146	state[11] = state[7]
147	state[7] = t3
148	}
149
150	@[direct_array_access; inline]
151	fn inv_shift_rows(mut state [16]u8) {
152	t13 := state[13]
153	state[13] = state[9]
154	state[9] = state[5]
155	state[5] = state[1]
156	state[1] = t13
157
158	t2 := state[2]
159	t6 := state[6]
160	state[2] = state[10]
161	state[6] = state[14]
162	state[10] = t2
163	state[14] = t6
164
165	t3 := state[3]
166	state[3] = state[7]
167	state[7] = state[11]
168	state[11] = state[15]
169	state[15] = t3
170	}
171
172	@[direct_array_access; inline]
173	fn mix_columns(mut state [16]u8) {
174	for col in 0 .. 4 {
175	base := col * 4
176	s0 := state[base + 0]
177	s1 := state[base + 1]
178	s2 := state[base + 2]
179	s3 := state[base + 3]
180	m2s0 := xtime(s0)
181	m2s1 := xtime(s1)
182	m2s2 := xtime(s2)
183	m2s3 := xtime(s3)
184	state[base + 0] = m2s0 ^ (m2s1 ^ s1) ^ s2 ^ s3
185	state[base + 1] = s0 ^ m2s1 ^ (m2s2 ^ s2) ^ s3
186	state[base + 2] = s0 ^ s1 ^ m2s2 ^ (m2s3 ^ s3)
187	state[base + 3] = (m2s0 ^ s0) ^ s1 ^ s2 ^ m2s3
188	}
189	}
190
191	@[direct_array_access; inline]
192	fn inv_mix_columns(mut state [16]u8) {
193	for col in 0 .. 4 {
194	base := col * 4
195	s0 := state[base + 0]
196	s1 := state[base + 1]
197	s2 := state[base + 2]
198	s3 := state[base + 3]
199	state[base + 0] = gf_mul(s0, 14) ^ gf_mul(s1, 11) ^ gf_mul(s2, 13) ^ gf_mul(s3, 9)
200	state[base + 1] = gf_mul(s0, 9) ^ gf_mul(s1, 14) ^ gf_mul(s2, 11) ^ gf_mul(s3, 13)
201	state[base + 2] = gf_mul(s0, 13) ^ gf_mul(s1, 9) ^ gf_mul(s2, 14) ^ gf_mul(s3, 11)
202	state[base + 3] = gf_mul(s0, 11) ^ gf_mul(s1, 13) ^ gf_mul(s2, 9) ^ gf_mul(s3, 14)
203	}
204	}
205
206	// Encrypt one block from src into dst, using the expanded key xk.
207	@[direct_array_access]
208	fn encrypt_block_generic(xk []u32, mut dst []u8, src []u8) {
209	_ = src[15] // early bounds check
210	mut state := [16]u8{}
211	for i in 0 .. 16 {
212	state[i] = src[i]
213	}
214	nr := xk.len / 4 - 1
215	add_round_key(mut state, xk, 0)
216	for round in 1 .. nr {
217	sub_bytes(mut state)
218	shift_rows(mut state)
219	mix_columns(mut state)
220	add_round_key(mut state, xk, round)
221	}
222	sub_bytes(mut state)
223	shift_rows(mut state)
224	add_round_key(mut state, xk, nr)
225	_ = dst[15] // early bounds check
226	for i in 0 .. 16 {
227	dst[i] = state[i]
228	}
229	}
230
231	// Decrypt one block from src into dst, using the expanded key xk.
232	@[direct_array_access]
233	fn decrypt_block_generic(xk []u32, mut dst []u8, src []u8) {
234	_ = src[15] // early bounds check
235	mut state := [16]u8{}
236	for i in 0 .. 16 {
237	state[i] = src[i]
238	}
239	nr := xk.len / 4 - 1
240	add_round_key(mut state, xk, 0)
241	for round in 1 .. nr {
242	inv_shift_rows(mut state)
243	inv_sub_bytes(mut state)
244	add_round_key(mut state, xk, round)
245	inv_mix_columns(mut state)
246	}
247	inv_shift_rows(mut state)
248	inv_sub_bytes(mut state)
249	add_round_key(mut state, xk, nr)
250	_ = dst[15] // early bounds check
251	for i in 0 .. 16 {
252	dst[i] = state[i]
253	}
254	}
255
256	// Apply the AES S-box to each byte in w without lookup tables.
257	@[inline]
258	fn subw(w u32) u32 {
259	return u32(sub_byte(u8(w >> 24))) << 24 \| u32(sub_byte(u8(w >> 16))) << 16 \| u32(sub_byte(u8(w >> 8))) << 8 \| u32(sub_byte(u8(w)))
260	}
261
262	// Rotate
263	@[inline]
264	fn rotw(w u32) u32 {
265	return (w << 8) \| (w >> 24)
266	}
267
268	// Key expansion algorithm. See FIPS-197, Figure 11.
269	// Their rcon[i] is our powx[i-1] << 24.
270	@[direct_array_access]
271	fn expand_key_generic(key []u8, mut enc []u32, mut dec []u32) {
272	// Encryption key setup.
273	mut i := 0
274	nk := key.len / 4
275	for i = 0; i < nk; i++ {
276	if 4 * i >= key.len {
277	break
278	}
279	enc[i] = binary.big_endian_u32(key[4 * i..])
280	}
281	for i < enc.len {
282	mut t := enc[i - 1]
283	if i % nk == 0 {
284	t = subw(rotw(t)) ^ u32(pow_x[i / nk - 1]) << 24
285	} else if nk > 6 && i % nk == 4 {
286	t = subw(t)
287	}
288	enc[i] = enc[i - nk] ^ t
289	i++
290	}
291	// Derive decryption key from encryption key.
292	// Reverse the 4-word round key sets from enc to produce dec.
293	// The byte-wise block path applies InvMixColumns separately during decryption.
294	if dec.len == 0 {
295	return
296	}
297	n := enc.len
298	for i = 0; i < n; i += 4 {
299	ei := n - i - 4
300	for j in 0 .. 4 {
301	dec[i + j] = enc[ei + j]
302	}
303	}
304	}
305