| 1 | // Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | // This implementation is derived from the golang implementation |
| 5 | // which itself is derived in part from the reference |
| 6 | // ANSI C implementation, which carries the following notice: |
| 7 | // |
| 8 | // rijndael-alg-fst.c |
| 9 | // |
| 10 | // @version 3.0 (December 2000) |
| 11 | // |
| 12 | // Optimised ANSI C code for the Rijndael cipher (now AES) |
| 13 | // |
| 14 | // @author Vincent Rijmen <[email protected]> |
| 15 | // @author Antoon Bosselaers <[email protected]> |
| 16 | // @author Paulo Barreto <[email protected]> |
| 17 | // |
| 18 | // This code is hereby placed in the public domain. |
| 19 | // |
| 20 | // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS |
| 21 | // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 22 | // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 23 | // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE |
| 24 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 25 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 26 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
| 27 | // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
| 28 | // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
| 29 | // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
| 30 | // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | // |
| 32 | // See FIPS 197 for specification, and see Daemen and Rijmen's Rijndael submission |
| 33 | // for implementation details. |
| 34 | // https://csrc.nist.gov/csrc/media/publications/fips/197/final/documents/fips-197.pdf |
| 35 | // https://csrc.nist.gov/archive/aes/rijndael/Rijndael-ammended.pdf |
| 36 | module aes |
| 37 | |
| 38 | import encoding.binary |
| 39 | |
| 40 | // ct_mask_u8 expands the low bit of `bit` to either 0x00 or 0xff. |
| 41 | @[inline] |
| 42 | fn ct_mask_u8(bit u8) u8 { |
| 43 | return u8(~(int(bit & 1) - 1)) |
| 44 | } |
| 45 | |
| 46 | // xtime multiplies `x` by x in GF(2^8). |
| 47 | @[inline] |
| 48 | fn xtime(x u8) u8 { |
| 49 | return u8(u32(x) << 1) ^ (u8(0x1b) & ct_mask_u8(x >> 7)) |
| 50 | } |
| 51 | |
| 52 | // gf_mul multiplies `x` and `y` in GF(2^8) without data-dependent branches. |
| 53 | @[direct_array_access; inline] |
| 54 | fn gf_mul(x u8, y u8) u8 { |
| 55 | mut a := x |
| 56 | mut b := y |
| 57 | mut out := u8(0) |
| 58 | for _ in 0 .. 8 { |
| 59 | out ^= a & ct_mask_u8(b) |
| 60 | a = xtime(a) |
| 61 | b >>= 1 |
| 62 | } |
| 63 | return out |
| 64 | } |
| 65 | |
| 66 | // gf_square squares `x` in GF(2^8). |
| 67 | @[inline] |
| 68 | fn gf_square(x u8) u8 { |
| 69 | return gf_mul(x, x) |
| 70 | } |
| 71 | |
| 72 | @[inline] |
| 73 | fn rotl8(x u8, n int) u8 { |
| 74 | return u8((u32(x) << u32(n)) | (u32(x) >> u32(8 - n))) |
| 75 | } |
| 76 | |
| 77 | @[inline] |
| 78 | fn gf_inverse(x u8) u8 { |
| 79 | x2 := gf_square(x) |
| 80 | x4 := gf_square(x2) |
| 81 | x8 := gf_square(x4) |
| 82 | x16 := gf_square(x8) |
| 83 | x32 := gf_square(x16) |
| 84 | x64 := gf_square(x32) |
| 85 | x128 := gf_square(x64) |
| 86 | return gf_mul(gf_mul(gf_mul(gf_mul(gf_mul(gf_mul(x128, x64), x32), x16), x8), x4), x2) |
| 87 | } |
| 88 | |
| 89 | // sub_byte applies the AES S-box without lookup tables. |
| 90 | @[inline] |
| 91 | fn sub_byte(x u8) u8 { |
| 92 | inv := gf_inverse(x) |
| 93 | return inv ^ rotl8(inv, 1) ^ rotl8(inv, 2) ^ rotl8(inv, 3) ^ rotl8(inv, 4) ^ u8(0x63) |
| 94 | } |
| 95 | |
| 96 | // inv_sub_byte applies the inverse AES S-box without lookup tables. |
| 97 | @[inline] |
| 98 | fn inv_sub_byte(x u8) u8 { |
| 99 | return gf_inverse(rotl8(x, 1) ^ rotl8(x, 3) ^ rotl8(x, 6) ^ u8(0x05)) |
| 100 | } |
| 101 | |
| 102 | @[direct_array_access; inline] |
| 103 | fn add_round_key(mut state [16]u8, xk []u32, round int) { |
| 104 | for col in 0 .. 4 { |
| 105 | word := xk[round * 4 + col] |
| 106 | base := col * 4 |
| 107 | state[base + 0] ^= u8(word >> 24) |
| 108 | state[base + 1] ^= u8(word >> 16) |
| 109 | state[base + 2] ^= u8(word >> 8) |
| 110 | state[base + 3] ^= u8(word) |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | @[direct_array_access; inline] |
| 115 | fn sub_bytes(mut state [16]u8) { |
| 116 | for i in 0 .. 16 { |
| 117 | state[i] = sub_byte(state[i]) |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | @[direct_array_access; inline] |
| 122 | fn inv_sub_bytes(mut state [16]u8) { |
| 123 | for i in 0 .. 16 { |
| 124 | state[i] = inv_sub_byte(state[i]) |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | @[direct_array_access; inline] |
| 129 | fn shift_rows(mut state [16]u8) { |
| 130 | t1 := state[1] |
| 131 | state[1] = state[5] |
| 132 | state[5] = state[9] |
| 133 | state[9] = state[13] |
| 134 | state[13] = t1 |
| 135 | |
| 136 | t2 := state[2] |
| 137 | t6 := state[6] |
| 138 | state[2] = state[10] |
| 139 | state[6] = state[14] |
| 140 | state[10] = t2 |
| 141 | state[14] = t6 |
| 142 | |
| 143 | t3 := state[3] |
| 144 | state[3] = state[15] |
| 145 | state[15] = state[11] |
| 146 | state[11] = state[7] |
| 147 | state[7] = t3 |
| 148 | } |
| 149 | |
| 150 | @[direct_array_access; inline] |
| 151 | fn inv_shift_rows(mut state [16]u8) { |
| 152 | t13 := state[13] |
| 153 | state[13] = state[9] |
| 154 | state[9] = state[5] |
| 155 | state[5] = state[1] |
| 156 | state[1] = t13 |
| 157 | |
| 158 | t2 := state[2] |
| 159 | t6 := state[6] |
| 160 | state[2] = state[10] |
| 161 | state[6] = state[14] |
| 162 | state[10] = t2 |
| 163 | state[14] = t6 |
| 164 | |
| 165 | t3 := state[3] |
| 166 | state[3] = state[7] |
| 167 | state[7] = state[11] |
| 168 | state[11] = state[15] |
| 169 | state[15] = t3 |
| 170 | } |
| 171 | |
| 172 | @[direct_array_access; inline] |
| 173 | fn mix_columns(mut state [16]u8) { |
| 174 | for col in 0 .. 4 { |
| 175 | base := col * 4 |
| 176 | s0 := state[base + 0] |
| 177 | s1 := state[base + 1] |
| 178 | s2 := state[base + 2] |
| 179 | s3 := state[base + 3] |
| 180 | m2s0 := xtime(s0) |
| 181 | m2s1 := xtime(s1) |
| 182 | m2s2 := xtime(s2) |
| 183 | m2s3 := xtime(s3) |
| 184 | state[base + 0] = m2s0 ^ (m2s1 ^ s1) ^ s2 ^ s3 |
| 185 | state[base + 1] = s0 ^ m2s1 ^ (m2s2 ^ s2) ^ s3 |
| 186 | state[base + 2] = s0 ^ s1 ^ m2s2 ^ (m2s3 ^ s3) |
| 187 | state[base + 3] = (m2s0 ^ s0) ^ s1 ^ s2 ^ m2s3 |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | @[direct_array_access; inline] |
| 192 | fn inv_mix_columns(mut state [16]u8) { |
| 193 | for col in 0 .. 4 { |
| 194 | base := col * 4 |
| 195 | s0 := state[base + 0] |
| 196 | s1 := state[base + 1] |
| 197 | s2 := state[base + 2] |
| 198 | s3 := state[base + 3] |
| 199 | state[base + 0] = gf_mul(s0, 14) ^ gf_mul(s1, 11) ^ gf_mul(s2, 13) ^ gf_mul(s3, 9) |
| 200 | state[base + 1] = gf_mul(s0, 9) ^ gf_mul(s1, 14) ^ gf_mul(s2, 11) ^ gf_mul(s3, 13) |
| 201 | state[base + 2] = gf_mul(s0, 13) ^ gf_mul(s1, 9) ^ gf_mul(s2, 14) ^ gf_mul(s3, 11) |
| 202 | state[base + 3] = gf_mul(s0, 11) ^ gf_mul(s1, 13) ^ gf_mul(s2, 9) ^ gf_mul(s3, 14) |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | // Encrypt one block from src into dst, using the expanded key xk. |
| 207 | @[direct_array_access] |
| 208 | fn encrypt_block_generic(xk []u32, mut dst []u8, src []u8) { |
| 209 | _ = src[15] // early bounds check |
| 210 | mut state := [16]u8{} |
| 211 | for i in 0 .. 16 { |
| 212 | state[i] = src[i] |
| 213 | } |
| 214 | nr := xk.len / 4 - 1 |
| 215 | add_round_key(mut state, xk, 0) |
| 216 | for round in 1 .. nr { |
| 217 | sub_bytes(mut state) |
| 218 | shift_rows(mut state) |
| 219 | mix_columns(mut state) |
| 220 | add_round_key(mut state, xk, round) |
| 221 | } |
| 222 | sub_bytes(mut state) |
| 223 | shift_rows(mut state) |
| 224 | add_round_key(mut state, xk, nr) |
| 225 | _ = dst[15] // early bounds check |
| 226 | for i in 0 .. 16 { |
| 227 | dst[i] = state[i] |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | // Decrypt one block from src into dst, using the expanded key xk. |
| 232 | @[direct_array_access] |
| 233 | fn decrypt_block_generic(xk []u32, mut dst []u8, src []u8) { |
| 234 | _ = src[15] // early bounds check |
| 235 | mut state := [16]u8{} |
| 236 | for i in 0 .. 16 { |
| 237 | state[i] = src[i] |
| 238 | } |
| 239 | nr := xk.len / 4 - 1 |
| 240 | add_round_key(mut state, xk, 0) |
| 241 | for round in 1 .. nr { |
| 242 | inv_shift_rows(mut state) |
| 243 | inv_sub_bytes(mut state) |
| 244 | add_round_key(mut state, xk, round) |
| 245 | inv_mix_columns(mut state) |
| 246 | } |
| 247 | inv_shift_rows(mut state) |
| 248 | inv_sub_bytes(mut state) |
| 249 | add_round_key(mut state, xk, nr) |
| 250 | _ = dst[15] // early bounds check |
| 251 | for i in 0 .. 16 { |
| 252 | dst[i] = state[i] |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | // Apply the AES S-box to each byte in w without lookup tables. |
| 257 | @[inline] |
| 258 | fn subw(w u32) u32 { |
| 259 | return u32(sub_byte(u8(w >> 24))) << 24 | u32(sub_byte(u8(w >> 16))) << 16 | u32(sub_byte(u8(w >> 8))) << 8 | u32(sub_byte(u8(w))) |
| 260 | } |
| 261 | |
| 262 | // Rotate |
| 263 | @[inline] |
| 264 | fn rotw(w u32) u32 { |
| 265 | return (w << 8) | (w >> 24) |
| 266 | } |
| 267 | |
| 268 | // Key expansion algorithm. See FIPS-197, Figure 11. |
| 269 | // Their rcon[i] is our powx[i-1] << 24. |
| 270 | @[direct_array_access] |
| 271 | fn expand_key_generic(key []u8, mut enc []u32, mut dec []u32) { |
| 272 | // Encryption key setup. |
| 273 | mut i := 0 |
| 274 | nk := key.len / 4 |
| 275 | for i = 0; i < nk; i++ { |
| 276 | if 4 * i >= key.len { |
| 277 | break |
| 278 | } |
| 279 | enc[i] = binary.big_endian_u32(key[4 * i..]) |
| 280 | } |
| 281 | for i < enc.len { |
| 282 | mut t := enc[i - 1] |
| 283 | if i % nk == 0 { |
| 284 | t = subw(rotw(t)) ^ u32(pow_x[i / nk - 1]) << 24 |
| 285 | } else if nk > 6 && i % nk == 4 { |
| 286 | t = subw(t) |
| 287 | } |
| 288 | enc[i] = enc[i - nk] ^ t |
| 289 | i++ |
| 290 | } |
| 291 | // Derive decryption key from encryption key. |
| 292 | // Reverse the 4-word round key sets from enc to produce dec. |
| 293 | // The byte-wise block path applies InvMixColumns separately during decryption. |
| 294 | if dec.len == 0 { |
| 295 | return |
| 296 | } |
| 297 | n := enc.len |
| 298 | for i = 0; i < n; i += 4 { |
| 299 | ei := n - i - 4 |
| 300 | for j in 0 .. 4 { |
| 301 | dec[i + j] = enc[ei + j] |
| 302 | } |
| 303 | } |
| 304 | } |
| 305 | |