From 383812bfbf3dc60322a71fa7ad12c420c415b074 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Wed, 15 Apr 2026 02:40:39 +0300 Subject: [PATCH] crypto.aes: fix vulnerability to cache-timing attacks (fixes #23791) --- vlib/crypto/aes/block_generic.v | 300 +++++++++++++++++++-------- vlib/crypto/aes/block_generic_test.v | 48 +++++ vlib/crypto/aes/const.v | 8 +- 3 files changed, 258 insertions(+), 98 deletions(-) create mode 100644 vlib/crypto/aes/block_generic_test.v diff --git a/vlib/crypto/aes/block_generic.v b/vlib/crypto/aes/block_generic.v index b94ac8294..9ff3d3312 100644 --- a/vlib/crypto/aes/block_generic.v +++ b/vlib/crypto/aes/block_generic.v @@ -37,106 +37,226 @@ module aes import encoding.binary +// ct_mask_u8 expands the low bit of `bit` to either 0x00 or 0xff. +@[inline] +fn ct_mask_u8(bit u8) u8 { + return u8(~(int(bit & 1) - 1)) +} + +// xtime multiplies `x` by x in GF(2^8). +@[inline] +fn xtime(x u8) u8 { + return u8(u32(x) << 1) ^ (u8(0x1b) & ct_mask_u8(x >> 7)) +} + +// gf_mul multiplies `x` and `y` in GF(2^8) without data-dependent branches. +@[direct_array_access; inline] +fn gf_mul(x u8, y u8) u8 { + mut a := x + mut b := y + mut out := u8(0) + for _ in 0 .. 8 { + out ^= a & ct_mask_u8(b) + a = xtime(a) + b >>= 1 + } + return out +} + +// gf_square squares `x` in GF(2^8). +@[inline] +fn gf_square(x u8) u8 { + return gf_mul(x, x) +} + +@[inline] +fn rotl8(x u8, n int) u8 { + return u8((u32(x) << u32(n)) | (u32(x) >> u32(8 - n))) +} + +@[inline] +fn gf_inverse(x u8) u8 { + x2 := gf_square(x) + x4 := gf_square(x2) + x8 := gf_square(x4) + x16 := gf_square(x8) + x32 := gf_square(x16) + x64 := gf_square(x32) + x128 := gf_square(x64) + return gf_mul(gf_mul(gf_mul(gf_mul(gf_mul(gf_mul(x128, x64), x32), x16), x8), x4), x2) +} + +// sub_byte applies the AES S-box without lookup tables. +@[inline] +fn sub_byte(x u8) u8 { + inv := gf_inverse(x) + return inv ^ rotl8(inv, 1) ^ rotl8(inv, 2) ^ rotl8(inv, 3) ^ rotl8(inv, 4) ^ u8(0x63) +} + +// inv_sub_byte applies the inverse AES S-box without lookup tables. +@[inline] +fn inv_sub_byte(x u8) u8 { + return gf_inverse(rotl8(x, 1) ^ rotl8(x, 3) ^ rotl8(x, 6) ^ u8(0x05)) +} + +@[direct_array_access; inline] +fn add_round_key(mut state [16]u8, xk []u32, round int) { + for col in 0 .. 4 { + word := xk[round * 4 + col] + base := col * 4 + state[base + 0] ^= u8(word >> 24) + state[base + 1] ^= u8(word >> 16) + state[base + 2] ^= u8(word >> 8) + state[base + 3] ^= u8(word) + } +} + +@[direct_array_access; inline] +fn sub_bytes(mut state [16]u8) { + for i in 0 .. 16 { + state[i] = sub_byte(state[i]) + } +} + +@[direct_array_access; inline] +fn inv_sub_bytes(mut state [16]u8) { + for i in 0 .. 16 { + state[i] = inv_sub_byte(state[i]) + } +} + +@[direct_array_access; inline] +fn shift_rows(mut state [16]u8) { + t1 := state[1] + state[1] = state[5] + state[5] = state[9] + state[9] = state[13] + state[13] = t1 + + t2 := state[2] + t6 := state[6] + state[2] = state[10] + state[6] = state[14] + state[10] = t2 + state[14] = t6 + + t3 := state[3] + state[3] = state[15] + state[15] = state[11] + state[11] = state[7] + state[7] = t3 +} + +@[direct_array_access; inline] +fn inv_shift_rows(mut state [16]u8) { + t13 := state[13] + state[13] = state[9] + state[9] = state[5] + state[5] = state[1] + state[1] = t13 + + t2 := state[2] + t6 := state[6] + state[2] = state[10] + state[6] = state[14] + state[10] = t2 + state[14] = t6 + + t3 := state[3] + state[3] = state[7] + state[7] = state[11] + state[11] = state[15] + state[15] = t3 +} + +@[direct_array_access; inline] +fn mix_columns(mut state [16]u8) { + for col in 0 .. 4 { + base := col * 4 + s0 := state[base + 0] + s1 := state[base + 1] + s2 := state[base + 2] + s3 := state[base + 3] + m2s0 := xtime(s0) + m2s1 := xtime(s1) + m2s2 := xtime(s2) + m2s3 := xtime(s3) + state[base + 0] = m2s0 ^ (m2s1 ^ s1) ^ s2 ^ s3 + state[base + 1] = s0 ^ m2s1 ^ (m2s2 ^ s2) ^ s3 + state[base + 2] = s0 ^ s1 ^ m2s2 ^ (m2s3 ^ s3) + state[base + 3] = (m2s0 ^ s0) ^ s1 ^ s2 ^ m2s3 + } +} + +@[direct_array_access; inline] +fn inv_mix_columns(mut state [16]u8) { + for col in 0 .. 4 { + base := col * 4 + s0 := state[base + 0] + s1 := state[base + 1] + s2 := state[base + 2] + s3 := state[base + 3] + state[base + 0] = gf_mul(s0, 14) ^ gf_mul(s1, 11) ^ gf_mul(s2, 13) ^ gf_mul(s3, 9) + state[base + 1] = gf_mul(s0, 9) ^ gf_mul(s1, 14) ^ gf_mul(s2, 11) ^ gf_mul(s3, 13) + state[base + 2] = gf_mul(s0, 13) ^ gf_mul(s1, 9) ^ gf_mul(s2, 14) ^ gf_mul(s3, 11) + state[base + 3] = gf_mul(s0, 11) ^ gf_mul(s1, 13) ^ gf_mul(s2, 9) ^ gf_mul(s3, 14) + } +} + // Encrypt one block from src into dst, using the expanded key xk. @[direct_array_access] fn encrypt_block_generic(xk []u32, mut dst []u8, src []u8) { _ = src[15] // early bounds check - mut s0 := binary.big_endian_u32(src[..4]) - mut s1 := binary.big_endian_u32(src[4..8]) - mut s2 := binary.big_endian_u32(src[8..12]) - mut s3 := binary.big_endian_u32(src[12..16]) - // First round just XORs input with key. - s0 ^= xk[0] - s1 ^= xk[1] - s2 ^= xk[2] - s3 ^= xk[3] - // Middle rounds shuffle using tables. - // Number of rounds is set by length of expanded key. - nr := xk.len / 4 - 2 // - 2: one above, one more below - mut k := 4 - mut t0 := u32(0) - mut t1 := u32(0) - mut t2 := u32(0) - mut t3 := u32(0) - for _ in 0 .. nr { - t0 = xk[k + 0] ^ te0[u8(s0 >> 24)] ^ te1[u8(s1 >> 16)] ^ te2[u8(s2 >> 8)] ^ u32(te3[u8(s3)]) - t1 = xk[k + 1] ^ te0[u8(s1 >> 24)] ^ te1[u8(s2 >> 16)] ^ te2[u8(s3 >> 8)] ^ u32(te3[u8(s0)]) - t2 = xk[k + 2] ^ te0[u8(s2 >> 24)] ^ te1[u8(s3 >> 16)] ^ te2[u8(s0 >> 8)] ^ u32(te3[u8(s1)]) - t3 = xk[k + 3] ^ te0[u8(s3 >> 24)] ^ te1[u8(s0 >> 16)] ^ te2[u8(s1 >> 8)] ^ u32(te3[u8(s2)]) - k += 4 - s0 = t0 - s1 = t1 - s2 = t2 - s3 = t3 - } - // Last round uses s-box directly and XORs to produce output. - s0 = u32(s_box0[t0 >> 24]) << 24 | u32(s_box0[(t1 >> 16) & 0xff]) << 16 | u32(s_box0[(t2 >> 8) & 0xff]) << 8 | u32(s_box0[t3 & u32(0xff)]) - s1 = u32(s_box0[t1 >> 24]) << 24 | u32(s_box0[(t2 >> 16) & 0xff]) << 16 | u32(s_box0[(t3 >> 8) & 0xff]) << 8 | u32(s_box0[t0 & u32(0xff)]) - s2 = u32(s_box0[t2 >> 24]) << 24 | u32(s_box0[(t3 >> 16) & 0xff]) << 16 | u32(s_box0[(t0 >> 8) & 0xff]) << 8 | u32(s_box0[t1 & u32(0xff)]) - s3 = u32(s_box0[t3 >> 24]) << 24 | u32(s_box0[(t0 >> 16) & 0xff]) << 16 | u32(s_box0[(t1 >> 8) & 0xff]) << 8 | u32(s_box0[t2 & u32(0xff)]) - s0 ^= xk[k + 0] - s1 ^= xk[k + 1] - s2 ^= xk[k + 2] - s3 ^= xk[k + 3] - _ := dst[15] // early bounds check - binary.big_endian_put_u32(mut (*dst)[0..4], s0) - binary.big_endian_put_u32(mut (*dst)[4..8], s1) - binary.big_endian_put_u32(mut (*dst)[8..12], s2) - binary.big_endian_put_u32(mut (*dst)[12..16], s3) + mut state := [16]u8{} + for i in 0 .. 16 { + state[i] = src[i] + } + nr := xk.len / 4 - 1 + add_round_key(mut state, xk, 0) + for round in 1 .. nr { + sub_bytes(mut state) + shift_rows(mut state) + mix_columns(mut state) + add_round_key(mut state, xk, round) + } + sub_bytes(mut state) + shift_rows(mut state) + add_round_key(mut state, xk, nr) + _ = dst[15] // early bounds check + for i in 0 .. 16 { + dst[i] = state[i] + } } // Decrypt one block from src into dst, using the expanded key xk. @[direct_array_access] fn decrypt_block_generic(xk []u32, mut dst []u8, src []u8) { _ = src[15] // early bounds check - mut s0 := binary.big_endian_u32(src[0..4]) - mut s1 := binary.big_endian_u32(src[4..8]) - mut s2 := binary.big_endian_u32(src[8..12]) - mut s3 := binary.big_endian_u32(src[12..16]) - // First round just XORs input with key. - s0 ^= xk[0] - s1 ^= xk[1] - s2 ^= xk[2] - s3 ^= xk[3] - // Middle rounds shuffle using tables. - // Number of rounds is set by length of expanded key. - nr := xk.len / 4 - 2 // - 2: one above, one more below - mut k := 4 - mut t0 := u32(0) - mut t1 := u32(0) - mut t2 := u32(0) - mut t3 := u32(0) - for _ in 0 .. nr { - t0 = xk[k + 0] ^ td0[u8(s0 >> 24)] ^ td1[u8(s3 >> 16)] ^ td2[u8(s2 >> 8)] ^ u32(td3[u8(s1)]) - t1 = xk[k + 1] ^ td0[u8(s1 >> 24)] ^ td1[u8(s0 >> 16)] ^ td2[u8(s3 >> 8)] ^ u32(td3[u8(s2)]) - t2 = xk[k + 2] ^ td0[u8(s2 >> 24)] ^ td1[u8(s1 >> 16)] ^ td2[u8(s0 >> 8)] ^ u32(td3[u8(s3)]) - t3 = xk[k + 3] ^ td0[u8(s3 >> 24)] ^ td1[u8(s2 >> 16)] ^ td2[u8(s1 >> 8)] ^ u32(td3[u8(s0)]) - k += 4 - s0 = t0 - s1 = t1 - s2 = t2 - s3 = t3 - } - // Last round uses s-box directly and XORs to produce output. - s0 = u32(s_box1[t0 >> 24]) << 24 | u32(s_box1[(t3 >> 16) & 0xff]) << 16 | u32(s_box1[(t2 >> 8) & 0xff]) << 8 | u32(s_box1[t1 & u32(0xff)]) - s1 = u32(s_box1[t1 >> 24]) << 24 | u32(s_box1[(t0 >> 16) & 0xff]) << 16 | u32(s_box1[(t3 >> 8) & 0xff]) << 8 | u32(s_box1[t2 & u32(0xff)]) - s2 = u32(s_box1[t2 >> 24]) << 24 | u32(s_box1[(t1 >> 16) & 0xff]) << 16 | u32(s_box1[(t0 >> 8) & 0xff]) << 8 | u32(s_box1[t3 & u32(0xff)]) - s3 = u32(s_box1[t3 >> 24]) << 24 | u32(s_box1[(t2 >> 16) & 0xff]) << 16 | u32(s_box1[(t1 >> 8) & 0xff]) << 8 | u32(s_box1[t0 & u32(0xff)]) - s0 ^= xk[k + 0] - s1 ^= xk[k + 1] - s2 ^= xk[k + 2] - s3 ^= xk[k + 3] + mut state := [16]u8{} + for i in 0 .. 16 { + state[i] = src[i] + } + nr := xk.len / 4 - 1 + add_round_key(mut state, xk, 0) + for round in 1 .. nr { + inv_shift_rows(mut state) + inv_sub_bytes(mut state) + add_round_key(mut state, xk, round) + inv_mix_columns(mut state) + } + inv_shift_rows(mut state) + inv_sub_bytes(mut state) + add_round_key(mut state, xk, nr) _ = dst[15] // early bounds check - binary.big_endian_put_u32(mut (*dst)[..4], s0) - binary.big_endian_put_u32(mut (*dst)[4..8], s1) - binary.big_endian_put_u32(mut (*dst)[8..12], s2) - binary.big_endian_put_u32(mut (*dst)[12..16], s3) + for i in 0 .. 16 { + dst[i] = state[i] + } } -// Apply s_box0 to each byte in w. -@[direct_array_access; inline] +// Apply the AES S-box to each byte in w without lookup tables. +@[inline] fn subw(w u32) u32 { - return u32(s_box0[w >> 24]) << 24 | u32(s_box0[(w >> 16) & 0xff]) << 16 | u32(s_box0[(w >> 8) & 0xff]) << 8 | u32(s_box0[w & u32(0xff)]) + return u32(sub_byte(u8(w >> 24))) << 24 | u32(sub_byte(u8(w >> 16))) << 16 | u32(sub_byte(u8(w >> 8))) << 8 | u32(sub_byte(u8(w))) } // Rotate @@ -170,7 +290,7 @@ fn expand_key_generic(key []u8, mut enc []u32, mut dec []u32) { } // Derive decryption key from encryption key. // Reverse the 4-word round key sets from enc to produce dec. - // All sets but the first and last get the MixColumn transform applied. + // The byte-wise block path applies InvMixColumns separately during decryption. if dec.len == 0 { return } @@ -178,11 +298,7 @@ fn expand_key_generic(key []u8, mut enc []u32, mut dec []u32) { for i = 0; i < n; i += 4 { ei := n - i - 4 for j in 0 .. 4 { - mut x := enc[ei + j] - if i > 0 && i + 4 < n { - x = td0[s_box0[x >> 24]] ^ td1[s_box0[(x >> 16) & 0xff]] ^ td2[s_box0[(x >> 8) & 0xff]] ^ td3[s_box0[x & u32(0xff)]] - } - dec[i + j] = x + dec[i + j] = enc[ei + j] } } } diff --git a/vlib/crypto/aes/block_generic_test.v b/vlib/crypto/aes/block_generic_test.v new file mode 100644 index 000000000..9b3085c47 --- /dev/null +++ b/vlib/crypto/aes/block_generic_test.v @@ -0,0 +1,48 @@ +module aes + +import encoding.hex + +struct BlockVector { + key string + plaintext string + ciphertext string +} + +fn test_sbox_roundtrip() { + for i in 0 .. 256 { + b := u8(i) + assert inv_sub_byte(sub_byte(b)) == b + } +} + +fn test_aes_known_answer_vectors() { + test_cases := [ + BlockVector{ + key: '000102030405060708090a0b0c0d0e0f' + plaintext: '00112233445566778899aabbccddeeff' + ciphertext: '69c4e0d86a7b0430d8cdb78070b4c55a' + }, + BlockVector{ + key: '000102030405060708090a0b0c0d0e0f1011121314151617' + plaintext: '00112233445566778899aabbccddeeff' + ciphertext: 'dda97ca4864cdfe06eaf70a0ec0d7191' + }, + BlockVector{ + key: '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f' + plaintext: '00112233445566778899aabbccddeeff' + ciphertext: '8ea2b7ca516745bfeafc49904b496089' + }, + ] + for tc in test_cases { + key := hex.decode(tc.key) or { panic(err) } + plaintext := hex.decode(tc.plaintext) or { panic(err) } + expected_ciphertext := hex.decode(tc.ciphertext) or { panic(err) } + block := new_cipher(key) + mut ciphertext := []u8{len: block_size} + block.encrypt(mut ciphertext, plaintext) + assert ciphertext == expected_ciphertext + mut decrypted := []u8{len: block_size} + block.decrypt(mut decrypted, ciphertext) + assert decrypted == plaintext + } +} diff --git a/vlib/crypto/aes/const.v b/vlib/crypto/aes/const.v index 53ae82057..0c1c2f68c 100644 --- a/vlib/crypto/aes/const.v +++ b/vlib/crypto/aes/const.v @@ -5,12 +5,8 @@ // Package aes implements AES encryption (formerly Rijndael), as defined in // U.S. Federal Information Processing Standards Publication 197. // -// The AES operations in this package are not implemented using constant-time algorithms. -// An exception is when running on systems with enabled hardware support for AES -// that makes these operations constant-time. Examples include amd64 systems using AES-NI -// extensions and s390x systems using Message-Security-Assist extensions. -// On such systems, when the result of NewCipher is passed to cipher.NewGCM, -// the GHASH operation used by GCM is also constant-time. +// The generic software implementation in this package avoids secret-dependent +// table lookups during both block operations and key expansion. module aes // This file contains AES constants - 8720 bytes of initialized data. -- 2.39.5