| 1 | // Copyright (c) 2024 blackshirt. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | // |
| 5 | module chacha20 |
| 6 | |
| 7 | import math.bits |
| 8 | import encoding.binary |
| 9 | |
| 10 | // max_64bit_counter is a 64-bit maximum internal counter of original ChaCha20 variant. |
| 11 | const max_64bit_counter = max_u64 |
| 12 | // max_32bit_counter is a 32-bit maximum internal counter of standard IETF ChaCha20 variant. |
| 13 | const max_32bit_counter = u64(max_u32) |
| 14 | |
| 15 | // default chacha20 quarter round number |
| 16 | const default_qround_nr = 10 |
| 17 | |
| 18 | // Stream is an internal structure where main ChaCha20 algorithm operates on. |
| 19 | @[noinit] |
| 20 | struct Stream { |
| 21 | mut: |
| 22 | // underlying stream's key |
| 23 | key [8]u32 |
| 24 | // underlying stream's nonce with internal counter |
| 25 | nonce [4]u32 |
| 26 | |
| 27 | // The mode (variant) of this ChaCha20 stream |
| 28 | // Standard IETF variant or original (from DJ Bernstein) variant, set on creation. |
| 29 | mode CipherMode = .standard |
| 30 | // Flag that tells whether this stream was an extended XChaCha20 standard variant. |
| 31 | // only make sense when mode == .standard |
| 32 | extended bool |
| 33 | // Flag tells whether this stream has reached the counter limit |
| 34 | overflow bool |
| 35 | |
| 36 | // counter-independent precomputed values |
| 37 | precomp bool |
| 38 | // vfmt off |
| 39 | p1 u32 p5 u32 p9 u32 p13 u32 |
| 40 | p2 u32 p6 u32 p10 u32 p14 u32 |
| 41 | p3 u32 p7 u32 p11 u32 p15 u32 |
| 42 | // vfmt on |
| 43 | } |
| 44 | |
| 45 | // new_stream_with_options creates a new chacha20 stream with provided options. |
| 46 | // The supported nonce size is 8, 12 or 24 bytes. |
| 47 | @[direct_array_access; inline] |
| 48 | fn new_stream_with_options(key []u8, nonce []u8, opt Options) !Stream { |
| 49 | if key.len != key_size { |
| 50 | return error('Bad key size provided') |
| 51 | } |
| 52 | // setup for default value |
| 53 | mut mode := CipherMode.standard |
| 54 | mut extended := false |
| 55 | |
| 56 | // Based on the nonce.len and option supplied, it determines the variant (mode) and |
| 57 | // extended form of the new chacha20 stream intended to create. |
| 58 | match nonce.len { |
| 59 | nonce_size {} |
| 60 | x_nonce_size { |
| 61 | extended = true |
| 62 | if opt.use_64bit_counter { |
| 63 | mode = .original |
| 64 | } |
| 65 | } |
| 66 | orig_nonce_size { |
| 67 | mode = .original |
| 68 | } |
| 69 | else { |
| 70 | return error('new_stream_with_options: unsupported nonce size') |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | // if this an extended chacha20 construct, derives a new key and nonce |
| 75 | new_key, new_nonce := if extended { |
| 76 | xkey, xnonce := derive_xchacha20_key_nonce(key, nonce, opt.use_64bit_counter)! |
| 77 | xkey, xnonce |
| 78 | } else { |
| 79 | // otherwise, use provided key and nonce |
| 80 | key, nonce |
| 81 | } |
| 82 | // Build a new stream and setup the key |
| 83 | mut b := Stream{ |
| 84 | mode: mode |
| 85 | extended: extended |
| 86 | } |
| 87 | // store the key |
| 88 | b.key[0] = binary.little_endian_u32(new_key[0..4]) |
| 89 | b.key[1] = binary.little_endian_u32(new_key[4..8]) |
| 90 | b.key[2] = binary.little_endian_u32(new_key[8..12]) |
| 91 | b.key[3] = binary.little_endian_u32(new_key[12..16]) |
| 92 | b.key[4] = binary.little_endian_u32(new_key[16..20]) |
| 93 | b.key[5] = binary.little_endian_u32(new_key[20..24]) |
| 94 | b.key[6] = binary.little_endian_u32(new_key[24..28]) |
| 95 | b.key[7] = binary.little_endian_u32(new_key[28..32]) |
| 96 | |
| 97 | // store the nonce |
| 98 | if b.mode == .standard { |
| 99 | // in standard IETF variant, first nonce was used as internal counter |
| 100 | b.nonce[0] = 0 |
| 101 | b.nonce[1] = binary.little_endian_u32(new_nonce[0..4]) |
| 102 | b.nonce[2] = binary.little_endian_u32(new_nonce[4..8]) |
| 103 | b.nonce[3] = binary.little_endian_u32(new_nonce[8..12]) |
| 104 | } else { |
| 105 | // in the original variant, two's of first counter servers as 64-bit counter value |
| 106 | b.nonce[0] = 0 |
| 107 | b.nonce[1] = 0 |
| 108 | |
| 109 | b.nonce[2] = binary.little_endian_u32(new_nonce[0..4]) |
| 110 | b.nonce[3] = binary.little_endian_u32(new_nonce[4..8]) |
| 111 | } |
| 112 | return b |
| 113 | } |
| 114 | |
| 115 | // reset resets internal stream |
| 116 | @[unsafe] |
| 117 | fn (mut s Stream) reset() { |
| 118 | // we dont reset s.mode and s.extended |
| 119 | unsafe { |
| 120 | _ := vmemset(&s.key, 0, 32) |
| 121 | _ := vmemset(&s.nonce, 0, 16) |
| 122 | } |
| 123 | s.precomp = false |
| 124 | s.p1, s.p5, s.p9, s.p13 = u32(0), u32(0), u32(0), u32(0) |
| 125 | s.p2, s.p6, s.p10, s.p14 = u32(0), u32(0), u32(0), u32(0) |
| 126 | s.p3, s.p7, s.p11, s.p15 = u32(0), u32(0), u32(0), u32(0) |
| 127 | } |
| 128 | |
| 129 | // new_curr_state creates a new State from current stream |
| 130 | @[direct_array_access] |
| 131 | fn (s Stream) new_curr_state() State { |
| 132 | // initializes ChaCha20 state |
| 133 | // 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc |
| 134 | // 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk |
| 135 | // 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk |
| 136 | // 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn |
| 137 | // |
| 138 | // where c=constant k=key b=blockcounter n=nonce |
| 139 | mut state := State{} |
| 140 | // load chacha20 constant into state |
| 141 | state[0] = cc0 |
| 142 | state[1] = cc1 |
| 143 | state[2] = cc2 |
| 144 | state[3] = cc3 |
| 145 | // load key into state |
| 146 | for i, k in s.key { |
| 147 | state[i + 4] = k |
| 148 | } |
| 149 | // load nonce into state |
| 150 | for j, v in s.nonce { |
| 151 | state[j + 12] = v |
| 152 | } |
| 153 | return state |
| 154 | } |
| 155 | |
| 156 | // keystream_full process with full size of src being processed |
| 157 | @[direct_array_access] |
| 158 | fn (mut s Stream) keystream_full(mut dst []u8, src []u8) ! { |
| 159 | if s.overflow { |
| 160 | return error('chacha20: keystream_full counter has reached the limit') |
| 161 | } |
| 162 | // number of block to be processed |
| 163 | nr_blocks := src.len / block_size |
| 164 | // check for counter overflow |
| 165 | if s.check_ctr(u64(nr_blocks)) { |
| 166 | s.overflow = true |
| 167 | return error('chacha20: internal counter overflow') |
| 168 | } |
| 169 | mut idx := 0 |
| 170 | // process for full block_size-d msg |
| 171 | for i := 0; i < nr_blocks; i++ { |
| 172 | // for every block_sized message, we generates 64-bytes block key stream |
| 173 | // and then xor-ing this block with generated key stream |
| 174 | block := unsafe { src[i * block_size..(i + 1) * block_size] } |
| 175 | ks := s.keystream()! |
| 176 | for j, b in ks { |
| 177 | dst[idx + j] = block[j] ^ b |
| 178 | } |
| 179 | // updates position |
| 180 | idx += block_size |
| 181 | } |
| 182 | |
| 183 | // process for remaining partial block |
| 184 | if src.len % block_size != 0 { |
| 185 | last_block := unsafe { src[nr_blocks * block_size..] } |
| 186 | // generates one 64-bytes keystream block, and xor-ing bytes |
| 187 | // in last_block with the key stream |
| 188 | ks := s.keystream()! |
| 189 | for i, b in last_block { |
| 190 | dst[idx + i] = b ^ ks[i] |
| 191 | } |
| 192 | idx += last_block.len |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | // keystream generates and retursns a 64-bytes block of key stream and increases internal counter. |
| 197 | @[direct_array_access] |
| 198 | fn (mut s Stream) keystream() ![]u8 { |
| 199 | // initializes current state and working state |
| 200 | mut awal := s.new_curr_state() |
| 201 | mut ws := awal.clone() |
| 202 | |
| 203 | // precomputes cache counter-independent values |
| 204 | if s.mode == .standard && !s.precomp { |
| 205 | s.precomp(awal) |
| 206 | } |
| 207 | // remaining first column round |
| 208 | if s.mode == .standard { |
| 209 | mut fcr := Quartet{awal[0], awal[4], awal[8], awal[12]} |
| 210 | qround_on_quartet(mut fcr) |
| 211 | |
| 212 | // First diagonal round. |
| 213 | qround_on_state_with_quartet(mut ws, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10, 15) |
| 214 | qround_on_state_with_quartet(mut ws, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11, 12) |
| 215 | qround_on_state_with_quartet(mut ws, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8, 13) |
| 216 | qround_on_state_with_quartet(mut ws, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9, 14) |
| 217 | } |
| 218 | |
| 219 | // The remaining quarter rounds |
| 220 | // |
| 221 | // For standard variant, the first column-round was already precomputed, |
| 222 | // For original variant, its use full quarter round number. |
| 223 | // |
| 224 | // perform chacha20 quarter round n-times |
| 225 | n := if s.mode == .standard { 9 } else { default_qround_nr } |
| 226 | ws.qround(n) |
| 227 | |
| 228 | // Adding the working state values with inital state values. |
| 229 | // We dont performs xor-ing here, its done on xor_key_stream and or keystream_full. |
| 230 | for i, _ in ws { |
| 231 | ws[i] += awal[i] |
| 232 | } |
| 233 | // increases stream internal counter |
| 234 | s.inc_ctr(mut awal)! |
| 235 | |
| 236 | // serializes current working state in little-endian form |
| 237 | mut block := []u8{len: block_size} |
| 238 | for i, v in ws { |
| 239 | block[i * 4] = u8(v) |
| 240 | block[i * 4 + 1] = u8(v >> 8) |
| 241 | block[i * 4 + 2] = u8(v >> 16) |
| 242 | block[i * 4 + 3] = u8(v >> 24) |
| 243 | } |
| 244 | return block |
| 245 | } |
| 246 | |
| 247 | // precomp performs quarter round on counter-independent quartet values on running state st. |
| 248 | @[direct_array_access; inline] |
| 249 | fn (mut s Stream) precomp(st State) { |
| 250 | mut pcr1 := Quartet{st[1], st[5], st[9], st[13]} |
| 251 | mut pcr2 := Quartet{st[2], st[6], st[10], st[14]} |
| 252 | mut pcr3 := Quartet{st[3], st[7], st[11], st[15]} |
| 253 | |
| 254 | qround_on_quartet(mut pcr1) |
| 255 | qround_on_quartet(mut pcr2) |
| 256 | qround_on_quartet(mut pcr3) |
| 257 | |
| 258 | s.p1 = pcr1.e0 |
| 259 | s.p5 = pcr1.e1 |
| 260 | s.p9 = pcr1.e2 |
| 261 | s.p13 = pcr1.e3 |
| 262 | |
| 263 | s.p2 = pcr2.e0 |
| 264 | s.p6 = pcr2.e1 |
| 265 | s.p10 = pcr2.e2 |
| 266 | s.p14 = pcr2.e3 |
| 267 | |
| 268 | s.p3 = pcr3.e0 |
| 269 | s.p7 = pcr3.e1 |
| 270 | s.p11 = pcr3.e2 |
| 271 | s.p15 = pcr3.e3 |
| 272 | |
| 273 | s.precomp = true |
| 274 | } |
| 275 | |
| 276 | // Handling of Stream's internal counter |
| 277 | // |
| 278 | |
| 279 | // ctr returns a current Stream's counter as u64 value. |
| 280 | @[direct_array_access; inline] |
| 281 | fn (b Stream) ctr() u64 { |
| 282 | match b.mode { |
| 283 | // In the original mode, counter was 64-bit size |
| 284 | // stored on b.nonce[0], and b.nonce[1] |
| 285 | .original { |
| 286 | return u64(b.nonce[1]) << 32 | u64(b.nonce[0]) |
| 287 | } |
| 288 | .standard { |
| 289 | // in standard mode, counter was 32-bit value, stored on b.nonce[0] |
| 290 | return u64(b.nonce[0]) |
| 291 | } |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | // set_ctr sets stream internal counter |
| 296 | @[direct_array_access; inline] |
| 297 | fn (mut b Stream) set_ctr(ctr u64) { |
| 298 | match b.mode { |
| 299 | .original { |
| 300 | b.nonce[0] = u32(ctr) |
| 301 | b.nonce[1] = u32(ctr >> 32) |
| 302 | } |
| 303 | .standard { |
| 304 | // check for ctr value that may exceed the counter limit |
| 305 | if ctr > max_32bit_counter { |
| 306 | panic('set_ctr: counter value exceed the limit ') |
| 307 | } |
| 308 | b.nonce[0] = u32(ctr) |
| 309 | } |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | // check_ctr checks for counter overflow when added by value. |
| 314 | // It returns true on counter overflow. |
| 315 | @[inline] |
| 316 | fn (b Stream) check_ctr(value u64) bool { |
| 317 | ctr := b.ctr() |
| 318 | sum := ctr + value |
| 319 | max := b.max_ctr() |
| 320 | if sum < ctr || sum < value || sum > max { |
| 321 | return true |
| 322 | } |
| 323 | return false |
| 324 | } |
| 325 | |
| 326 | // inc_ctr increases stream counter by one from the current state st |
| 327 | @[direct_array_access] |
| 328 | fn (mut s Stream) inc_ctr(mut st State) ! { |
| 329 | // updates internal counter |
| 330 | if s.mode == .original { |
| 331 | st[12] += 1 |
| 332 | // first counter reset ? |
| 333 | if st[12] == 0 { |
| 334 | // increase second counter, if reset, mark as an overflow and return error |
| 335 | st[13] += 1 |
| 336 | if st[13] == 0 { |
| 337 | s.overflow = true |
| 338 | return error('chacha20.keystream: 64-bit counter reached') |
| 339 | } |
| 340 | } |
| 341 | // store the counter |
| 342 | s.nonce[0] = st[12] |
| 343 | s.nonce[1] = st[13] |
| 344 | } else { |
| 345 | st[12] += 1 |
| 346 | if st[12] == 0 { |
| 347 | s.overflow = true |
| 348 | return error('chacha20.keystream: overflow 32-bit counter') |
| 349 | } |
| 350 | s.nonce[0] = st[12] |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | // max_ctr returns maximum counter value of this stream variant |
| 355 | @[inline] |
| 356 | fn (b Stream) max_ctr() u64 { |
| 357 | match b.mode { |
| 358 | .original { return max_64bit_counter } |
| 359 | .standard { return max_32bit_counter } |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | // State represents the running 64-bytes of chacha20 stream, |
| 364 | pub type State = [16]u32 |
| 365 | |
| 366 | // clone returns a new copy of this state. |
| 367 | @[direct_array_access; inline] |
| 368 | pub fn (s State) clone() State { |
| 369 | mut sc := State{} |
| 370 | for i, v in s { |
| 371 | sc[i] = v |
| 372 | } |
| 373 | return sc |
| 374 | } |
| 375 | |
| 376 | // reset resets internal values of this state. |
| 377 | @[direct_array_access; inline] |
| 378 | pub fn (mut s State) reset() { |
| 379 | for i, _ in s { |
| 380 | s[i] = u32(0) |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | // qround performs quarter round on the working state ws with round number specified in nr. |
| 385 | // Its responsibility the user to provide the correct round number. |
| 386 | @[direct_array_access] |
| 387 | pub fn (mut ws State) qround(nr int) { |
| 388 | for i := 0; i < nr; i++ { |
| 389 | // Column-round |
| 390 | // 0 | 1 | 2 | 3 |
| 391 | // 4 | 5 | 6 | 7 |
| 392 | // 8 | 9 | 10 | 11 |
| 393 | // 12 | 13 | 14 | 15 |
| 394 | qround_on_state(mut ws, 0, 4, 8, 12) // 0 |
| 395 | qround_on_state(mut ws, 1, 5, 9, 13) // 1 |
| 396 | qround_on_state(mut ws, 2, 6, 10, 14) // 2 |
| 397 | qround_on_state(mut ws, 3, 7, 11, 15) // 3 |
| 398 | |
| 399 | // Diagonal round. |
| 400 | // 0 \ 1 \ 2 \ 3 |
| 401 | // 5 \ 6 \ 7 \ 4 |
| 402 | // 10 \ 11 \ 8 \ 9 |
| 403 | // 15 \ 12 \ 13 \ 14 |
| 404 | qround_on_state(mut ws, 0, 5, 10, 15) |
| 405 | qround_on_state(mut ws, 1, 6, 11, 12) |
| 406 | qround_on_state(mut ws, 2, 7, 8, 13) |
| 407 | qround_on_state(mut ws, 3, 4, 9, 14) |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | // qround_on_state_with_quartet run qround_on_state by previously set up state values in offset |
| 412 | // (a,b,c,d) with values from quartet (q0, q1, q2, q3) |
| 413 | @[direct_array_access] |
| 414 | fn qround_on_state_with_quartet(mut s State, q0 u32, q1 u32, q2 u32, q3 u32, a int, b int, c int, d int) { |
| 415 | s[a] = q0 |
| 416 | s[b] = q1 |
| 417 | s[c] = q2 |
| 418 | s[d] = q3 |
| 419 | qround_on_state(mut s, a, b, c, d) |
| 420 | } |
| 421 | |
| 422 | // qround_on_state performs chacha20 quarter round on states with quartet index a, b, c, d. |
| 423 | @[direct_array_access] |
| 424 | fn qround_on_state(mut s State, a int, b int, c int, d int) { |
| 425 | // a += b; d ^= a; d <<<= 16; |
| 426 | s[a] += s[b] |
| 427 | s[d] ^= s[a] |
| 428 | s[d] = bits.rotate_left_32(s[d], 16) |
| 429 | |
| 430 | // c += d; b ^= c; b <<<= 12; |
| 431 | s[c] += s[d] |
| 432 | s[b] ^= s[c] |
| 433 | s[b] = bits.rotate_left_32(s[b], 12) |
| 434 | |
| 435 | // a += b; d ^= a; d <<<= 8; |
| 436 | s[a] += s[b] |
| 437 | s[d] ^= s[a] |
| 438 | s[d] = bits.rotate_left_32(s[d], 8) |
| 439 | |
| 440 | // c += d; b ^= c; b <<<= 7; |
| 441 | s[c] += s[d] |
| 442 | s[b] ^= s[c] |
| 443 | s[b] = bits.rotate_left_32(s[b], 7) |
| 444 | } |
| 445 | |
| 446 | // quartet of u32 values. |
| 447 | struct Quartet { |
| 448 | mut: |
| 449 | e0 u32 |
| 450 | e1 u32 |
| 451 | e2 u32 |
| 452 | e3 u32 |
| 453 | } |
| 454 | |
| 455 | // qround_on_quartet runs chacha20 quarter round run on Quartet q. |
| 456 | fn qround_on_quartet(mut q Quartet) { |
| 457 | // a += b; d ^= a; d <<<= 16; |
| 458 | q.e0 += q.e1 |
| 459 | q.e3 ^= q.e0 |
| 460 | q.e3 = bits.rotate_left_32(q.e3, 16) |
| 461 | |
| 462 | // c += d; b ^= c; b <<<= 12; |
| 463 | q.e2 += q.e3 |
| 464 | q.e1 ^= q.e2 |
| 465 | q.e1 = bits.rotate_left_32(q.e1, 12) |
| 466 | |
| 467 | // a += b; d ^= a; d <<< 8; |
| 468 | q.e0 += q.e1 |
| 469 | q.e3 ^= q.e0 |
| 470 | q.e3 = bits.rotate_left_32(q.e3, 8) |
| 471 | |
| 472 | // c += d; b ^= c; b <<<= 7; |
| 473 | q.e2 += q.e3 |
| 474 | q.e1 ^= q.e2 |
| 475 | q.e1 = bits.rotate_left_32(q.e1, 7) |
| 476 | } |
| 477 | |