| 1 | // Copyright (c) 2024 blackshirt. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | // |
| 5 | // Poly1305 one-time message authentication code (MAC) module |
| 6 | module poly1305 |
| 7 | |
| 8 | import math |
| 9 | import math.unsigned |
| 10 | import encoding.binary |
| 11 | import crypto.internal.subtle |
| 12 | |
| 13 | // Constants defined in this module |
| 14 | // ------------------------------- |
| 15 | // block_size is the internal size of the Poly1305 block that is operates on |
| 16 | const block_size = 16 |
| 17 | // key_size is a 256-bit one-time key size for input to Poly1305 mac in bytes. |
| 18 | const key_size = 32 |
| 19 | // tag_size is the size of the output of the Poly1305 result, in bytes. |
| 20 | const tag_size = 16 |
| 21 | |
| 22 | // mask value for clamping low 64 bits of the r part, clearing 10 bits |
| 23 | const rmask0 = u64(0x0FFF_FFFC_0FFF_FFFF) |
| 24 | const not_rmask0 = ~rmask0 |
| 25 | // mask value for clamping high 64 bits of the r part, clearing 12 bits |
| 26 | const rmask1 = u64(0x0FFF_FFFC_0FFF_FFFC) |
| 27 | const not_rmask1 = ~rmask1 |
| 28 | |
| 29 | // mask value for low 2 bits of u64 value |
| 30 | const mask_low2bits = u64(0x0000_0000_0000_0003) |
| 31 | // mask value for high 62 bits of u64 value |
| 32 | const mask_high62bits = u64(0xFFFF_FFFF_FFFF_FFFC) |
| 33 | // mask value for high 60 bits of u64 value |
| 34 | const mask_high60bits = u64(0xFFFF_FFFF_FFFF_FFF0) |
| 35 | |
| 36 | // p is 130 bit of Poly1305 constant prime, ie 2^130-5 |
| 37 | // as defined in rfc, p = 3fffffffffffffffffffffffffffffffb |
| 38 | const p = Uint192{ |
| 39 | lo: u64(0xFFFF_FFFF_FFFF_FFFB) |
| 40 | mi: u64(0xFFFF_FFFF_FFFF_FFFF) |
| 41 | hi: u64(0x0000_0000_0000_0003) |
| 42 | } |
| 43 | |
| 44 | // Poly1305 mac instance |
| 45 | @[noinit] |
| 46 | pub struct Poly1305 { |
| 47 | mut: |
| 48 | // Poly1305 mac accepts 32 bytes (256 bits) of key input. |
| 49 | // This key is partitioned into two's 128 bits parts, r and s |
| 50 | // where r is clamped before stored and the s part is kept secret. |
| 51 | r unsigned.Uint128 |
| 52 | s unsigned.Uint128 |
| 53 | // Poly1305 accumulator |
| 54 | h Uint192 |
| 55 | // buffer |
| 56 | buffer []u8 = []u8{len: block_size} |
| 57 | leftover int |
| 58 | // The done flag tells us if the instance should not be used again. |
| 59 | // It's set to true after calling finish or reset on the instance. |
| 60 | done bool |
| 61 | } |
| 62 | |
| 63 | // create_tag generates 16 bytes tag, i.e., a one-time message authenticator code (mac) stored into out. |
| 64 | // It accepts the message bytes to be authenticated and the 32 bytes of the key. |
| 65 | // This is an oneshot function to create a tag and reset internal state after the call. |
| 66 | // For incremental updates, use the method based on Poly1305 mac instance. |
| 67 | pub fn create_tag(mut out []u8, msg []u8, key []u8) ! { |
| 68 | if out.len != tag_size { |
| 69 | return error('poly1305: bad out tag_size') |
| 70 | } |
| 71 | mut po := new(key)! |
| 72 | po.update(msg) |
| 73 | po.finish(mut out) |
| 74 | } |
| 75 | |
| 76 | // verify_tag verifies the tag is a valid message authentication code for the msg |
| 77 | // compared to the tag output from the calculated process. |
| 78 | // It returns `true` if two tags is matching, `false` otherwise. |
| 79 | pub fn verify_tag(tag []u8, msg []u8, key []u8) bool { |
| 80 | mut po := new(key) or { panic(err) } |
| 81 | mut out := []u8{len: tag_size} |
| 82 | po.update(msg) |
| 83 | po.finish(mut out) |
| 84 | return subtle.constant_time_compare(tag, out) == 1 |
| 85 | } |
| 86 | |
| 87 | // new creates a new Poly1305 mac instance from 32 bytes of key provided. |
| 88 | @[direct_array_access] |
| 89 | pub fn new(key []u8) !&Poly1305 { |
| 90 | if key.len != key_size { |
| 91 | return error('poly1305: bad key length') |
| 92 | } |
| 93 | // Read the r part of the key and clamp it. Clamping was done by clearing |
| 94 | // some bits of r before being used. The spec says the bits from 16 bytes of r, |
| 95 | // that are required to be clamped are: some odd index bytes, i.e., r[3], |
| 96 | // r[7], r[11], and r[15], are required to have their top four bits clear |
| 97 | // (be smaller than 16), and some even index bytes, i.e., r[4], r[8], and r[12], |
| 98 | // are required to have their bottom two bits clear (be divisible by 4), |
| 99 | // totally clearing 22 bits. In 128-bit little-endian format, the clamping |
| 100 | // mask value is 0x0ffffffc0ffffffc0ffffffc0fffffff. |
| 101 | // See the rmask0 and rmask1 constants above. |
| 102 | r := unsigned.Uint128{ |
| 103 | lo: binary.little_endian_u64(key[0..8]) & rmask0 |
| 104 | hi: binary.little_endian_u64(key[8..16]) & rmask1 |
| 105 | } |
| 106 | |
| 107 | // read s part from the rest bytes of key |
| 108 | s := unsigned.Uint128{ |
| 109 | lo: binary.little_endian_u64(key[16..24]) |
| 110 | hi: binary.little_endian_u64(key[24..32]) |
| 111 | } |
| 112 | |
| 113 | ctx := &Poly1305{ |
| 114 | r: r |
| 115 | s: s |
| 116 | } |
| 117 | return ctx |
| 118 | } |
| 119 | |
| 120 | // clone returns the clone of the current Poly1305 state |
| 121 | pub fn (po &Poly1305) clone() &Poly1305 { |
| 122 | poc := &Poly1305{ |
| 123 | r: po.r |
| 124 | s: po.s |
| 125 | h: po.h |
| 126 | buffer: po.buffer |
| 127 | leftover: po.leftover |
| 128 | done: po.done |
| 129 | } |
| 130 | return poc |
| 131 | } |
| 132 | |
| 133 | // update updates internal of Poly1305 state by message. |
| 134 | pub fn (mut po Poly1305) update(msg []u8) { |
| 135 | poly1305_update_block(mut po, msg) |
| 136 | } |
| 137 | |
| 138 | // verify verifies if the `tag` is a valid message authenticated code for current state of |
| 139 | // Poly1305 instance. Internally, it works on clone of the current instance. |
| 140 | pub fn (po Poly1305) verify(tag []u8) bool { |
| 141 | assert tag.len == tag_size |
| 142 | // we work on copy of current instance |
| 143 | mut ctx := po |
| 144 | mut out := []u8{len: tag_size} |
| 145 | if ctx.leftover > 0 { |
| 146 | poly1305_blocks(mut ctx, ctx.buffer[..ctx.leftover]) |
| 147 | } |
| 148 | finalize(mut out, mut ctx.h, ctx.s) |
| 149 | return subtle.constant_time_compare(tag[..], out) == 1 |
| 150 | } |
| 151 | |
| 152 | // finish finalizes the message authentication code calculation and stores the result into out. |
| 153 | // After calls this method, don't use the instance anymore to do most anything, but, |
| 154 | // you should reinitialize the instance with the new key with .`reinit` method instead. |
| 155 | pub fn (mut po Poly1305) finish(mut out []u8) { |
| 156 | if po.done { |
| 157 | panic('poly1305: has done, please reinit with the new key') |
| 158 | } |
| 159 | if po.leftover > 0 { |
| 160 | poly1305_blocks(mut po, po.buffer[..po.leftover]) |
| 161 | } |
| 162 | finalize(mut out, mut po.h, po.s) |
| 163 | // we reset instance to make its in bad unusable state. |
| 164 | po.reset() |
| 165 | } |
| 166 | |
| 167 | // reinit reinitializes Poly1305 mac instance by resetting internal fields, and |
| 168 | // then reinit instance with the new key. |
| 169 | pub fn (mut po Poly1305) reinit(key []u8) { |
| 170 | if key.len != key_size { |
| 171 | panic('bad key size') |
| 172 | } |
| 173 | // first, we reset the instance and than setup its again |
| 174 | po.reset() |
| 175 | po.r = unsigned.Uint128{ |
| 176 | lo: binary.little_endian_u64(key[0..8]) & rmask0 |
| 177 | hi: binary.little_endian_u64(key[8..16]) & rmask1 |
| 178 | } |
| 179 | po.s = unsigned.Uint128{ |
| 180 | lo: binary.little_endian_u64(key[16..24]) |
| 181 | hi: binary.little_endian_u64(key[24..32]) |
| 182 | } |
| 183 | // we set po.done to false, to make its usable again. |
| 184 | po.done = false |
| 185 | } |
| 186 | |
| 187 | // poly1305_update_block updates the internals of Poly1305 state instance by block of message. |
| 188 | fn poly1305_update_block(mut po Poly1305, msg []u8) { |
| 189 | if msg.len == 0 { |
| 190 | return |
| 191 | } |
| 192 | if po.done { |
| 193 | panic('poly1305: has done, please reinit with the new key') |
| 194 | } |
| 195 | |
| 196 | mut msglen := msg.len |
| 197 | mut idx := 0 |
| 198 | // handle leftover |
| 199 | if po.leftover > 0 { |
| 200 | want := math.min(block_size - po.leftover, msglen) |
| 201 | block := msg[idx..idx + want] |
| 202 | _ := copy(mut po.buffer[po.leftover..], block) |
| 203 | |
| 204 | msglen -= want |
| 205 | idx += want |
| 206 | po.leftover += want |
| 207 | |
| 208 | if po.leftover < block_size { |
| 209 | return |
| 210 | } |
| 211 | poly1305_blocks(mut po, po.buffer) |
| 212 | po.leftover = 0 |
| 213 | } |
| 214 | // process full blocks |
| 215 | if msglen >= block_size { |
| 216 | want := (msglen & ~(block_size - 1)) |
| 217 | mut block := unsafe { msg[idx..idx + want] } |
| 218 | poly1305_blocks(mut po, block) |
| 219 | idx += want |
| 220 | msglen -= want |
| 221 | } |
| 222 | // store leftover |
| 223 | if msglen > 0 { |
| 224 | po.leftover += copy(mut po.buffer[po.leftover..], msg[idx..]) |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | // reset zeroes the Poly1305 mac instance and puts it in an unusable state. |
| 229 | // You should reinit the instance with the new key instead to make it usable again. |
| 230 | fn (mut po Poly1305) reset() { |
| 231 | po.r = unsigned.uint128_zero |
| 232 | po.s = unsigned.uint128_zero |
| 233 | po.h = uint192_zero |
| 234 | po.leftover = 0 |
| 235 | unsafe { |
| 236 | po.buffer.reset() |
| 237 | } |
| 238 | // We set the done flag to true to prevent accidental calls |
| 239 | // to update or finish methods on the instance. |
| 240 | po.done = true |
| 241 | } |
| 242 | |
| 243 | // poly1305_blocks updates internal state of Poly1305 instance `po` with message `msg` |
| 244 | fn poly1305_blocks(mut po Poly1305, msg []u8) { |
| 245 | // nothing to do, just return |
| 246 | if msg.len == 0 { |
| 247 | return |
| 248 | } |
| 249 | // For correctness and clarity, we check whether r is properly clamped. |
| 250 | if po.r.lo & not_rmask0 != 0 && po.r.hi & not_rmask1 != 0 { |
| 251 | panic('poly1305: bad unclamped of r') |
| 252 | } |
| 253 | // We need the accumulator to be in correctly reduced form to make sure it is not overflowing. |
| 254 | // To be safe when used, only maximum of four low bits of the high part of the accumulator (h.hi) |
| 255 | // can be set, and the remaining high bits must not be set. |
| 256 | if po.h.hi & mask_high60bits != 0 { |
| 257 | panic('poly1305: h need to be reduced') |
| 258 | } |
| 259 | |
| 260 | // localize the thing |
| 261 | mut h := po.h |
| 262 | mut t := [4]u64{} |
| 263 | |
| 264 | // The main routine for updating internal poly1305 state with blocks of messages done with step: |
| 265 | // - chop messages into 16-byte blocks and read block as little-endian number; |
| 266 | // - add one bit beyond the number (its dependz on the size of the block); |
| 267 | // - add this number to the accumulator and then multiply the accumulator by "r". |
| 268 | // - perform partial reduction modulo p on the result by calling `poly1305_squeeze` function. |
| 269 | // - updates poly1305 accumulator with the new values |
| 270 | mut msglen := msg.len |
| 271 | mut idx := 0 |
| 272 | |
| 273 | for msglen > 0 { |
| 274 | // carry |
| 275 | mut c := u64(0) |
| 276 | if msglen >= block_size { |
| 277 | // Read the 16 bytes msg block as a little-endian number |
| 278 | // and stored into the 128 bits of Uint128 |
| 279 | block := msg[idx..idx + block_size] |
| 280 | m := unsigned.Uint128{ |
| 281 | lo: binary.little_endian_u64(block[0..8]) |
| 282 | hi: binary.little_endian_u64(block[8..16]) |
| 283 | } |
| 284 | // add msg block to accumulator, h += m |
| 285 | h, c = h.add_128_checked(m, 0) |
| 286 | |
| 287 | // // The rfc requires us to set a bit just above the message size, ie, |
| 288 | // add one bit beyond the number of octets. For a 16-byte block, |
| 289 | // this is equivalent to adding 2^128 to the number. |
| 290 | // so we can just add 1 to the high part of accumulator (h.hi += 1) |
| 291 | // h.hi has been checked above, so, its safe to assume its not overflow |
| 292 | h.hi += c + 1 |
| 293 | idx += block_size |
| 294 | msglen -= block_size |
| 295 | } else { |
| 296 | // The last one msg block might be shorter than 16 bytes long, |
| 297 | // pad it with zeros to align with block_size. |
| 298 | mut buf := []u8{len: block_size} |
| 299 | subtle.constant_time_copy(1, mut buf[..msglen], msg[idx..idx + msglen]) |
| 300 | |
| 301 | // set a bit above msg size. |
| 302 | buf[msglen] = u8(0x01) |
| 303 | // loads 16 bytes of message block |
| 304 | m := unsigned.Uint128{ |
| 305 | lo: binary.little_endian_u64(buf[0..8]) |
| 306 | hi: binary.little_endian_u64(buf[8..16]) |
| 307 | } |
| 308 | |
| 309 | // add this number to the accumulator, ie, h += m |
| 310 | h, c = h.add_128_checked(m, 0) |
| 311 | h.hi += c |
| 312 | idx += block_size |
| 313 | msglen -= block_size |
| 314 | } |
| 315 | |
| 316 | // perform h *= r and then do partial reduction modulo p to the output. |
| 317 | mul_h_by_r(mut t, mut h, po.r) |
| 318 | poly1305_squeeze(mut h, t) |
| 319 | } |
| 320 | |
| 321 | // updates internal accumulator |
| 322 | po.h = h |
| 323 | } |
| 324 | |
| 325 | // finalize finalizes the reduction of accumulator h, adds it with secret s, |
| 326 | // and then take 128 bits of h stored into out. |
| 327 | fn finalize(mut out []u8, mut ac Uint192, s unsigned.Uint128) { |
| 328 | assert out.len == tag_size |
| 329 | mut h := ac |
| 330 | // compute t = h - p = h - (2¹³⁰ - 5), and select h as the result if the |
| 331 | // subtraction underflows, and t otherwise. |
| 332 | t, b := h.sub_checked(p) |
| 333 | |
| 334 | // h = h if h < p else h - p |
| 335 | h.lo = select_64(b, h.lo, t.lo) |
| 336 | h.mi = select_64(b, h.mi, t.mi) |
| 337 | |
| 338 | // Finally, we compute tag = h + s mod 2¹²⁸ |
| 339 | // s is 128 bit of po.s, ie, Uint128 |
| 340 | tag, _ := h.add_128_checked(s, 0) |
| 341 | |
| 342 | // take only low 128 bit of x |
| 343 | binary.little_endian_put_u64(mut out[0..8], tag.lo) |
| 344 | binary.little_endian_put_u64(mut out[8..16], tag.mi) |
| 345 | } |
| 346 | |
| 347 | // mul_h_by_r multiplies accumulator h by r and stores the result into four of the 64 bit limbs |
| 348 | fn mul_h_by_r(mut t [4]u64, mut h Uint192, r unsigned.Uint128) { |
| 349 | // Let's multiply h by r, h *= r, and stores the result into raw 320 bits of xh and hb |
| 350 | // In properly clamped r and reduced h, hb.hi bits should not be set, ie, hb.hi == 0 |
| 351 | // see comments on mul_128_checked for details. |
| 352 | xh, hb := h.mul_128_checked(r) |
| 353 | |
| 354 | // check for high bits of the result is not overflowing 256 bits, so we can ignore |
| 355 | // high bit (hb.hi) of the Uint128 part, fifth 64 bits limb. |
| 356 | if hb.hi != 0 { |
| 357 | panic('poly1305: unexpected overflow, non-null 5th limb') |
| 358 | } |
| 359 | |
| 360 | // updates 4 64-bit limbs and ignore 5th limb |
| 361 | t[0] = xh.lo |
| 362 | t[1] = xh.mi |
| 363 | t[2] = xh.hi |
| 364 | t[3] = hb.lo |
| 365 | } |
| 366 | |
| 367 | // poly1305_squeeze reduces by doing partial reduction module p |
| 368 | // where t is result of previous h*r from mul_h_by_r calls. |
| 369 | fn poly1305_squeeze(mut h Uint192, t [4]u64) { |
| 370 | // we need to reduce 4 of 64 bit limbs in t modulo 2¹³⁰ - 5. |
| 371 | // we follow the go version, by splitting result at the 2¹³⁰ mark into h and cc, the carry. |
| 372 | mut ac := Uint192{ |
| 373 | lo: t[0] |
| 374 | mi: t[1] |
| 375 | hi: t[2] & mask_low2bits |
| 376 | } |
| 377 | mut cc := unsigned.uint128_new(t[2] & mask_high62bits, t[3]) |
| 378 | // reduction of general mersene prime, x = c * 2¹³⁰ + h = c * 5 + h (mod 2¹³⁰ - 5) |
| 379 | // because 2¹³⁰ = 5 (mod 2¹³⁰ - 5) |
| 380 | // here, we follow the go version |
| 381 | |
| 382 | mut c := u64(0) |
| 383 | ac, c = ac.add_128_checked(cc, c) |
| 384 | cc = shift_right_by2(mut cc) |
| 385 | |
| 386 | // once again |
| 387 | ac, c = ac.add_128_checked(cc, 0) |
| 388 | // updates accumulator |
| 389 | h = ac |
| 390 | } |
| 391 | |