| 1 | module edwards25519 |
| 2 | |
| 3 | import math.bits |
| 4 | import math.unsigned |
| 5 | import encoding.binary |
| 6 | import crypto.internal.subtle |
| 7 | |
| 8 | // embedded unsigned.Uint128 |
| 9 | struct Uint128 { |
| 10 | unsigned.Uint128 |
| 11 | } |
| 12 | |
| 13 | // Element represents an element of the edwards25519 GF(2^255-19). Note that this |
| 14 | // is not a cryptographically secure group, and should only be used to interact |
| 15 | // with edwards25519.Point coordinates. |
| 16 | // |
| 17 | // This type works similarly to math/big.Int, and all arguments and receivers |
| 18 | // are allowed to alias. |
| 19 | // |
| 20 | // The zero value is a valid zero element. |
| 21 | pub struct Element { |
| 22 | mut: |
| 23 | // An element t represents the integer |
| 24 | // t.l0 + t.l1*2^51 + t.l2*2^102 + t.l3*2^153 + t.l4*2^204 |
| 25 | // |
| 26 | // Between operations, all limbs are expected to be lower than 2^52. |
| 27 | l0 u64 |
| 28 | l1 u64 |
| 29 | l2 u64 |
| 30 | l3 u64 |
| 31 | l4 u64 |
| 32 | } |
| 33 | |
| 34 | const mask_low_51_bits = u64((1 << 51) - 1) |
| 35 | const fe_zero = Element{ |
| 36 | l0: 0 |
| 37 | l1: 0 |
| 38 | l2: 0 |
| 39 | l3: 0 |
| 40 | l4: 0 |
| 41 | } |
| 42 | const fe_one = Element{ |
| 43 | l0: 1 |
| 44 | l1: 0 |
| 45 | l2: 0 |
| 46 | l3: 0 |
| 47 | l4: 0 |
| 48 | } |
| 49 | // sqrt_m1 is 2^((p-1)/4), which squared is equal to -1 by Euler's Criterion. |
| 50 | const sqrt_m1 = Element{ |
| 51 | l0: 1718705420411056 |
| 52 | l1: 234908883556509 |
| 53 | l2: 2233514472574048 |
| 54 | l3: 2117202627021982 |
| 55 | l4: 765476049583133 |
| 56 | } |
| 57 | |
| 58 | // mul_64 returns a * b. |
| 59 | fn mul_64(a u64, b u64) Uint128 { |
| 60 | hi, lo := bits.mul_64(a, b) |
| 61 | return Uint128{ |
| 62 | lo: lo |
| 63 | hi: hi |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | // add_mul_64 returns v + a * b. |
| 68 | fn add_mul_64(v Uint128, a u64, b u64) Uint128 { |
| 69 | mut hi, lo := bits.mul_64(a, b) |
| 70 | low, carry := bits.add_64(lo, v.lo, 0) |
| 71 | hi, _ = bits.add_64(hi, v.hi, carry) |
| 72 | return Uint128{ |
| 73 | lo: low |
| 74 | hi: hi |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | // shift_right_by_51 returns a >> 51. a is assumed to be at most 115 bits. |
| 79 | fn shift_right_by_51(a Uint128) u64 { |
| 80 | return (a.hi << (64 - 51)) | (a.lo >> 51) |
| 81 | } |
| 82 | |
| 83 | fn fe_mul_generic(a Element, b Element) Element { |
| 84 | a0 := a.l0 |
| 85 | a1 := a.l1 |
| 86 | a2 := a.l2 |
| 87 | a3 := a.l3 |
| 88 | a4 := a.l4 |
| 89 | |
| 90 | b0 := b.l0 |
| 91 | b1 := b.l1 |
| 92 | b2 := b.l2 |
| 93 | b3 := b.l3 |
| 94 | b4 := b.l4 |
| 95 | |
| 96 | // Limb multiplication works like pen-and-paper columnar multiplication, but |
| 97 | // with 51-bit limbs instead of digits. |
| 98 | // |
| 99 | // a4 a3 a2 a1 a0 x |
| 100 | // b4 b3 b2 b1 b0 = |
| 101 | // ------------------------ |
| 102 | // a4b0 a3b0 a2b0 a1b0 a0b0 + |
| 103 | // a4b1 a3b1 a2b1 a1b1 a0b1 + |
| 104 | // a4b2 a3b2 a2b2 a1b2 a0b2 + |
| 105 | // a4b3 a3b3 a2b3 a1b3 a0b3 + |
| 106 | // a4b4 a3b4 a2b4 a1b4 a0b4 = |
| 107 | // ---------------------------------------------- |
| 108 | // r8 r7 r6 r5 r4 r3 r2 r1 r0 |
| 109 | // |
| 110 | // We can then use the reduction identity (a * 2²⁵⁵ + b = a * 19 + b) to |
| 111 | // reduce the limbs that would overflow 255 bits. r5 * 2²⁵⁵ becomes 19 * r5, |
| 112 | // r6 * 2³⁰⁶ becomes 19 * r6 * 2⁵¹, etc. |
| 113 | // |
| 114 | // Reduction can be carried out simultaneously to multiplication. For |
| 115 | // example, we do not compute r5: whenever the result of a multiplication |
| 116 | // belongs to r5, like a1b4, we multiply it by 19 and add the result to r0. |
| 117 | // |
| 118 | // a4b0 a3b0 a2b0 a1b0 a0b0 + |
| 119 | // a3b1 a2b1 a1b1 a0b1 19×a4b1 + |
| 120 | // a2b2 a1b2 a0b2 19×a4b2 19×a3b2 + |
| 121 | // a1b3 a0b3 19×a4b3 19×a3b3 19×a2b3 + |
| 122 | // a0b4 19×a4b4 19×a3b4 19×a2b4 19×a1b4 = |
| 123 | // -------------------------------------- |
| 124 | // r4 r3 r2 r1 r0 |
| 125 | // |
| 126 | // Finally we add up the columns into wide, overlapping limbs. |
| 127 | |
| 128 | a1_19 := a1 * 19 |
| 129 | a2_19 := a2 * 19 |
| 130 | a3_19 := a3 * 19 |
| 131 | a4_19 := a4 * 19 |
| 132 | |
| 133 | // r0 = a0×b0 + 19×(a1×b4 + a2×b3 + a3×b2 + a4×b1) |
| 134 | mut r0 := mul_64(a0, b0) |
| 135 | r0 = add_mul_64(r0, a1_19, b4) |
| 136 | r0 = add_mul_64(r0, a2_19, b3) |
| 137 | r0 = add_mul_64(r0, a3_19, b2) |
| 138 | r0 = add_mul_64(r0, a4_19, b1) |
| 139 | |
| 140 | // r1 = a0×b1 + a1×b0 + 19×(a2×b4 + a3×b3 + a4×b2) |
| 141 | mut r1 := mul_64(a0, b1) |
| 142 | r1 = add_mul_64(r1, a1, b0) |
| 143 | r1 = add_mul_64(r1, a2_19, b4) |
| 144 | r1 = add_mul_64(r1, a3_19, b3) |
| 145 | r1 = add_mul_64(r1, a4_19, b2) |
| 146 | |
| 147 | // r2 = a0×b2 + a1×b1 + a2×b0 + 19×(a3×b4 + a4×b3) |
| 148 | mut r2 := mul_64(a0, b2) |
| 149 | r2 = add_mul_64(r2, a1, b1) |
| 150 | r2 = add_mul_64(r2, a2, b0) |
| 151 | r2 = add_mul_64(r2, a3_19, b4) |
| 152 | r2 = add_mul_64(r2, a4_19, b3) |
| 153 | |
| 154 | // r3 = a0×b3 + a1×b2 + a2×b1 + a3×b0 + 19×a4×b4 |
| 155 | mut r3 := mul_64(a0, b3) |
| 156 | r3 = add_mul_64(r3, a1, b2) |
| 157 | r3 = add_mul_64(r3, a2, b1) |
| 158 | r3 = add_mul_64(r3, a3, b0) |
| 159 | r3 = add_mul_64(r3, a4_19, b4) |
| 160 | |
| 161 | // r4 = a0×b4 + a1×b3 + a2×b2 + a3×b1 + a4×b0 |
| 162 | mut r4 := mul_64(a0, b4) |
| 163 | r4 = add_mul_64(r4, a1, b3) |
| 164 | r4 = add_mul_64(r4, a2, b2) |
| 165 | r4 = add_mul_64(r4, a3, b1) |
| 166 | r4 = add_mul_64(r4, a4, b0) |
| 167 | |
| 168 | // After the multiplication, we need to reduce (carry) the five coefficients |
| 169 | // to obtain a result with limbs that are at most slightly larger than 2⁵¹, |
| 170 | // to respect the Element invariant. |
| 171 | // |
| 172 | // Overall, the reduction works the same as carryPropagate, except with |
| 173 | // wider inputs: we take the carry for each coefficient by shifting it right |
| 174 | // by 51, and add it to the limb above it. The top carry is multiplied by 19 |
| 175 | // according to the reduction identity and added to the lowest limb. |
| 176 | // |
| 177 | // The largest coefficient (r0) will be at most 111 bits, which guarantees |
| 178 | // that all carries are at most 111 - 51 = 60 bits, which fits in a u64. |
| 179 | // |
| 180 | // r0 = a0×b0 + 19×(a1×b4 + a2×b3 + a3×b2 + a4×b1) |
| 181 | // r0 < 2⁵²×2⁵² + 19×(2⁵²×2⁵² + 2⁵²×2⁵² + 2⁵²×2⁵² + 2⁵²×2⁵²) |
| 182 | // r0 < (1 + 19 × 4) × 2⁵² × 2⁵² |
| 183 | // r0 < 2⁷ × 2⁵² × 2⁵² |
| 184 | // r0 < 2¹¹¹ |
| 185 | // |
| 186 | // Moreover, the top coefficient (r4) is at most 107 bits, so c4 is at most |
| 187 | // 56 bits, and c4 * 19 is at most 61 bits, which again fits in a u64 and |
| 188 | // allows us to easily apply the reduction identity. |
| 189 | // |
| 190 | // r4 = a0×b4 + a1×b3 + a2×b2 + a3×b1 + a4×b0 |
| 191 | // r4 < 5 × 2⁵² × 2⁵² |
| 192 | // r4 < 2¹⁰⁷ |
| 193 | // |
| 194 | |
| 195 | c0 := shift_right_by_51(r0) |
| 196 | c1 := shift_right_by_51(r1) |
| 197 | c2 := shift_right_by_51(r2) |
| 198 | c3 := shift_right_by_51(r3) |
| 199 | c4 := shift_right_by_51(r4) |
| 200 | |
| 201 | rr0 := r0.lo & mask_low_51_bits + c4 * 19 |
| 202 | rr1 := r1.lo & mask_low_51_bits + c0 |
| 203 | rr2 := r2.lo & mask_low_51_bits + c1 |
| 204 | rr3 := r3.lo & mask_low_51_bits + c2 |
| 205 | rr4 := r4.lo & mask_low_51_bits + c3 |
| 206 | |
| 207 | // Now all coefficients fit into 64-bit registers but are still too large to |
| 208 | // be passed around as a Element. We therefore do one last carry chain, |
| 209 | // where the carries will be small enough to fit in the wiggle room above 2⁵¹. |
| 210 | mut v := Element{ |
| 211 | l0: rr0 |
| 212 | l1: rr1 |
| 213 | l2: rr2 |
| 214 | l3: rr3 |
| 215 | l4: rr4 |
| 216 | } |
| 217 | // v.carryPropagate() |
| 218 | // using `carry_propagate_generic()` instead |
| 219 | v = v.carry_propagate_generic() |
| 220 | return v |
| 221 | } |
| 222 | |
| 223 | // carry_propagate_generic brings the limbs below 52 bits by applying the reduction |
| 224 | // identity (a * 2²⁵⁵ + b = a * 19 + b) to the l4 carry. |
| 225 | fn (mut v Element) carry_propagate_generic() Element { |
| 226 | c0 := v.l0 >> 51 |
| 227 | c1 := v.l1 >> 51 |
| 228 | c2 := v.l2 >> 51 |
| 229 | c3 := v.l3 >> 51 |
| 230 | c4 := v.l4 >> 51 |
| 231 | |
| 232 | v.l0 = v.l0 & mask_low_51_bits + c4 * 19 |
| 233 | v.l1 = v.l1 & mask_low_51_bits + c0 |
| 234 | v.l2 = v.l2 & mask_low_51_bits + c1 |
| 235 | v.l3 = v.l3 & mask_low_51_bits + c2 |
| 236 | v.l4 = v.l4 & mask_low_51_bits + c3 |
| 237 | return v |
| 238 | } |
| 239 | |
| 240 | fn fe_square_generic(a Element) Element { |
| 241 | l0 := a.l0 |
| 242 | l1 := a.l1 |
| 243 | l2 := a.l2 |
| 244 | l3 := a.l3 |
| 245 | l4 := a.l4 |
| 246 | |
| 247 | // Squaring works precisely like multiplication above, but thanks to its |
| 248 | // symmetry we get to group a few terms together. |
| 249 | // |
| 250 | // l4 l3 l2 l1 l0 x |
| 251 | // l4 l3 l2 l1 l0 = |
| 252 | // ------------------------ |
| 253 | // l4l0 l3l0 l2l0 l1l0 l0l0 + |
| 254 | // l4l1 l3l1 l2l1 l1l1 l0l1 + |
| 255 | // l4l2 l3l2 l2l2 l1l2 l0l2 + |
| 256 | // l4l3 l3l3 l2l3 l1l3 l0l3 + |
| 257 | // l4l4 l3l4 l2l4 l1l4 l0l4 = |
| 258 | // ---------------------------------------------- |
| 259 | // r8 r7 r6 r5 r4 r3 r2 r1 r0 |
| 260 | // |
| 261 | // l4l0 l3l0 l2l0 l1l0 l0l0 + |
| 262 | // l3l1 l2l1 l1l1 l0l1 19×l4l1 + |
| 263 | // l2l2 l1l2 l0l2 19×l4l2 19×l3l2 + |
| 264 | // l1l3 l0l3 19×l4l3 19×l3l3 19×l2l3 + |
| 265 | // l0l4 19×l4l4 19×l3l4 19×l2l4 19×l1l4 = |
| 266 | // -------------------------------------- |
| 267 | // r4 r3 r2 r1 r0 |
| 268 | // |
| 269 | // With precomputed 2×, 19×, and 2×19× terms, we can compute each limb with |
| 270 | // only three mul_64 and four Add64, instead of five and eight. |
| 271 | |
| 272 | l0_2 := l0 * 2 |
| 273 | l1_2 := l1 * 2 |
| 274 | |
| 275 | l1_38 := l1 * 38 |
| 276 | l2_38 := l2 * 38 |
| 277 | l3_38 := l3 * 38 |
| 278 | |
| 279 | l3_19 := l3 * 19 |
| 280 | l4_19 := l4 * 19 |
| 281 | |
| 282 | // r0 = l0×l0 + 19×(l1×l4 + l2×l3 + l3×l2 + l4×l1) = l0×l0 + 19×2×(l1×l4 + l2×l3) |
| 283 | mut r0 := mul_64(l0, l0) |
| 284 | r0 = add_mul_64(r0, l1_38, l4) |
| 285 | r0 = add_mul_64(r0, l2_38, l3) |
| 286 | |
| 287 | // r1 = l0×l1 + l1×l0 + 19×(l2×l4 + l3×l3 + l4×l2) = 2×l0×l1 + 19×2×l2×l4 + 19×l3×l3 |
| 288 | mut r1 := mul_64(l0_2, l1) |
| 289 | r1 = add_mul_64(r1, l2_38, l4) |
| 290 | r1 = add_mul_64(r1, l3_19, l3) |
| 291 | |
| 292 | // r2 = l0×l2 + l1×l1 + l2×l0 + 19×(l3×l4 + l4×l3) = 2×l0×l2 + l1×l1 + 19×2×l3×l4 |
| 293 | mut r2 := mul_64(l0_2, l2) |
| 294 | r2 = add_mul_64(r2, l1, l1) |
| 295 | r2 = add_mul_64(r2, l3_38, l4) |
| 296 | |
| 297 | // r3 = l0×l3 + l1×l2 + l2×l1 + l3×l0 + 19×l4×l4 = 2×l0×l3 + 2×l1×l2 + 19×l4×l4 |
| 298 | mut r3 := mul_64(l0_2, l3) |
| 299 | r3 = add_mul_64(r3, l1_2, l2) |
| 300 | r3 = add_mul_64(r3, l4_19, l4) |
| 301 | |
| 302 | // r4 = l0×l4 + l1×l3 + l2×l2 + l3×l1 + l4×l0 = 2×l0×l4 + 2×l1×l3 + l2×l2 |
| 303 | mut r4 := mul_64(l0_2, l4) |
| 304 | r4 = add_mul_64(r4, l1_2, l3) |
| 305 | r4 = add_mul_64(r4, l2, l2) |
| 306 | |
| 307 | c0 := shift_right_by_51(r0) |
| 308 | c1 := shift_right_by_51(r1) |
| 309 | c2 := shift_right_by_51(r2) |
| 310 | c3 := shift_right_by_51(r3) |
| 311 | c4 := shift_right_by_51(r4) |
| 312 | |
| 313 | rr0 := r0.lo & mask_low_51_bits + c4 * 19 |
| 314 | rr1 := r1.lo & mask_low_51_bits + c0 |
| 315 | rr2 := r2.lo & mask_low_51_bits + c1 |
| 316 | rr3 := r3.lo & mask_low_51_bits + c2 |
| 317 | rr4 := r4.lo & mask_low_51_bits + c3 |
| 318 | |
| 319 | mut v := Element{ |
| 320 | l0: rr0 |
| 321 | l1: rr1 |
| 322 | l2: rr2 |
| 323 | l3: rr3 |
| 324 | l4: rr4 |
| 325 | } |
| 326 | v = v.carry_propagate_generic() |
| 327 | return v |
| 328 | } |
| 329 | |
| 330 | // zero sets v = 0, and returns v. |
| 331 | pub fn (mut v Element) zero() Element { |
| 332 | v = fe_zero |
| 333 | return v |
| 334 | } |
| 335 | |
| 336 | // one sets v = 1, and returns v. |
| 337 | pub fn (mut v Element) one() Element { |
| 338 | v = fe_one |
| 339 | return v |
| 340 | } |
| 341 | |
| 342 | // reduce reduces v modulo 2^255 - 19 and returns it. |
| 343 | pub fn (mut v Element) reduce() Element { |
| 344 | v = v.carry_propagate_generic() |
| 345 | |
| 346 | // After the light reduction we now have a edwards25519 element representation |
| 347 | // v < 2^255 + 2^13 * 19, but need v < 2^255 - 19. |
| 348 | |
| 349 | // If v >= 2^255 - 19, then v + 19 >= 2^255, which would overflow 2^255 - 1, |
| 350 | // generating a carry. That is, c will be 0 if v < 2^255 - 19, and 1 otherwise. |
| 351 | mut c := (v.l0 + 19) >> 51 |
| 352 | c = (v.l1 + c) >> 51 |
| 353 | c = (v.l2 + c) >> 51 |
| 354 | c = (v.l3 + c) >> 51 |
| 355 | c = (v.l4 + c) >> 51 |
| 356 | |
| 357 | // If v < 2^255 - 19 and c = 0, this will be a no-op. Otherwise, it's |
| 358 | // effectively applying the reduction identity to the carry. |
| 359 | v.l0 += 19 * c |
| 360 | |
| 361 | v.l1 += v.l0 >> 51 |
| 362 | v.l0 = v.l0 & mask_low_51_bits |
| 363 | v.l2 += v.l1 >> 51 |
| 364 | v.l1 = v.l1 & mask_low_51_bits |
| 365 | v.l3 += v.l2 >> 51 |
| 366 | v.l2 = v.l2 & mask_low_51_bits |
| 367 | v.l4 += v.l3 >> 51 |
| 368 | v.l3 = v.l3 & mask_low_51_bits |
| 369 | // no additional carry |
| 370 | v.l4 = v.l4 & mask_low_51_bits |
| 371 | |
| 372 | return v |
| 373 | } |
| 374 | |
| 375 | // add sets v = a + b, and returns v. |
| 376 | pub fn (mut v Element) add(a Element, b Element) Element { |
| 377 | v.l0 = a.l0 + b.l0 |
| 378 | v.l1 = a.l1 + b.l1 |
| 379 | v.l2 = a.l2 + b.l2 |
| 380 | v.l3 = a.l3 + b.l3 |
| 381 | v.l4 = a.l4 + b.l4 |
| 382 | // Using the generic implementation here is actually faster than the |
| 383 | // assembly. Probably because the body of this function is so simple that |
| 384 | // the compiler can figure out better optimizations by inlining the carry |
| 385 | // propagation. |
| 386 | return v.carry_propagate_generic() |
| 387 | } |
| 388 | |
| 389 | // subtract sets v = a - b, and returns v. |
| 390 | pub fn (mut v Element) subtract(a Element, b Element) Element { |
| 391 | // We first add 2 * p, to guarantee the subtraction won't underflow, and |
| 392 | // then subtract b (which can be up to 2^255 + 2^13 * 19). |
| 393 | v.l0 = (a.l0 + 0xFFFFFFFFFFFDA) - b.l0 |
| 394 | v.l1 = (a.l1 + 0xFFFFFFFFFFFFE) - b.l1 |
| 395 | v.l2 = (a.l2 + 0xFFFFFFFFFFFFE) - b.l2 |
| 396 | v.l3 = (a.l3 + 0xFFFFFFFFFFFFE) - b.l3 |
| 397 | v.l4 = (a.l4 + 0xFFFFFFFFFFFFE) - b.l4 |
| 398 | return v.carry_propagate_generic() |
| 399 | } |
| 400 | |
| 401 | // negate sets v = -a, and returns v. |
| 402 | pub fn (mut v Element) negate(a Element) Element { |
| 403 | return v.subtract(fe_zero, a) |
| 404 | } |
| 405 | |
| 406 | // invert sets v = 1/z mod p, and returns v. |
| 407 | // |
| 408 | // If z == 0, invert returns v = 0. |
| 409 | pub fn (mut v Element) invert(z Element) Element { |
| 410 | // Inversion is implemented as exponentiation with exponent p − 2. It uses the |
| 411 | // same sequence of 255 squarings and 11 multiplications as [Curve25519]. |
| 412 | mut z2 := Element{} |
| 413 | mut z9 := Element{} |
| 414 | mut z11 := Element{} |
| 415 | mut z2_5_0 := Element{} |
| 416 | mut z2_10_0 := Element{} |
| 417 | mut z2_20_0 := Element{} |
| 418 | mut z2_50_0 := Element{} |
| 419 | mut z2_100_0 := Element{} |
| 420 | mut t := Element{} |
| 421 | |
| 422 | z2.square(z) // 2 |
| 423 | t.square(z2) // 4 |
| 424 | t.square(t) // 8 |
| 425 | z9.multiply(t, z) // 9 |
| 426 | z11.multiply(z9, z2) // 11 |
| 427 | t.square(z11) // 22 |
| 428 | z2_5_0.multiply(t, z9) // 31 = 2^5 - 2^0 |
| 429 | |
| 430 | t.square(z2_5_0) // 2^6 - 2^1 |
| 431 | for i := 0; i < 4; i++ { |
| 432 | t.square(t) // 2^10 - 2^5 |
| 433 | } |
| 434 | z2_10_0.multiply(t, z2_5_0) // 2^10 - 2^0 |
| 435 | |
| 436 | t.square(z2_10_0) // 2^11 - 2^1 |
| 437 | for i := 0; i < 9; i++ { |
| 438 | t.square(t) // 2^20 - 2^10 |
| 439 | } |
| 440 | z2_20_0.multiply(t, z2_10_0) // 2^20 - 2^0 |
| 441 | |
| 442 | t.square(z2_20_0) // 2^21 - 2^1 |
| 443 | for i := 0; i < 19; i++ { |
| 444 | t.square(t) // 2^40 - 2^20 |
| 445 | } |
| 446 | t.multiply(t, z2_20_0) // 2^40 - 2^0 |
| 447 | |
| 448 | t.square(t) // 2^41 - 2^1 |
| 449 | for i := 0; i < 9; i++ { |
| 450 | t.square(t) // 2^50 - 2^10 |
| 451 | } |
| 452 | z2_50_0.multiply(t, z2_10_0) // 2^50 - 2^0 |
| 453 | |
| 454 | t.square(z2_50_0) // 2^51 - 2^1 |
| 455 | for i := 0; i < 49; i++ { |
| 456 | t.square(t) // 2^100 - 2^50 |
| 457 | } |
| 458 | z2_100_0.multiply(t, z2_50_0) // 2^100 - 2^0 |
| 459 | |
| 460 | t.square(z2_100_0) // 2^101 - 2^1 |
| 461 | for i := 0; i < 99; i++ { |
| 462 | t.square(t) // 2^200 - 2^100 |
| 463 | } |
| 464 | t.multiply(t, z2_100_0) // 2^200 - 2^0 |
| 465 | |
| 466 | t.square(t) // 2^201 - 2^1 |
| 467 | for i := 0; i < 49; i++ { |
| 468 | t.square(t) // 2^250 - 2^50 |
| 469 | } |
| 470 | t.multiply(t, z2_50_0) // 2^250 - 2^0 |
| 471 | |
| 472 | t.square(t) // 2^251 - 2^1 |
| 473 | t.square(t) // 2^252 - 2^2 |
| 474 | t.square(t) // 2^253 - 2^3 |
| 475 | t.square(t) // 2^254 - 2^4 |
| 476 | t.square(t) // 2^255 - 2^5 |
| 477 | |
| 478 | return v.multiply(t, z11) // 2^255 - 21 |
| 479 | } |
| 480 | |
| 481 | // square sets v = x * x, and returns v. |
| 482 | pub fn (mut v Element) square(x Element) Element { |
| 483 | v = fe_square_generic(x) |
| 484 | return v |
| 485 | } |
| 486 | |
| 487 | // multiply sets v = x * y, and returns v. |
| 488 | pub fn (mut v Element) multiply(x Element, y Element) Element { |
| 489 | v = fe_mul_generic(x, y) |
| 490 | return v |
| 491 | } |
| 492 | |
| 493 | // mul_51 returns lo + hi * 2⁵¹ = a * b. |
| 494 | fn mul_51(a u64, b u32) (u64, u64) { |
| 495 | mh, ml := bits.mul_64(a, u64(b)) |
| 496 | lo := ml & mask_low_51_bits |
| 497 | hi := (mh << 13) | (ml >> 51) |
| 498 | return lo, hi |
| 499 | } |
| 500 | |
| 501 | // pow_22523 set v = x^((p-5)/8), and returns v. (p-5)/8 is 2^252-3. |
| 502 | pub fn (mut v Element) pow_22523(x Element) Element { |
| 503 | mut t0, mut t1, mut t2 := Element{}, Element{}, Element{} |
| 504 | |
| 505 | t0.square(x) // x^2 |
| 506 | t1.square(t0) // x^4 |
| 507 | t1.square(t1) // x^8 |
| 508 | t1.multiply(x, t1) // x^9 |
| 509 | t0.multiply(t0, t1) // x^11 |
| 510 | t0.square(t0) // x^22 |
| 511 | t0.multiply(t1, t0) // x^31 |
| 512 | t1.square(t0) // x^62 |
| 513 | for i := 1; i < 5; i++ { // x^992 |
| 514 | t1.square(t1) |
| 515 | } |
| 516 | t0.multiply(t1, t0) // x^1023 -> 1023 = 2^10 - 1 |
| 517 | t1.square(t0) // 2^11 - 2 |
| 518 | for i := 1; i < 10; i++ { // 2^20 - 2^10 |
| 519 | t1.square(t1) |
| 520 | } |
| 521 | t1.multiply(t1, t0) // 2^20 - 1 |
| 522 | t2.square(t1) // 2^21 - 2 |
| 523 | for i := 1; i < 20; i++ { // 2^40 - 2^20 |
| 524 | t2.square(t2) |
| 525 | } |
| 526 | t1.multiply(t2, t1) // 2^40 - 1 |
| 527 | t1.square(t1) // 2^41 - 2 |
| 528 | for i := 1; i < 10; i++ { // 2^50 - 2^10 |
| 529 | t1.square(t1) |
| 530 | } |
| 531 | t0.multiply(t1, t0) // 2^50 - 1 |
| 532 | t1.square(t0) // 2^51 - 2 |
| 533 | for i := 1; i < 50; i++ { // 2^100 - 2^50 |
| 534 | t1.square(t1) |
| 535 | } |
| 536 | t1.multiply(t1, t0) // 2^100 - 1 |
| 537 | t2.square(t1) // 2^101 - 2 |
| 538 | for i := 1; i < 100; i++ { // 2^200 - 2^100 |
| 539 | t2.square(t2) |
| 540 | } |
| 541 | t1.multiply(t2, t1) // 2^200 - 1 |
| 542 | t1.square(t1) // 2^201 - 2 |
| 543 | for i := 1; i < 50; i++ { // 2^250 - 2^50 |
| 544 | t1.square(t1) |
| 545 | } |
| 546 | t0.multiply(t1, t0) // 2^250 - 1 |
| 547 | t0.square(t0) // 2^251 - 2 |
| 548 | t0.square(t0) // 2^252 - 4 |
| 549 | return v.multiply(t0, x) // 2^252 - 3 -> x^(2^252-3) |
| 550 | } |
| 551 | |
| 552 | // sqrt_ratio sets r to the non-negative square root of the ratio of u and v. |
| 553 | // |
| 554 | // If u/v is square, sqrt_ratio returns r and 1. If u/v is not square, sqrt_ratio |
| 555 | // sets r according to Section 4.3 of draft-irtf-cfrg-ristretto255-decaf448-00, |
| 556 | // and returns r and 0. |
| 557 | pub fn (mut r Element) sqrt_ratio(u Element, v Element) (Element, int) { |
| 558 | mut a, mut b := Element{}, Element{} |
| 559 | |
| 560 | // r = (u * v3) * (u * v7)^((p-5)/8) |
| 561 | v2 := a.square(v) |
| 562 | uv3 := b.multiply(u, b.multiply(v2, v)) |
| 563 | uv7 := a.multiply(uv3, a.square(v2)) |
| 564 | r.multiply(uv3, r.pow_22523(uv7)) |
| 565 | |
| 566 | mut check := a.multiply(v, a.square(r)) // check = v * r^2 |
| 567 | |
| 568 | mut uneg := b.negate(u) |
| 569 | correct_sign_sqrt := check.equal(u) |
| 570 | flipped_sign_sqrt := check.equal(uneg) |
| 571 | flipped_sign_sqrt_i := check.equal(uneg.multiply(uneg, sqrt_m1)) |
| 572 | |
| 573 | rprime := b.multiply(r, sqrt_m1) // r_prime = SQRT_M1 * r |
| 574 | // r = CT_selected(r_prime IF flipped_sign_sqrt | flipped_sign_sqrt_i ELSE r) |
| 575 | r.selected(rprime, r, flipped_sign_sqrt | flipped_sign_sqrt_i) |
| 576 | |
| 577 | r.absolute(r) // Choose the nonnegative square root. |
| 578 | return r, correct_sign_sqrt | flipped_sign_sqrt |
| 579 | } |
| 580 | |
| 581 | // mask_64_bits returns 0xffffffff if cond is 1, and 0 otherwise. |
| 582 | fn mask_64_bits(cond int) u64 { |
| 583 | // in go, `^` operates on bit mean NOT, flip bit |
| 584 | // in v, its a ~ bitwise NOT |
| 585 | return ~(u64(cond) - 1) |
| 586 | } |
| 587 | |
| 588 | // selected sets v to a if cond == 1, and to b if cond == 0. |
| 589 | pub fn (mut v Element) selected(a Element, b Element, cond int) Element { |
| 590 | // see above notes |
| 591 | m := mask_64_bits(cond) |
| 592 | v.l0 = (m & a.l0) | (~m & b.l0) |
| 593 | v.l1 = (m & a.l1) | (~m & b.l1) |
| 594 | v.l2 = (m & a.l2) | (~m & b.l2) |
| 595 | v.l3 = (m & a.l3) | (~m & b.l3) |
| 596 | v.l4 = (m & a.l4) | (~m & b.l4) |
| 597 | return v |
| 598 | } |
| 599 | |
| 600 | // is_negative returns 1 if v is negative, and 0 otherwise. |
| 601 | pub fn (mut v Element) is_negative() int { |
| 602 | return int(v.bytes()[0] & 1) |
| 603 | } |
| 604 | |
| 605 | // absolute sets v to |u|, and returns v. |
| 606 | pub fn (mut v Element) absolute(u Element) Element { |
| 607 | mut e := Element{} |
| 608 | mut uk := u |
| 609 | return v.selected(e.negate(uk), uk, uk.is_negative()) |
| 610 | } |
| 611 | |
| 612 | // set sets v = a, and returns v. |
| 613 | pub fn (mut v Element) set(a Element) Element { |
| 614 | v = a |
| 615 | return v |
| 616 | } |
| 617 | |
| 618 | // set_bytes sets v to x, where x is a 32-byte little-endian encoding. If x is |
| 619 | // not of the right length, SetUniformBytes returns an error, and the |
| 620 | // receiver is unchanged. |
| 621 | // |
| 622 | // Consistent with RFC 7748, the most significant bit (the high bit of the |
| 623 | // last byte) is ignored, and non-canonical values (2^255-19 through 2^255-1) |
| 624 | // are accepted. Note that this is laxer than specified by RFC 8032. |
| 625 | pub fn (mut v Element) set_bytes(x []u8) !Element { |
| 626 | if x.len != 32 { |
| 627 | return error('edwards25519: invalid edwards25519 element input size') |
| 628 | } |
| 629 | |
| 630 | // Bits 0:51 (bytes 0:8, bits 0:64, shift 0, mask 51). |
| 631 | v.l0 = binary.little_endian_u64(x[0..8]) |
| 632 | v.l0 &= mask_low_51_bits |
| 633 | // Bits 51:102 (bytes 6:14, bits 48:112, shift 3, mask 51). |
| 634 | v.l1 = binary.little_endian_u64(x[6..14]) >> 3 |
| 635 | v.l1 &= mask_low_51_bits |
| 636 | // Bits 102:153 (bytes 12:20, bits 96:160, shift 6, mask 51). |
| 637 | v.l2 = binary.little_endian_u64(x[12..20]) >> 6 |
| 638 | v.l2 &= mask_low_51_bits |
| 639 | // Bits 153:204 (bytes 19:27, bits 152:216, shift 1, mask 51). |
| 640 | v.l3 = binary.little_endian_u64(x[19..27]) >> 1 |
| 641 | v.l3 &= mask_low_51_bits |
| 642 | // Bits 204:251 (bytes 24:32, bits 192:256, shift 12, mask 51). |
| 643 | // Note: not bytes 25:33, shift 4, to avoid overread. |
| 644 | v.l4 = binary.little_endian_u64(x[24..32]) >> 12 |
| 645 | v.l4 &= mask_low_51_bits |
| 646 | |
| 647 | return v |
| 648 | } |
| 649 | |
| 650 | // bytes returns the canonical 32-byte little-endian encoding of v. |
| 651 | pub fn (mut v Element) bytes() []u8 { |
| 652 | // This function is outlined to make the allocations inline in the caller |
| 653 | // rather than happen on the heap. |
| 654 | // out := v.bytes_generic() |
| 655 | return v.bytes_generic() |
| 656 | } |
| 657 | |
| 658 | fn (mut v Element) bytes_generic() []u8 { |
| 659 | mut out := []u8{len: 32} |
| 660 | |
| 661 | v = v.reduce() |
| 662 | |
| 663 | mut buf := []u8{len: 8} |
| 664 | idxs := [v.l0, v.l1, v.l2, v.l3, v.l4] |
| 665 | for i, l in idxs { |
| 666 | bits_offset := i * 51 |
| 667 | binary.little_endian_put_u64(mut buf, l << u32(bits_offset % 8)) |
| 668 | for j, bb in buf { |
| 669 | off := bits_offset / 8 + j |
| 670 | if off >= out.len { |
| 671 | break |
| 672 | } |
| 673 | out[off] |= bb |
| 674 | } |
| 675 | } |
| 676 | |
| 677 | return out |
| 678 | } |
| 679 | |
| 680 | // equal returns 1 if v and u are equal, and 0 otherwise. |
| 681 | pub fn (mut v Element) equal(ue Element) int { |
| 682 | mut u := ue |
| 683 | sa := u.bytes() |
| 684 | sv := v.bytes() |
| 685 | return subtle.constant_time_compare(sa, sv) |
| 686 | } |
| 687 | |
| 688 | // swap swaps v and u if cond == 1 or leaves them unchanged if cond == 0, and returns v. |
| 689 | pub fn (mut v Element) swap(mut u Element, cond int) { |
| 690 | // mut u := ue |
| 691 | m := mask_64_bits(cond) |
| 692 | mut t := m & (v.l0 ^ u.l0) |
| 693 | v.l0 ^= t |
| 694 | u.l0 ^= t |
| 695 | t = m & (v.l1 ^ u.l1) |
| 696 | v.l1 ^= t |
| 697 | u.l1 ^= t |
| 698 | t = m & (v.l2 ^ u.l2) |
| 699 | v.l2 ^= t |
| 700 | u.l2 ^= t |
| 701 | t = m & (v.l3 ^ u.l3) |
| 702 | v.l3 ^= t |
| 703 | u.l3 ^= t |
| 704 | t = m & (v.l4 ^ u.l4) |
| 705 | v.l4 ^= t |
| 706 | u.l4 ^= t |
| 707 | } |
| 708 | |
| 709 | // mult_32 sets v = x * y, and returns v. |
| 710 | pub fn (mut v Element) mult_32(x Element, y u32) Element { |
| 711 | x0lo, x0hi := mul_51(x.l0, y) |
| 712 | x1lo, x1hi := mul_51(x.l1, y) |
| 713 | x2lo, x2hi := mul_51(x.l2, y) |
| 714 | x3lo, x3hi := mul_51(x.l3, y) |
| 715 | x4lo, x4hi := mul_51(x.l4, y) |
| 716 | v.l0 = x0lo + 19 * x4hi // carried over per the reduction identity |
| 717 | v.l1 = x1lo + x0hi |
| 718 | v.l2 = x2lo + x1hi |
| 719 | v.l3 = x3lo + x2hi |
| 720 | v.l4 = x4lo + x3hi |
| 721 | // The hi portions are going to be only 32 bits, plus any previous excess, |
| 722 | // so we can skip the carry propagation. |
| 723 | return v |
| 724 | } |
| 725 | |
| 726 | fn swap_endianness(mut buf []u8) []u8 { |
| 727 | for i := 0; i < buf.len / 2; i++ { |
| 728 | buf[i], buf[buf.len - i - 1] = buf[buf.len - i - 1], buf[i] |
| 729 | } |
| 730 | return buf |
| 731 | } |
| 732 | |