| 1 | // Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | module bits |
| 5 | |
| 6 | fn C._umul128(x u64, y u64, result_hi &u64) u64 |
| 7 | fn C._addcarry_u64(carry_in u8, a u64, b u64, out &u64) u8 |
| 8 | fn C._udiv128(hi u64, lo u64, y u64, rem &u64) u64 |
| 9 | |
| 10 | // mul_64 returns the 128-bit product of x and y: (hi, lo) = x * y |
| 11 | // with the product bits' upper half returned in hi and the lower |
| 12 | // half returned in lo. |
| 13 | // |
| 14 | // This function's execution time does not depend on the inputs. |
| 15 | @[inline] |
| 16 | pub fn mul_64(x u64, y u64) (u64, u64) { |
| 17 | mut hi := u64(0) |
| 18 | mut lo := u64(0) |
| 19 | $if msvc { |
| 20 | lo = C._umul128(x, y, &hi) |
| 21 | return hi, lo |
| 22 | } $else $if amd64 { |
| 23 | asm amd64 { |
| 24 | mulq rdx |
| 25 | ; =a (lo) |
| 26 | =d (hi) |
| 27 | ; a (x) |
| 28 | d (y) |
| 29 | ; cc |
| 30 | } |
| 31 | return hi, lo |
| 32 | } |
| 33 | // cross compile |
| 34 | return mul_64_default(x, y) |
| 35 | } |
| 36 | |
| 37 | // mul_add_64 returns the 128-bit result of x * y + z: (hi, lo) = x * y + z |
| 38 | // with the result bits' upper half returned in hi and the lower |
| 39 | // half returned in lo. |
| 40 | @[inline] |
| 41 | pub fn mul_add_64(x u64, y u64, z u64) (u64, u64) { |
| 42 | mut hi := u64(0) |
| 43 | mut lo := u64(0) |
| 44 | $if msvc { |
| 45 | lo = C._umul128(x, y, &hi) |
| 46 | carry := C._addcarry_u64(0, lo, z, &lo) |
| 47 | hi += carry |
| 48 | return hi, lo |
| 49 | } $else $if amd64 { |
| 50 | asm amd64 { |
| 51 | mulq rdx |
| 52 | addq rax, z |
| 53 | adcq rdx, 0 |
| 54 | ; =a (lo) |
| 55 | =d (hi) |
| 56 | ; a (x) |
| 57 | d (y) |
| 58 | r (z) |
| 59 | ; cc |
| 60 | } |
| 61 | return hi, lo |
| 62 | } |
| 63 | // cross compile |
| 64 | return mul_add_64_default(x, y, z) |
| 65 | } |
| 66 | |
| 67 | // div_64 returns the quotient and remainder of (hi, lo) divided by y: |
| 68 | // quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper |
| 69 | // half in parameter hi and the lower half in parameter lo. |
| 70 | // div_64 panics for y == 0 (division by zero) or y <= hi (quotient overflow). |
| 71 | @[inline] |
| 72 | pub fn div_64(hi u64, lo u64, y1 u64) (u64, u64) { |
| 73 | mut y := y1 |
| 74 | if y == 0 { |
| 75 | panic(divide_error) |
| 76 | } |
| 77 | if y <= hi { |
| 78 | panic(overflow_error) |
| 79 | } |
| 80 | |
| 81 | mut quo := u64(0) |
| 82 | mut rem := u64(0) |
| 83 | $if msvc { |
| 84 | quo = C._udiv128(hi, lo, y, &rem) |
| 85 | return quo, rem |
| 86 | } $else $if amd64 { |
| 87 | asm amd64 { |
| 88 | div y |
| 89 | ; =a (quo) |
| 90 | =d (rem) |
| 91 | ; d (hi) |
| 92 | a (lo) |
| 93 | r (y) |
| 94 | ; cc |
| 95 | } |
| 96 | return quo, rem |
| 97 | } |
| 98 | // cross compile |
| 99 | return div_64_default(hi, lo, y1) |
| 100 | } |
| 101 | |