| 1 | module big |
| 2 | |
| 3 | import math.bits |
| 4 | |
| 5 | // help routines for cleaner code but inline for performance |
| 6 | // quicker than BitField.set_bit |
| 7 | @[direct_array_access; inline] |
| 8 | fn bit_set(mut a []u64, n int) { |
| 9 | byte_offset := n / digit_bits |
| 10 | mask := u64(1) << u32(n % digit_bits) |
| 11 | $if debug { |
| 12 | assert a.len >= byte_offset |
| 13 | } |
| 14 | a[byte_offset] |= mask |
| 15 | } |
| 16 | |
| 17 | @[direct_array_access] |
| 18 | fn knuth_divide_array_by_array(operand_a []u64, operand_b []u64, mut quotient []u64, mut remainder []u64) { |
| 19 | m := operand_a.len - operand_b.len |
| 20 | n := operand_b.len |
| 21 | mut u := []u64{len: operand_a.len + 1} |
| 22 | mut v := []u64{len: n} |
| 23 | leading_zeros := bits.leading_zeros_64(operand_b.last()) - (64 - digit_bits) |
| 24 | |
| 25 | if leading_zeros > 0 { |
| 26 | mut carry := u64(0) |
| 27 | amount := digit_bits - leading_zeros |
| 28 | for i in 0 .. operand_a.len { |
| 29 | temp := (operand_a[i] << leading_zeros) | carry |
| 30 | u[i] = temp & max_digit |
| 31 | carry = operand_a[i] >> amount |
| 32 | } |
| 33 | u[operand_a.len] = carry |
| 34 | carry = 0 |
| 35 | for i in 0 .. operand_b.len { |
| 36 | temp := (operand_b[i] << leading_zeros) | carry |
| 37 | v[i] = temp & max_digit |
| 38 | carry = operand_b[i] >> amount |
| 39 | } |
| 40 | } else { |
| 41 | for i in 0 .. operand_a.len { |
| 42 | u[i] = operand_a[i] |
| 43 | } |
| 44 | for i in 0 .. operand_b.len { |
| 45 | v[i] = operand_b[i] |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | if remainder.len >= (n + 1) { |
| 50 | remainder.trim(n + 1) |
| 51 | } else { |
| 52 | remainder = []u64{len: n + 1} |
| 53 | } |
| 54 | |
| 55 | v_n_1 := v[n - 1] |
| 56 | v_n_2 := v[n - 2] |
| 57 | for j := m; j >= 0; j-- { |
| 58 | u_j_n := u[j + n] |
| 59 | u_j_n_1 := u[j + n - 1] |
| 60 | u_j_n_2 := u[j + n - 2] |
| 61 | |
| 62 | mut qhat, mut rhat := bits.div_64(u_j_n >> (64 - digit_bits), |
| 63 | (u_j_n << digit_bits) | u_j_n_1, v_n_1) |
| 64 | mut x1, mut x2 := bits.mul_64(qhat, v_n_2) |
| 65 | x2 = x2 & max_digit |
| 66 | x1 = (x1 << (64 - digit_bits)) | (x2 >> digit_bits) |
| 67 | for greater_than(x1, x2, rhat, u_j_n_2) { |
| 68 | qhat-- |
| 69 | prev := rhat |
| 70 | rhat += v_n_1 |
| 71 | if rhat < prev { |
| 72 | break |
| 73 | } |
| 74 | x1, x2 = bits.mul_64(qhat, v_n_2) |
| 75 | x2 = x2 & max_digit |
| 76 | x1 = (x1 << (64 - digit_bits)) | (x2 >> digit_bits) |
| 77 | } |
| 78 | mut carry := u64(0) |
| 79 | for i in 0 .. n { |
| 80 | hi, lo := bits.mul_add_64(v[i], qhat, carry) |
| 81 | remainder[i] = lo & max_digit |
| 82 | carry = (hi << (64 - digit_bits)) | (lo >> digit_bits) |
| 83 | } |
| 84 | remainder[n] = carry |
| 85 | |
| 86 | mut borrow := u64(0) |
| 87 | for i in 0 .. n + 1 { |
| 88 | result := u[j + i] - remainder[i] - borrow |
| 89 | u[j + i] = result & max_digit |
| 90 | borrow = (result >> digit_bits) & 1 |
| 91 | } |
| 92 | if borrow == 1 { |
| 93 | qhat-- |
| 94 | carry = u64(0) |
| 95 | for i in 0 .. n { |
| 96 | sum := u[j + i] + v[i] + carry |
| 97 | u[j + i] = sum & max_digit |
| 98 | carry = sum >> digit_bits |
| 99 | } |
| 100 | } |
| 101 | quotient[j] = qhat |
| 102 | } |
| 103 | |
| 104 | remainder.delete_last() |
| 105 | if leading_zeros > 0 { |
| 106 | mut carry := u64(0) |
| 107 | max_leading_digit := (u64(1) << leading_zeros) - 1 |
| 108 | for i := n - 1; i >= 0; i-- { |
| 109 | current_limb := u[i] |
| 110 | remainder[i] = (current_limb >> leading_zeros) | carry |
| 111 | carry = (current_limb & max_leading_digit) << (digit_bits - leading_zeros) |
| 112 | } |
| 113 | } else { |
| 114 | for i in 0 .. n { |
| 115 | remainder[i] = u[i] |
| 116 | } |
| 117 | } |
| 118 | shrink_tail_zeros(mut quotient) |
| 119 | shrink_tail_zeros(mut remainder) |
| 120 | } |
| 121 | |
| 122 | @[inline] |
| 123 | fn greater_than(x1 u64, x2 u64, y1 u64, y2 u64) bool { |
| 124 | return x1 > y1 || (x1 == y1 && x2 > y2) |
| 125 | } |
| 126 | |