| 1 | // Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | module bits |
| 5 | |
| 6 | // See http://supertech.csail.mit.edu/papers/debruijn.pdf |
| 7 | const de_bruijn32 = u32(0x077CB531) |
| 8 | const de_bruijn32tab = [u8(0), 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, |
| 9 | 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]! |
| 10 | const de_bruijn64 = u64(0x03f79d71b4ca8b09) |
| 11 | const de_bruijn64tab = [u8(0), 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 62, 47, |
| 12 | 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 63, 55, 48, 27, 60, 41, 37, 16, 46, |
| 13 | 35, 44, 21, 52, 32, 23, 11, 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6]! |
| 14 | |
| 15 | const m0 = u64(0x5555555555555555) // 01010101 ... |
| 16 | |
| 17 | const m1 = u64(0x3333333333333333) // 00110011 ... |
| 18 | |
| 19 | const m2 = u64(0x0f0f0f0f0f0f0f0f) // 00001111 ... |
| 20 | |
| 21 | const m3 = u64(0x00ff00ff00ff00ff) // etc. |
| 22 | |
| 23 | const m4 = u64(0x0000ffff0000ffff) |
| 24 | |
| 25 | // --- LeadingZeros --- |
| 26 | // leading_zeros_8 returns the number of leading zero bits in x; the result is 8 for x == 0. |
| 27 | @[inline] |
| 28 | pub fn leading_zeros_8(x u8) int { |
| 29 | return leading_zeros_8_default(x) |
| 30 | } |
| 31 | |
| 32 | @[inline] |
| 33 | fn leading_zeros_8_default(x u8) int { |
| 34 | return 8 - len_8(x) |
| 35 | } |
| 36 | |
| 37 | // leading_zeros_16 returns the number of leading zero bits in x; the result is 16 for x == 0. |
| 38 | @[inline] |
| 39 | pub fn leading_zeros_16(x u16) int { |
| 40 | return leading_zeros_16_default(x) |
| 41 | } |
| 42 | |
| 43 | @[inline] |
| 44 | fn leading_zeros_16_default(x u16) int { |
| 45 | return 16 - len_16(x) |
| 46 | } |
| 47 | |
| 48 | // leading_zeros_32 returns the number of leading zero bits in x; the result is 32 for x == 0. |
| 49 | @[inline] |
| 50 | pub fn leading_zeros_32(x u32) int { |
| 51 | return leading_zeros_32_default(x) |
| 52 | } |
| 53 | |
| 54 | @[inline] |
| 55 | fn leading_zeros_32_default(x u32) int { |
| 56 | return 32 - len_32(x) |
| 57 | } |
| 58 | |
| 59 | // leading_zeros_64 returns the number of leading zero bits in x; the result is 64 for x == 0. |
| 60 | @[inline] |
| 61 | pub fn leading_zeros_64(x u64) int { |
| 62 | return leading_zeros_64_default(x) |
| 63 | } |
| 64 | |
| 65 | @[inline] |
| 66 | fn leading_zeros_64_default(x u64) int { |
| 67 | return 64 - len_64(x) |
| 68 | } |
| 69 | |
| 70 | // --- TrailingZeros --- |
| 71 | // trailing_zeros_8 returns the number of trailing zero bits in x; the result is 8 for x == 0. |
| 72 | @[inline] |
| 73 | pub fn trailing_zeros_8(x u8) int { |
| 74 | return trailing_zeros_8_default(x) |
| 75 | } |
| 76 | |
| 77 | @[direct_array_access; inline] |
| 78 | fn trailing_zeros_8_default(x u8) int { |
| 79 | return int(ntz_8_tab[x]) |
| 80 | } |
| 81 | |
| 82 | // trailing_zeros_16 returns the number of trailing zero bits in x; the result is 16 for x == 0. |
| 83 | @[inline] |
| 84 | pub fn trailing_zeros_16(x u16) int { |
| 85 | return trailing_zeros_16_default(x) |
| 86 | } |
| 87 | |
| 88 | @[direct_array_access; ignore_overflow; inline] |
| 89 | fn trailing_zeros_16_default(x u16) int { |
| 90 | if x == 0 { |
| 91 | return 16 |
| 92 | } |
| 93 | // see comment in trailing_zeros_64 |
| 94 | return int(de_bruijn32tab[u32(x & -x) * de_bruijn32 >> (32 - 5)]) |
| 95 | } |
| 96 | |
| 97 | // trailing_zeros_32 returns the number of trailing zero bits in x; the result is 32 for x == 0. |
| 98 | @[inline] |
| 99 | pub fn trailing_zeros_32(x u32) int { |
| 100 | return trailing_zeros_32_default(x) |
| 101 | } |
| 102 | |
| 103 | @[direct_array_access; ignore_overflow; inline] |
| 104 | fn trailing_zeros_32_default(x u32) int { |
| 105 | if x == 0 { |
| 106 | return 32 |
| 107 | } |
| 108 | // see comment in trailing_zeros_64 |
| 109 | return int(de_bruijn32tab[(x & -x) * de_bruijn32 >> (32 - 5)]) |
| 110 | } |
| 111 | |
| 112 | // trailing_zeros_64 returns the number of trailing zero bits in x; the result is 64 for x == 0. |
| 113 | @[inline] |
| 114 | pub fn trailing_zeros_64(x u64) int { |
| 115 | return trailing_zeros_64_default(x) |
| 116 | } |
| 117 | |
| 118 | @[direct_array_access; ignore_overflow; inline] |
| 119 | fn trailing_zeros_64_default(x u64) int { |
| 120 | if x == 0 { |
| 121 | return 64 |
| 122 | } |
| 123 | // If popcount is fast, replace code below with return popcount(^x & (x - 1)). |
| 124 | // |
| 125 | // x & -x leaves only the right-most bit set in the word. Let k be the |
| 126 | // index of that bit. Since only a single bit is set, the value is two |
| 127 | // to the power of k. Multiplying by a power of two is equivalent to |
| 128 | // left shifting, in this case by k bits. The de Bruijn (64 bit) constant |
| 129 | // is such that all six bit, consecutive substrings are distinct. |
| 130 | // Therefore, if we have a left shifted version of this constant we can |
| 131 | // find by how many bits it was shifted by looking at which six bit |
| 132 | // substring ended up at the top of the word. |
| 133 | // (Knuth, volume 4, section 7.3.1) |
| 134 | return int(de_bruijn64tab[int((x & -x) * de_bruijn64 >> (64 - 6))]) |
| 135 | } |
| 136 | |
| 137 | // ones_count_8 returns the number of one bits ("population count") in x. |
| 138 | @[inline] |
| 139 | pub fn ones_count_8(x u8) int { |
| 140 | return ones_count_8_default(x) |
| 141 | } |
| 142 | |
| 143 | @[direct_array_access; inline] |
| 144 | fn ones_count_8_default(x u8) int { |
| 145 | return int(pop_8_tab[x]) |
| 146 | } |
| 147 | |
| 148 | // ones_count_16 returns the number of one bits ("population count") in x. |
| 149 | @[inline] |
| 150 | pub fn ones_count_16(x u16) int { |
| 151 | return ones_count_16_default(x) |
| 152 | } |
| 153 | |
| 154 | @[direct_array_access; inline] |
| 155 | fn ones_count_16_default(x u16) int { |
| 156 | return int(pop_8_tab[x >> 8] + pop_8_tab[x & u16(0xff)]) |
| 157 | } |
| 158 | |
| 159 | // ones_count_32 returns the number of one bits ("population count") in x. |
| 160 | @[inline] |
| 161 | pub fn ones_count_32(x u32) int { |
| 162 | return ones_count_32_default(x) |
| 163 | } |
| 164 | |
| 165 | @[direct_array_access; inline] |
| 166 | fn ones_count_32_default(x u32) int { |
| 167 | return int(pop_8_tab[x >> 24] + pop_8_tab[(x >> 16) & 0xff] + pop_8_tab[(x >> 8) & 0xff] + |
| 168 | pop_8_tab[x & u32(0xff)]) |
| 169 | } |
| 170 | |
| 171 | // ones_count_64 returns the number of one bits ("population count") in x. |
| 172 | @[inline] |
| 173 | pub fn ones_count_64(x u64) int { |
| 174 | return ones_count_64_default(x) |
| 175 | } |
| 176 | |
| 177 | @[inline] |
| 178 | fn ones_count_64_default(x u64) int { |
| 179 | // Implementation: Parallel summing of adjacent bits. |
| 180 | // See "Hacker's Delight", Chap. 5: Counting Bits. |
| 181 | // The following pattern shows the general approach: |
| 182 | // |
| 183 | // x = x>>1&(m0&m) + x&(m0&m) |
| 184 | // x = x>>2&(m1&m) + x&(m1&m) |
| 185 | // x = x>>4&(m2&m) + x&(m2&m) |
| 186 | // x = x>>8&(m3&m) + x&(m3&m) |
| 187 | // x = x>>16&(m4&m) + x&(m4&m) |
| 188 | // x = x>>32&(m5&m) + x&(m5&m) |
| 189 | // return int(x) |
| 190 | // |
| 191 | // Masking (& operations) can be left away when there's no |
| 192 | // danger that a field's sum will carry over into the next |
| 193 | // field: Since the result cannot be > 64, 8 bits is enough |
| 194 | // and we can ignore the masks for the shifts by 8 and up. |
| 195 | // Per "Hacker's Delight", the first line can be simplified |
| 196 | // more, but it saves at best one instruction, so we leave |
| 197 | // it alone for clarity. |
| 198 | mut y := ((x >> u64(1)) & (m0 & max_u64)) + (x & (m0 & max_u64)) |
| 199 | y = ((y >> u64(2)) & (m1 & max_u64)) + (y & (m1 & max_u64)) |
| 200 | y = ((y >> 4) + y) & (m2 & max_u64) |
| 201 | y += y >> 8 |
| 202 | y += y >> 16 |
| 203 | y += y >> 32 |
| 204 | return int(y) & ((1 << 7) - 1) |
| 205 | } |
| 206 | |
| 207 | const n8 = u8(8) |
| 208 | const n16 = u16(16) |
| 209 | const n32 = u32(32) |
| 210 | const n64 = u64(64) |
| 211 | |
| 212 | // rotate_left_8 returns the value of x rotated left by (k mod 8) bits. |
| 213 | // To rotate x right by k bits, call rotate_left_8(x, -k). |
| 214 | // |
| 215 | // This function's execution time does not depend on the inputs. |
| 216 | @[inline] |
| 217 | pub fn rotate_left_8(x u8, k int) u8 { |
| 218 | s := u8(k) & (n8 - u8(1)) |
| 219 | return (x << s) | (x >> (n8 - s)) |
| 220 | } |
| 221 | |
| 222 | // rotate_left_16 returns the value of x rotated left by (k mod 16) bits. |
| 223 | // To rotate x right by k bits, call rotate_left_16(x, -k). |
| 224 | // |
| 225 | // This function's execution time does not depend on the inputs. |
| 226 | @[inline] |
| 227 | pub fn rotate_left_16(x u16, k int) u16 { |
| 228 | s := u16(k) & (n16 - u16(1)) |
| 229 | return (x << s) | (x >> (n16 - s)) |
| 230 | } |
| 231 | |
| 232 | // rotate_left_32 returns the value of x rotated left by (k mod 32) bits. |
| 233 | // To rotate x right by k bits, call rotate_left_32(x, -k). |
| 234 | // |
| 235 | // This function's execution time does not depend on the inputs. |
| 236 | @[inline] |
| 237 | pub fn rotate_left_32(x u32, k int) u32 { |
| 238 | s := u32(k) & (n32 - u32(1)) |
| 239 | return (x << s) | (x >> (n32 - s)) |
| 240 | } |
| 241 | |
| 242 | // rotate_left_64 returns the value of x rotated left by (k mod 64) bits. |
| 243 | // To rotate x right by k bits, call rotate_left_64(x, -k). |
| 244 | // |
| 245 | // This function's execution time does not depend on the inputs. |
| 246 | @[inline] |
| 247 | pub fn rotate_left_64(x u64, k int) u64 { |
| 248 | s := u64(k) & (n64 - u64(1)) |
| 249 | return (x << s) | (x >> (n64 - s)) |
| 250 | } |
| 251 | |
| 252 | // reverse_8 returns the value of x with its bits in reversed order. |
| 253 | @[direct_array_access; inline] |
| 254 | pub fn reverse_8(x u8) u8 { |
| 255 | return rev_8_tab[x] |
| 256 | } |
| 257 | |
| 258 | // reverse_16 returns the value of x with its bits in reversed order. |
| 259 | @[direct_array_access; inline] |
| 260 | pub fn reverse_16(x u16) u16 { |
| 261 | return u16(rev_8_tab[x >> 8]) | (u16(rev_8_tab[x & u16(0xff)]) << 8) |
| 262 | } |
| 263 | |
| 264 | // reverse_32 returns the value of x with its bits in reversed order. |
| 265 | @[inline] |
| 266 | pub fn reverse_32(x u32) u32 { |
| 267 | mut y := (((x >> u32(1)) & (m0 & max_u32)) | ((x & (m0 & max_u32)) << 1)) |
| 268 | y = (((y >> u32(2)) & (m1 & max_u32)) | ((y & (m1 & max_u32)) << u32(2))) |
| 269 | y = (((y >> u32(4)) & (m2 & max_u32)) | ((y & (m2 & max_u32)) << u32(4))) |
| 270 | return reverse_bytes_32(u32(y)) |
| 271 | } |
| 272 | |
| 273 | // reverse_64 returns the value of x with its bits in reversed order. |
| 274 | @[inline] |
| 275 | pub fn reverse_64(x u64) u64 { |
| 276 | mut y := (((x >> u64(1)) & (m0 & max_u64)) | ((x & (m0 & max_u64)) << 1)) |
| 277 | y = (((y >> u64(2)) & (m1 & max_u64)) | ((y & (m1 & max_u64)) << 2)) |
| 278 | y = (((y >> u64(4)) & (m2 & max_u64)) | ((y & (m2 & max_u64)) << 4)) |
| 279 | return reverse_bytes_64(y) |
| 280 | } |
| 281 | |
| 282 | // reverse_bytes_16 returns the value of x with its bytes in reversed order. |
| 283 | // |
| 284 | // This function's execution time does not depend on the inputs. |
| 285 | @[inline] |
| 286 | pub fn reverse_bytes_16(x u16) u16 { |
| 287 | return (x >> 8) | (x << 8) |
| 288 | } |
| 289 | |
| 290 | // reverse_bytes_32 returns the value of x with its bytes in reversed order. |
| 291 | // |
| 292 | // This function's execution time does not depend on the inputs. |
| 293 | @[inline] |
| 294 | pub fn reverse_bytes_32(x u32) u32 { |
| 295 | y := (((x >> u32(8)) & (m3 & max_u32)) | ((x & (m3 & max_u32)) << u32(8))) |
| 296 | return u32((y >> 16) | (y << 16)) |
| 297 | } |
| 298 | |
| 299 | // reverse_bytes_64 returns the value of x with its bytes in reversed order. |
| 300 | // |
| 301 | // This function's execution time does not depend on the inputs. |
| 302 | @[inline] |
| 303 | pub fn reverse_bytes_64(x u64) u64 { |
| 304 | mut y := (((x >> u64(8)) & (m3 & max_u64)) | ((x & (m3 & max_u64)) << u64(8))) |
| 305 | y = (((y >> u64(16)) & (m4 & max_u64)) | ((y & (m4 & max_u64)) << u64(16))) |
| 306 | return (y >> 32) | (y << 32) |
| 307 | } |
| 308 | |
| 309 | // len_8 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| 310 | @[direct_array_access] |
| 311 | pub fn len_8(x u8) int { |
| 312 | return int(len_8_tab[x]) |
| 313 | } |
| 314 | |
| 315 | // len_16 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| 316 | @[direct_array_access; ignore_overflow] |
| 317 | pub fn len_16(x u16) int { |
| 318 | mut y := x |
| 319 | mut n := 0 |
| 320 | if y >= 1 << 8 { |
| 321 | y >>= 8 |
| 322 | n = 8 |
| 323 | } |
| 324 | return n + int(len_8_tab[int(y)]) |
| 325 | } |
| 326 | |
| 327 | // len_32 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| 328 | @[direct_array_access; ignore_overflow] |
| 329 | pub fn len_32(x u32) int { |
| 330 | mut y := x |
| 331 | mut n := 0 |
| 332 | if y >= (1 << 16) { |
| 333 | y >>= 16 |
| 334 | n = 16 |
| 335 | } |
| 336 | if y >= (1 << 8) { |
| 337 | y >>= 8 |
| 338 | n += 8 |
| 339 | } |
| 340 | return n + int(len_8_tab[int(y)]) |
| 341 | } |
| 342 | |
| 343 | // len_64 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| 344 | @[direct_array_access] |
| 345 | pub fn len_64(x u64) int { |
| 346 | mut y := x |
| 347 | mut n := 0 |
| 348 | if y >= u64(1) << u64(32) { |
| 349 | y >>= 32 |
| 350 | n = 32 |
| 351 | } |
| 352 | if y >= u64(1) << u64(16) { |
| 353 | y >>= 16 |
| 354 | n += 16 |
| 355 | } |
| 356 | if y >= u64(1) << u64(8) { |
| 357 | y >>= 8 |
| 358 | n += 8 |
| 359 | } |
| 360 | return n + int(len_8_tab[int(y)]) |
| 361 | } |
| 362 | |
| 363 | // add_32 returns the sum with carry of x, y and carry: sum = x + y + carry. |
| 364 | // The carry input must be 0 or 1; otherwise the behavior is undefined. |
| 365 | // The carryOut output is guaranteed to be 0 or 1. |
| 366 | // This function's execution time does not depend on the inputs. |
| 367 | @[ignore_overflow] |
| 368 | pub fn add_32(x u32, y u32, carry u32) (u32, u32) { |
| 369 | sum64 := u64(x) + u64(y) + u64(carry) |
| 370 | sum := u32(sum64) |
| 371 | carry_out := u32(sum64 >> 32) |
| 372 | return sum, carry_out |
| 373 | } |
| 374 | |
| 375 | // add_64 returns the sum with carry of x, y and carry: sum = x + y + carry. |
| 376 | // The carry input must be 0 or 1; otherwise the behavior is undefined. |
| 377 | // The carryOut output is guaranteed to be 0 or 1. |
| 378 | // This function's execution time does not depend on the inputs. |
| 379 | @[ignore_overflow] |
| 380 | pub fn add_64(x u64, y u64, carry u64) (u64, u64) { |
| 381 | sum := x + y + carry |
| 382 | // The sum will overflow if both top bits are set (x & y) or if one of them |
| 383 | // is (x | y), and a carry from the lower place happened. If such a carry |
| 384 | // happens, the top bit will be 1 + 0 + 1 = 0 (&^ sum). |
| 385 | carry_out := ((x & y) | ((x | y) & ~sum)) >> 63 |
| 386 | return sum, carry_out |
| 387 | } |
| 388 | |
| 389 | // sub_32 returns the difference of x, y and borrow, diff = x - y - borrow. |
| 390 | // The borrow input must be 0 or 1; otherwise the behavior is undefined. |
| 391 | // The borrowOut output is guaranteed to be 0 or 1. |
| 392 | // This function's execution time does not depend on the inputs. |
| 393 | @[ignore_overflow] |
| 394 | pub fn sub_32(x u32, y u32, borrow u32) (u32, u32) { |
| 395 | diff := x - y - borrow |
| 396 | // The difference will underflow if the top bit of x is not set and the top |
| 397 | // bit of y is set (^x & y) or if they are the same (^(x ^ y)) and a borrow |
| 398 | // from the lower place happens. If that borrow happens, the result will be |
| 399 | // 1 - 1 - 1 = 0 - 0 - 1 = 1 (& diff). |
| 400 | borrow_out := ((~x & y) | (~(x ^ y) & diff)) >> 31 |
| 401 | return diff, borrow_out |
| 402 | } |
| 403 | |
| 404 | // sub_64 returns the difference of x, y and borrow: diff = x - y - borrow. |
| 405 | // The borrow input must be 0 or 1; otherwise the behavior is undefined. |
| 406 | // The borrowOut output is guaranteed to be 0 or 1. |
| 407 | // This function's execution time does not depend on the inputs. |
| 408 | @[ignore_overflow] |
| 409 | pub fn sub_64(x u64, y u64, borrow u64) (u64, u64) { |
| 410 | diff := x - y - borrow |
| 411 | // See Sub32 for the bit logic. |
| 412 | borrow_out := ((~x & y) | (~(x ^ y) & diff)) >> 63 |
| 413 | return diff, borrow_out |
| 414 | } |
| 415 | |
| 416 | @[markused] |
| 417 | pub const two32 = u64(0x100000000) |
| 418 | const mask32 = two32 - 1 |
| 419 | const overflow_error = 'Overflow Error' |
| 420 | const divide_error = 'Divide by Zero Error' |
| 421 | |
| 422 | // mul_32 returns the 64-bit product of x and y: (hi, lo) = x * y |
| 423 | // with the product bits' upper half returned in hi and the lower |
| 424 | // half returned in lo. |
| 425 | // |
| 426 | // This function's execution time does not depend on the inputs. |
| 427 | @[inline] |
| 428 | pub fn mul_32(x u32, y u32) (u32, u32) { |
| 429 | return mul_32_default(x, y) |
| 430 | } |
| 431 | |
| 432 | @[inline] |
| 433 | fn mul_32_default(x u32, y u32) (u32, u32) { |
| 434 | tmp := u64(x) * u64(y) |
| 435 | hi := u32(tmp >> 32) |
| 436 | lo := u32(tmp) |
| 437 | return hi, lo |
| 438 | } |
| 439 | |
| 440 | // mul_64 returns the 128-bit product of x and y: (hi, lo) = x * y |
| 441 | // with the product bits' upper half returned in hi and the lower |
| 442 | // half returned in lo. |
| 443 | // |
| 444 | // This function's execution time does not depend on the inputs. |
| 445 | @[inline] |
| 446 | pub fn mul_64(x u64, y u64) (u64, u64) { |
| 447 | return mul_64_default(x, y) |
| 448 | } |
| 449 | |
| 450 | fn mul_64_default(x u64, y u64) (u64, u64) { |
| 451 | x0 := x & mask32 |
| 452 | x1 := x >> 32 |
| 453 | y0 := y & mask32 |
| 454 | y1 := y >> 32 |
| 455 | w0 := x0 * y0 |
| 456 | t := x1 * y0 + (w0 >> 32) |
| 457 | mut w1 := t & mask32 |
| 458 | w2 := t >> 32 |
| 459 | w1 += x0 * y1 |
| 460 | hi := x1 * y1 + w2 + (w1 >> 32) |
| 461 | lo := x * y |
| 462 | return hi, lo |
| 463 | } |
| 464 | |
| 465 | // mul_add_32 returns the 64-bit result of x * y + z: (hi, lo) = x * y + z |
| 466 | // with the result bits' upper half returned in hi and the lower |
| 467 | // half returned in lo. |
| 468 | @[inline] |
| 469 | pub fn mul_add_32(x u32, y u32, z u32) (u32, u32) { |
| 470 | return mul_add_32_default(x, y, z) |
| 471 | } |
| 472 | |
| 473 | @[inline] |
| 474 | fn mul_add_32_default(x u32, y u32, z u32) (u32, u32) { |
| 475 | tmp := u64(x) * u64(y) + u64(z) |
| 476 | hi := u32(tmp >> 32) |
| 477 | lo := u32(tmp) |
| 478 | return hi, lo |
| 479 | } |
| 480 | |
| 481 | // mul_add_64 returns the 128-bit result of x * y + z: (hi, lo) = x * y + z |
| 482 | // with the result bits' upper half returned in hi and the lower |
| 483 | // half returned in lo. |
| 484 | @[inline] |
| 485 | pub fn mul_add_64(x u64, y u64, z u64) (u64, u64) { |
| 486 | return mul_add_64_default(x, y, z) |
| 487 | } |
| 488 | |
| 489 | @[inline] |
| 490 | fn mul_add_64_default(x u64, y u64, z u64) (u64, u64) { |
| 491 | h, l := mul_64(x, y) |
| 492 | lo := l + z |
| 493 | hi := h + u64(lo < l) |
| 494 | return hi, lo |
| 495 | } |
| 496 | |
| 497 | // div_32 returns the quotient and remainder of (hi, lo) divided by y: |
| 498 | // quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper |
| 499 | // half in parameter hi and the lower half in parameter lo. |
| 500 | // div_32 panics for y == 0 (division by zero) or y <= hi (quotient overflow). |
| 501 | @[inline] |
| 502 | pub fn div_32(hi u32, lo u32, y u32) (u32, u32) { |
| 503 | return div_32_default(hi, lo, y) |
| 504 | } |
| 505 | |
| 506 | fn div_32_default(hi u32, lo u32, y u32) (u32, u32) { |
| 507 | if y == 0 { |
| 508 | panic(divide_error) |
| 509 | } |
| 510 | if y <= hi { |
| 511 | panic(overflow_error) |
| 512 | } |
| 513 | z := (u64(hi) << 32) | u64(lo) |
| 514 | quo := u32(z / u64(y)) |
| 515 | rem := u32(z % u64(y)) |
| 516 | return quo, rem |
| 517 | } |
| 518 | |
| 519 | // div_64 returns the quotient and remainder of (hi, lo) divided by y: |
| 520 | // quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper |
| 521 | // half in parameter hi and the lower half in parameter lo. |
| 522 | // div_64 panics for y == 0 (division by zero). |
| 523 | // Also panics for y <= hi (quotient overflow). |
| 524 | @[inline] |
| 525 | pub fn div_64(hi u64, lo u64, y1 u64) (u64, u64) { |
| 526 | return div_64_default(hi, lo, y1) |
| 527 | } |
| 528 | |
| 529 | fn div_64_default(hi u64, lo u64, y1 u64) (u64, u64) { |
| 530 | mut y := y1 |
| 531 | if y == 0 { |
| 532 | panic(divide_error) |
| 533 | } |
| 534 | if y <= hi { |
| 535 | panic(overflow_error) |
| 536 | } |
| 537 | s := u32(leading_zeros_64(y)) |
| 538 | y <<= s |
| 539 | yn1 := y >> 32 |
| 540 | yn0 := y & mask32 |
| 541 | ss1 := (hi << s) |
| 542 | xxx := 64 - s |
| 543 | mut ss2 := lo >> xxx |
| 544 | if xxx == 64 { |
| 545 | // in Go, shifting right a u64 number, 64 times produces 0 *always*. |
| 546 | // See https://go.dev/ref/spec |
| 547 | // > The shift operators implement arithmetic shifts if the left operand |
| 548 | // > is a signed integer and logical shifts if it is an unsigned integer. |
| 549 | // > There is no upper limit on the shift count. |
| 550 | // > Shifts behave as if the left operand is shifted n times by 1 for a shift count of n. |
| 551 | // > As a result, x << 1 is the same as x*2 and x >> 1 is the same as x/2 |
| 552 | // > but truncated towards negative infinity. |
| 553 | // |
| 554 | // In V, that is currently left to whatever C is doing, which is apparently a NOP. |
| 555 | // This function was a direct port of https://cs.opensource.google/go/go/+/refs/tags/go1.17.6:src/math/bits/bits.go;l=512, |
| 556 | // so we have to use the Go behaviour. |
| 557 | // TODO: reconsider whether we need to adopt it for our shift ops, or just use function wrappers that do it. |
| 558 | ss2 = 0 |
| 559 | } |
| 560 | un32 := ss1 | ss2 |
| 561 | un10 := lo << s |
| 562 | un1 := un10 >> 32 |
| 563 | un0 := un10 & mask32 |
| 564 | mut q1 := un32 / yn1 |
| 565 | mut rhat := un32 - (q1 * yn1) |
| 566 | for q1 >= two32 || (q1 * yn0) > ((two32 * rhat) + un1) { |
| 567 | q1-- |
| 568 | rhat += yn1 |
| 569 | if rhat >= two32 { |
| 570 | break |
| 571 | } |
| 572 | } |
| 573 | un21 := (un32 * two32) + (un1 - (q1 * y)) |
| 574 | mut q0 := un21 / yn1 |
| 575 | rhat = un21 - q0 * yn1 |
| 576 | for q0 >= two32 || (q0 * yn0) > ((two32 * rhat) + un0) { |
| 577 | q0-- |
| 578 | rhat += yn1 |
| 579 | if rhat >= two32 { |
| 580 | break |
| 581 | } |
| 582 | } |
| 583 | qq := ((q1 * two32) + q0) |
| 584 | rr := ((un21 * two32) + un0 - (q0 * y)) >> s |
| 585 | return qq, rr |
| 586 | } |
| 587 | |
| 588 | // rem_32 returns the remainder of (hi, lo) divided by y. Rem32 panics |
| 589 | // for y == 0 (division by zero) but, unlike Div32, it doesn't panic |
| 590 | // on a quotient overflow. |
| 591 | @[inline] |
| 592 | pub fn rem_32(hi u32, lo u32, y u32) u32 { |
| 593 | if y == 0 { |
| 594 | panic(divide_error) |
| 595 | } |
| 596 | return u32(((u64(hi) << 32) | u64(lo)) % u64(y)) |
| 597 | } |
| 598 | |
| 599 | // rem_64 returns the remainder of (hi, lo) divided by y. Rem64 panics |
| 600 | // for y == 0 (division by zero) but, unlike div_64, it doesn't panic |
| 601 | // on a quotient overflow. |
| 602 | @[inline] |
| 603 | pub fn rem_64(hi u64, lo u64, y u64) u64 { |
| 604 | // We scale down hi so that hi < y, then use div_64 to compute the |
| 605 | // rem with the guarantee that it won't panic on quotient overflow. |
| 606 | // Given that |
| 607 | // hi ≡ hi%y (mod y) |
| 608 | // we have |
| 609 | // hi<<64 + lo ≡ (hi%y)<<64 + lo (mod y) |
| 610 | if y == 0 { |
| 611 | panic(divide_error) |
| 612 | } |
| 613 | _, rem := div_64(hi % y, lo, y) |
| 614 | return rem |
| 615 | } |
| 616 | |
| 617 | // normalize returns a normal number y and exponent exp |
| 618 | // satisfying x == y × 2**exp. It assumes x is finite and non-zero. |
| 619 | pub fn normalize(x f64) (f64, int) { |
| 620 | smallest_normal := 2.2250738585072014e-308 // 2**-1022 |
| 621 | if (if x > 0.0 { |
| 622 | x |
| 623 | } else { |
| 624 | -x |
| 625 | }) < smallest_normal { |
| 626 | return x * (u64(1) << u64(52)), -52 |
| 627 | } |
| 628 | return x, 0 |
| 629 | } |
| 630 | |