| 1 | module strconv |
| 2 | |
| 3 | /*============================================================================= |
| 4 | |
| 5 | f64 to string |
| 6 | |
| 7 | Copyright (c) 2019-2024 Dario Deledda. All rights reserved. |
| 8 | Use of this source code is governed by an MIT license |
| 9 | that can be found in the LICENSE file. |
| 10 | |
| 11 | This file contains the f64 to string functions |
| 12 | |
| 13 | These functions are based on the work of: |
| 14 | Publication:PLDI 2018: Proceedings of the 39th ACM SIGPLAN |
| 15 | Conference on Programming Language Design and ImplementationJune 2018 |
| 16 | Pages 270–282 https://doi.org/10.1145/3192366.3192369 |
| 17 | |
| 18 | inspired by the Go version here: |
| 19 | https://github.com/cespare/ryu/tree/ba56a33f39e3bbbfa409095d0f9ae168a595feea |
| 20 | |
| 21 | =============================================================================*/ |
| 22 | |
| 23 | @[direct_array_access] |
| 24 | fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string { |
| 25 | mut n_digit := if i_n_digit < 1 { 1 } else { i_n_digit + 1 } |
| 26 | pad_digit := i_pad_digit + 1 |
| 27 | mut out := d.m |
| 28 | mut d_exp := d.e |
| 29 | // mut out_len := decimal_len_64(out) |
| 30 | mut out_len := dec_digits(out) |
| 31 | out_len_original := out_len |
| 32 | |
| 33 | mut fw_zeros := 0 |
| 34 | if pad_digit > out_len { |
| 35 | fw_zeros = pad_digit - out_len |
| 36 | } |
| 37 | |
| 38 | mut buf := []u8{len: (out_len + 6 + 1 + 1 + fw_zeros)} // sign + mant_len + . + e + e_sign + exp_len(2) + \0} |
| 39 | mut i := 0 |
| 40 | |
| 41 | if neg { |
| 42 | buf[i] = `-` |
| 43 | i++ |
| 44 | } |
| 45 | |
| 46 | mut disp := 0 |
| 47 | if out_len <= 1 { |
| 48 | disp = 1 |
| 49 | } |
| 50 | |
| 51 | // rounding last used digit |
| 52 | if n_digit < out_len { |
| 53 | // println("out:[${out}]") |
| 54 | out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up |
| 55 | out /= ten_pow_table_64[out_len - n_digit] |
| 56 | // println("out1:[${out}] ${d.m / ten_pow_table_64[out_len - n_digit ]}") |
| 57 | // fix issue #22424 |
| 58 | out_div := d.m / ten_pow_table_64[out_len - n_digit] |
| 59 | if out_div < out && dec_digits(out_div) < dec_digits(out) { |
| 60 | // from `99` to `100`, will need d_exp+1 |
| 61 | d_exp++ |
| 62 | n_digit++ |
| 63 | } |
| 64 | |
| 65 | // println("cmp: ${d.m/ten_pow_table_64[out_len - n_digit ]} ${out/ten_pow_table_64[out_len - n_digit ]}") |
| 66 | |
| 67 | out_len = n_digit |
| 68 | // println("orig: ${out_len_original} new len: ${out_len} out:[${out}]") |
| 69 | } |
| 70 | |
| 71 | y := i + out_len |
| 72 | mut x := 0 |
| 73 | for x < (out_len - disp - 1) { |
| 74 | buf[y - x] = `0` + u8(out % 10) |
| 75 | out /= 10 |
| 76 | i++ |
| 77 | x++ |
| 78 | } |
| 79 | |
| 80 | // fix issue #22424 |
| 81 | // no decimal digits needed, end here |
| 82 | // if i_n_digit == 0 { |
| 83 | // unsafe { |
| 84 | // buf[i] = 0 |
| 85 | // return tos(&u8(&buf[0]), i) |
| 86 | // } |
| 87 | //} |
| 88 | |
| 89 | if out_len > 1 || fw_zeros > 0 { |
| 90 | buf[y - x] = `.` |
| 91 | i++ |
| 92 | } |
| 93 | x++ |
| 94 | |
| 95 | if y - x >= 0 { |
| 96 | buf[y - x] = `0` + u8(out % 10) |
| 97 | i++ |
| 98 | } |
| 99 | |
| 100 | for fw_zeros > 0 { |
| 101 | buf[i] = `0` |
| 102 | i++ |
| 103 | fw_zeros-- |
| 104 | } |
| 105 | |
| 106 | buf[i] = `e` |
| 107 | i++ |
| 108 | |
| 109 | mut exp := d_exp + out_len_original - 1 |
| 110 | if exp < 0 { |
| 111 | buf[i] = `-` |
| 112 | i++ |
| 113 | exp = -exp |
| 114 | } else { |
| 115 | buf[i] = `+` |
| 116 | i++ |
| 117 | } |
| 118 | |
| 119 | // Always print at least two digits to match strconv's formatting. |
| 120 | d2 := exp % 10 |
| 121 | exp /= 10 |
| 122 | d1 := exp % 10 |
| 123 | d0 := exp / 10 |
| 124 | if d0 > 0 { |
| 125 | buf[i] = `0` + u8(d0) |
| 126 | i++ |
| 127 | } |
| 128 | buf[i] = `0` + u8(d1) |
| 129 | i++ |
| 130 | buf[i] = `0` + u8(d2) |
| 131 | i++ |
| 132 | buf[i] = 0 |
| 133 | |
| 134 | return unsafe { |
| 135 | tos(memdup(&buf[0], i + 1), i) |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | @[ignore_overflow] |
| 140 | fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) { |
| 141 | mut d := Dec64{} |
| 142 | e := exp - bias64 |
| 143 | if e > mantbits64 { |
| 144 | return d, false |
| 145 | } |
| 146 | shift := mantbits64 - e |
| 147 | mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1 |
| 148 | // mant := i_mant | (1 << mantbits64) // implicit 1 |
| 149 | d.m = mant >> shift |
| 150 | if (d.m << shift) != mant { |
| 151 | return d, false |
| 152 | } |
| 153 | |
| 154 | for (d.m % 10) == 0 { |
| 155 | d.m /= 10 |
| 156 | d.e++ |
| 157 | } |
| 158 | return d, true |
| 159 | } |
| 160 | |
| 161 | fn f64_to_decimal(mant u64, exp u64) Dec64 { |
| 162 | mut e2 := 0 |
| 163 | mut m2 := u64(0) |
| 164 | if exp == 0 { |
| 165 | // We subtract 2 so that the bounds computation has |
| 166 | // 2 additional bits. |
| 167 | e2 = 1 - bias64 - int(mantbits64) - 2 |
| 168 | m2 = mant |
| 169 | } else { |
| 170 | e2 = int(exp) - bias64 - int(mantbits64) - 2 |
| 171 | m2 = (u64(1) << mantbits64) | mant |
| 172 | } |
| 173 | even := (m2 & 1) == 0 |
| 174 | accept_bounds := even |
| 175 | |
| 176 | // Step 2: Determine the interval of valid decimal representations. |
| 177 | mv := u64(4 * m2) |
| 178 | mm_shift := bool_to_u64(mant != 0 || exp <= 1) |
| 179 | |
| 180 | // Step 3: Convert to a decimal power base uing 128-bit arithmetic. |
| 181 | mut vr := u64(0) |
| 182 | mut vp := u64(0) |
| 183 | mut vm := u64(0) |
| 184 | mut e10 := 0 |
| 185 | mut vm_is_trailing_zeros := false |
| 186 | mut vr_is_trailing_zeros := false |
| 187 | |
| 188 | if e2 >= 0 { |
| 189 | // This expression is slightly faster than max(0, log10Pow2(e2) - 1). |
| 190 | q := log10_pow2(e2) - bool_to_u32(e2 > 3) |
| 191 | e10 = int(q) |
| 192 | k := pow5_inv_num_bits_64 + pow5_bits(int(q)) - 1 |
| 193 | i := -e2 + int(q) + k |
| 194 | |
| 195 | mul := *(&Uint128(&pow5_inv_split_64_x[q * 2])) |
| 196 | vr = mul_shift_64(u64(4) * m2, mul, i) |
| 197 | vp = mul_shift_64(u64(4) * m2 + u64(2), mul, i) |
| 198 | vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, i) |
| 199 | if q <= 21 { |
| 200 | // This should use q <= 22, but I think 21 is also safe. |
| 201 | // Smaller values may still be safe, but it's more |
| 202 | // difficult to reason about them. Only one of mp, mv, |
| 203 | // and mm can be a multiple of 5, if any. |
| 204 | if mv % 5 == 0 { |
| 205 | vr_is_trailing_zeros = multiple_of_power_of_five_64(mv, q) |
| 206 | } else if accept_bounds { |
| 207 | // Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q |
| 208 | // <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q |
| 209 | // <=> true && pow5Factor64(mm) >= q, since e2 >= q. |
| 210 | vm_is_trailing_zeros = multiple_of_power_of_five_64(mv - 1 - mm_shift, q) |
| 211 | } else if multiple_of_power_of_five_64(mv + 2, q) { |
| 212 | vp-- |
| 213 | } |
| 214 | } |
| 215 | } else { |
| 216 | // This expression is slightly faster than max(0, log10Pow5(-e2) - 1). |
| 217 | q := log10_pow5(-e2) - bool_to_u32(-e2 > 1) |
| 218 | e10 = int(q) + e2 |
| 219 | i := -e2 - int(q) |
| 220 | k := pow5_bits(i) - pow5_num_bits_64 |
| 221 | j := int(q) - k |
| 222 | mul := *(&Uint128(&pow5_split_64_x[i * 2])) |
| 223 | vr = mul_shift_64(u64(4) * m2, mul, j) |
| 224 | vp = mul_shift_64(u64(4) * m2 + u64(2), mul, j) |
| 225 | vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, j) |
| 226 | if q <= 1 { |
| 227 | // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. |
| 228 | // mv = 4 * m2, so it always has at least two trailing 0 bits. |
| 229 | vr_is_trailing_zeros = true |
| 230 | if accept_bounds { |
| 231 | // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. |
| 232 | vm_is_trailing_zeros = (mm_shift == 1) |
| 233 | } else { |
| 234 | // mp = mv + 2, so it always has at least one trailing 0 bit. |
| 235 | vp-- |
| 236 | } |
| 237 | } else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here. |
| 238 | // We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1 |
| 239 | // <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1 |
| 240 | // <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) |
| 241 | // <=> (mv & ((1 << (q - 1)) - 1)) == 0 |
| 242 | // We also need to make sure that the left shift does not overflow. |
| 243 | vr_is_trailing_zeros = multiple_of_power_of_two_64(mv, q - 1) |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | // Step 4: Find the shortest decimal representation |
| 248 | // in the interval of valid representations. |
| 249 | mut removed := 0 |
| 250 | mut last_removed_digit := u8(0) |
| 251 | mut out := u64(0) |
| 252 | // On average, we remove ~2 digits. |
| 253 | if vm_is_trailing_zeros || vr_is_trailing_zeros { |
| 254 | // General case, which happens rarely (~0.7%). |
| 255 | for { |
| 256 | vp_div_10 := vp / 10 |
| 257 | vm_div_10 := vm / 10 |
| 258 | if vp_div_10 <= vm_div_10 { |
| 259 | break |
| 260 | } |
| 261 | vm_mod_10 := vm % 10 |
| 262 | vr_div_10 := vr / 10 |
| 263 | vr_mod_10 := vr % 10 |
| 264 | vm_is_trailing_zeros = vm_is_trailing_zeros && vm_mod_10 == 0 |
| 265 | vr_is_trailing_zeros = vr_is_trailing_zeros && last_removed_digit == 0 |
| 266 | last_removed_digit = u8(vr_mod_10) |
| 267 | vr = vr_div_10 |
| 268 | vp = vp_div_10 |
| 269 | vm = vm_div_10 |
| 270 | removed++ |
| 271 | } |
| 272 | if vm_is_trailing_zeros { |
| 273 | for { |
| 274 | vm_div_10 := vm / 10 |
| 275 | vm_mod_10 := vm % 10 |
| 276 | if vm_mod_10 != 0 { |
| 277 | break |
| 278 | } |
| 279 | vp_div_10 := vp / 10 |
| 280 | vr_div_10 := vr / 10 |
| 281 | vr_mod_10 := vr % 10 |
| 282 | vr_is_trailing_zeros = vr_is_trailing_zeros && last_removed_digit == 0 |
| 283 | last_removed_digit = u8(vr_mod_10) |
| 284 | vr = vr_div_10 |
| 285 | vp = vp_div_10 |
| 286 | vm = vm_div_10 |
| 287 | removed++ |
| 288 | } |
| 289 | } |
| 290 | if vr_is_trailing_zeros && last_removed_digit == 5 && (vr % 2) == 0 { |
| 291 | // Round even if the exact number is .....50..0. |
| 292 | last_removed_digit = 4 |
| 293 | } |
| 294 | out = vr |
| 295 | // We need to take vr + 1 if vr is outside bounds |
| 296 | // or we need to round up. |
| 297 | if (vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5 { |
| 298 | out++ |
| 299 | } |
| 300 | } else { |
| 301 | // Specialized for the common case (~99.3%). |
| 302 | // Percentages below are relative to this. |
| 303 | mut round_up := false |
| 304 | for vp / 100 > vm / 100 { |
| 305 | // Optimization: remove two digits at a time (~86.2%). |
| 306 | round_up = (vr % 100) >= 50 |
| 307 | vr /= 100 |
| 308 | vp /= 100 |
| 309 | vm /= 100 |
| 310 | removed += 2 |
| 311 | } |
| 312 | // Loop iterations below (approximately), without optimization above: |
| 313 | // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02% |
| 314 | // Loop iterations below (approximately), with optimization above: |
| 315 | // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% |
| 316 | for vp / 10 > vm / 10 { |
| 317 | round_up = (vr % 10) >= 5 |
| 318 | vr /= 10 |
| 319 | vp /= 10 |
| 320 | vm /= 10 |
| 321 | removed++ |
| 322 | } |
| 323 | // We need to take vr + 1 if vr is outside bounds |
| 324 | // or we need to round up. |
| 325 | out = vr + bool_to_u64(vr == vm || round_up) |
| 326 | } |
| 327 | |
| 328 | return Dec64{ |
| 329 | m: out |
| 330 | e: e10 + removed |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | //============================================================================= |
| 335 | // String Functions |
| 336 | //============================================================================= |
| 337 | |
| 338 | // f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot. |
| 339 | pub fn f64_to_str(f f64, n_digit int) string { |
| 340 | mut u1 := Uf64{} |
| 341 | u1.f = f |
| 342 | u := unsafe { u1.u } |
| 343 | |
| 344 | neg := (u >> (mantbits64 + expbits64)) != 0 |
| 345 | mant := u & ((u64(1) << mantbits64) - u64(1)) |
| 346 | exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1)) |
| 347 | // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}") |
| 348 | |
| 349 | // Exit early for easy cases. |
| 350 | if exp == maxexp64 || (exp == 0 && mant == 0) { |
| 351 | return get_string_special(neg, exp == 0, mant == 0) |
| 352 | } |
| 353 | |
| 354 | mut d, ok := f64_to_decimal_exact_int(mant, exp) |
| 355 | if !ok { |
| 356 | // println("to_decimal") |
| 357 | d = f64_to_decimal(mant, exp) |
| 358 | } |
| 359 | // println("${d.m} ${d.e}") |
| 360 | return d.get_string_64(neg, n_digit, 0) |
| 361 | } |
| 362 | |
| 363 | // f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot. |
| 364 | pub fn f64_to_str_pad(f f64, n_digit int) string { |
| 365 | mut u1 := Uf64{} |
| 366 | u1.f = f |
| 367 | u := unsafe { u1.u } |
| 368 | |
| 369 | neg := (u >> (mantbits64 + expbits64)) != 0 |
| 370 | mant := u & ((u64(1) << mantbits64) - u64(1)) |
| 371 | exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1)) |
| 372 | // unsafe { println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016x}") } |
| 373 | |
| 374 | // Exit early for easy cases. |
| 375 | if exp == maxexp64 || (exp == 0 && mant == 0) { |
| 376 | return get_string_special(neg, exp == 0, mant == 0) |
| 377 | } |
| 378 | |
| 379 | mut d, ok := f64_to_decimal_exact_int(mant, exp) |
| 380 | if !ok { |
| 381 | // println("to_decimal") |
| 382 | d = f64_to_decimal(mant, exp) |
| 383 | } |
| 384 | // println("DEBUG: ${d.m} ${d.e}") |
| 385 | return d.get_string_64(neg, n_digit, n_digit) |
| 386 | } |
| 387 | |