| 1 | module strconv |
| 2 | |
| 3 | // Copyright (c) 2019-2024 Dario Deledda. All rights reserved. |
| 4 | // Use of this source code is governed by an MIT license |
| 5 | // that can be found in the LICENSE file. |
| 6 | // |
| 7 | // This file contains utilities for converting a string to a f64 variable. |
| 8 | // IEEE 754 standard is used. |
| 9 | // Know limitation: limited to 18 significant digits |
| 10 | // |
| 11 | // The code is inspired by: |
| 12 | // Grzegorz Kraszewski [email protected] |
| 13 | // URL: http://krashan.ppa.pl/articles/stringtofloat/ |
| 14 | // Original license: MIT |
| 15 | // 96 bit operation utilities |
| 16 | // |
| 17 | // Note: when u128 will be available, these function can be refactored. |
| 18 | |
| 19 | // f32 constants |
| 20 | pub const single_plus_zero = u32(0x0000_0000) |
| 21 | pub const single_minus_zero = u32(0x8000_0000) |
| 22 | pub const single_plus_infinity = u32(0x7F80_0000) |
| 23 | pub const single_minus_infinity = u32(0xFF80_0000) |
| 24 | |
| 25 | // f64 constants |
| 26 | pub const digits = 18 |
| 27 | pub const double_plus_zero = u64(0x0000000000000000) |
| 28 | pub const double_minus_zero = u64(0x8000000000000000) |
| 29 | pub const double_plus_infinity = u64(0x7FF0000000000000) |
| 30 | pub const double_minus_infinity = u64(0xFFF0000000000000) |
| 31 | |
| 32 | // char constants |
| 33 | pub const c_dpoint = `.` |
| 34 | pub const c_plus = `+` |
| 35 | pub const c_minus = `-` |
| 36 | pub const c_zero = `0` |
| 37 | pub const c_nine = `9` |
| 38 | pub const c_ten = u32(10) |
| 39 | |
| 40 | // right logical shift 96 bit |
| 41 | fn lsr96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) { |
| 42 | mut r0 := u32(0) |
| 43 | mut r1 := u32(0) |
| 44 | mut r2 := u32(0) |
| 45 | r0 = (s0 >> 1) | ((s1 & u32(1)) << 31) |
| 46 | r1 = (s1 >> 1) | ((s2 & u32(1)) << 31) |
| 47 | r2 = s2 >> 1 |
| 48 | return r2, r1, r0 |
| 49 | } |
| 50 | |
| 51 | // left logical shift 96 bit |
| 52 | fn lsl96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) { |
| 53 | mut r0 := u32(0) |
| 54 | mut r1 := u32(0) |
| 55 | mut r2 := u32(0) |
| 56 | r2 = (s2 << 1) | ((s1 & (u32(1) << 31)) >> 31) |
| 57 | r1 = (s1 << 1) | ((s0 & (u32(1) << 31)) >> 31) |
| 58 | r0 = s0 << 1 |
| 59 | return r2, r1, r0 |
| 60 | } |
| 61 | |
| 62 | // sum on 96 bit |
| 63 | fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) { |
| 64 | mut w := u64(0) |
| 65 | mut r0 := u32(0) |
| 66 | mut r1 := u32(0) |
| 67 | mut r2 := u32(0) |
| 68 | w = u64(s0) + u64(d0) |
| 69 | r0 = u32(w) |
| 70 | w >>= 32 |
| 71 | w += u64(s1) + u64(d1) |
| 72 | r1 = u32(w) |
| 73 | w >>= 32 |
| 74 | w += u64(s2) + u64(d2) |
| 75 | r2 = u32(w) |
| 76 | return r2, r1, r0 |
| 77 | } |
| 78 | |
| 79 | // subtraction on 96 bit |
| 80 | fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) { |
| 81 | mut w := u64(0) |
| 82 | mut r0 := u32(0) |
| 83 | mut r1 := u32(0) |
| 84 | mut r2 := u32(0) |
| 85 | w = u64(s0) - u64(d0) |
| 86 | r0 = u32(w) |
| 87 | w >>= 32 |
| 88 | w += u64(s1) - u64(d1) |
| 89 | r1 = u32(w) |
| 90 | w >>= 32 |
| 91 | w += u64(s2) - u64(d2) |
| 92 | r2 = u32(w) |
| 93 | return r2, r1, r0 |
| 94 | } |
| 95 | |
| 96 | // Utility functions |
| 97 | fn is_digit(x u8) bool { |
| 98 | return x >= c_zero && x <= c_nine |
| 99 | } |
| 100 | |
| 101 | fn is_space(x u8) bool { |
| 102 | return x == `\t` || x == `\n` || x == `\v` || x == `\f` || x == `\r` || x == ` ` |
| 103 | } |
| 104 | |
| 105 | fn is_exp(x u8) bool { |
| 106 | return x == `E` || x == `e` |
| 107 | } |
| 108 | |
| 109 | // Possible parser return values. |
| 110 | enum ParserState { |
| 111 | ok // parser finished OK |
| 112 | pzero // no digits or number is smaller than +-2^-1022 |
| 113 | mzero // number is negative, module smaller |
| 114 | pinf // number is higher than +HUGE_VAL |
| 115 | minf // number is lower than -HUGE_VAL |
| 116 | invalid_number // invalid number, used for '#@%^' for example |
| 117 | extra_char // extra char after number |
| 118 | } |
| 119 | |
| 120 | // parser tries to parse the given string into a number |
| 121 | // FIXME: need one char after the last char of the number |
| 122 | @[direct_array_access] |
| 123 | fn parser(s string) (ParserState, PrepNumber) { |
| 124 | mut digx := 0 |
| 125 | mut result := ParserState.ok |
| 126 | mut expneg := false |
| 127 | mut expexp := 0 |
| 128 | mut i := 0 |
| 129 | mut pn := PrepNumber{} |
| 130 | |
| 131 | // skip spaces |
| 132 | for i < s.len && s[i].is_space() { |
| 133 | i++ |
| 134 | } |
| 135 | |
| 136 | // check negatives |
| 137 | if s[i] == `-` { |
| 138 | pn.negative = true |
| 139 | i++ |
| 140 | } |
| 141 | |
| 142 | // positive sign ignore it |
| 143 | if s[i] == `+` { |
| 144 | i++ |
| 145 | } |
| 146 | |
| 147 | // read mantissa |
| 148 | for i < s.len && s[i].is_digit() { |
| 149 | if pn.mantissa == 0 && s[i] == c_zero { |
| 150 | i++ |
| 151 | continue |
| 152 | } |
| 153 | // println("${i} => ${s[i]}") |
| 154 | if digx < digits { |
| 155 | pn.mantissa *= 10 |
| 156 | pn.mantissa += u64(s[i] - c_zero) |
| 157 | digx++ |
| 158 | } else if pn.exponent < 2147483647 { |
| 159 | pn.exponent++ |
| 160 | } |
| 161 | i++ |
| 162 | } |
| 163 | |
| 164 | // read mantissa decimals |
| 165 | if i < s.len && s[i] == `.` { |
| 166 | i++ |
| 167 | for i < s.len && s[i].is_digit() { |
| 168 | if pn.mantissa == 0 && s[i] == c_zero { |
| 169 | pn.exponent-- |
| 170 | i++ |
| 171 | continue |
| 172 | } |
| 173 | if digx < digits { |
| 174 | pn.mantissa *= 10 |
| 175 | pn.mantissa += u64(s[i] - c_zero) |
| 176 | pn.exponent-- |
| 177 | digx++ |
| 178 | } |
| 179 | i++ |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | // read exponent |
| 184 | if i < s.len && (s[i] == `e` || s[i] == `E`) { |
| 185 | i++ |
| 186 | if i < s.len { |
| 187 | // esponent sign |
| 188 | if s[i] == c_plus { |
| 189 | i++ |
| 190 | } else if s[i] == c_minus { |
| 191 | expneg = true |
| 192 | i++ |
| 193 | } |
| 194 | |
| 195 | for i < s.len && s[i].is_digit() { |
| 196 | if expexp < 214748364 { |
| 197 | expexp *= 10 |
| 198 | expexp += int(s[i] - c_zero) |
| 199 | } |
| 200 | i++ |
| 201 | } |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | if expneg { |
| 206 | expexp = -expexp |
| 207 | } |
| 208 | pn.exponent += expexp |
| 209 | if pn.mantissa == 0 { |
| 210 | if pn.negative { |
| 211 | result = .mzero |
| 212 | } else { |
| 213 | result = .pzero |
| 214 | } |
| 215 | } else if pn.exponent > 309 { |
| 216 | if pn.negative { |
| 217 | result = .minf |
| 218 | } else { |
| 219 | result = .pinf |
| 220 | } |
| 221 | } else if pn.exponent < -328 { |
| 222 | if pn.negative { |
| 223 | result = .mzero |
| 224 | } else { |
| 225 | result = .pzero |
| 226 | } |
| 227 | } |
| 228 | if i == 0 && s.len > 0 { |
| 229 | return ParserState.invalid_number, pn |
| 230 | } |
| 231 | if i != s.len { |
| 232 | return ParserState.extra_char, pn |
| 233 | } |
| 234 | return result, pn |
| 235 | } |
| 236 | |
| 237 | // converter returns a u64 with the bit image of the f64 number |
| 238 | fn converter(mut pn PrepNumber) u64 { |
| 239 | mut binexp := 92 |
| 240 | // s0,s1,s2 are the parts of a 96-bit precision integer |
| 241 | mut s2 := u32(0) |
| 242 | mut s1 := u32(0) |
| 243 | mut s0 := u32(0) |
| 244 | // q0,q1,q2 are the parts of a 96-bit precision integer |
| 245 | mut q2 := u32(0) |
| 246 | mut q1 := u32(0) |
| 247 | mut q0 := u32(0) |
| 248 | // r0,r1,r2 are the parts of a 96-bit precision integer |
| 249 | mut r2 := u32(0) |
| 250 | mut r1 := u32(0) |
| 251 | mut r0 := u32(0) |
| 252 | |
| 253 | mask28 := u32(u64(0xF) << 28) |
| 254 | mut result := u64(0) |
| 255 | // working on 3 u32 to have 96 bit precision |
| 256 | s0 = u32(pn.mantissa & u64(0x00000000FFFFFFFF)) |
| 257 | s1 = u32(pn.mantissa >> 32) |
| 258 | s2 = u32(0) |
| 259 | if pn.mantissa == 0 && pn.exponent <= 0 { |
| 260 | return if pn.negative { double_minus_zero } else { double_plus_zero } |
| 261 | } |
| 262 | // so we take the decimal exponent off |
| 263 | for pn.exponent > 0 { |
| 264 | q2, q1, q0 = lsl96(s2, s1, s0) // q = s * 2 |
| 265 | r2, r1, r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2 |
| 266 | s2, s1, s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2 |
| 267 | s2, s1, s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10 |
| 268 | pn.exponent-- |
| 269 | for (s2 & mask28) != 0 { |
| 270 | q2, q1, q0 = lsr96(s2, s1, s0) |
| 271 | binexp++ |
| 272 | s2 = q2 |
| 273 | s1 = q1 |
| 274 | s0 = q0 |
| 275 | } |
| 276 | } |
| 277 | for pn.exponent < 0 { |
| 278 | for !((s2 & (u32(1) << 31)) != 0) { |
| 279 | q2, q1, q0 = lsl96(s2, s1, s0) |
| 280 | binexp-- |
| 281 | s2 = q2 |
| 282 | s1 = q1 |
| 283 | s0 = q0 |
| 284 | } |
| 285 | q2 = s2 / c_ten |
| 286 | r1 = s2 % c_ten |
| 287 | r2 = (s1 >> 8) | (r1 << 24) |
| 288 | q1 = r2 / c_ten |
| 289 | r1 = r2 % c_ten |
| 290 | r2 = ((s1 & u32(0xFF)) << 16) | (s0 >> 16) | (r1 << 24) |
| 291 | r0 = r2 / c_ten |
| 292 | r1 = r2 % c_ten |
| 293 | q1 = (q1 << 8) | ((r0 & u32(0x00FF0000)) >> 16) |
| 294 | q0 = r0 << 16 |
| 295 | r2 = (s0 & u32(0xFFFF)) | (r1 << 16) |
| 296 | q0 |= r2 / c_ten |
| 297 | s2 = q2 |
| 298 | s1 = q1 |
| 299 | s0 = q0 |
| 300 | pn.exponent++ |
| 301 | } |
| 302 | // C.printf(c"mantissa before normalization: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp) |
| 303 | // normalization, the 28 bit in s2 must the leftest one in the variable |
| 304 | if s2 != 0 || s1 != 0 || s0 != 0 { |
| 305 | for (s2 & mask28) == 0 { |
| 306 | q2, q1, q0 = lsl96(s2, s1, s0) |
| 307 | binexp-- |
| 308 | s2 = q2 |
| 309 | s1 = q1 |
| 310 | s0 = q0 |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | // Handle subnormal (denormalized) numbers - very small numbers near zero |
| 315 | // |
| 316 | // Normal floats have an implicit leading 1 bit in their mantissa (like 1.xxxxx). |
| 317 | // When numbers get too small (binexp < -1022), we can't represent them normally. |
| 318 | // Instead, we use subnormals: set exponent to 0 and shift the mantissa right, |
| 319 | // losing precision gradually. This prevents abrupt underflow to zero. |
| 320 | // |
| 321 | // Example: 1.23e-308 is smaller than the minimum normal float, so we: |
| 322 | // 1. Keep the normalized mantissa from s2 and s1 |
| 323 | // 2. Shift it right to "denormalize" it (the leading 1 moves into the mantissa) |
| 324 | // 3. Round correctly using the bits that were shifted out |
| 325 | // 4. Return with exponent = 0 (subnormal marker) |
| 326 | if binexp < -1022 && (s2 | s1) != 0 { |
| 327 | shift := -1022 - binexp |
| 328 | if shift > 60 { |
| 329 | return if pn.negative { double_minus_zero } else { double_plus_zero } |
| 330 | } |
| 331 | shifted := ((u64(s2) << 32) | u64(s1)) >> u32(shift) |
| 332 | q := (shifted >> 8) + |
| 333 | u64((shifted >> 7) & 1 != 0 && ((shifted & 0x7F) != 0 || (shifted >> 8) & 1 != 0)) |
| 334 | return (q & 0x000FFFFFFFFFFFFF) | (u64(pn.negative) << 63) |
| 335 | } |
| 336 | |
| 337 | // rounding if needed |
| 338 | /* |
| 339 | * "round half to even" algorithm |
| 340 | * Example for f32, just a reminder |
| 341 | * |
| 342 | * If bit 54 is 0, round down |
| 343 | * If bit 54 is 1 |
| 344 | * If any bit beyond bit 54 is 1, round up |
| 345 | * If all bits beyond bit 54 are 0 (meaning the number is halfway between two floating-point numbers) |
| 346 | * If bit 53 is 0, round down |
| 347 | * If bit 53 is 1, round up |
| 348 | */ |
| 349 | /* |
| 350 | test case 1 complete |
| 351 | s2=0x1FFFFFFF |
| 352 | s1=0xFFFFFF80 |
| 353 | s0=0x0 |
| 354 | */ |
| 355 | |
| 356 | /* |
| 357 | test case 1 check_round_bit |
| 358 | s2=0x18888888 |
| 359 | s1=0x88888880 |
| 360 | s0=0x0 |
| 361 | */ |
| 362 | |
| 363 | /* |
| 364 | test case check_round_bit + normalization |
| 365 | s2=0x18888888 |
| 366 | s1=0x88888F80 |
| 367 | s0=0x0 |
| 368 | */ |
| 369 | |
| 370 | // C.printf(c"mantissa before rounding: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp) |
| 371 | // s1 => 0xFFFFFFxx only F are represented |
| 372 | nbit := 7 |
| 373 | check_round_bit := u32(1) << u32(nbit) |
| 374 | check_round_mask := u32(0xFFFFFFFF) << u32(nbit) |
| 375 | if (s1 & check_round_bit) != 0 { |
| 376 | // C.printf(c"need round!! check mask: %08x\n", s1 & ~check_round_mask ) |
| 377 | if (s1 & ~check_round_mask) != 0 { |
| 378 | // C.printf(c"Add 1!\n") |
| 379 | s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0) |
| 380 | } else { |
| 381 | // C.printf(c"All 0!\n") |
| 382 | if (s1 & (check_round_bit << u32(1))) != 0 { |
| 383 | // C.printf(c"Add 1 form -1 bit control!\n") |
| 384 | s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0) |
| 385 | } |
| 386 | } |
| 387 | s1 = s1 & check_round_mask |
| 388 | s0 = u32(0) |
| 389 | // recheck normalization |
| 390 | if s2 & (mask28 << u32(1)) != 0 { |
| 391 | // C.printf(c"Renormalize!!\n") |
| 392 | q2, q1, q0 = lsr96(s2, s1, s0) |
| 393 | binexp++ |
| 394 | // dump(binexp) |
| 395 | s2 = q2 |
| 396 | s1 = q1 |
| 397 | s0 = q0 |
| 398 | } |
| 399 | } |
| 400 | // tmp := ( u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) |
| 401 | // C.printf(c"mantissa after rounding : %08x %08x %08x binexp: %d \n", s2,s1,s0,binexp) |
| 402 | // C.printf(c"Tmp result: %016x\n",tmp) |
| 403 | // end rounding |
| 404 | // offset the binary exponent IEEE 754 |
| 405 | binexp += 1023 |
| 406 | if binexp > 2046 { |
| 407 | if pn.negative { |
| 408 | result = double_minus_infinity |
| 409 | } else { |
| 410 | result = double_plus_infinity |
| 411 | } |
| 412 | } else if binexp < 1 { |
| 413 | // Should not reach here for subnormals anymore (handled earlier) |
| 414 | // This is now only for true zeros |
| 415 | if pn.negative { |
| 416 | result = double_minus_zero |
| 417 | } else { |
| 418 | result = double_plus_zero |
| 419 | } |
| 420 | } else if s2 != 0 { |
| 421 | mut q := u64(0) |
| 422 | binexs2 := u64(binexp) << 52 |
| 423 | q = (u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) | binexs2 |
| 424 | if pn.negative { |
| 425 | q |= (u64(1) << 63) |
| 426 | } |
| 427 | result = q |
| 428 | } |
| 429 | return result |
| 430 | } |
| 431 | |
| 432 | @[params] |
| 433 | pub struct AtoF64Param { |
| 434 | pub: |
| 435 | allow_extra_chars bool // allow extra characters after number |
| 436 | } |
| 437 | |
| 438 | // atof64 parses the string `s`, and if possible, converts it into a f64 number |
| 439 | pub fn atof64(s string, param AtoF64Param) !f64 { |
| 440 | if s.len == 0 { |
| 441 | return error('expected a number found an empty string') |
| 442 | } |
| 443 | mut res := Float64u{} |
| 444 | res_parsing, mut pn := parser(s) |
| 445 | match res_parsing { |
| 446 | .ok { |
| 447 | res.u = converter(mut pn) |
| 448 | } |
| 449 | .pzero { |
| 450 | res.u = double_plus_zero |
| 451 | } |
| 452 | .mzero { |
| 453 | res.u = double_minus_zero |
| 454 | } |
| 455 | .pinf { |
| 456 | res.u = double_plus_infinity |
| 457 | } |
| 458 | .minf { |
| 459 | res.u = double_minus_infinity |
| 460 | } |
| 461 | .extra_char { |
| 462 | if param.allow_extra_chars { |
| 463 | res.u = converter(mut pn) |
| 464 | } else { |
| 465 | return error('extra char after number') |
| 466 | } |
| 467 | } |
| 468 | .invalid_number { |
| 469 | return error('not a number') |
| 470 | } |
| 471 | } |
| 472 | |
| 473 | return unsafe { res.f } |
| 474 | } |
| 475 | |