v / vlib / strconv / atof.c.v
474 lines · 443 sloc · 11.09 KB · 4b7955afe528e233a43f8586b923529e6d28c391
Raw
1module strconv
2
3// Copyright (c) 2019-2024 Dario Deledda. All rights reserved.
4// Use of this source code is governed by an MIT license
5// that can be found in the LICENSE file.
6//
7// This file contains utilities for converting a string to a f64 variable.
8// IEEE 754 standard is used.
9// Know limitation: limited to 18 significant digits
10//
11// The code is inspired by:
12// Grzegorz Kraszewski [email protected]
13// URL: http://krashan.ppa.pl/articles/stringtofloat/
14// Original license: MIT
15// 96 bit operation utilities
16//
17// Note: when u128 will be available, these function can be refactored.
18
19// f32 constants
20pub const single_plus_zero = u32(0x0000_0000)
21pub const single_minus_zero = u32(0x8000_0000)
22pub const single_plus_infinity = u32(0x7F80_0000)
23pub const single_minus_infinity = u32(0xFF80_0000)
24
25// f64 constants
26pub const digits = 18
27pub const double_plus_zero = u64(0x0000000000000000)
28pub const double_minus_zero = u64(0x8000000000000000)
29pub const double_plus_infinity = u64(0x7FF0000000000000)
30pub const double_minus_infinity = u64(0xFFF0000000000000)
31
32// char constants
33pub const c_dpoint = `.`
34pub const c_plus = `+`
35pub const c_minus = `-`
36pub const c_zero = `0`
37pub const c_nine = `9`
38pub const c_ten = u32(10)
39
40// right logical shift 96 bit
41fn lsr96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) {
42 mut r0 := u32(0)
43 mut r1 := u32(0)
44 mut r2 := u32(0)
45 r0 = (s0 >> 1) | ((s1 & u32(1)) << 31)
46 r1 = (s1 >> 1) | ((s2 & u32(1)) << 31)
47 r2 = s2 >> 1
48 return r2, r1, r0
49}
50
51// left logical shift 96 bit
52fn lsl96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) {
53 mut r0 := u32(0)
54 mut r1 := u32(0)
55 mut r2 := u32(0)
56 r2 = (s2 << 1) | ((s1 & (u32(1) << 31)) >> 31)
57 r1 = (s1 << 1) | ((s0 & (u32(1) << 31)) >> 31)
58 r0 = s0 << 1
59 return r2, r1, r0
60}
61
62// sum on 96 bit
63fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
64 mut w := u64(0)
65 mut r0 := u32(0)
66 mut r1 := u32(0)
67 mut r2 := u32(0)
68 w = u64(s0) + u64(d0)
69 r0 = u32(w)
70 w >>= 32
71 w += u64(s1) + u64(d1)
72 r1 = u32(w)
73 w >>= 32
74 w += u64(s2) + u64(d2)
75 r2 = u32(w)
76 return r2, r1, r0
77}
78
79// subtraction on 96 bit
80fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
81 mut w := u64(0)
82 mut r0 := u32(0)
83 mut r1 := u32(0)
84 mut r2 := u32(0)
85 w = u64(s0) - u64(d0)
86 r0 = u32(w)
87 w >>= 32
88 w += u64(s1) - u64(d1)
89 r1 = u32(w)
90 w >>= 32
91 w += u64(s2) - u64(d2)
92 r2 = u32(w)
93 return r2, r1, r0
94}
95
96// Utility functions
97fn is_digit(x u8) bool {
98 return x >= c_zero && x <= c_nine
99}
100
101fn is_space(x u8) bool {
102 return x == `\t` || x == `\n` || x == `\v` || x == `\f` || x == `\r` || x == ` `
103}
104
105fn is_exp(x u8) bool {
106 return x == `E` || x == `e`
107}
108
109// Possible parser return values.
110enum ParserState {
111 ok // parser finished OK
112 pzero // no digits or number is smaller than +-2^-1022
113 mzero // number is negative, module smaller
114 pinf // number is higher than +HUGE_VAL
115 minf // number is lower than -HUGE_VAL
116 invalid_number // invalid number, used for '#@%^' for example
117 extra_char // extra char after number
118}
119
120// parser tries to parse the given string into a number
121// FIXME: need one char after the last char of the number
122@[direct_array_access]
123fn parser(s string) (ParserState, PrepNumber) {
124 mut digx := 0
125 mut result := ParserState.ok
126 mut expneg := false
127 mut expexp := 0
128 mut i := 0
129 mut pn := PrepNumber{}
130
131 // skip spaces
132 for i < s.len && s[i].is_space() {
133 i++
134 }
135
136 // check negatives
137 if s[i] == `-` {
138 pn.negative = true
139 i++
140 }
141
142 // positive sign ignore it
143 if s[i] == `+` {
144 i++
145 }
146
147 // read mantissa
148 for i < s.len && s[i].is_digit() {
149 if pn.mantissa == 0 && s[i] == c_zero {
150 i++
151 continue
152 }
153 // println("${i} => ${s[i]}")
154 if digx < digits {
155 pn.mantissa *= 10
156 pn.mantissa += u64(s[i] - c_zero)
157 digx++
158 } else if pn.exponent < 2147483647 {
159 pn.exponent++
160 }
161 i++
162 }
163
164 // read mantissa decimals
165 if i < s.len && s[i] == `.` {
166 i++
167 for i < s.len && s[i].is_digit() {
168 if pn.mantissa == 0 && s[i] == c_zero {
169 pn.exponent--
170 i++
171 continue
172 }
173 if digx < digits {
174 pn.mantissa *= 10
175 pn.mantissa += u64(s[i] - c_zero)
176 pn.exponent--
177 digx++
178 }
179 i++
180 }
181 }
182
183 // read exponent
184 if i < s.len && (s[i] == `e` || s[i] == `E`) {
185 i++
186 if i < s.len {
187 // esponent sign
188 if s[i] == c_plus {
189 i++
190 } else if s[i] == c_minus {
191 expneg = true
192 i++
193 }
194
195 for i < s.len && s[i].is_digit() {
196 if expexp < 214748364 {
197 expexp *= 10
198 expexp += int(s[i] - c_zero)
199 }
200 i++
201 }
202 }
203 }
204
205 if expneg {
206 expexp = -expexp
207 }
208 pn.exponent += expexp
209 if pn.mantissa == 0 {
210 if pn.negative {
211 result = .mzero
212 } else {
213 result = .pzero
214 }
215 } else if pn.exponent > 309 {
216 if pn.negative {
217 result = .minf
218 } else {
219 result = .pinf
220 }
221 } else if pn.exponent < -328 {
222 if pn.negative {
223 result = .mzero
224 } else {
225 result = .pzero
226 }
227 }
228 if i == 0 && s.len > 0 {
229 return ParserState.invalid_number, pn
230 }
231 if i != s.len {
232 return ParserState.extra_char, pn
233 }
234 return result, pn
235}
236
237// converter returns a u64 with the bit image of the f64 number
238fn converter(mut pn PrepNumber) u64 {
239 mut binexp := 92
240 // s0,s1,s2 are the parts of a 96-bit precision integer
241 mut s2 := u32(0)
242 mut s1 := u32(0)
243 mut s0 := u32(0)
244 // q0,q1,q2 are the parts of a 96-bit precision integer
245 mut q2 := u32(0)
246 mut q1 := u32(0)
247 mut q0 := u32(0)
248 // r0,r1,r2 are the parts of a 96-bit precision integer
249 mut r2 := u32(0)
250 mut r1 := u32(0)
251 mut r0 := u32(0)
252
253 mask28 := u32(u64(0xF) << 28)
254 mut result := u64(0)
255 // working on 3 u32 to have 96 bit precision
256 s0 = u32(pn.mantissa & u64(0x00000000FFFFFFFF))
257 s1 = u32(pn.mantissa >> 32)
258 s2 = u32(0)
259 if pn.mantissa == 0 && pn.exponent <= 0 {
260 return if pn.negative { double_minus_zero } else { double_plus_zero }
261 }
262 // so we take the decimal exponent off
263 for pn.exponent > 0 {
264 q2, q1, q0 = lsl96(s2, s1, s0) // q = s * 2
265 r2, r1, r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2
266 s2, s1, s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2
267 s2, s1, s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10
268 pn.exponent--
269 for (s2 & mask28) != 0 {
270 q2, q1, q0 = lsr96(s2, s1, s0)
271 binexp++
272 s2 = q2
273 s1 = q1
274 s0 = q0
275 }
276 }
277 for pn.exponent < 0 {
278 for !((s2 & (u32(1) << 31)) != 0) {
279 q2, q1, q0 = lsl96(s2, s1, s0)
280 binexp--
281 s2 = q2
282 s1 = q1
283 s0 = q0
284 }
285 q2 = s2 / c_ten
286 r1 = s2 % c_ten
287 r2 = (s1 >> 8) | (r1 << 24)
288 q1 = r2 / c_ten
289 r1 = r2 % c_ten
290 r2 = ((s1 & u32(0xFF)) << 16) | (s0 >> 16) | (r1 << 24)
291 r0 = r2 / c_ten
292 r1 = r2 % c_ten
293 q1 = (q1 << 8) | ((r0 & u32(0x00FF0000)) >> 16)
294 q0 = r0 << 16
295 r2 = (s0 & u32(0xFFFF)) | (r1 << 16)
296 q0 |= r2 / c_ten
297 s2 = q2
298 s1 = q1
299 s0 = q0
300 pn.exponent++
301 }
302 // C.printf(c"mantissa before normalization: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
303 // normalization, the 28 bit in s2 must the leftest one in the variable
304 if s2 != 0 || s1 != 0 || s0 != 0 {
305 for (s2 & mask28) == 0 {
306 q2, q1, q0 = lsl96(s2, s1, s0)
307 binexp--
308 s2 = q2
309 s1 = q1
310 s0 = q0
311 }
312 }
313
314 // Handle subnormal (denormalized) numbers - very small numbers near zero
315 //
316 // Normal floats have an implicit leading 1 bit in their mantissa (like 1.xxxxx).
317 // When numbers get too small (binexp < -1022), we can't represent them normally.
318 // Instead, we use subnormals: set exponent to 0 and shift the mantissa right,
319 // losing precision gradually. This prevents abrupt underflow to zero.
320 //
321 // Example: 1.23e-308 is smaller than the minimum normal float, so we:
322 // 1. Keep the normalized mantissa from s2 and s1
323 // 2. Shift it right to "denormalize" it (the leading 1 moves into the mantissa)
324 // 3. Round correctly using the bits that were shifted out
325 // 4. Return with exponent = 0 (subnormal marker)
326 if binexp < -1022 && (s2 | s1) != 0 {
327 shift := -1022 - binexp
328 if shift > 60 {
329 return if pn.negative { double_minus_zero } else { double_plus_zero }
330 }
331 shifted := ((u64(s2) << 32) | u64(s1)) >> u32(shift)
332 q := (shifted >> 8) +
333 u64((shifted >> 7) & 1 != 0 && ((shifted & 0x7F) != 0 || (shifted >> 8) & 1 != 0))
334 return (q & 0x000FFFFFFFFFFFFF) | (u64(pn.negative) << 63)
335 }
336
337 // rounding if needed
338 /*
339 * "round half to even" algorithm
340 * Example for f32, just a reminder
341 *
342 * If bit 54 is 0, round down
343 * If bit 54 is 1
344 * If any bit beyond bit 54 is 1, round up
345 * If all bits beyond bit 54 are 0 (meaning the number is halfway between two floating-point numbers)
346 * If bit 53 is 0, round down
347 * If bit 53 is 1, round up
348 */
349 /*
350 test case 1 complete
351 s2=0x1FFFFFFF
352 s1=0xFFFFFF80
353 s0=0x0
354 */
355
356 /*
357 test case 1 check_round_bit
358 s2=0x18888888
359 s1=0x88888880
360 s0=0x0
361 */
362
363 /*
364 test case check_round_bit + normalization
365 s2=0x18888888
366 s1=0x88888F80
367 s0=0x0
368 */
369
370 // C.printf(c"mantissa before rounding: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
371 // s1 => 0xFFFFFFxx only F are represented
372 nbit := 7
373 check_round_bit := u32(1) << u32(nbit)
374 check_round_mask := u32(0xFFFFFFFF) << u32(nbit)
375 if (s1 & check_round_bit) != 0 {
376 // C.printf(c"need round!! check mask: %08x\n", s1 & ~check_round_mask )
377 if (s1 & ~check_round_mask) != 0 {
378 // C.printf(c"Add 1!\n")
379 s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
380 } else {
381 // C.printf(c"All 0!\n")
382 if (s1 & (check_round_bit << u32(1))) != 0 {
383 // C.printf(c"Add 1 form -1 bit control!\n")
384 s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
385 }
386 }
387 s1 = s1 & check_round_mask
388 s0 = u32(0)
389 // recheck normalization
390 if s2 & (mask28 << u32(1)) != 0 {
391 // C.printf(c"Renormalize!!\n")
392 q2, q1, q0 = lsr96(s2, s1, s0)
393 binexp++
394 // dump(binexp)
395 s2 = q2
396 s1 = q1
397 s0 = q0
398 }
399 }
400 // tmp := ( u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8)
401 // C.printf(c"mantissa after rounding : %08x %08x %08x binexp: %d \n", s2,s1,s0,binexp)
402 // C.printf(c"Tmp result: %016x\n",tmp)
403 // end rounding
404 // offset the binary exponent IEEE 754
405 binexp += 1023
406 if binexp > 2046 {
407 if pn.negative {
408 result = double_minus_infinity
409 } else {
410 result = double_plus_infinity
411 }
412 } else if binexp < 1 {
413 // Should not reach here for subnormals anymore (handled earlier)
414 // This is now only for true zeros
415 if pn.negative {
416 result = double_minus_zero
417 } else {
418 result = double_plus_zero
419 }
420 } else if s2 != 0 {
421 mut q := u64(0)
422 binexs2 := u64(binexp) << 52
423 q = (u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) | binexs2
424 if pn.negative {
425 q |= (u64(1) << 63)
426 }
427 result = q
428 }
429 return result
430}
431
432@[params]
433pub struct AtoF64Param {
434pub:
435 allow_extra_chars bool // allow extra characters after number
436}
437
438// atof64 parses the string `s`, and if possible, converts it into a f64 number
439pub fn atof64(s string, param AtoF64Param) !f64 {
440 if s.len == 0 {
441 return error('expected a number found an empty string')
442 }
443 mut res := Float64u{}
444 res_parsing, mut pn := parser(s)
445 match res_parsing {
446 .ok {
447 res.u = converter(mut pn)
448 }
449 .pzero {
450 res.u = double_plus_zero
451 }
452 .mzero {
453 res.u = double_minus_zero
454 }
455 .pinf {
456 res.u = double_plus_infinity
457 }
458 .minf {
459 res.u = double_minus_infinity
460 }
461 .extra_char {
462 if param.allow_extra_chars {
463 res.u = converter(mut pn)
464 } else {
465 return error('extra char after number')
466 }
467 }
468 .invalid_number {
469 return error('not a number')
470 }
471 }
472
473 return unsafe { res.f }
474}
475