v / vlib / strconv / f64_str.c.v
386 lines · 344 sloc · 10.32 KB · e2e5cf8db56f3562c7baa735061690be936bdf3e
Raw
1module strconv
2
3/*=============================================================================
4
5f64 to string
6
7Copyright (c) 2019-2024 Dario Deledda. All rights reserved.
8Use of this source code is governed by an MIT license
9that can be found in the LICENSE file.
10
11This file contains the f64 to string functions
12
13These functions are based on the work of:
14Publication:PLDI 2018: Proceedings of the 39th ACM SIGPLAN
15Conference on Programming Language Design and ImplementationJune 2018
16Pages 270–282 https://doi.org/10.1145/3192366.3192369
17
18inspired by the Go version here:
19https://github.com/cespare/ryu/tree/ba56a33f39e3bbbfa409095d0f9ae168a595feea
20
21=============================================================================*/
22
23@[direct_array_access]
24fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string {
25 mut n_digit := if i_n_digit < 1 { 1 } else { i_n_digit + 1 }
26 pad_digit := i_pad_digit + 1
27 mut out := d.m
28 mut d_exp := d.e
29 // mut out_len := decimal_len_64(out)
30 mut out_len := dec_digits(out)
31 out_len_original := out_len
32
33 mut fw_zeros := 0
34 if pad_digit > out_len {
35 fw_zeros = pad_digit - out_len
36 }
37
38 mut buf := []u8{len: (out_len + 6 + 1 + 1 + fw_zeros)} // sign + mant_len + . + e + e_sign + exp_len(2) + \0}
39 mut i := 0
40
41 if neg {
42 buf[i] = `-`
43 i++
44 }
45
46 mut disp := 0
47 if out_len <= 1 {
48 disp = 1
49 }
50
51 // rounding last used digit
52 if n_digit < out_len {
53 // println("out:[${out}]")
54 out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up
55 out /= ten_pow_table_64[out_len - n_digit]
56 // println("out1:[${out}] ${d.m / ten_pow_table_64[out_len - n_digit ]}")
57 // fix issue #22424
58 out_div := d.m / ten_pow_table_64[out_len - n_digit]
59 if out_div < out && dec_digits(out_div) < dec_digits(out) {
60 // from `99` to `100`, will need d_exp+1
61 d_exp++
62 n_digit++
63 }
64
65 // println("cmp: ${d.m/ten_pow_table_64[out_len - n_digit ]} ${out/ten_pow_table_64[out_len - n_digit ]}")
66
67 out_len = n_digit
68 // println("orig: ${out_len_original} new len: ${out_len} out:[${out}]")
69 }
70
71 y := i + out_len
72 mut x := 0
73 for x < (out_len - disp - 1) {
74 buf[y - x] = `0` + u8(out % 10)
75 out /= 10
76 i++
77 x++
78 }
79
80 // fix issue #22424
81 // no decimal digits needed, end here
82 // if i_n_digit == 0 {
83 // unsafe {
84 // buf[i] = 0
85 // return tos(&u8(&buf[0]), i)
86 // }
87 //}
88
89 if out_len > 1 || fw_zeros > 0 {
90 buf[y - x] = `.`
91 i++
92 }
93 x++
94
95 if y - x >= 0 {
96 buf[y - x] = `0` + u8(out % 10)
97 i++
98 }
99
100 for fw_zeros > 0 {
101 buf[i] = `0`
102 i++
103 fw_zeros--
104 }
105
106 buf[i] = `e`
107 i++
108
109 mut exp := d_exp + out_len_original - 1
110 if exp < 0 {
111 buf[i] = `-`
112 i++
113 exp = -exp
114 } else {
115 buf[i] = `+`
116 i++
117 }
118
119 // Always print at least two digits to match strconv's formatting.
120 d2 := exp % 10
121 exp /= 10
122 d1 := exp % 10
123 d0 := exp / 10
124 if d0 > 0 {
125 buf[i] = `0` + u8(d0)
126 i++
127 }
128 buf[i] = `0` + u8(d1)
129 i++
130 buf[i] = `0` + u8(d2)
131 i++
132 buf[i] = 0
133
134 return unsafe {
135 tos(memdup(&buf[0], i + 1), i)
136 }
137}
138
139@[ignore_overflow]
140fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) {
141 mut d := Dec64{}
142 e := exp - bias64
143 if e > mantbits64 {
144 return d, false
145 }
146 shift := mantbits64 - e
147 mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1
148 // mant := i_mant | (1 << mantbits64) // implicit 1
149 d.m = mant >> shift
150 if (d.m << shift) != mant {
151 return d, false
152 }
153
154 for (d.m % 10) == 0 {
155 d.m /= 10
156 d.e++
157 }
158 return d, true
159}
160
161fn f64_to_decimal(mant u64, exp u64) Dec64 {
162 mut e2 := 0
163 mut m2 := u64(0)
164 if exp == 0 {
165 // We subtract 2 so that the bounds computation has
166 // 2 additional bits.
167 e2 = 1 - bias64 - int(mantbits64) - 2
168 m2 = mant
169 } else {
170 e2 = int(exp) - bias64 - int(mantbits64) - 2
171 m2 = (u64(1) << mantbits64) | mant
172 }
173 even := (m2 & 1) == 0
174 accept_bounds := even
175
176 // Step 2: Determine the interval of valid decimal representations.
177 mv := u64(4 * m2)
178 mm_shift := bool_to_u64(mant != 0 || exp <= 1)
179
180 // Step 3: Convert to a decimal power base uing 128-bit arithmetic.
181 mut vr := u64(0)
182 mut vp := u64(0)
183 mut vm := u64(0)
184 mut e10 := 0
185 mut vm_is_trailing_zeros := false
186 mut vr_is_trailing_zeros := false
187
188 if e2 >= 0 {
189 // This expression is slightly faster than max(0, log10Pow2(e2) - 1).
190 q := log10_pow2(e2) - bool_to_u32(e2 > 3)
191 e10 = int(q)
192 k := pow5_inv_num_bits_64 + pow5_bits(int(q)) - 1
193 i := -e2 + int(q) + k
194
195 mul := *(&Uint128(&pow5_inv_split_64_x[q * 2]))
196 vr = mul_shift_64(u64(4) * m2, mul, i)
197 vp = mul_shift_64(u64(4) * m2 + u64(2), mul, i)
198 vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, i)
199 if q <= 21 {
200 // This should use q <= 22, but I think 21 is also safe.
201 // Smaller values may still be safe, but it's more
202 // difficult to reason about them. Only one of mp, mv,
203 // and mm can be a multiple of 5, if any.
204 if mv % 5 == 0 {
205 vr_is_trailing_zeros = multiple_of_power_of_five_64(mv, q)
206 } else if accept_bounds {
207 // Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q
208 // <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q
209 // <=> true && pow5Factor64(mm) >= q, since e2 >= q.
210 vm_is_trailing_zeros = multiple_of_power_of_five_64(mv - 1 - mm_shift, q)
211 } else if multiple_of_power_of_five_64(mv + 2, q) {
212 vp--
213 }
214 }
215 } else {
216 // This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
217 q := log10_pow5(-e2) - bool_to_u32(-e2 > 1)
218 e10 = int(q) + e2
219 i := -e2 - int(q)
220 k := pow5_bits(i) - pow5_num_bits_64
221 j := int(q) - k
222 mul := *(&Uint128(&pow5_split_64_x[i * 2]))
223 vr = mul_shift_64(u64(4) * m2, mul, j)
224 vp = mul_shift_64(u64(4) * m2 + u64(2), mul, j)
225 vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, j)
226 if q <= 1 {
227 // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
228 // mv = 4 * m2, so it always has at least two trailing 0 bits.
229 vr_is_trailing_zeros = true
230 if accept_bounds {
231 // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
232 vm_is_trailing_zeros = (mm_shift == 1)
233 } else {
234 // mp = mv + 2, so it always has at least one trailing 0 bit.
235 vp--
236 }
237 } else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here.
238 // We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1
239 // <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1
240 // <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q)
241 // <=> (mv & ((1 << (q - 1)) - 1)) == 0
242 // We also need to make sure that the left shift does not overflow.
243 vr_is_trailing_zeros = multiple_of_power_of_two_64(mv, q - 1)
244 }
245 }
246
247 // Step 4: Find the shortest decimal representation
248 // in the interval of valid representations.
249 mut removed := 0
250 mut last_removed_digit := u8(0)
251 mut out := u64(0)
252 // On average, we remove ~2 digits.
253 if vm_is_trailing_zeros || vr_is_trailing_zeros {
254 // General case, which happens rarely (~0.7%).
255 for {
256 vp_div_10 := vp / 10
257 vm_div_10 := vm / 10
258 if vp_div_10 <= vm_div_10 {
259 break
260 }
261 vm_mod_10 := vm % 10
262 vr_div_10 := vr / 10
263 vr_mod_10 := vr % 10
264 vm_is_trailing_zeros = vm_is_trailing_zeros && vm_mod_10 == 0
265 vr_is_trailing_zeros = vr_is_trailing_zeros && last_removed_digit == 0
266 last_removed_digit = u8(vr_mod_10)
267 vr = vr_div_10
268 vp = vp_div_10
269 vm = vm_div_10
270 removed++
271 }
272 if vm_is_trailing_zeros {
273 for {
274 vm_div_10 := vm / 10
275 vm_mod_10 := vm % 10
276 if vm_mod_10 != 0 {
277 break
278 }
279 vp_div_10 := vp / 10
280 vr_div_10 := vr / 10
281 vr_mod_10 := vr % 10
282 vr_is_trailing_zeros = vr_is_trailing_zeros && last_removed_digit == 0
283 last_removed_digit = u8(vr_mod_10)
284 vr = vr_div_10
285 vp = vp_div_10
286 vm = vm_div_10
287 removed++
288 }
289 }
290 if vr_is_trailing_zeros && last_removed_digit == 5 && (vr % 2) == 0 {
291 // Round even if the exact number is .....50..0.
292 last_removed_digit = 4
293 }
294 out = vr
295 // We need to take vr + 1 if vr is outside bounds
296 // or we need to round up.
297 if (vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5 {
298 out++
299 }
300 } else {
301 // Specialized for the common case (~99.3%).
302 // Percentages below are relative to this.
303 mut round_up := false
304 for vp / 100 > vm / 100 {
305 // Optimization: remove two digits at a time (~86.2%).
306 round_up = (vr % 100) >= 50
307 vr /= 100
308 vp /= 100
309 vm /= 100
310 removed += 2
311 }
312 // Loop iterations below (approximately), without optimization above:
313 // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
314 // Loop iterations below (approximately), with optimization above:
315 // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
316 for vp / 10 > vm / 10 {
317 round_up = (vr % 10) >= 5
318 vr /= 10
319 vp /= 10
320 vm /= 10
321 removed++
322 }
323 // We need to take vr + 1 if vr is outside bounds
324 // or we need to round up.
325 out = vr + bool_to_u64(vr == vm || round_up)
326 }
327
328 return Dec64{
329 m: out
330 e: e10 + removed
331 }
332}
333
334//=============================================================================
335// String Functions
336//=============================================================================
337
338// f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot.
339pub fn f64_to_str(f f64, n_digit int) string {
340 mut u1 := Uf64{}
341 u1.f = f
342 u := unsafe { u1.u }
343
344 neg := (u >> (mantbits64 + expbits64)) != 0
345 mant := u & ((u64(1) << mantbits64) - u64(1))
346 exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
347 // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}")
348
349 // Exit early for easy cases.
350 if exp == maxexp64 || (exp == 0 && mant == 0) {
351 return get_string_special(neg, exp == 0, mant == 0)
352 }
353
354 mut d, ok := f64_to_decimal_exact_int(mant, exp)
355 if !ok {
356 // println("to_decimal")
357 d = f64_to_decimal(mant, exp)
358 }
359 // println("${d.m} ${d.e}")
360 return d.get_string_64(neg, n_digit, 0)
361}
362
363// f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot.
364pub fn f64_to_str_pad(f f64, n_digit int) string {
365 mut u1 := Uf64{}
366 u1.f = f
367 u := unsafe { u1.u }
368
369 neg := (u >> (mantbits64 + expbits64)) != 0
370 mant := u & ((u64(1) << mantbits64) - u64(1))
371 exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
372 // unsafe { println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016x}") }
373
374 // Exit early for easy cases.
375 if exp == maxexp64 || (exp == 0 && mant == 0) {
376 return get_string_special(neg, exp == 0, mant == 0)
377 }
378
379 mut d, ok := f64_to_decimal_exact_int(mant, exp)
380 if !ok {
381 // println("to_decimal")
382 d = f64_to_decimal(mant, exp)
383 }
384 // println("DEBUG: ${d.m} ${d.e}")
385 return d.get_string_64(neg, n_digit, n_digit)
386}
387