v2 / vlib / builtin / string_interpolation.v
767 lines · 726 sloc · 18.18 KB · e2e5cf8db56f3562c7baa735061690be936bdf3e
Raw
1module builtin
2
3import strconv
4import strings
5
6// Copyright (c) 2019-2024 Dario Deledda. All rights reserved.
7// Use of this source code is governed by an MIT license
8// that can be found in the LICENSE file.
9
10// This file contains V functions for string interpolation
11
12// StrIntpType is an enumeration of all the supported format types (max 32 types)
13pub enum StrIntpType {
14 si_no_str = 0 // no parameter to print only fix string
15 si_c
16 si_u8
17 si_i8
18 si_u16
19 si_i16
20 si_u32
21 si_i32
22 si_u64
23 si_i64
24 si_e32
25 si_e64
26 si_f32
27 si_f64
28 si_g32
29 si_g64
30 si_s
31 si_p
32 si_r
33 si_vp
34}
35
36pub fn (x StrIntpType) str() string {
37 return match x {
38 .si_no_str { 'no_str' }
39 .si_c { 'c' }
40 .si_u8 { 'u8' }
41 .si_i8 { 'i8' }
42 .si_u16 { 'u16' }
43 .si_i16 { 'i16' }
44 .si_u32 { 'u32' }
45 .si_i32 { 'i32' }
46 .si_u64 { 'u64' }
47 .si_i64 { 'i64' }
48 .si_f32 { 'f32' }
49 .si_f64 { 'f64' }
50 .si_g32 { 'f32' } // g32 format use f32 data
51 .si_g64 { 'f64' } // g64 format use f64 data
52 .si_e32 { 'f32' } // e32 format use f32 data
53 .si_e64 { 'f64' } // e64 format use f64 data
54 .si_s { 's' }
55 .si_p { 'p' }
56 .si_r { 'r' } // repeat string
57 .si_vp { 'vp' }
58 }
59}
60
61// StrIntpMem is a union of data used by StrIntpData
62pub union StrIntpMem {
63pub mut:
64 d_c u32
65 d_u8 u8
66 d_i8 i8
67 d_u16 u16
68 d_i16 i16
69 d_u32 u32
70 d_i32 i32
71 d_u64 u64
72 d_i64 i64
73 d_f32 f32
74 d_f64 f64
75 d_s string
76 d_r string
77 d_p voidptr
78 d_vp voidptr
79}
80
81@[inline]
82fn fabs32(x f32) f32 {
83 return if x < 0 { -x } else { x }
84}
85
86@[inline]
87fn fabs64(x f64) f64 {
88 return if x < 0 { -x } else { x }
89}
90
91@[inline]
92fn abs64(x i64) u64 {
93 return if x < 0 { u64(-x) } else { u64(x) }
94}
95
96// u32/u64 bit compact format
97//___ 32 24 16 8
98//___ | | | |
99//_3333333333222222222211111111110000000000
100//_9876543210987654321098765432109876543210
101//_nPPPPPPPPBBBBWWWWWWWWWWTDDDDDDDSUAA=====
102// = data type 5 bit max 32 data type
103// A align 2 bit Note: for now only 1 used!
104// U uppercase 1 bit 0 do nothing, 1 do to_upper()
105// S sign 1 bit show the sign if positive
106// D decimals 7 bit number of decimals digit to show
107// T tail zeros 1 bit 1 remove tail zeros, 0 do nothing
108// W Width 10 bit number of char for padding and indentation
109// B num base 4 bit start from 2, 0 for base 10
110// P pad char 1/8 bit padding char (in u32 format reduced to 1 bit as flag for `0` padding)
111// --------------
112// TOTAL: 39/32 bit
113//---------------------------------------
114
115// convert from data format to compact u64
116pub fn get_str_intp_u64_format(fmt_type StrIntpType, in_width int, in_precision int, in_tail_zeros bool,
117 in_sign bool, in_pad_ch u8, in_base int, in_upper_case bool) u64 {
118 width := if in_width != 0 { abs64(in_width) } else { u64(0) }
119 align := if in_width > 0 { u64(1 << 5) } else { u64(0) } // two bit 0 .left 1 .right, for now we use only one
120 upper_case := if in_upper_case { u64(1 << 7) } else { u64(0) }
121 sign := if in_sign { u64(1 << 8) } else { u64(0) }
122 precision := if in_precision != 987698 {
123 (u64(in_precision & 0x7F) << 9)
124 } else {
125 u64(0x7F) << 9
126 }
127 tail_zeros := if in_tail_zeros { u32(1) << 16 } else { u32(0) }
128 base := u64(u32(in_base & 0xf) << 27)
129 res := u64((u64(fmt_type) & 0x1F) | align | upper_case | sign | precision | tail_zeros | (u64(width & 0x3FF) << 17) | base | (u64(in_pad_ch) << 31))
130 return res
131}
132
133const str_intp_has_dynamic_width = u8(1)
134const str_intp_has_dynamic_precision = u8(1 << 1)
135
136// convert from data format to compact u32
137pub fn get_str_intp_u32_format(fmt_type StrIntpType, in_width int, in_precision int, in_tail_zeros bool,
138 in_sign bool, in_pad_ch u8, in_base int, in_upper_case bool) u32 {
139 width := if in_width != 0 { abs64(in_width) } else { u32(0) }
140 align := if in_width > 0 { u32(1 << 5) } else { u32(0) } // two bit 0 .left 1 .right, for now we use only one
141 upper_case := if in_upper_case { u32(1 << 7) } else { u32(0) }
142 sign := if in_sign { u32(1 << 8) } else { u32(0) }
143 precision := if in_precision != 987698 {
144 (u32(in_precision & 0x7F) << 9)
145 } else {
146 u32(0x7F) << 9
147 }
148 tail_zeros := if in_tail_zeros { u32(1) << 16 } else { u32(0) }
149 base := u32(u32(in_base & 0xf) << 27)
150 res := u32((u32(fmt_type) & 0x1F) | align | upper_case | sign | precision | tail_zeros | (u32(width & 0x3FF) << 17) | base | (u32(in_pad_ch & 1) << 31))
151 return res
152}
153
154// convert from struct to formatted string
155@[manualfree]
156fn (data &StrIntpData) process_str_intp_data(mut sb strings.Builder) {
157 x := data.fmt
158 typ := unsafe { StrIntpType(x & 0x1F) }
159 mut align := int((x >> 5) & 0x01)
160 upper_case := ((x >> 7) & 0x01) > 0
161 sign := int((x >> 8) & 0x01)
162 mut precision := int((x >> 9) & 0x7F)
163 tail_zeros := ((x >> 16) & 0x01) > 0
164 mut width := int(i16((x >> 17) & 0x3FF))
165 mut base := int(x >> 27) & 0xF
166 fmt_pad_ch := u8((x >> 31) & 0xFF)
167 has_dynamic_width := (data.dyn_flags & str_intp_has_dynamic_width) != 0
168 has_dynamic_precision := (data.dyn_flags & str_intp_has_dynamic_precision) != 0
169
170 // no string interpolation is needed, return empty string
171 if typ == .si_no_str {
172 return
173 }
174
175 // if width > 0 { println("${x.hex()} Type: ${x & 0x7F} Width: ${width} Precision: ${precision} align:${align}") }
176
177 // manage base if any
178 if base > 0 {
179 base += 2 // we start from 2, 0 == base 10
180 }
181 if has_dynamic_width {
182 width = data.dyn_width
183 if width < 0 {
184 width = -width
185 align = 0
186 } else if width > 0 {
187 align = 1
188 }
189 }
190 if has_dynamic_precision {
191 precision = data.dyn_precision
192 }
193
194 // mange pad char, for now only 0 allowed
195 mut pad_ch := u8(` `)
196 if fmt_pad_ch > 0 {
197 // pad_ch = fmt_pad_ch
198 pad_ch = `0`
199 }
200
201 len0_set := if width > 0 { width } else { -1 }
202 len1_set := if has_dynamic_precision {
203 if precision >= 0 { precision } else { -1 }
204 } else if precision == 0x7F {
205 -1
206 } else {
207 precision
208 }
209 sign_set := sign == 1
210
211 mut bf := strconv.BF_param{
212 pad_ch: pad_ch // padding char
213 len0: len0_set // default len for whole the number or string
214 len1: len1_set // number of decimal digits, if needed
215 positive: true // mandatory: the sign of the number passed
216 sign_flag: sign_set // flag for print sign as prefix in padding
217 align: .left // alignment of the string
218 rm_tail_zero: tail_zeros // false // remove the tail zeros from floats
219 }
220
221 // align
222 if fmt_pad_ch == 0 || pad_ch == `0` {
223 match align {
224 0 { bf.align = .left }
225 1 { bf.align = .right }
226 // 2 { bf.align = .center }
227 else { bf.align = .left }
228 }
229 } else {
230 bf.align = .right
231 }
232
233 unsafe {
234 // strings
235 if typ == .si_s {
236 if upper_case {
237 s := data.d.d_s.to_upper()
238 if width == 0 {
239 sb.write_string(s)
240 } else {
241 strconv.format_str_sb(s, bf, mut sb)
242 }
243 s.free()
244 } else {
245 if width == 0 {
246 sb.write_string(data.d.d_s)
247 } else {
248 strconv.format_str_sb(data.d.d_s, bf, mut sb)
249 }
250 }
251 return
252 }
253
254 if typ == .si_r {
255 if width > 0 {
256 if upper_case {
257 s := data.d.d_s.to_upper()
258 for _ in 1 .. (1 + (if width > 0 {
259 width
260 } else {
261 0
262 })) {
263 sb.write_string(s)
264 }
265 s.free()
266 } else {
267 for _ in 1 .. (1 + (if width > 0 {
268 width
269 } else {
270 0
271 })) {
272 sb.write_string(data.d.d_s)
273 }
274 }
275 }
276 return
277 }
278
279 // signed int
280 if typ in [.si_i8, .si_i16, .si_i32, .si_i64] {
281 mut d := data.d.d_i64
282 if typ == .si_i8 {
283 d = i64(data.d.d_i8)
284 } else if typ == .si_i16 {
285 d = i64(data.d.d_i16)
286 } else if typ == .si_i32 {
287 d = i64(data.d.d_i32)
288 }
289
290 if base == 0 {
291 if d < 0 {
292 bf.positive = false
293 }
294 // Format straight into the builder to avoid temporary `d.str()` allocations
295 // for plain `${int}` interpolations.
296 strconv.format_dec_sb(abs64(d), bf, mut sb)
297 } else {
298 // binary, we use 3 for binary
299 if base == 3 {
300 base = 2
301 }
302 mut absd, mut write_minus := d, false
303 if d < 0 && pad_ch != ` ` {
304 absd = -d
305 write_minus = true
306 }
307 mut hx := strconv.format_int(absd, base)
308 if upper_case {
309 tmp := hx
310 hx = hx.to_upper()
311 tmp.free()
312 }
313 if write_minus {
314 sb.write_u8(`-`)
315 bf.len0-- // compensate for the `-` above
316 }
317 if width == 0 {
318 sb.write_string(hx)
319 } else {
320 strconv.format_str_sb(hx, bf, mut sb)
321 }
322 hx.free()
323 }
324 return
325 }
326
327 // unsigned int and pointers
328 if typ in [.si_u8, .si_u16, .si_u32, .si_u64] {
329 mut d := data.d.d_u64
330 if typ == .si_u8 {
331 d = u64(data.d.d_u8)
332 } else if typ == .si_u16 {
333 d = u64(data.d.d_u16)
334 } else if typ == .si_u32 {
335 d = u64(data.d.d_u32)
336 }
337 if base == 0 {
338 strconv.format_dec_sb(d, bf, mut sb)
339 } else {
340 // binary, we use 3 for binary
341 if base == 3 {
342 base = 2
343 }
344 mut hx := strconv.format_uint(d, base)
345 if upper_case {
346 tmp := hx
347 hx = hx.to_upper()
348 tmp.free()
349 }
350 if width == 0 {
351 sb.write_string(hx)
352 } else {
353 strconv.format_str_sb(hx, bf, mut sb)
354 }
355 hx.free()
356 }
357 return
358 }
359
360 // pointers
361 if typ == .si_p {
362 // Read the pointer through its pointer union member first.
363 // On 32-bit C compilers, initializing `.d_p` does not guarantee that
364 // the upper half of `.d_u64` is zeroed.
365 mut d := u64(data.d.d_p)
366 base = 16 // TODO: **** decide the behaviour of this flag! ****
367 if base == 0 {
368 if width == 0 {
369 d_str := d.str()
370 sb.write_string(d_str)
371 d_str.free()
372 return
373 }
374 strconv.format_dec_sb(d, bf, mut sb)
375 } else {
376 mut hx := strconv.format_uint(d, base)
377 if upper_case {
378 tmp := hx
379 hx = hx.to_upper()
380 tmp.free()
381 }
382 if width == 0 {
383 sb.write_string(hx)
384 } else {
385 strconv.format_str_sb(hx, bf, mut sb)
386 }
387 hx.free()
388 }
389 return
390 }
391
392 // default settings for floats
393 mut use_default_str := false
394 if width == 0 && precision == 0x7F {
395 bf.len1 = 3
396 use_default_str = true
397 }
398 if bf.len1 < 0 {
399 bf.len1 = 3
400 }
401
402 match typ {
403 // floating point
404 .si_f32 {
405 $if !nofloat ? {
406 if use_default_str {
407 mut f := data.d.d_f32.str()
408 if upper_case {
409 tmp := f
410 f = f.to_upper()
411 tmp.free()
412 }
413 sb.write_string(f)
414 f.free()
415 } else {
416 if data.d.d_f32 < 0 {
417 bf.positive = false
418 }
419 mut f := strconv.format_fl(data.d.d_f32, bf)
420 if upper_case {
421 tmp := f
422 f = f.to_upper()
423 tmp.free()
424 }
425 sb.write_string(f)
426 f.free()
427 }
428 }
429 }
430 .si_f64 {
431 $if !nofloat ? {
432 if use_default_str {
433 mut f := data.d.d_f64.str()
434 if upper_case {
435 tmp := f
436 f = f.to_upper()
437 tmp.free()
438 }
439 sb.write_string(f)
440 f.free()
441 } else {
442 if data.d.d_f64 < 0 {
443 bf.positive = false
444 }
445 f_union := strconv.Float64u{
446 f: data.d.d_f64
447 }
448 if f_union.u == strconv.double_minus_zero {
449 bf.positive = false
450 }
451
452 mut f := strconv.format_fl(data.d.d_f64, bf)
453 if upper_case {
454 tmp := f
455 f = f.to_upper()
456 tmp.free()
457 }
458 sb.write_string(f)
459 f.free()
460 }
461 }
462 }
463 .si_g32 {
464 if use_default_str {
465 $if !nofloat ? {
466 mut f := data.d.d_f32.strg()
467 if upper_case {
468 tmp := f
469 f = f.to_upper()
470 tmp.free()
471 }
472 sb.write_string(f)
473 f.free()
474 }
475 } else {
476 // Manage +/-0
477 if data.d.d_f32 == strconv.single_plus_zero {
478 tmp_str := '0'
479 strconv.format_str_sb(tmp_str, bf, mut sb)
480 tmp_str.free()
481 return
482 }
483 if data.d.d_f32 == strconv.single_minus_zero {
484 tmp_str := '-0'
485 strconv.format_str_sb(tmp_str, bf, mut sb)
486 tmp_str.free()
487 return
488 }
489 // Manage +/-INF
490 if data.d.d_f32 == strconv.single_plus_infinity {
491 mut tmp_str := '+inf'
492 if upper_case {
493 tmp_str = '+INF'
494 }
495 strconv.format_str_sb(tmp_str, bf, mut sb)
496 tmp_str.free()
497 }
498 if data.d.d_f32 == strconv.single_minus_infinity {
499 mut tmp_str := '-inf'
500 if upper_case {
501 tmp_str = '-INF'
502 }
503 strconv.format_str_sb(tmp_str, bf, mut sb)
504 tmp_str.free()
505 }
506
507 if data.d.d_f32 < 0 {
508 bf.positive = false
509 }
510 d := fabs32(data.d.d_f32)
511 if d < 999_999.0 && d >= 0.00001 {
512 mut f := strconv.format_fl(data.d.d_f32, bf)
513 if upper_case {
514 tmp := f
515 f = f.to_upper()
516 tmp.free()
517 }
518 sb.write_string(f)
519 f.free()
520 return
521 }
522 // NOTE: For 'g' and 'G' bf.len1 is the maximum number of significant digits.
523 // Not like 'e' or 'E', which is the number of digits after the decimal point.
524 bf.len1--
525 mut f := strconv.format_es(data.d.d_f32, bf)
526 if upper_case {
527 tmp := f
528 f = f.to_upper()
529 tmp.free()
530 }
531 sb.write_string(f)
532 f.free()
533 }
534 }
535 .si_g64 {
536 if use_default_str {
537 $if !nofloat ? {
538 mut f := data.d.d_f64.strg()
539 if upper_case {
540 tmp := f
541 f = f.to_upper()
542 tmp.free()
543 }
544 sb.write_string(f)
545 f.free()
546 }
547 } else {
548 // Manage +/-0
549 if data.d.d_f64 == strconv.double_plus_zero {
550 tmp_str := '0'
551 strconv.format_str_sb(tmp_str, bf, mut sb)
552 tmp_str.free()
553 return
554 }
555 if data.d.d_f64 == strconv.double_minus_zero {
556 tmp_str := '-0'
557 strconv.format_str_sb(tmp_str, bf, mut sb)
558 tmp_str.free()
559 return
560 }
561 // Manage +/-INF
562 if data.d.d_f64 == strconv.double_plus_infinity {
563 mut tmp_str := '+inf'
564 if upper_case {
565 tmp_str = '+INF'
566 }
567 strconv.format_str_sb(tmp_str, bf, mut sb)
568 tmp_str.free()
569 }
570 if data.d.d_f64 == strconv.double_minus_infinity {
571 mut tmp_str := '-inf'
572 if upper_case {
573 tmp_str = '-INF'
574 }
575 strconv.format_str_sb(tmp_str, bf, mut sb)
576 tmp_str.free()
577 }
578
579 if data.d.d_f64 < 0 {
580 bf.positive = false
581 }
582 d := fabs64(data.d.d_f64)
583 if d < 999_999.0 && d >= 0.00001 {
584 mut f := strconv.format_fl(data.d.d_f64, bf)
585 if upper_case {
586 tmp := f
587 f = f.to_upper()
588 tmp.free()
589 }
590 sb.write_string(f)
591 f.free()
592 return
593 }
594 // NOTE: For 'g' and 'G' bf.len1 is the maximum number of significant digits
595 // Not like 'e' or 'E', which is the number of digits after the decimal point.
596 bf.len1--
597 mut f := strconv.format_es(data.d.d_f64, bf)
598 if upper_case {
599 tmp := f
600 f = f.to_upper()
601 tmp.free()
602 }
603 sb.write_string(f)
604 f.free()
605 }
606 }
607 .si_e32 {
608 $if !nofloat ? {
609 if use_default_str {
610 mut f := data.d.d_f32.str()
611 if upper_case {
612 tmp := f
613 f = f.to_upper()
614 tmp.free()
615 }
616 sb.write_string(f)
617 f.free()
618 } else {
619 if data.d.d_f32 < 0 {
620 bf.positive = false
621 }
622 mut f := strconv.format_es(data.d.d_f32, bf)
623 if upper_case {
624 tmp := f
625 f = f.to_upper()
626 tmp.free()
627 }
628 sb.write_string(f)
629 f.free()
630 }
631 }
632 }
633 .si_e64 {
634 $if !nofloat ? {
635 if use_default_str {
636 mut f := data.d.d_f64.str()
637 if upper_case {
638 tmp := f
639 f = f.to_upper()
640 tmp.free()
641 }
642 sb.write_string(f)
643 f.free()
644 } else {
645 if data.d.d_f64 < 0 {
646 bf.positive = false
647 }
648 mut f := strconv.format_es(data.d.d_f64, bf)
649 if upper_case {
650 tmp := f
651 f = f.to_upper()
652 tmp.free()
653 }
654 sb.write_string(f)
655 f.free()
656 }
657 }
658 }
659 // runes
660 .si_c {
661 ss := utf32_to_str(data.d.d_c)
662 sb.write_string(ss)
663 ss.free()
664 }
665 // v pointers
666 .si_vp {
667 ss := u64(data.d.d_vp).hex()
668 sb.write_string(ss)
669 ss.free()
670 }
671 else {
672 sb.write_string('***ERROR!***')
673 }
674 }
675 }
676}
677
678// StrIntpCgenData is a storing struct used by cgen
679pub struct StrIntpCgenData {
680pub:
681 str string
682 fmt string
683 d string
684}
685
686// StrIntpData is a LOW LEVEL struct, passed to V in the C code
687pub struct StrIntpData {
688pub:
689 str string
690 // fmt u64 // expanded version for future use, 64 bit
691 fmt u32
692 d StrIntpMem
693 dyn_width int
694 dyn_precision int
695 dyn_flags u8
696}
697
698// str_intp is the main entry point for string interpolation
699@[direct_array_access; manualfree]
700pub fn str_intp(data_len int, input_base &StrIntpData) string {
701 mut res := strings.new_builder(64)
702 for i := 0; i < data_len; i++ {
703 data := unsafe { &input_base[i] }
704 // avoid empty strings
705 if data.str.len != 0 {
706 res.write_string(data.str)
707 }
708 // skip empty data
709 if data.fmt != 0 {
710 data.process_str_intp_data(mut res)
711 }
712 }
713 ret := res.str()
714 unsafe { res.free() }
715 return ret
716}
717
718// The consts here are utilities for the compiler's "auto_str_methods.v".
719// They are used to substitute old _STR calls.
720// FIXME: this const is not released from memory => use a precalculated string const for now.
721// si_s_code = "0x" + int(StrIntpType.si_s).hex() // code for a simple string.
722pub const si_s_code = '0xfe10'
723pub const si_g32_code = '0xfe0e'
724pub const si_g64_code = '0xfe0f'
725
726@[inline]
727pub fn str_intp_sq(in_str string) string {
728 return 'builtin__str_intp(2, _MOV((StrIntpData[]){{_S("\'"), ${si_s_code}, {.d_s = ${in_str}}, 0, 0, 0},{_S("\'"), 0, {0}, 0, 0, 0}}))'
729}
730
731@[inline]
732pub fn str_intp_rune(in_str string) string {
733 return 'builtin__str_intp(2, _MOV((StrIntpData[]){{_S("\`"), ${si_s_code}, {.d_s = ${in_str}}, 0, 0, 0},{_S("\`"), 0, {0}, 0, 0, 0}}))'
734}
735
736@[inline]
737pub fn str_intp_g32(in_str string) string {
738 return 'builtin__str_intp(1, _MOV((StrIntpData[]){{_SLIT0, ${si_g32_code}, {.d_f32 = ${in_str} }, 0, 0, 0}}))'
739}
740
741@[inline]
742pub fn str_intp_g64(in_str string) string {
743 return 'builtin__str_intp(1, _MOV((StrIntpData[]){{_SLIT0, ${si_g64_code}, {.d_f64 = ${in_str} }, 0, 0, 0}}))'
744}
745
746// str_intp_sub replace %% with the in_str
747@[manualfree]
748pub fn str_intp_sub(base_str string, in_str string) string {
749 index := base_str.index('%%') or {
750 eprintln('No string interpolation %% parameters')
751 exit(1)
752 }
753 // return base_str[..index] + in_str + base_str[index+2..]
754 unsafe {
755 st_str := base_str[..index]
756 if index + 2 < base_str.len {
757 en_str := base_str[index + 2..]
758 res_str := 'builtin__str_intp(2, _MOV((StrIntpData[]){{_S("${st_str}"), ${si_s_code}, {.d_s = ${in_str} }, 0, 0, 0},{_S("${en_str}"), 0, {0}, 0, 0, 0}}))'
759 st_str.free()
760 en_str.free()
761 return res_str
762 }
763 res2_str := 'builtin__str_intp(1, _MOV((StrIntpData[]){{_S("${st_str}"), ${si_s_code}, {.d_s = ${in_str} }, 0, 0, 0}}))'
764 st_str.free()
765 return res2_str
766 }
767}
768