v2 / vlib / builtin / string.v
3252 lines · 3015 sloc · 81.38 KB · bdc2b4357bd8710471957b96a2c54796fa1c4e08
Raw
1// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module builtin
5
6import strconv
7import strings
8
9/*
10Note: A V string should be/is immutable from the point of view of
11 V user programs after it is first created. A V string is
12 also slightly larger than the equivalent C string because
13 the V string also has an integer length attached.
14
15 This tradeoff is made, since V strings are created just *once*,
16 but potentially used *many times* over their lifetime.
17
18 The V string implementation uses a struct, that has a .str field,
19 which points to a C style 0 terminated memory block. Although not
20 strictly necessary from the V point of view, that additional 0
21 is *very useful for C interoperability*.
22
23 The V string implementation also has an integer .len field,
24 containing the length of the .str field, excluding the
25 terminating 0 (just like the C's strlen(s) would do).
26
27 The 0 ending of .str, and the .len field, mean that in practice:
28 a) a V string s can be used very easily, wherever a
29 C string is needed, just by passing s.str,
30 without a need for further conversion/copying.
31
32 b) where strlen(s) is needed, you can just pass s.len,
33 without having to constantly recompute the length of s
34 *over and over again* like some C programs do. This is because
35 V strings are immutable and so their length does not change.
36
37 Ordinary V code *does not need* to be concerned with the
38 additional 0 in the .str field. The 0 *must* be put there by the
39 low level string creating functions inside this module.
40
41 Failing to do this will lead to programs that work most of the
42 time, when used with pure V functions, but fail in strange ways,
43 when used with modules using C functions (for example os and so on).
44*/
45pub struct string {
46pub:
47 str &u8 = 0 // points to a C style 0 terminated string of bytes.
48 len int // the length of the .str field, excluding the ending 0 byte. It is always equal to strlen(.str).
49mut:
50 is_lit int
51 // NB string.is_lit is an enumeration of the following:
52 // .is_lit == 0 => a fresh string, should be freed by autofree
53 // .is_lit == 1 => a literal string from .rodata, should NOT be freed
54 // .is_lit == -98761234 => already freed string, protects against double frees.
55 // ---------> ^^^^^^^^^ calling free on these is a bug.
56 // Any other value means that the string has been corrupted.
57}
58
59// runes returns an array of all the utf runes in the string `s`
60// which is useful if you want random access to them
61@[direct_array_access]
62pub fn (s string) runes() []rune {
63 mut runes := []rune{cap: s.len}
64 for i := 0; i < s.len; i++ {
65 char_len := utf8_char_len(unsafe { s.str[i] })
66 if char_len > 1 {
67 end := if s.len - 1 >= i + char_len { i + char_len } else { s.len }
68 mut r := unsafe { s[i..end] }
69 runes << r.utf32_code()
70 i += char_len - 1
71 } else {
72 runes << unsafe { s.str[i] }
73 }
74 }
75 return runes
76}
77
78// graphemes returns the string split into Unicode grapheme clusters.
79pub fn (s string) graphemes() []string {
80 return string_graphemes_impl(s)
81}
82
83// cstring_to_vstring creates a new V string copy of the C style string,
84// pointed by `s`. This function is most likely what you want to use when
85// working with C style pointers to 0 terminated strings (i.e. `char*`).
86// It is recommended to use it, unless you *do* understand the implications of
87// tos/tos2/tos3/tos4/tos5 in terms of memory management and interactions with
88// -autofree and `@[manualfree]`.
89// It will panic, if the pointer `s` is 0.
90@[unsafe]
91pub fn cstring_to_vstring(const_s &char) string {
92 s := unsafe { tos2(byteptr(const_s)) }
93 return s.clone()
94}
95
96// tos_clone creates a new V string copy of the C style string, pointed by `s`.
97// See also cstring_to_vstring (it is the same as it, the only difference is,
98// that tos_clone expects `&u8`, while cstring_to_vstring expects &char).
99// It will panic, if the pointer `s` is 0.
100@[unsafe]
101pub fn tos_clone(const_s &u8) string {
102 s := unsafe { tos2(&u8(const_s)) }
103 return s.clone()
104}
105
106// tos creates a V string, given a C style pointer to a 0 terminated block.
107// Note: the memory block pointed by s is *reused, not copied*!
108// It will panic, when the pointer `s` is 0.
109// See also `tos_clone`.
110@[unsafe]
111pub fn tos(s &u8, len int) string {
112 if s == 0 {
113 panic('tos(): nil string')
114 }
115 return string{
116 str: unsafe { s }
117 len: len
118 }
119}
120
121// tos2 creates a V string, given a C style pointer to a 0 terminated block.
122// Note: the memory block pointed by s is *reused, not copied*!
123// It will calculate the length first, thus it is more costly than `tos`.
124// It will panic, when the pointer `s` is 0.
125// It is the same as `tos3`, but for &u8 pointers, avoiding callsite casts.
126// See also `tos_clone`.
127@[unsafe]
128pub fn tos2(s &u8) string {
129 if s == 0 {
130 panic('tos2: nil string')
131 }
132 return string{
133 str: unsafe { s }
134 len: unsafe { vstrlen(s) }
135 }
136}
137
138// tos3 creates a V string, given a C style pointer to a 0 terminated block.
139// Note: the memory block pointed by s is *reused, not copied*!
140// It will calculate the length first, so it is more costly than tos.
141// It will panic, when the pointer `s` is 0.
142// It is the same as `tos2`, but for &char pointers, avoiding callsite casts.
143// See also `tos_clone`.
144@[unsafe]
145pub fn tos3(s &char) string {
146 if s == 0 {
147 panic('tos3: nil string')
148 }
149 return string{
150 str: unsafe { &u8(s) }
151 len: unsafe { vstrlen_char(s) }
152 }
153}
154
155// tos4 creates a V string, given a C style pointer to a 0 terminated block.
156// Note: the memory block pointed by s is *reused, not copied*!
157// It will calculate the length first, so it is more costly than tos.
158// It returns '', when given a 0 pointer `s`, it does NOT panic.
159// It is the same as `tos5`, but for &u8 pointers, avoiding callsite casts.
160// See also `tos_clone`.
161@[unsafe]
162pub fn tos4(s &u8) string {
163 if s == 0 {
164 return ''
165 }
166 return string{
167 str: unsafe { s }
168 len: unsafe { vstrlen(s) }
169 }
170}
171
172// tos5 creates a V string, given a C style pointer to a 0 terminated block.
173// Note: the memory block pointed by s is *reused, not copied*!
174// It will calculate the length first, so it is more costly than tos.
175// It returns '', when given a 0 pointer `s`, it does NOT panic.
176// It is the same as `tos4`, but for &char pointers, avoiding callsite casts.
177// See also `tos_clone`.
178@[unsafe]
179pub fn tos5(s &char) string {
180 if s == 0 {
181 return ''
182 }
183 return string{
184 str: unsafe { &u8(s) }
185 len: unsafe { vstrlen_char(s) }
186 }
187}
188
189// vstring converts a C style string to a V string.
190// Note: the memory block pointed by `bp` is *reused, not copied*!
191// Note: instead of `&u8(arr.data).vstring()`, do use `tos_clone(&u8(arr.data))`.
192// Strings returned from this function will be normal V strings beside that,
193// (i.e. they would be freed by V's -autofree mechanism, when they are no longer used).
194// See also `tos_clone`.
195@[unsafe]
196pub fn (bp &u8) vstring() string {
197 return string{
198 str: unsafe { bp }
199 len: unsafe { vstrlen(bp) }
200 }
201}
202
203// vstring_with_len converts a C style 0 terminated string to a V string.
204// Note: the memory block pointed by `bp` is *reused, not copied*!
205// This method has lower overhead compared to .vstring(), since it
206// does not need to calculate the length of the 0 terminated string.
207// See also `tos_clone`.
208@[unsafe]
209pub fn (bp &u8) vstring_with_len(len int) string {
210 return string{
211 str: unsafe { bp }
212 len: len
213 is_lit: 0
214 }
215}
216
217// vstring converts a C style string to a V string.
218// Note: the memory block pointed by `bp` is *reused, not copied*!
219// Strings returned from this function will be normal V strings beside that,
220// (i.e. they would be freed by V's -autofree mechanism, when they are
221// no longer used).
222// Note: instead of `&u8(a.data).vstring()`, use `tos_clone(&u8(a.data))`.
223// See also `tos_clone`.
224@[unsafe]
225pub fn (cp &char) vstring() string {
226 return string{
227 str: &u8(cp)
228 len: unsafe { vstrlen_char(cp) }
229 is_lit: 0
230 }
231}
232
233// vstring_with_len converts a C style 0 terminated string to a V string.
234// Note: the memory block pointed by `bp` is *reused, not copied*!
235// This method has lower overhead compared to .vstring(), since it
236// does not calculate the length of the 0 terminated string.
237// See also `tos_clone`.
238@[unsafe]
239pub fn (cp &char) vstring_with_len(len int) string {
240 return string{
241 str: &u8(cp)
242 len: len
243 is_lit: 0
244 }
245}
246
247// vstring_literal converts a C style string to a V string.
248// Note: the memory block pointed by `bp` is *reused, not copied*!
249// NB2: unlike vstring, vstring_literal will mark the string
250// as a literal, so it will not be freed by -autofree.
251// This is suitable for readonly strings, C string literals etc,
252// that can be read by the V program, but that should not be
253// managed/freed by it, for example `os.args` is implemented using it.
254// See also `tos_clone`.
255@[unsafe]
256pub fn (bp &u8) vstring_literal() string {
257 return string{
258 str: unsafe { bp }
259 len: unsafe { vstrlen(bp) }
260 is_lit: 1
261 }
262}
263
264// vstring_with_len converts a C style string to a V string.
265// Note: the memory block pointed by `bp` is *reused, not copied*!
266// This method has lower overhead compared to .vstring_literal(), since it
267// does not need to calculate the length of the 0 terminated string.
268// See also `tos_clone`.
269@[unsafe]
270pub fn (bp &u8) vstring_literal_with_len(len int) string {
271 return string{
272 str: unsafe { bp }
273 len: len
274 is_lit: 1
275 }
276}
277
278// vstring_literal converts a C style string char* pointer to a V string.
279// Note: the memory block pointed by `bp` is *reused, not copied*!
280// See also `byteptr.vstring_literal` for more details.
281// See also `tos_clone`.
282@[unsafe]
283pub fn (cp &char) vstring_literal() string {
284 return string{
285 str: &u8(cp)
286 len: unsafe { vstrlen_char(cp) }
287 is_lit: 1
288 }
289}
290
291// vstring_literal_with_len converts a C style string char* pointer,
292// to a V string.
293// Note: the memory block pointed by `bp` is *reused, not copied*!
294// This method has lower overhead compared to .vstring_literal(), since it
295// does not need to calculate the length of the 0 terminated string.
296// See also `tos_clone`.
297@[unsafe]
298pub fn (cp &char) vstring_literal_with_len(len int) string {
299 return string{
300 str: &u8(cp)
301 len: len
302 is_lit: 1
303 }
304}
305
306// len_utf8 returns the number of runes contained in the string `s`.
307pub fn (s string) len_utf8() int {
308 mut l := 0
309 mut i := 0
310 for i < s.len {
311 l++
312 i += int(((u32(0xe5000000) >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1)
313 }
314 return l
315}
316
317// is_pure_ascii returns whether the string contains only ASCII characters.
318// Note that UTF8 encodes such characters in just 1 byte:
319// 1 byte: 0xxxxxxx
320// 2 bytes: 110xxxxx 10xxxxxx
321// 3 bytes: 1110xxxx 10xxxxxx 10xxxxxx
322// 4 bytes: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
323@[direct_array_access]
324pub fn (s string) is_pure_ascii() bool {
325 for i in 0 .. s.len {
326 if s[i] >= 0x80 {
327 return false
328 }
329 }
330 return true
331}
332
333// clone_static returns an independent copy of a given array.
334// It should be used only in -autofree generated code.
335@[inline]
336fn (a string) clone_static() string {
337 return a.clone()
338}
339
340// option_clone_static returns an independent copy of a given array when lhs is an option type.
341// It should be used only in -autofree generated code.
342@[inline]
343fn (a string) option_clone_static() ?string {
344 return ?string(a.clone())
345}
346
347// clone returns a copy of the V string `a`.
348pub fn (a string) clone() string {
349 if a.len <= 0 {
350 return ''
351 }
352 mut b := string{
353 str: unsafe { malloc_noscan(a.len + 1) }
354 len: a.len
355 }
356 unsafe {
357 vmemcpy(b.str, a.str, a.len)
358 b.str[a.len] = 0
359 }
360 return b
361}
362
363// replace_once replaces the first occurrence of `rep` with the string passed in `with`.
364pub fn (s string) replace_once(rep string, with string) string {
365 idx := s.index_(rep)
366 if idx == -1 {
367 return s.clone()
368 }
369 // return s.substr(0, idx) + with + s.substr(idx + rep.len, s.len)
370 //
371 // Avoid an extra allocation here by using substr_unsafe
372 // string_plus copies from both strings via vmemcpy, so it's safe.
373 //
374 // return s.substr_unsafe(0, idx) + with + s.substr_unsafe(idx + rep.len, s.len)
375 return s.substr_unsafe(0, idx).plus_two(with, s.substr_unsafe(idx + rep.len, s.len))
376}
377
378const replace_stack_buffer_size = 10
379// replace replaces all occurrences of `rep` with the string passed in `with`.
380@[direct_array_access; manualfree]
381pub fn (s string) replace(rep string, with string) string {
382 if s.len == 0 || rep.len == 0 || rep.len > s.len {
383 return s.clone()
384 }
385 if !s.contains(rep) {
386 return s.clone()
387 }
388 mut pidxs_len := 0
389 pidxs_cap := s.len / rep.len
390 mut stack_idxs := [replace_stack_buffer_size]int{}
391 mut pidxs := unsafe { &stack_idxs[0] }
392 if pidxs_cap > replace_stack_buffer_size {
393 pidxs = unsafe { &int(malloc(int(sizeof(int)) * pidxs_cap)) }
394 }
395 defer {
396 if pidxs_cap > replace_stack_buffer_size {
397 unsafe { free(pidxs) }
398 }
399 }
400 mut idx := 0
401 for {
402 idx = s.index_after_(rep, idx)
403 if idx == -1 {
404 break
405 }
406 unsafe {
407 pidxs[pidxs_len] = idx
408 pidxs_len++
409 }
410 idx += rep.len
411 }
412 // Dont change the string if there's nothing to replace
413 if pidxs_len == 0 {
414 return s.clone()
415 }
416 // Now we know the number of replacements we need to do and we can calc the len of the new string
417 new_len := s.len + pidxs_len * (with.len - rep.len)
418 mut b := unsafe { malloc_noscan(new_len + 1) } // add space for the null byte at the end
419 // Fill the new string
420 mut b_i := 0
421 mut s_idx := 0
422 for j in 0 .. pidxs_len {
423 rep_pos := unsafe { pidxs[j] }
424 // copy everything up to piece being replaced
425 before_len := rep_pos - s_idx
426 unsafe { vmemcpy(&b[b_i], &s.str[s_idx], before_len) }
427 b_i += before_len
428 s_idx = rep_pos + rep.len // move string index past replacement
429 // copy replacement piece
430 unsafe { vmemcpy(&b[b_i], &with.str[0], with.len) }
431 b_i += with.len
432 }
433 if s_idx < s.len {
434 // if any original after last replacement, copy it
435 unsafe { vmemcpy(&b[b_i], &s.str[s_idx], s.len - s_idx) }
436 }
437 unsafe {
438 b[new_len] = 0
439 return tos(b, new_len)
440 }
441}
442
443struct RepIndex {
444 idx int
445 val_idx int
446}
447
448// replace_each replaces all occurrences of the string pairs given in `vals`.
449// Example: assert 'ABCD'.replace_each(['B','C/','C','D','D','C']) == 'AC/DC'
450@[direct_array_access]
451pub fn (s string) replace_each(vals []string) string {
452 if s.len == 0 || vals.len == 0 {
453 return s.clone()
454 }
455 if vals.len % 2 != 0 {
456 eprintln('string.replace_each(): odd number of strings')
457 return s.clone()
458 }
459 // `rep` - string to replace
460 // `with` - string to replace with
461 // Remember positions of all rep strings, and calculate the length
462 // of the new string to do just one allocation.
463 mut new_len := s.len
464 mut idxs := []RepIndex{cap: 6}
465 defer { unsafe { idxs.free() } }
466 mut idx := 0
467 s_ := s.clone()
468 for rep_i := 0; rep_i < vals.len; rep_i += 2 {
469 // vals: ['rep1, 'with1', 'rep2', 'with2']
470 rep := vals[rep_i]
471 with := vals[rep_i + 1]
472
473 for {
474 idx = s_.index_after_(rep, idx)
475 if idx == -1 {
476 break
477 }
478 // The string already found is set to `/del`, to avoid duplicate searches.
479 for i in 0 .. rep.len {
480 unsafe {
481 s_.str[idx + i] = 0
482 }
483 }
484 // We need to remember both the position in the string,
485 // and which rep/with pair it refers to.
486
487 idxs << RepIndex{
488 idx: idx
489 val_idx: rep_i
490 }
491
492 idx += rep.len
493 new_len += with.len - rep.len
494 }
495 }
496
497 // Dont change the string if there's nothing to replace
498 if idxs.len == 0 {
499 return s.clone()
500 }
501 idxs.sort(a.idx < b.idx)
502 mut buf := unsafe { malloc_noscan(new_len + 1) } // add space for 0 terminator
503 // Fill the new string
504 mut idx_pos := 0
505 mut cur_idx := idxs[idx_pos]
506 mut buf_i := 0
507 for i := 0; i < s.len; i++ {
508 if i == cur_idx.idx {
509 // Reached the location of rep, replace it with "with"
510 rep := vals[cur_idx.val_idx]
511 with := vals[cur_idx.val_idx + 1]
512 for j in 0 .. with.len {
513 unsafe {
514 buf[buf_i] = with[j]
515 }
516 buf_i++
517 }
518 // Skip the length of rep, since we just replaced it with "with"
519 i += rep.len - 1
520 // Go to the next index
521 idx_pos++
522 if idx_pos < idxs.len {
523 cur_idx = idxs[idx_pos]
524 }
525 } else {
526 // Rep doesnt start here, just copy
527 unsafe {
528 buf[buf_i] = s.str[i]
529 }
530 buf_i++
531 }
532 }
533 unsafe {
534 buf[new_len] = 0
535 return tos(buf, new_len)
536 }
537}
538
539// format replaces positional placeholders like `{0}` and `{1}` in `s`
540// with the corresponding values from `args`.
541// Use `{{` and `}}` to output literal braces.
542@[direct_array_access]
543pub fn (s string) format(args ...string) string {
544 if s.len == 0 {
545 return ''
546 }
547 mut out := strings.new_builder(s.len)
548 mut i := 0
549 for i < s.len {
550 ch := s[i]
551 if ch == `{` {
552 if i + 1 < s.len && s[i + 1] == `{` {
553 out.write_byte(`{`)
554 i += 2
555 continue
556 }
557 mut j := i + 1
558 if j >= s.len || !s[j].is_digit() {
559 out.write_byte(ch)
560 i++
561 continue
562 }
563 mut idx := 0
564 mut overflowed := false
565 for j < s.len && s[j].is_digit() {
566 digit := int(s[j] - `0`)
567 if idx > (max_int - digit) / 10 {
568 overflowed = true
569 break
570 }
571 idx = idx * 10 + digit
572 j++
573 }
574 if !overflowed && j < s.len && s[j] == `}` {
575 if idx < args.len {
576 out.write_string(args[idx])
577 } else {
578 out.write_string(s[i..j + 1])
579 }
580 i = j + 1
581 continue
582 }
583 out.write_byte(ch)
584 i++
585 continue
586 }
587 if ch == `}` && i + 1 < s.len && s[i + 1] == `}` {
588 out.write_byte(`}`)
589 i += 2
590 continue
591 }
592 out.write_byte(ch)
593 i++
594 }
595 return out.str()
596}
597
598// replace_char replaces all occurrences of the character `rep`, with `repeat` x the character passed in `with`.
599// Example: assert '\tHello!'.replace_char(`\t`,` `,8) == ' Hello!'
600@[direct_array_access]
601pub fn (s string) replace_char(rep u8, with u8, repeat int) string {
602 $if !no_bounds_checking {
603 if repeat <= 0 {
604 panic('string.replace_char(): tab length too short')
605 }
606 }
607 if s.len == 0 {
608 return s.clone()
609 }
610 // TODO: Allocating ints is expensive. Should be a stack array
611 // - string.replace()
612 mut idxs := []int{cap: s.len >> 2}
613 defer { unsafe { idxs.free() } }
614 // No need to do a contains(), it already traverses the entire string
615 for i, ch in s {
616 if ch == rep { // Found char? Mark its location
617 idxs << i
618 }
619 }
620 if idxs.len == 0 {
621 return s.clone()
622 }
623 // Now we know the number of replacements we need to do and we can calc the len of the new string
624 new_len := s.len + idxs.len * (repeat - 1)
625 mut b := unsafe { malloc_noscan(new_len + 1) } // add space for the null byte at the end
626 // Fill the new string
627 mut b_i := 0
628 mut s_idx := 0
629 for rep_pos in idxs {
630 for i in s_idx .. rep_pos { // copy everything up to piece being replaced
631 unsafe {
632 b[b_i] = s[i]
633 }
634 b_i++
635 }
636 s_idx = rep_pos + 1 // move string index past replacement
637 for _ in 0 .. repeat { // copy replacement piece
638 unsafe {
639 b[b_i] = with
640 }
641 b_i++
642 }
643 }
644 if s_idx < s.len { // if any original after last replacement, copy it
645 for i in s_idx .. s.len {
646 unsafe {
647 b[b_i] = s[i]
648 }
649 b_i++
650 }
651 }
652 unsafe {
653 b[new_len] = 0
654 return tos(b, new_len)
655 }
656}
657
658// normalize_tabs replaces all tab characters with `tab_len` amount of spaces.
659// Example: assert '\t\tpop rax\t; pop rax'.normalize_tabs(2) == ' pop rax ; pop rax'
660@[inline]
661pub fn (s string) normalize_tabs(tab_len int) string {
662 return s.replace_char(`\t`, ` `, tab_len)
663}
664
665// expand_tabs replaces tab characters (\t) in the input string with spaces to achieve proper column alignment .
666// Example: assert 'AB\tHello!'.expand_tabs(4) == 'AB Hello!'
667pub fn (s string) expand_tabs(tab_len int) string {
668 if tab_len <= 0 {
669 return s.clone() // Handle invalid tab length
670 }
671 mut output := strings.new_builder(s.len)
672 mut column := 0
673 for r in s.runes_iterator() {
674 match r {
675 `\t` {
676 spaces := tab_len - (column % tab_len)
677 output.write_string(' '.repeat(spaces))
678 column += spaces
679 }
680 `\n`, `\r` {
681 output.write_rune(r)
682 column = 0 // Reset on any line break
683 }
684 else {
685 output.write_rune(r)
686 column++ // Valid for most chars; consider Unicode wide chars
687 }
688 }
689 }
690 return output.str()
691}
692
693// bool returns `true` if the string equals the word "true" it will return `false` otherwise.
694@[inline]
695pub fn (s string) bool() bool {
696 return s == 'true' || s == 't' // TODO: t for pg, remove
697}
698
699// i8 returns the value of the string as i8 `'1'.i8() == i8(1)`.
700@[inline]
701pub fn (s string) i8() i8 {
702 return i8(strconv.common_parse_int(s, 0, 8, false, false) or { 0 })
703}
704
705// i16 returns the value of the string as i16 `'1'.i16() == i16(1)`.
706@[inline]
707pub fn (s string) i16() i16 {
708 return i16(strconv.common_parse_int(s, 0, 16, false, false) or { 0 })
709}
710
711// i32 returns the value of the string as i32 `'1'.i32() == i32(1)`.
712@[inline]
713pub fn (s string) i32() i32 {
714 return i32(strconv.common_parse_int(s, 0, 32, false, false) or { 0 })
715}
716
717// int returns the value of the string as an integer `'1'.int() == 1`.
718@[inline]
719pub fn (s string) int() int {
720 return int(strconv.common_parse_int(s, 0, 32, false, false) or { 0 })
721}
722
723// i64 returns the value of the string as i64 `'1'.i64() == i64(1)`.
724@[inline]
725pub fn (s string) i64() i64 {
726 return strconv.common_parse_int(s, 0, 64, false, false) or { 0 }
727}
728
729// f32 returns the value of the string as f32 `'1.0'.f32() == f32(1)`.
730@[inline]
731pub fn (s string) f32() f32 {
732 return f32(strconv.atof64(s, allow_extra_chars: true) or { 0 })
733}
734
735// f64 returns the value of the string as f64 `'1.0'.f64() == f64(1)`.
736@[inline]
737pub fn (s string) f64() f64 {
738 return strconv.atof64(s, allow_extra_chars: true) or { 0 }
739}
740
741// u8_array returns the value of the hex/bin string as u8 array.
742// hex string example: `'0x11223344ee'.u8_array() == [u8(0x11),0x22,0x33,0x44,0xee]`.
743// bin string example: `'0b1101_1101'.u8_array() == [u8(0xdd)]`.
744// underscore in the string will be stripped.
745pub fn (s string) u8_array() []u8 {
746 // strip underscore in the string
747 mut tmps := s.replace('_', '')
748 if tmps.len == 0 {
749 return []u8{}
750 }
751 tmps = tmps.to_lower_ascii()
752 if tmps.starts_with('0x') {
753 tmps = tmps[2..]
754 if tmps.len == 0 {
755 return []u8{}
756 }
757 // make sure every digit is valid hex digit
758 if !tmps.contains_only('0123456789abcdef') {
759 return []u8{}
760 }
761 // make sure tmps has even hex digits
762 if tmps.len % 2 == 1 {
763 tmps = '0' + tmps
764 }
765
766 mut ret := []u8{len: tmps.len / 2}
767 for i in 0 .. ret.len {
768 ret[i] = u8(tmps[2 * i..2 * i + 2].parse_uint(16, 8) or { 0 })
769 }
770 return ret
771 } else if tmps.starts_with('0b') {
772 tmps = tmps[2..]
773 if tmps.len == 0 {
774 return []u8{}
775 }
776 // make sure every digit is valid binary digit
777 if !tmps.contains_only('01') {
778 return []u8{}
779 }
780 // make sure tmps has multiple of 8 binary digits
781 if tmps.len % 8 != 0 {
782 tmps = '0'.repeat(8 - tmps.len % 8) + tmps
783 }
784
785 mut ret := []u8{len: tmps.len / 8}
786 for i in 0 .. ret.len {
787 ret[i] = u8(tmps[8 * i..8 * i + 8].parse_uint(2, 8) or { 0 })
788 }
789 return ret
790 }
791 return []u8{}
792}
793
794// u8 returns the value of the string as u8 `'1'.u8() == u8(1)`.
795@[inline]
796pub fn (s string) u8() u8 {
797 return u8(strconv.common_parse_uint(s, 0, 8, false, false) or { 0 })
798}
799
800// u16 returns the value of the string as u16 `'1'.u16() == u16(1)`.
801@[inline]
802pub fn (s string) u16() u16 {
803 return u16(strconv.common_parse_uint(s, 0, 16, false, false) or { 0 })
804}
805
806// u32 returns the value of the string as u32 `'1'.u32() == u32(1)`.
807@[inline]
808pub fn (s string) u32() u32 {
809 return u32(strconv.common_parse_uint(s, 0, 32, false, false) or { 0 })
810}
811
812// u64 returns the value of the string as u64 `'1'.u64() == u64(1)`.
813@[inline]
814pub fn (s string) u64() u64 {
815 return strconv.common_parse_uint(s, 0, 64, false, false) or { 0 }
816}
817
818// parse_uint is like `parse_int` but for unsigned numbers
819//
820// This method directly exposes the `parse_uint` function from `strconv`
821// as a method on `string`. For more advanced features,
822// consider calling `strconv.common_parse_uint` directly.
823@[inline]
824pub fn (s string) parse_uint(_base int, _bit_size int) !u64 {
825 return strconv.parse_uint(s, _base, _bit_size)
826}
827
828// parse_int interprets a string s in the given base (0, 2 to 36) and
829// bit size (0 to 64) and returns the corresponding value i.
830//
831// If the base argument is 0, the true base is implied by the string's
832// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
833// Also, for argument base 0 only, underscore characters are permitted
834// as defined by the Go syntax for integer literals.
835//
836// The bitSize argument specifies the integer type
837// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
838// correspond to int, int8, int16, int32, and int64.
839// If bitSize is below 0 or above 64, an error is returned.
840//
841// This method directly exposes the `parse_int` function from `strconv`
842// as a method on `string`. For more advanced features,
843// consider calling `strconv.common_parse_int` directly.
844@[inline]
845pub fn (s string) parse_int(_base int, _bit_size int) !i64 {
846 return strconv.parse_int(s, _base, _bit_size)
847}
848
849@[direct_array_access]
850fn (s string) == (a string) bool {
851 if s.str == 0 {
852 // Nil string: equal only to another nil/empty string.
853 // This can happen with zero-initialized struct fields in the native backend.
854 return a.str == 0 || a.len == 0
855 }
856 if s.len != a.len {
857 return false
858 }
859 unsafe {
860 return vmemcmp(s.str, a.str, a.len) == 0
861 }
862}
863
864// compare returns -1 if `s` < `a`, 0 if `s` == `a`, and 1 if `s` > `a`
865@[direct_array_access]
866pub fn (s string) compare(a string) int {
867 min_len := if s.len < a.len { s.len } else { a.len }
868 for i in 0 .. min_len {
869 if s[i] < a[i] {
870 return -1
871 }
872 if s[i] > a[i] {
873 return 1
874 }
875 }
876 if s.len < a.len {
877 return -1
878 }
879 if s.len > a.len {
880 return 1
881 }
882 return 0
883}
884
885@[direct_array_access]
886fn (s string) < (a string) bool {
887 for i in 0 .. s.len {
888 if i >= a.len || s[i] > a[i] {
889 return false
890 } else if s[i] < a[i] {
891 return true
892 }
893 }
894 if s.len < a.len {
895 return true
896 }
897 return false
898}
899
900@[direct_array_access]
901fn (s string) + (a string) string {
902 slen := if s.len > 0 { s.len } else { 0 }
903 alen := if a.len > 0 { a.len } else { 0 }
904 new_len := alen + slen
905 mut res := string{
906 str: unsafe { malloc_noscan(new_len + 1) }
907 len: new_len
908 }
909 unsafe {
910 if slen > 0 {
911 vmemcpy(res.str, s.str, slen)
912 }
913 if alen > 0 {
914 vmemcpy(res.str + slen, a.str, alen)
915 }
916 res.str[new_len] = 0 // V strings are not null terminated, but just in case
917 }
918 return res
919}
920
921// string_plus_many concatenates several strings with a single allocation.
922@[direct_array_access; markused]
923fn string_plus_many(data_len int, input_base &string) string {
924 mut new_len := 0
925 for i := 0; i < data_len; i++ {
926 part := unsafe { input_base[i] }
927 new_len += if part.len > 0 { part.len } else { 0 }
928 }
929 mut res := string{
930 str: unsafe { malloc_noscan(new_len + 1) }
931 len: new_len
932 }
933 mut offset := 0
934 unsafe {
935 for i := 0; i < data_len; i++ {
936 part := input_base[i]
937 part_len := if part.len > 0 { part.len } else { 0 }
938 if part_len > 0 {
939 vmemcpy(res.str + offset, part.str, part_len)
940 offset += part_len
941 }
942 }
943 res.str[new_len] = 0 // V strings are not null terminated, but just in case
944 }
945 return res
946}
947
948// for `s + s2 + s3`, an optimization (faster than string_plus(string_plus(s1, s2), s3))
949@[direct_array_access]
950fn (s string) plus_two(a string, b string) string {
951 slen := if s.len > 0 { s.len } else { 0 }
952 alen := if a.len > 0 { a.len } else { 0 }
953 blen := if b.len > 0 { b.len } else { 0 }
954 new_len := alen + blen + slen
955 mut res := string{
956 str: unsafe { malloc_noscan(new_len + 1) }
957 len: new_len
958 }
959 unsafe {
960 if slen > 0 {
961 vmemcpy(res.str, s.str, slen)
962 }
963 if alen > 0 {
964 vmemcpy(res.str + slen, a.str, alen)
965 }
966 if blen > 0 {
967 vmemcpy(res.str + slen + alen, b.str, blen)
968 }
969 res.str[new_len] = 0 // V strings are not null terminated, but just in case
970 }
971 return res
972}
973
974// split_any splits the string to an array by any of the `delim` chars.
975// If the delimiter string is empty then `.split()` is used.
976// Example: assert "first row\nsecond row".split_any(" \n") == ['first', 'row', 'second', 'row']
977@[direct_array_access]
978pub fn (s string) split_any(delim string) []string {
979 mut res := []string{}
980 unsafe { res.flags.set(.noslices) }
981 defer { unsafe { res.flags.clear(.noslices) } }
982 mut i := 0
983 // check empty source string
984 if s.len > 0 {
985 // if empty delimiter string using default split
986 if delim.len <= 0 {
987 return s.split('')
988 }
989 for index, ch in s {
990 for delim_ch in delim {
991 if ch == delim_ch {
992 res << s[i..index]
993 i = index + 1
994 break
995 }
996 }
997 }
998 if i < s.len {
999 res << s[i..]
1000 }
1001 }
1002 return res
1003}
1004
1005// rsplit_any splits the string to an array by any of the `delim` chars in reverse order.
1006// If the delimiter string is empty then `.rsplit()` is used.
1007// Example: assert "first row\nsecond row".rsplit_any(" \n") == ['row', 'second', 'row', 'first']
1008@[direct_array_access]
1009pub fn (s string) rsplit_any(delim string) []string {
1010 mut res := []string{}
1011 unsafe { res.flags.set(.noslices) }
1012 defer { unsafe { res.flags.clear(.noslices) } }
1013 mut i := s.len - 1
1014 if s.len > 0 {
1015 if delim.len <= 0 {
1016 return s.rsplit('')
1017 }
1018 mut rbound := s.len
1019 for i >= 0 {
1020 for delim_ch in delim {
1021 if s[i] == delim_ch {
1022 res << s[i + 1..rbound]
1023 rbound = i
1024 break
1025 }
1026 }
1027 i--
1028 }
1029 if rbound > 0 {
1030 res << s[..rbound]
1031 }
1032 }
1033 return res
1034}
1035
1036// split splits the string into an array of strings at the given delimiter.
1037// If `delim` is empty the string is split by it's characters.
1038// Example: assert 'DEF'.split('') == ['D','E','F']
1039// Example: assert 'A B C'.split(' ') == ['A','B','C']
1040@[inline]
1041pub fn (s string) split(delim string) []string {
1042 return s.split_nth(delim, 0)
1043}
1044
1045// rsplit splits the string into an array of strings at the given delimiter, starting from the right.
1046// If `delim` is empty the string is split by it's characters.
1047// Example: assert 'DEF'.rsplit('') == ['F','E','D']
1048// Example: assert 'A B C'.rsplit(' ') == ['C','B','A']
1049@[inline]
1050pub fn (s string) rsplit(delim string) []string {
1051 return s.rsplit_nth(delim, 0)
1052}
1053
1054// split_once splits the string into a pair of strings at the given delimiter.
1055// Example:
1056// ```v
1057// path, ext := 'file.ts.dts'.split_once('.')?
1058// assert path == 'file'
1059// assert ext == 'ts.dts'
1060pub fn (s string) split_once(delim string) ?(string, string) {
1061 result := s.split_nth(delim, 2)
1062
1063 if result.len != 2 {
1064 return none
1065 }
1066
1067 return result[0], result[1]
1068}
1069
1070// rsplit_once splits the string into a pair of strings at the given delimiter, starting from the right.
1071// NOTE: rsplit_once returns the string at the left side of the delimiter as first part of the pair.
1072// Example:
1073// ```v
1074// path, ext := 'file.ts.dts'.rsplit_once('.')?
1075// assert path == 'file.ts'
1076// assert ext == 'dts'
1077// ```
1078pub fn (s string) rsplit_once(delim string) ?(string, string) {
1079 result := s.rsplit_nth(delim, 2)
1080
1081 if result.len != 2 {
1082 return none
1083 }
1084
1085 return result[1], result[0]
1086}
1087
1088// split_n splits the string based on the passed `delim` substring.
1089// It returns the first Nth parts. When N=0, return all the splits.
1090// The last returned element has the remainder of the string, even if
1091// the remainder contains more `delim` substrings.
1092pub fn (s string) split_n(delim string, n int) []string {
1093 return s.split_nth(delim, n)
1094}
1095
1096// split_nth splits the string based on the passed `delim` substring.
1097// It returns the first Nth parts. When N=0, return all the splits.
1098// The last returned element has the remainder of the string, even if
1099// the remainder contains more `delim` substrings.
1100@[direct_array_access]
1101pub fn (s string) split_nth(delim string, nth int) []string {
1102 mut res := []string{}
1103 unsafe { res.flags.set(.noslices) } // allow freeing of old data during <<
1104 defer { unsafe { res.flags.clear(.noslices) } }
1105 match delim.len {
1106 0 {
1107 for i, ch in s {
1108 if nth > 0 && res.len == nth - 1 {
1109 res << s[i..]
1110 break
1111 }
1112 res << ch.ascii_str()
1113 }
1114 }
1115 1 {
1116 delim_byte := delim[0]
1117 mut start := 0
1118 for i, ch in s {
1119 if ch == delim_byte {
1120 if nth > 0 && res.len == nth - 1 {
1121 break
1122 }
1123 res << s.substr(start, i)
1124 start = i + 1
1125 }
1126 }
1127 if nth < 1 || res.len < nth {
1128 res << s[start..]
1129 }
1130 }
1131 else {
1132 mut start := 0
1133 // Add up to `nth` segments left of every occurrence of the delimiter.
1134 for i := 0; i + delim.len <= s.len; {
1135 if unsafe { s.substr_unsafe(i, i + delim.len) } == delim {
1136 if nth > 0 && res.len == nth - 1 {
1137 break
1138 }
1139 res << s.substr(start, i)
1140 i += delim.len
1141 start = i
1142 } else {
1143 i++
1144 }
1145 }
1146 // Then add the remaining part of the string as the last segment.
1147 if nth < 1 || res.len < nth {
1148 res << s[start..]
1149 }
1150 }
1151 }
1152
1153 return res
1154}
1155
1156// rsplit_nth splits the string based on the passed `delim` substring in revese order.
1157// It returns the first Nth parts. When N=0, return all the splits.
1158// The last returned element has the remainder of the string, even if
1159// the remainder contains more `delim` substrings.
1160@[direct_array_access]
1161pub fn (s string) rsplit_nth(delim string, nth int) []string {
1162 mut res := []string{}
1163 unsafe { res.flags.set(.noslices) } // allow freeing of old data during <<
1164 defer { unsafe { res.flags.clear(.noslices) } }
1165 match delim.len {
1166 0 {
1167 for i := s.len - 1; i >= 0; i-- {
1168 if nth > 0 && res.len == nth - 1 {
1169 res << s[..i + 1]
1170 break
1171 }
1172 res << s[i].ascii_str()
1173 }
1174 }
1175 1 {
1176 delim_byte := delim[0]
1177 mut rbound := s.len
1178 for i := s.len - 1; i >= 0; i-- {
1179 if s[i] == delim_byte {
1180 if nth > 0 && res.len == nth - 1 {
1181 break
1182 }
1183 res << s[i + 1..rbound]
1184 rbound = i
1185 }
1186 }
1187 if nth < 1 || res.len < nth {
1188 res << s[..rbound]
1189 }
1190 }
1191 else {
1192 mut rbound := s.len
1193 for i := s.len - 1; i >= 0; i-- {
1194 is_delim := i - delim.len >= 0 && s[i - delim.len..i] == delim
1195 if is_delim {
1196 if nth > 0 && res.len == nth - 1 {
1197 break
1198 }
1199 res << s[i..rbound]
1200 i -= delim.len
1201 rbound = i
1202 }
1203 }
1204 if nth < 1 || res.len < nth {
1205 res << s[..rbound]
1206 }
1207 }
1208 }
1209
1210 return res
1211}
1212
1213// split_into_lines splits the string by newline characters.
1214// newlines are stripped.
1215// `\r` (MacOS), `\n` (POSIX), and `\r\n` (WinOS) line endings are all supported (including mixed line endings).
1216// NOTE: algorithm is "greedy", consuming '\r\n' as a single line ending with higher priority than '\r' and '\n' as multiple endings
1217@[direct_array_access]
1218pub fn (s string) split_into_lines() []string {
1219 mut res := []string{}
1220 if s.len == 0 {
1221 return res
1222 }
1223 unsafe { res.flags.set(.noslices) } // allow freeing of old data during <<
1224 defer { unsafe { res.flags.clear(.noslices) } }
1225 cr := `\r`
1226 lf := `\n`
1227 mut line_start := 0
1228 for i := 0; i < s.len; i++ {
1229 if line_start <= i {
1230 if s[i] == lf {
1231 res << if line_start == i { '' } else { s[line_start..i] }
1232 line_start = i + 1
1233 } else if s[i] == cr {
1234 res << if line_start == i { '' } else { s[line_start..i] }
1235 if (i + 1) < s.len && s[i + 1] == lf {
1236 line_start = i + 2
1237 } else {
1238 line_start = i + 1
1239 }
1240 }
1241 }
1242 }
1243 if line_start < s.len {
1244 res << s[line_start..]
1245 }
1246 return res
1247}
1248
1249// split_by_space splits the string by whitespace (any of ` `, `\n`, `\t`, `\v`, `\f`, `\r`).
1250// Repeated, trailing or leading whitespaces will be omitted.
1251pub fn (s string) split_by_space() []string {
1252 mut res := []string{}
1253 unsafe { res.flags.set(.noslices) }
1254 defer { unsafe { res.flags.clear(.noslices) } }
1255 for word in s.split_any(' \n\t\v\f\r') {
1256 if word != '' {
1257 res << word
1258 }
1259 }
1260 return res
1261}
1262
1263// substr returns the string between index positions `start` and `end`.
1264// Example: assert 'ABCD'.substr(1,3) == 'BC'
1265@[direct_array_access]
1266pub fn (s string) substr(start int, _end int) string {
1267 // WARNNING: The is a temp solution for bootstrap!
1268 end := if _end == max_i64 || _end == max_i32 { s.len } else { _end } // max_int
1269 $if !no_bounds_checking {
1270 if start > end || start > s.len || end > s.len || start < 0 || end < 0 {
1271 panic('substr(' + impl_i64_to_string(start) + ', ' + impl_i64_to_string(end) +
1272 ') out of bounds (len=' + impl_i64_to_string(s.len) + ') s=' + s)
1273 }
1274 }
1275 len := end - start
1276 if len == s.len {
1277 return s.clone()
1278 }
1279 mut res := string{
1280 str: unsafe { malloc_noscan(len + 1) }
1281 len: len
1282 }
1283 unsafe {
1284 vmemcpy(res.str, s.str + start, len)
1285 res.str[len] = 0
1286 }
1287 return res
1288}
1289
1290// substr_unsafe works like substr(), but doesn't copy (allocate) the substring
1291@[direct_array_access]
1292pub fn (s string) substr_unsafe(start int, _end int) string {
1293 end := if _end == 2147483647 { s.len } else { _end } // max_int
1294 len := end - start
1295 if len == s.len {
1296 return s
1297 }
1298 return string{
1299 str: unsafe { s.str + start }
1300 len: len
1301 }
1302}
1303
1304// substr_or returns substr(start, end) if bounds are valid, otherwise returns fallback.
1305// Used by the native backend for `s[start..end] or { fallback }` expressions.
1306@[direct_array_access]
1307pub fn (s string) substr_or(start int, _end int, fallback string) string {
1308 end := if _end == max_i64 || _end == max_i32 { s.len } else { _end }
1309 if start < 0 || start > end || end > s.len {
1310 return fallback
1311 }
1312 return s.substr(start, end)
1313}
1314
1315// version of `substr()` that is used in `a[start..end] or {`
1316// return an error when the index is out of range
1317@[direct_array_access]
1318pub fn (s string) substr_with_check(start int, _end int) !string {
1319 // WARNNING: The is a temp solution for bootstrap!
1320 end := if _end == max_i64 || _end == max_i32 { s.len } else { _end } // max_int
1321 if start > end || start > s.len || end > s.len || start < 0 || end < 0 {
1322 return error('substr(' + impl_i64_to_string(start) + ', ' + impl_i64_to_string(end) +
1323 ') out of bounds (len=' + impl_i64_to_string(s.len) + ')')
1324 }
1325 len := end - start
1326 if len == s.len {
1327 return s.clone()
1328 }
1329 mut res := string{
1330 str: unsafe { malloc_noscan(len + 1) }
1331 len: len
1332 }
1333 unsafe {
1334 vmemcpy(res.str, s.str + start, len)
1335 res.str[len] = 0
1336 }
1337 return res
1338}
1339
1340// substr_ni returns the string between index positions `start` and `end` allowing negative indexes
1341// This function always return a valid string.
1342@[direct_array_access]
1343pub fn (s string) substr_ni(_start int, _end int) string {
1344 mut start := _start
1345 // WARNNING: The is a temp solution for bootstrap!
1346 mut end := if _end == max_i64 || _end == max_i32 { s.len } else { _end }
1347
1348 // borders math
1349 if start < 0 {
1350 start = s.len + start
1351 if start < 0 {
1352 start = 0
1353 }
1354 }
1355
1356 if end < 0 {
1357 end = s.len + end
1358 if end < 0 {
1359 end = 0
1360 }
1361 }
1362 if end >= s.len {
1363 end = s.len
1364 }
1365
1366 if start > s.len || end < start {
1367 return ''
1368 }
1369
1370 len := end - start
1371
1372 // string copy
1373 mut res := string{
1374 str: unsafe { malloc_noscan(len + 1) }
1375 len: len
1376 }
1377 unsafe {
1378 vmemcpy(res.str, s.str + start, len)
1379 res.str[len] = 0
1380 }
1381 return res
1382}
1383
1384// index_ returns the position of the first character of the input string.
1385// It will return `-1` if the input string can't be found.
1386@[direct_array_access]
1387pub fn (s string) index_(p string) int {
1388 if p.len > s.len || p.len == 0 || u64(s.str) <= 0xFFFF || u64(p.str) <= 0xFFFF {
1389 return -1
1390 }
1391 if p.len > 2 {
1392 return s.index_kmp(p)
1393 }
1394 mut i := 0
1395 for i < s.len {
1396 mut j := 0
1397 for j < p.len && unsafe { s.str[i + j] == p.str[j] } {
1398 j++
1399 }
1400 if j == p.len {
1401 return i
1402 }
1403 i++
1404 }
1405 return -1
1406}
1407
1408// index returns the position of the first character of the first occurrence of the `needle` string in `s`.
1409// It will return `none` if the `needle` string can't be found in `s`.
1410pub fn (s string) index(p string) ?int {
1411 idx := s.index_(p)
1412 if idx == -1 {
1413 return none
1414 }
1415 return idx
1416}
1417
1418// last_index returns the position of the first character of the *last* occurrence of the `needle` string in `s`.
1419@[inline]
1420pub fn (s string) last_index(needle string) ?int {
1421 idx := s.index_last_(needle)
1422 if idx == -1 {
1423 return none
1424 }
1425 return idx
1426}
1427
1428const kmp_stack_buffer_size = 20
1429
1430// index_kmp does KMP search inside the string `s` for the needle `p`.
1431// It returns the first found index where the string `p` is found.
1432// It returns -1, when the needle `p` is not present in `s`.
1433@[direct_array_access; manualfree]
1434fn (s string) index_kmp(p string) int {
1435 if p.len > s.len {
1436 return -1
1437 }
1438 mut stack_prefixes := [kmp_stack_buffer_size]int{}
1439 mut p_prefixes := unsafe { &stack_prefixes[0] }
1440 if p.len > kmp_stack_buffer_size {
1441 p_prefixes = unsafe { &int(vcalloc(p.len * int(sizeof(int)))) }
1442 }
1443 defer {
1444 if p.len > kmp_stack_buffer_size {
1445 unsafe { free(p_prefixes) }
1446 }
1447 }
1448 mut j := 0
1449 for i := 1; i < p.len; i++ {
1450 for unsafe { p.str[j] != p.str[i] } && j > 0 {
1451 j = unsafe { p_prefixes[j - 1] }
1452 }
1453 if unsafe { p.str[j] == p.str[i] } {
1454 j++
1455 }
1456 unsafe {
1457 p_prefixes[i] = j
1458 }
1459 }
1460 j = 0
1461 for i in 0 .. s.len {
1462 for unsafe { p.str[j] != s.str[i] } && j > 0 {
1463 j = unsafe { p_prefixes[j - 1] }
1464 }
1465 if unsafe { p.str[j] == s.str[i] } {
1466 j++
1467 }
1468 if j == p.len {
1469 return i - p.len + 1
1470 }
1471 }
1472 return -1
1473}
1474
1475// index_any returns the position of any of the characters in the input string - if found.
1476pub fn (s string) index_any(chars string) int {
1477 for i, ss in s {
1478 for c in chars {
1479 if c == ss {
1480 return i
1481 }
1482 }
1483 }
1484 return -1
1485}
1486
1487// index_last_ returns the position of the last occurrence of the given string `p` in `s`.
1488@[direct_array_access]
1489fn (s string) index_last_(p string) int {
1490 if p.len > s.len || p.len == 0 {
1491 return -1
1492 }
1493 mut i := s.len - p.len
1494 for i >= 0 {
1495 mut j := 0
1496 for j < p.len && unsafe { s.str[i + j] == p.str[j] } {
1497 j++
1498 }
1499 if j == p.len {
1500 return i
1501 }
1502 i--
1503 }
1504 return -1
1505}
1506
1507// index_after returns the position of the input string, starting search from `start` position.
1508@[direct_array_access]
1509pub fn (s string) index_after(p string, start int) ?int {
1510 if p.len > s.len {
1511 return none
1512 }
1513 mut strt := start
1514 if start < 0 {
1515 strt = 0
1516 }
1517 if start >= s.len {
1518 return none
1519 }
1520 mut i := strt
1521 for i < s.len {
1522 mut j := 0
1523 mut ii := i
1524 for j < p.len && unsafe { s.str[ii] == p.str[j] } {
1525 j++
1526 ii++
1527 }
1528 if j == p.len {
1529 return i
1530 }
1531 i++
1532 }
1533 return none
1534}
1535
1536// index_after_ returns the position of the input string, starting search from `start` position.
1537@[direct_array_access]
1538pub fn (s string) index_after_(p string, start int) int {
1539 if p.len > s.len {
1540 return -1
1541 }
1542 mut strt := start
1543 if start < 0 {
1544 strt = 0
1545 }
1546 if start >= s.len {
1547 return -1
1548 }
1549 mut i := strt
1550 for i < s.len {
1551 mut j := 0
1552 mut ii := i
1553 for j < p.len && unsafe { s.str[ii] == p.str[j] } {
1554 j++
1555 ii++
1556 }
1557 if j == p.len {
1558 return i
1559 }
1560 i++
1561 }
1562 return -1
1563}
1564
1565// index_u8 returns the index of byte `c` if found in the string.
1566// index_u8 returns -1 if the byte can not be found.
1567@[direct_array_access]
1568pub fn (s string) index_u8(c u8) int {
1569 for i, b in s {
1570 if b == c {
1571 return i
1572 }
1573 }
1574 return -1
1575}
1576
1577// last_index_u8 returns the index of the last occurrence of byte `c` if it was found in the string.
1578@[direct_array_access; inline]
1579pub fn (s string) last_index_u8(c u8) int {
1580 for i := s.len - 1; i >= 0; i-- {
1581 if s[i] == c {
1582 return i
1583 }
1584 }
1585 return -1
1586}
1587
1588// count returns the number of occurrences of `substr` in the string.
1589// count returns -1 if no `substr` could be found.
1590@[direct_array_access]
1591pub fn (s string) count(substr string) int {
1592 if s.len == 0 || substr.len == 0 {
1593 return 0
1594 }
1595 if substr.len > s.len {
1596 return 0
1597 }
1598
1599 mut n := 0
1600
1601 if substr.len == 1 {
1602 target := substr[0]
1603
1604 for letter in s {
1605 if letter == target {
1606 n++
1607 }
1608 }
1609
1610 return n
1611 }
1612
1613 mut i := 0
1614 for {
1615 i = s.index_after_(substr, i)
1616 if i == -1 {
1617 return n
1618 }
1619 i += substr.len
1620 n++
1621 }
1622 return 0 // TODO: can never get here - v doesn't know that
1623}
1624
1625// contains_u8 returns `true` if the string contains the byte value `x`.
1626// See also: [`string.index_u8`](#string.index_u8) , to get the index of the byte as well.
1627pub fn (s string) contains_u8(x u8) bool {
1628 for c in s {
1629 if x == c {
1630 return true
1631 }
1632 }
1633 return false
1634}
1635
1636// contains returns `true` if the string contains `substr`.
1637// See also: [`string.index`](#string.index)
1638pub fn (s string) contains(substr string) bool {
1639 if substr.len == 0 {
1640 return true
1641 }
1642 if substr.len == 1 {
1643 return s.contains_u8(unsafe { substr.str[0] })
1644 }
1645 return s.index_(substr) != -1
1646}
1647
1648// contains_any returns `true` if the string contains any chars in `chars`.
1649pub fn (s string) contains_any(chars string) bool {
1650 for c in chars {
1651 if s.contains_u8(c) {
1652 return true
1653 }
1654 }
1655 return false
1656}
1657
1658// contains_only returns `true`, if the string contains only the characters in `chars`.
1659pub fn (s string) contains_only(chars string) bool {
1660 if chars.len == 0 {
1661 return false
1662 }
1663 for ch in s {
1664 mut res := 0
1665 for i := 0; i < chars.len && res == 0; i++ {
1666 res += int(ch == unsafe { chars.str[i] })
1667 }
1668 if res == 0 {
1669 return false
1670 }
1671 }
1672 return true
1673}
1674
1675// contains_any_substr returns `true` if the string contains any of the strings in `substrs`.
1676pub fn (s string) contains_any_substr(substrs []string) bool {
1677 if substrs.len == 0 {
1678 return true
1679 }
1680 for sub in substrs {
1681 if s.contains(sub) {
1682 return true
1683 }
1684 }
1685 return false
1686}
1687
1688// starts_with returns `true` if the string starts with `p`.
1689@[direct_array_access]
1690pub fn (s string) starts_with(p string) bool {
1691 if p.len > s.len || u64(s.str) <= 0xFFFF || u64(p.str) <= 0xFFFF {
1692 return false
1693 } else if unsafe { vmemcmp(s.str, p.str, p.len) == 0 } {
1694 return true
1695 }
1696 return false
1697}
1698
1699// ends_with returns `true` if the string ends with `p`.
1700@[direct_array_access]
1701pub fn (s string) ends_with(p string) bool {
1702 if p.len > s.len || u64(s.str) <= 0xFFFF || u64(p.str) <= 0xFFFF {
1703 return false
1704 } else if unsafe { vmemcmp(s.str + s.len - p.len, p.str, p.len) == 0 } {
1705 return true
1706 }
1707 return false
1708}
1709
1710// to_lower_ascii returns the string in all lowercase characters.
1711// It is faster than `s.to_lower()`, but works only when the input
1712// string `s` is composed *entirely* from ASCII characters.
1713// Use `s.to_lower()` instead, if you are not sure.
1714@[direct_array_access]
1715pub fn (s string) to_lower_ascii() string {
1716 unsafe {
1717 mut b := malloc_noscan(s.len + 1)
1718 for i in 0 .. s.len {
1719 if s.str[i] >= `A` && s.str[i] <= `Z` {
1720 b[i] = s.str[i] + 32
1721 } else {
1722 b[i] = s.str[i]
1723 }
1724 }
1725 b[s.len] = 0
1726 return tos(b, s.len)
1727 }
1728}
1729
1730// to_lower returns the string in all lowercase characters.
1731// Example: assert 'Hello V'.to_lower() == 'hello v'
1732@[direct_array_access]
1733pub fn (s string) to_lower() string {
1734 if s.is_pure_ascii() {
1735 return s.to_lower_ascii()
1736 }
1737 mut runes := s.runes()
1738 for i in 0 .. runes.len {
1739 runes[i] = runes[i].to_lower()
1740 }
1741 return runes.string()
1742}
1743
1744// is_lower returns `true`, if all characters in the string are lowercase.
1745// It only works when the input is composed entirely from ASCII characters.
1746// Example: assert 'hello developer'.is_lower() == true
1747@[direct_array_access]
1748pub fn (s string) is_lower() bool {
1749 if s == '' || s[0].is_digit() {
1750 return false
1751 }
1752 for i in 0 .. s.len {
1753 if s[i] >= `A` && s[i] <= `Z` {
1754 return false
1755 }
1756 }
1757 return true
1758}
1759
1760// to_upper_ascii returns the string in all UPPERCASE characters.
1761// It is faster than `s.to_upper()`, but works only when the input
1762// string `s` is composed *entirely* from ASCII characters.
1763// Use `s.to_upper()` instead, if you are not sure.
1764@[direct_array_access]
1765pub fn (s string) to_upper_ascii() string {
1766 unsafe {
1767 mut b := malloc_noscan(s.len + 1)
1768 for i in 0 .. s.len {
1769 if s.str[i] >= `a` && s.str[i] <= `z` {
1770 b[i] = s.str[i] - 32
1771 } else {
1772 b[i] = s.str[i]
1773 }
1774 }
1775 b[s.len] = 0
1776 return tos(b, s.len)
1777 }
1778}
1779
1780// to_upper returns the string in all uppercase characters.
1781// Example: assert 'Hello V'.to_upper() == 'HELLO V'
1782@[direct_array_access]
1783pub fn (s string) to_upper() string {
1784 if s.is_pure_ascii() {
1785 return s.to_upper_ascii()
1786 }
1787 mut runes := s.runes()
1788 for i in 0 .. runes.len {
1789 runes[i] = runes[i].to_upper()
1790 }
1791 return runes.string()
1792}
1793
1794// is_upper returns `true` if all ASCII letters in the string are uppercase,
1795// and the string contains at least one uppercase ASCII letter.
1796// It only works when the input is composed entirely from ASCII characters.
1797// See also: [`byte.is_capital`](#byte.is_capital)
1798// Example: assert 'HELLO V'.is_upper() == true
1799@[direct_array_access]
1800pub fn (s string) is_upper() bool {
1801 if s == '' {
1802 return false
1803 }
1804 mut has_upper := false
1805 for i in 0 .. s.len {
1806 if s[i] >= `a` && s[i] <= `z` {
1807 return false
1808 }
1809 if s[i] >= `A` && s[i] <= `Z` {
1810 has_upper = true
1811 }
1812 }
1813 return has_upper
1814}
1815
1816// capitalize returns the string with the first character capitalized.
1817// Example: assert 'hello'.capitalize() == 'Hello'
1818@[direct_array_access]
1819pub fn (s string) capitalize() string {
1820 if s.len == 0 {
1821 return ''
1822 }
1823 if s.len == 1 {
1824 return s[0].ascii_str().to_upper()
1825 }
1826 r := s.runes()
1827 letter := r[0].str()
1828 uletter := letter.to_upper()
1829 rrest := r[1..]
1830 srest := rrest.string()
1831 res := uletter + srest
1832 return res
1833}
1834
1835// uncapitalize returns the string with the first character uncapitalized.
1836// Example: assert 'Hello, Bob!'.uncapitalize() == 'hello, Bob!'
1837@[direct_array_access]
1838pub fn (s string) uncapitalize() string {
1839 if s.len == 0 {
1840 return ''
1841 }
1842 if s.len == 1 {
1843 return s[0].ascii_str().to_lower()
1844 }
1845 r := s.runes()
1846 letter := r[0].str()
1847 lletter := letter.to_lower()
1848 rrest := r[1..]
1849 srest := rrest.string()
1850 res := lletter + srest
1851 return res
1852}
1853
1854// is_capital returns `true`, if the first character in the string `s`,
1855// is a capital letter, and the rest are NOT.
1856// Example: assert 'Hello'.is_capital() == true
1857// Example: assert 'HelloWorld'.is_capital() == false
1858@[direct_array_access]
1859pub fn (s string) is_capital() bool {
1860 if s.len == 0 || !(s[0] >= `A` && s[0] <= `Z`) {
1861 return false
1862 }
1863 for i in 1 .. s.len {
1864 if s[i] >= `A` && s[i] <= `Z` {
1865 return false
1866 }
1867 }
1868 return true
1869}
1870
1871// starts_with_capital returns `true`, if the first character in the string `s`,
1872// is a capital letter, even if the rest are not.
1873// Example: assert 'Hello'.starts_with_capital() == true
1874// Example: assert 'Hello. World.'.starts_with_capital() == true
1875@[direct_array_access]
1876pub fn (s string) starts_with_capital() bool {
1877 if s.len == 0 || !s[0].is_capital() {
1878 return false
1879 }
1880 return true
1881}
1882
1883// title returns the string with each word capitalized.
1884// Example: assert 'hello v developer'.title() == 'Hello V Developer'
1885pub fn (s string) title() string {
1886 words := s.split(' ')
1887 mut tit := []string{}
1888 for word in words {
1889 tit << word.capitalize()
1890 }
1891 title := tit.join(' ')
1892 return title
1893}
1894
1895// is_title returns true if all words of the string are capitalized.
1896// Example: assert 'Hello V Developer'.is_title() == true
1897pub fn (s string) is_title() bool {
1898 words := s.split(' ')
1899 for word in words {
1900 if !word.is_capital() {
1901 return false
1902 }
1903 }
1904 return true
1905}
1906
1907// find_between returns the string found between `start` string and `end` string.
1908// Example: assert 'hey [man] how you doin'.find_between('[', ']') == 'man'
1909pub fn (s string) find_between(start string, end string) string {
1910 start_pos := s.index_(start)
1911 if start_pos == -1 {
1912 return ''
1913 }
1914 // First get everything to the right of 'start'
1915 val := s[start_pos + start.len..]
1916 end_pos := val.index_(end)
1917 if end_pos == -1 {
1918 return ''
1919 }
1920 return val[..end_pos]
1921}
1922
1923// trim_space strips any of ` `, `\n`, `\t`, `\v`, `\f`, `\r` from the start and end of the string.
1924// Example: assert ' Hello V '.trim_space() == 'Hello V'
1925@[inline]
1926pub fn (s string) trim_space() string {
1927 return s.trim(' \n\t\v\f\r')
1928}
1929
1930// trim_space_left strips any of ` `, `\n`, `\t`, `\v`, `\f`, `\r` from the start of the string.
1931// Example: assert ' Hello V '.trim_space_left() == 'Hello V '
1932@[inline]
1933pub fn (s string) trim_space_left() string {
1934 return s.trim_left(' \n\t\v\f\r')
1935}
1936
1937// trim_space_right strips any of ` `, `\n`, `\t`, `\v`, `\f`, `\r` from the end of the string.
1938// Example: assert ' Hello V '.trim_space_right() == ' Hello V'
1939@[inline]
1940pub fn (s string) trim_space_right() string {
1941 return s.trim_right(' \n\t\v\f\r')
1942}
1943
1944// trim strips any of the characters given in `cutset` from the start and end of the string.
1945// Example: assert ' ffHello V ffff'.trim(' f') == 'Hello V'
1946pub fn (s string) trim(cutset string) string {
1947 if s == '' || cutset == '' {
1948 return s.clone()
1949 }
1950 if cutset.is_pure_ascii() {
1951 return s.trim_chars(cutset, .trim_both)
1952 } else {
1953 return s.trim_runes(cutset, .trim_both)
1954 }
1955}
1956
1957// trim_indexes gets the new start and end indices of a string when any of the characters given in `cutset` were stripped from the start and end of the string. Should be used as an input to `substr()`. If the string contains only the characters in `cutset`, both values returned are zero.
1958// Example: left, right := '-hi-'.trim_indexes('-'); assert left == 1; assert right == 3
1959@[direct_array_access]
1960pub fn (s string) trim_indexes(cutset string) (int, int) {
1961 mut pos_left := 0
1962 mut pos_right := s.len - 1
1963 mut cs_match := true
1964 for pos_left <= s.len && pos_right >= -1 && cs_match {
1965 cs_match = false
1966 for cs in cutset {
1967 if s[pos_left] == cs {
1968 pos_left++
1969 cs_match = true
1970 break
1971 }
1972 }
1973 for cs in cutset {
1974 if s[pos_right] == cs {
1975 pos_right--
1976 cs_match = true
1977 break
1978 }
1979 }
1980 if pos_left > pos_right {
1981 return 0, 0
1982 }
1983 }
1984 return pos_left, pos_right + 1
1985}
1986
1987enum TrimMode {
1988 trim_left
1989 trim_right
1990 trim_both
1991}
1992
1993@[direct_array_access]
1994fn (s string) trim_chars(cutset string, mode TrimMode) string {
1995 mut pos_left := 0
1996 mut pos_right := s.len - 1
1997 mut cs_match := true
1998 for pos_left <= s.len && pos_right >= -1 && cs_match {
1999 cs_match = false
2000 if mode in [.trim_left, .trim_both] {
2001 for cs in cutset {
2002 if s[pos_left] == cs {
2003 pos_left++
2004 cs_match = true
2005 break
2006 }
2007 }
2008 }
2009 if mode in [.trim_right, .trim_both] {
2010 for cs in cutset {
2011 if s[pos_right] == cs {
2012 pos_right--
2013 cs_match = true
2014 break
2015 }
2016 }
2017 }
2018 if pos_left > pos_right {
2019 return ''
2020 }
2021 }
2022 return s.substr(pos_left, pos_right + 1)
2023}
2024
2025@[direct_array_access]
2026fn (s string) trim_runes(cutset string, mode TrimMode) string {
2027 s_runes := s.runes()
2028 cs_runes := cutset.runes()
2029 mut pos_left := 0
2030 mut pos_right := s_runes.len - 1
2031 mut cs_match := true
2032 for pos_left <= s_runes.len && pos_right >= -1 && cs_match {
2033 cs_match = false
2034 if mode in [.trim_left, .trim_both] {
2035 for cs in cs_runes {
2036 if s_runes[pos_left] == cs {
2037 pos_left++
2038 cs_match = true
2039 break
2040 }
2041 }
2042 }
2043 if mode in [.trim_right, .trim_both] {
2044 for cs in cs_runes {
2045 if s_runes[pos_right] == cs {
2046 pos_right--
2047 cs_match = true
2048 break
2049 }
2050 }
2051 }
2052 if pos_left > pos_right {
2053 return ''
2054 }
2055 }
2056 return s_runes[pos_left..pos_right + 1].string()
2057}
2058
2059// trim_left strips any of the characters given in `cutset` from the left of the string.
2060// Example: assert 'd Hello V developer'.trim_left(' d') == 'Hello V developer'
2061@[direct_array_access]
2062pub fn (s string) trim_left(cutset string) string {
2063 if s == '' || cutset == '' {
2064 return s.clone()
2065 }
2066 if cutset.is_pure_ascii() {
2067 return s.trim_chars(cutset, .trim_left)
2068 } else {
2069 return s.trim_runes(cutset, .trim_left)
2070 }
2071}
2072
2073// trim_right strips any of the characters given in `cutset` from the right of the string.
2074// Example: assert ' Hello V d'.trim_right(' d') == ' Hello V'
2075@[direct_array_access]
2076pub fn (s string) trim_right(cutset string) string {
2077 if s.len < 1 || cutset.len < 1 {
2078 return s.clone()
2079 }
2080 if cutset.len == 1 {
2081 cut := cutset[0]
2082 mut pos_right := s.len - 1
2083 for pos_right >= 0 && s[pos_right] == cut {
2084 pos_right--
2085 }
2086 if pos_right < 0 {
2087 return ''
2088 }
2089 return s.substr(0, pos_right + 1)
2090 }
2091 if cutset.len == 2 && cutset.is_pure_ascii() {
2092 cut0 := cutset[0]
2093 cut1 := cutset[1]
2094 mut pos_right := s.len - 1
2095 for pos_right >= 0 && (s[pos_right] == cut0 || s[pos_right] == cut1) {
2096 pos_right--
2097 }
2098 if pos_right < 0 {
2099 return ''
2100 }
2101 return s.substr(0, pos_right + 1)
2102 }
2103 if cutset.is_pure_ascii() {
2104 return s.trim_chars(cutset, .trim_right)
2105 } else {
2106 return s.trim_runes(cutset, .trim_right)
2107 }
2108}
2109
2110// trim_string_left strips `str` from the start of the string.
2111// Example: assert 'WorldHello V'.trim_string_left('World') == 'Hello V'
2112pub fn (s string) trim_string_left(str string) string {
2113 if s.starts_with(str) {
2114 return s[str.len..]
2115 }
2116 return s.clone()
2117}
2118
2119// trim_string_right strips `str` from the end of the string.
2120// Example: assert 'Hello VWorld'.trim_string_right('World') == 'Hello V'
2121pub fn (s string) trim_string_right(str string) string {
2122 if s.ends_with(str) {
2123 return s[..s.len - str.len]
2124 }
2125 return s.clone()
2126}
2127
2128// compare_strings returns `-1` if `a < b`, `1` if `a > b` else `0`.
2129pub fn compare_strings(a &string, b &string) int {
2130 return match true {
2131 a < b { -1 }
2132 a > b { 1 }
2133 else { 0 }
2134 }
2135}
2136
2137// compare_strings_by_len returns `-1` if `a.len < b.len`, `1` if `a.len > b.len` else `0`.
2138fn compare_strings_by_len(a &string, b &string) int {
2139 return match true {
2140 a.len < b.len { -1 }
2141 a.len > b.len { 1 }
2142 else { 0 }
2143 }
2144}
2145
2146// compare_lower_strings returns the same as compare_strings but converts `a` and `b` to lower case before comparing.
2147fn compare_lower_strings(a &string, b &string) int {
2148 aa := a.to_lower()
2149 bb := b.to_lower()
2150 return compare_strings(&aa, &bb)
2151}
2152
2153// sort_ignore_case sorts the string array using case insensitive comparing.
2154@[inline]
2155pub fn (mut s []string) sort_ignore_case() {
2156 s.sort_with_compare(compare_lower_strings)
2157}
2158
2159// sort_by_len sorts the string array by each string's `.len` length.
2160@[inline]
2161pub fn (mut s []string) sort_by_len() {
2162 s.sort_with_compare(compare_strings_by_len)
2163}
2164
2165// str returns a copy of the string
2166@[inline]
2167pub fn (s string) str() string {
2168 return s.clone()
2169}
2170
2171// at returns the byte at index `idx`.
2172// Example: assert 'ABC'.at(1) == u8(`B`)
2173fn (s string) at(idx int) u8 {
2174 $if !no_bounds_checking {
2175 if idx < 0 || idx >= s.len {
2176 panic_n2('string index out of range(idx,s.len):', idx, s.len)
2177 }
2178 }
2179 return unsafe { s.str[idx] }
2180}
2181
2182@[markused]
2183fn (s string) at_i64(idx i64) u8 {
2184 $if !no_bounds_checking {
2185 if idx < 0 || idx >= i64(s.len) {
2186 panic_n2('string index out of range(idx,s.len):', idx, s.len)
2187 }
2188 }
2189 return unsafe { s.str[int(idx)] }
2190}
2191
2192@[markused]
2193fn (s string) at_u64(idx u64) u8 {
2194 $if !no_bounds_checking {
2195 if idx >= u64(s.len) {
2196 panic('string index out of range(idx,s.len): ' + idx.str() + ', ' +
2197 impl_i64_to_string(s.len))
2198 }
2199 }
2200 return unsafe { s.str[int(idx)] }
2201}
2202
2203@[markused]
2204fn (s string) at_ni(idx int) u8 {
2205 return s.at(v_ni_index(idx, s.len))
2206}
2207
2208// version of `at()` that is used in `a[i] or {`
2209// return an error when the index is out of range
2210fn (s string) at_with_check(idx int) ?u8 {
2211 if idx < 0 || idx >= s.len {
2212 return none
2213 }
2214 unsafe {
2215 return s.str[idx]
2216 }
2217}
2218
2219@[markused]
2220fn (s string) at_with_check_i64(idx i64) ?u8 {
2221 if idx < 0 || idx >= i64(s.len) {
2222 return none
2223 }
2224 unsafe {
2225 return s.str[int(idx)]
2226 }
2227}
2228
2229@[markused]
2230fn (s string) at_with_check_u64(idx u64) ?u8 {
2231 if idx >= u64(s.len) {
2232 return none
2233 }
2234 unsafe {
2235 return s.str[int(idx)]
2236 }
2237}
2238
2239@[markused]
2240fn (s string) at_with_check_ni(idx int) ?u8 {
2241 return s.at_with_check(v_ni_index(idx, s.len))
2242}
2243
2244// Check if a string is an octal value. Returns 'true' if it is, or 'false' if it is not
2245@[direct_array_access]
2246pub fn (str string) is_oct() bool {
2247 mut i := 0
2248
2249 if str.len == 0 {
2250 return false
2251 }
2252
2253 if str[i] == `0` {
2254 i++
2255 } else if str[i] == `-` || str[i] == `+` {
2256 i++
2257
2258 if i < str.len && str[i] == `0` {
2259 i++
2260 } else {
2261 return false
2262 }
2263 } else {
2264 return false
2265 }
2266
2267 if i < str.len && str[i] == `o` {
2268 i++
2269 } else {
2270 return false
2271 }
2272
2273 if i == str.len {
2274 return false
2275 }
2276
2277 for i < str.len {
2278 if str[i] < `0` || str[i] > `7` {
2279 return false
2280 }
2281 i++
2282 }
2283
2284 return true
2285}
2286
2287// is_bin returns `true` if the string is a binary value.
2288@[direct_array_access]
2289pub fn (str string) is_bin() bool {
2290 mut i := 0
2291
2292 if str.len == 0 {
2293 return false
2294 }
2295
2296 if str[i] == `0` {
2297 i++
2298 } else if str[i] == `-` || str[i] == `+` {
2299 i++
2300
2301 if i < str.len && str[i] == `0` {
2302 i++
2303 } else {
2304 return false
2305 }
2306 } else {
2307 return false
2308 }
2309
2310 if i < str.len && str[i] == `b` {
2311 i++
2312 } else {
2313 return false
2314 }
2315
2316 if i == str.len {
2317 return false
2318 }
2319
2320 for i < str.len {
2321 if str[i] < `0` || str[i] > `1` {
2322 return false
2323 }
2324 i++
2325 }
2326
2327 return true
2328}
2329
2330// is_hex returns 'true' if the string is a hexadecimal value.
2331@[direct_array_access]
2332pub fn (str string) is_hex() bool {
2333 mut i := 0
2334
2335 if str.len == 0 {
2336 return false
2337 }
2338
2339 if str[i] == `0` {
2340 i++
2341 } else if str[i] == `-` || str[i] == `+` {
2342 i++
2343
2344 if i < str.len && str[i] == `0` {
2345 i++
2346 } else {
2347 return false
2348 }
2349 } else {
2350 return false
2351 }
2352
2353 if i < str.len && str[i] == `x` {
2354 i++
2355 } else {
2356 return false
2357 }
2358
2359 if i == str.len {
2360 return false
2361 }
2362
2363 for i < str.len {
2364 // TODO: remove this workaround for v2's parser
2365 // vfmt off
2366 if (str[i] < `0` || str[i] > `9`)
2367 && ((str[i] < `a` || str[i] > `f`) && (str[i] < `A` || str[i] > `F`)) {
2368 return false
2369 }
2370 // vfmt on
2371 i++
2372 }
2373
2374 return true
2375}
2376
2377// Check if a string is an integer value. Returns 'true' if it is, or 'false' if it is not
2378@[direct_array_access]
2379pub fn (str string) is_int() bool {
2380 mut i := 0
2381
2382 if str.len == 0 {
2383 return false
2384 }
2385
2386 if (str[i] != `-` && str[i] != `+`) && (!str[i].is_digit()) {
2387 return false
2388 } else {
2389 i++
2390 }
2391
2392 if i == str.len && (!str[i - 1].is_digit()) {
2393 return false
2394 }
2395
2396 for i < str.len {
2397 if str[i] < `0` || str[i] > `9` {
2398 return false
2399 }
2400 i++
2401 }
2402
2403 return true
2404}
2405
2406// is_space returns `true` if the byte is a white space character.
2407// The following list is considered white space characters: ` `, `\t`, `\n`, `\v`, `\f`, `\r`, 0x85, 0xa0
2408// Example: assert u8(` `).is_space() == true
2409@[inline]
2410pub fn (c u8) is_space() bool {
2411 // 0x85 is NEXT LINE (NEL)
2412 // 0xa0 is NO-BREAK SPACE
2413 return c == 32 || (c > 8 && c < 14) || c == 0x85 || c == 0xa0
2414}
2415
2416// is_digit returns `true` if the byte is in range 0-9 and `false` otherwise.
2417// Example: assert u8(`9`).is_digit() == true
2418@[inline]
2419pub fn (c u8) is_digit() bool {
2420 return c >= `0` && c <= `9`
2421}
2422
2423// is_hex_digit returns `true` if the byte is either in range 0-9, a-f or A-F and `false` otherwise.
2424// Example: assert u8(`F`).is_hex_digit() == true
2425@[inline]
2426pub fn (c u8) is_hex_digit() bool {
2427 return c.is_digit() || (c >= `a` && c <= `f`) || (c >= `A` && c <= `F`)
2428}
2429
2430// is_oct_digit returns `true` if the byte is in range 0-7 and `false` otherwise.
2431// Example: assert u8(`7`).is_oct_digit() == true
2432@[inline]
2433pub fn (c u8) is_oct_digit() bool {
2434 return c >= `0` && c <= `7`
2435}
2436
2437// is_bin_digit returns `true` if the byte is a binary digit (0 or 1) and `false` otherwise.
2438// Example: assert u8(`0`).is_bin_digit() == true
2439@[inline]
2440pub fn (c u8) is_bin_digit() bool {
2441 return c == `0` || c == `1`
2442}
2443
2444// is_letter returns `true` if the byte is in range a-z or A-Z and `false` otherwise.
2445// Example: assert u8(`V`).is_letter() == true
2446@[inline]
2447pub fn (c u8) is_letter() bool {
2448 return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`)
2449}
2450
2451// is_alnum returns `true` if the byte is in range a-z, A-Z, 0-9 and `false` otherwise.
2452// Example: assert u8(`V`).is_alnum() == true
2453@[inline]
2454pub fn (c u8) is_alnum() bool {
2455 return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) || (c >= `0` && c <= `9`)
2456}
2457
2458// free allows for manually freeing the memory occupied by the string
2459@[manualfree; unsafe]
2460pub fn (s &string) free() {
2461 $if prealloc {
2462 return
2463 }
2464 if s.is_lit == -98761234 {
2465 double_free_msg := unsafe { &u8(c'double string.free() detected\n') }
2466 double_free_msg_len := unsafe { vstrlen(double_free_msg) }
2467 $if freestanding {
2468 bare_eprint(double_free_msg, u64(double_free_msg_len))
2469 } $else {
2470 _write_buf_to_fd(1, double_free_msg, double_free_msg_len)
2471 }
2472 return
2473 }
2474 if s.is_lit == 1 || s.str == 0 {
2475 return
2476 }
2477 unsafe {
2478 // C.printf(c's: %x %s\n', s.str, s.str)
2479 free(s.str)
2480 s.str = nil
2481 }
2482 s.len = 0
2483 s.is_lit = -98761234
2484}
2485
2486// before returns the contents before `sub` in the string.
2487// If the substring is not found, it returns the full input string.
2488// Example: assert '23:34:45.234'.before('.') == '23:34:45'
2489// Example: assert 'abcd'.before('.') == 'abcd'
2490// TODO: deprecate and remove either .before or .all_before
2491pub fn (s string) before(sub string) string {
2492 pos := s.index_(sub)
2493 if pos == -1 {
2494 return s.clone()
2495 }
2496 return s[..pos]
2497}
2498
2499// all_before returns the contents before `sub` in the string.
2500// If the substring is not found, it returns the full input string.
2501// Example: assert '23:34:45.234'.all_before('.') == '23:34:45'
2502// Example: assert 'abcd'.all_before('.') == 'abcd'
2503pub fn (s string) all_before(sub string) string {
2504 // TODO: remove dup method
2505 pos := s.index_(sub)
2506 if pos == -1 {
2507 return s.clone()
2508 }
2509 return s[..pos]
2510}
2511
2512// all_before_last returns the contents before the last occurrence of `sub` in the string.
2513// If the substring is not found, it returns the full input string.
2514// Example: assert '23:34:45.234'.all_before_last(':') == '23:34'
2515// Example: assert 'abcd'.all_before_last('.') == 'abcd'
2516pub fn (s string) all_before_last(sub string) string {
2517 pos := s.index_last_(sub)
2518 if pos == -1 {
2519 return s.clone()
2520 }
2521 return s[..pos]
2522}
2523
2524// all_after returns the contents after `sub` in the string.
2525// If the substring is not found, it returns the full input string.
2526// Example: assert '23:34:45.234'.all_after('.') == '234'
2527// Example: assert 'abcd'.all_after('z') == 'abcd'
2528pub fn (s string) all_after(sub string) string {
2529 pos := s.index_(sub)
2530 if pos == -1 {
2531 return s.clone()
2532 }
2533 return s[pos + sub.len..]
2534}
2535
2536// all_after_last returns the contents after the last occurrence of `sub` in the string.
2537// If the substring is not found, it returns the full input string.
2538// Example: assert '23:34:45.234'.all_after_last(':') == '45.234'
2539// Example: assert 'abcd'.all_after_last('z') == 'abcd'
2540pub fn (s string) all_after_last(sub string) string {
2541 pos := s.index_last_(sub)
2542 if pos == -1 {
2543 return s.clone()
2544 }
2545 return s[pos + sub.len..]
2546}
2547
2548// all_after_first returns the contents after the first occurrence of `sub` in the string.
2549// If the substring is not found, it returns the full input string.
2550// Example: assert '23:34:45.234'.all_after_first(':') == '34:45.234'
2551// Example: assert 'abcd'.all_after_first('z') == 'abcd'
2552pub fn (s string) all_after_first(sub string) string {
2553 pos := s.index_(sub)
2554 if pos == -1 {
2555 return s.clone()
2556 }
2557 return s[pos + sub.len..]
2558}
2559
2560// after returns the contents after the last occurrence of `sub` in the string.
2561// If the substring is not found, it returns the full input string.
2562// Example: assert '23:34:45.234'.after(':') == '45.234'
2563// Example: assert 'abcd'.after('z') == 'abcd'
2564// TODO: deprecate either .all_after_last or .after
2565@[inline]
2566pub fn (s string) after(sub string) string {
2567 return s.all_after_last(sub)
2568}
2569
2570// after_char returns the contents after the first occurrence of `sub` character in the string.
2571// If the substring is not found, it returns the full input string.
2572// Example: assert '23:34:45.234'.after_char(`:`) == '34:45.234'
2573// Example: assert 'abcd'.after_char(`:`) == 'abcd'
2574pub fn (s string) after_char(sub u8) string {
2575 mut pos := -1
2576 for i, c in s {
2577 if c == sub {
2578 pos = i
2579 break
2580 }
2581 }
2582 if pos == -1 {
2583 return s.clone()
2584 }
2585 return s[pos + 1..]
2586}
2587
2588// join joins a string array into a string using `sep` separator.
2589// Example: assert ['Hello','V'].join(' ') == 'Hello V'
2590pub fn (a []string) join(sep string) string {
2591 if a.len == 0 {
2592 return ''
2593 }
2594 mut len := 0
2595 for val in a {
2596 len += val.len + sep.len
2597 }
2598 len -= sep.len
2599 // Allocate enough memory
2600 mut res := string{
2601 str: unsafe { malloc_noscan(len + 1) }
2602 len: len
2603 }
2604 mut idx := 0
2605 for i, val in a {
2606 unsafe {
2607 vmemcpy(voidptr(res.str + idx), val.str, val.len)
2608 idx += val.len
2609 }
2610 // Add sep if it's not last
2611 if i != a.len - 1 {
2612 unsafe {
2613 vmemcpy(voidptr(res.str + idx), sep.str, sep.len)
2614 idx += sep.len
2615 }
2616 }
2617 }
2618 unsafe {
2619 res.str[res.len] = 0
2620 }
2621 return res
2622}
2623
2624// join_lines joins a string array into a string using a `\n` newline delimiter.
2625@[inline]
2626pub fn (s []string) join_lines() string {
2627 return s.join('\n')
2628}
2629
2630// reverse returns a reversed string.
2631// Example: assert 'Hello V'.reverse() == 'V olleH'
2632@[direct_array_access]
2633pub fn (s string) reverse() string {
2634 if s.len == 0 || s.len == 1 {
2635 return s.clone()
2636 }
2637 mut res := string{
2638 str: unsafe { malloc_noscan(s.len + 1) }
2639 len: s.len
2640 }
2641 for i := s.len - 1; i >= 0; i-- {
2642 unsafe {
2643 res.str[s.len - i - 1] = s[i]
2644 }
2645 }
2646 unsafe {
2647 res.str[res.len] = 0
2648 }
2649 return res
2650}
2651
2652// limit returns a portion of the string, starting at `0` and extending for a given number of characters afterward.
2653// 'hello'.limit(2) => 'he'
2654// 'hi'.limit(10) => 'hi'
2655pub fn (s string) limit(max int) string {
2656 u := s.runes()
2657 if u.len <= max {
2658 return s.clone()
2659 }
2660 return u[0..max].string()
2661}
2662
2663// hash returns an integer hash of the string.
2664pub fn (s string) hash() int {
2665 mut h := u32(0)
2666 if h == 0 && s.len > 0 {
2667 for c in s {
2668 h = h * 31 + u32(c)
2669 }
2670 }
2671 return int(h)
2672}
2673
2674// bytes returns the string converted to a byte array.
2675pub fn (s string) bytes() []u8 {
2676 if s.len == 0 {
2677 return []
2678 }
2679 mut buf := []u8{len: s.len}
2680 unsafe { vmemcpy(buf.data, s.str, s.len) }
2681 return buf
2682}
2683
2684// repeat returns a new string with `count` number of copies of the string it was called on.
2685@[direct_array_access]
2686pub fn (s string) repeat(count int) string {
2687 if count <= 0 {
2688 return ''
2689 } else if count == 1 {
2690 return s.clone()
2691 }
2692 mut ret := unsafe { malloc_noscan(s.len * count + 1) }
2693 for i in 0 .. count {
2694 unsafe {
2695 vmemcpy(ret + i * s.len, s.str, s.len)
2696 }
2697 }
2698 new_len := s.len * count
2699 unsafe {
2700 ret[new_len] = 0
2701 }
2702 return unsafe { ret.vstring_with_len(new_len) }
2703}
2704
2705// fields returns a string array of the string split by `\t` and ` ` .
2706// Example: assert '\t\tv = v'.fields() == ['v', '=', 'v']
2707// Example: assert ' sss ssss'.fields() == ['sss', 'ssss']
2708pub fn (s string) fields() []string {
2709 mut res := []string{}
2710 unsafe { res.flags.set(.noslices) }
2711 defer { unsafe { res.flags.clear(.noslices) } }
2712 mut word_start := 0
2713 mut word_len := 0
2714 mut is_in_word := false
2715 mut is_space := false
2716 for i, c in s {
2717 is_space = c in [32, 9, 10]
2718 if !is_space {
2719 word_len++
2720 }
2721 if !is_in_word && !is_space {
2722 word_start = i
2723 is_in_word = true
2724 continue
2725 }
2726 if is_space && is_in_word {
2727 res << s[word_start..word_start + word_len]
2728 is_in_word = false
2729 word_len = 0
2730 word_start = 0
2731 continue
2732 }
2733 }
2734 if is_in_word && word_len > 0 {
2735 // collect the remainder word at the end
2736 res << s[word_start..s.len]
2737 }
2738 return res
2739}
2740
2741// strip_margin allows multi-line strings to be formatted in a way that removes white-space
2742// before a delimiter. By default `|` is used.
2743// Note: the delimiter has to be a byte at this time. That means surrounding
2744// the value in ``.
2745//
2746// See also: string.trim_indent()
2747//
2748// Example:
2749// ```v
2750// st := 'Hello there,
2751// | this is a string,
2752// | Everything before the first | is removed'.strip_margin()
2753//
2754// assert st == 'Hello there,
2755// this is a string,
2756// Everything before the first | is removed'
2757// ```
2758@[inline]
2759pub fn (s string) strip_margin() string {
2760 return s.strip_margin_custom(`|`)
2761}
2762
2763// strip_margin_custom does the same as `strip_margin` but will use `del` as delimiter instead of `|`
2764@[direct_array_access]
2765pub fn (s string) strip_margin_custom(del u8) string {
2766 mut sep := del
2767 if sep.is_space() {
2768 println('Warning: `strip_margin` cannot use white-space as a delimiter')
2769 println(' Defaulting to `|`')
2770 sep = `|`
2771 }
2772 // don't know how much space the resulting string will be, but the max it
2773 // can be is this big
2774 mut ret := unsafe { malloc_noscan(s.len + 1) }
2775 mut count := 0
2776 for i := 0; i < s.len; i++ {
2777 if s[i] in [10, 13] {
2778 unsafe {
2779 ret[count] = s[i]
2780 }
2781 count++
2782 // CRLF
2783 if s[i] == 13 && i < s.len - 1 && s[i + 1] == 10 {
2784 unsafe {
2785 ret[count] = s[i + 1]
2786 }
2787 count++
2788 i++
2789 }
2790 for s[i] != sep {
2791 i++
2792 if i >= s.len {
2793 break
2794 }
2795 }
2796 } else {
2797 unsafe {
2798 ret[count] = s[i]
2799 }
2800 count++
2801 }
2802 }
2803 unsafe {
2804 ret[count] = 0
2805 return ret.vstring_with_len(count)
2806 }
2807}
2808
2809// trim_indent detects a common minimal indent of all the input lines,
2810// removes it from every line and also removes the first and the last
2811// lines if they are blank (notice difference blank vs empty).
2812//
2813// Note that blank lines do not affect the detected indent level.
2814//
2815// In case if there are non-blank lines with no leading whitespace characters
2816// (no indent at all) then the common indent is 0, and therefore this function
2817// doesn't change the indentation.
2818//
2819// Example:
2820// ```v
2821// st := '
2822// Hello there,
2823// this is a string,
2824// all the leading indents are removed
2825// and also the first and the last lines if they are blank
2826// '.trim_indent()
2827//
2828// assert st == 'Hello there,
2829// this is a string,
2830// all the leading indents are removed
2831// and also the first and the last lines if they are blank'
2832// ```
2833pub fn (s string) trim_indent() string {
2834 mut lines := s.split_into_lines()
2835
2836 mut min_common_indent := int(max_int) // max int
2837 for line in lines {
2838 if line.is_blank() {
2839 continue
2840 }
2841 line_indent := line.indent_width()
2842 if line_indent < min_common_indent {
2843 min_common_indent = line_indent
2844 }
2845 }
2846
2847 // trim first line if it's blank
2848 if lines.len > 0 && lines.first().is_blank() {
2849 lines = unsafe { lines[1..] }
2850 }
2851
2852 // trim last line if it's blank
2853 if lines.len > 0 && lines.last().is_blank() {
2854 lines = unsafe { lines[..lines.len - 1] }
2855 }
2856
2857 mut trimmed_lines := []string{cap: lines.len}
2858
2859 for line in lines {
2860 if line.is_blank() {
2861 trimmed_lines << line
2862 continue
2863 }
2864
2865 trimmed_lines << line[min_common_indent..]
2866 }
2867
2868 return trimmed_lines.join('\n')
2869}
2870
2871// indent_width returns the number of spaces or tabs at the beginning of the string.
2872// Example: assert ' v'.indent_width() == 2
2873// Example: assert '\t\tv'.indent_width() == 2
2874pub fn (s string) indent_width() int {
2875 for i, c in s {
2876 if !c.is_space() {
2877 return i
2878 }
2879 }
2880
2881 return 0
2882}
2883
2884// is_blank returns true if the string is empty or contains only white-space.
2885// Example: assert ' '.is_blank()
2886// Example: assert '\t'.is_blank()
2887// Example: assert 'v'.is_blank() == false
2888pub fn (s string) is_blank() bool {
2889 if s.len == 0 {
2890 return true
2891 }
2892
2893 for c in s {
2894 if !c.is_space() {
2895 return false
2896 }
2897 }
2898
2899 return true
2900}
2901
2902// match_glob matches the string, with a Unix shell-style wildcard pattern.
2903// Note: wildcard patterns are NOT the same as regular expressions.
2904// They are much simpler, and do not allow backtracking, captures, etc.
2905// The special characters used in shell-style wildcards are:
2906// `*` - matches everything
2907// `?` - matches any single character
2908// `[seq]` - matches any of the characters in the sequence
2909// `[^seq]` - matches any character that is NOT in the sequence
2910// Any other character in `pattern`, is matched 1:1 to the corresponding
2911// character in `name`, including / and \.
2912// You can wrap the meta-characters in brackets too, i.e. `[?]` matches `?`
2913// in the string, and `[*]` matches `*` in the string.
2914// Example: assert 'ABCD'.match_glob('AB*')
2915// Example: assert 'ABCD'.match_glob('*D')
2916// Example: assert 'ABCD'.match_glob('*B*')
2917// Example: assert !'ABCD'.match_glob('AB')
2918@[direct_array_access]
2919pub fn (name string) match_glob(pattern string) bool {
2920 // Initial port based on https://research.swtch.com/glob.go
2921 // See also https://research.swtch.com/glob
2922 mut px := 0
2923 mut nx := 0
2924 mut next_px := 0
2925 mut next_nx := 0
2926 plen := pattern.len
2927 nlen := name.len
2928 for px < plen || nx < nlen {
2929 if px < plen {
2930 c := pattern[px]
2931 match c {
2932 `?` {
2933 // single-character wildcard
2934 if nx < nlen {
2935 px++
2936 nx++
2937 continue
2938 }
2939 }
2940 `*` {
2941 // zero-or-more-character wildcard
2942 // Try to match at nx.
2943 // If that doesn't work out, restart at nx+1 next.
2944 next_px = px
2945 next_nx = nx + 1
2946 px++
2947 continue
2948 }
2949 `[` {
2950 if nx < nlen {
2951 wanted_c := name[nx]
2952 mut is_inverted := false
2953 mut inner_match := false
2954 mut inner_idx := px + 1
2955 if inner_idx < plen && pattern[inner_idx] == `^` {
2956 is_inverted = true
2957 inner_idx++
2958 }
2959 for ; inner_idx < plen && pattern[inner_idx] != `]`; inner_idx++ {
2960 if pattern[inner_idx] == wanted_c {
2961 inner_match = true
2962 }
2963 }
2964 if inner_idx < plen && ((inner_match && !is_inverted)
2965 || (!inner_match && is_inverted)) {
2966 px = inner_idx + 1
2967 nx++
2968 continue
2969 }
2970 }
2971 }
2972 else {
2973 // an ordinary character
2974 if nx < nlen && name[nx] == c {
2975 px++
2976 nx++
2977 continue
2978 }
2979 }
2980 }
2981 }
2982 if 0 < next_nx && next_nx <= nlen {
2983 // A mismatch, try restarting:
2984 px = next_px
2985 nx = next_nx
2986 continue
2987 }
2988 return false
2989 }
2990 // Matched all of `pattern` to all of `name`
2991 return true
2992}
2993
2994// is_ascii returns true if all characters belong to the US-ASCII set ([` `..`~`])
2995@[direct_array_access; inline]
2996pub fn (s string) is_ascii() bool {
2997 for i := 0; i < s.len; i++ {
2998 if s[i] < u8(` `) || s[i] > u8(`~`) {
2999 return false
3000 }
3001 }
3002 return true
3003}
3004
3005// is_identifier checks if a string is a valid identifier (starts with letter/underscore, followed by letters, digits, or underscores)
3006@[direct_array_access]
3007pub fn (s string) is_identifier() bool {
3008 if s.len == 0 {
3009 return false
3010 }
3011 if !(s[0].is_letter() || s[0] == `_`) {
3012 return false
3013 }
3014 for i := 1; i < s.len; i++ {
3015 c := s[i]
3016 if !(c.is_letter() || c.is_digit() || c == `_`) {
3017 return false
3018 }
3019 }
3020 return true
3021}
3022
3023// camel_to_snake convert string from camelCase to snake_case
3024// Example: assert 'Abcd'.camel_to_snake() == 'abcd'
3025// Example: assert 'aaBB'.camel_to_snake() == 'aa_bb'
3026// Example: assert 'BBaa'.camel_to_snake() == 'bb_aa'
3027// Example: assert 'HTTPServer'.camel_to_snake() == 'http_server'
3028// Example: assert 'HTTP2Server'.camel_to_snake() == 'http2_server'
3029// Example: assert 'XML2JSON'.camel_to_snake() == 'xml_2_json'
3030@[direct_array_access]
3031pub fn (s string) camel_to_snake() string {
3032 if s.len == 0 {
3033 return ''
3034 }
3035 if s.len == 1 {
3036 return s.to_lower_ascii()
3037 }
3038 mut b := unsafe { malloc_noscan(2 * s.len + 1) }
3039 // Rather than checking whether the iterator variable is > 1 inside the loop,
3040 // handle the first two chars separately to reduce load.
3041 mut pos := 2
3042 mut prev_is_upper := false
3043 mut prev_inserted_boundary := false
3044 unsafe {
3045 if s[0].is_capital() {
3046 b[0] = s[0] + 32
3047 b[1] = if s[1].is_capital() {
3048 prev_is_upper = true
3049 s[1] + 32
3050 } else {
3051 s[1]
3052 }
3053 } else {
3054 b[0] = s[0]
3055 if s[1].is_capital() {
3056 prev_is_upper = true
3057 if s[0] != `_` && s.len > 2 && !s[2].is_capital() {
3058 b[1] = `_`
3059 b[2] = s[1] + 32
3060 pos = 3
3061 } else {
3062 b[1] = s[1] + 32
3063 }
3064 } else {
3065 b[1] = s[1]
3066 }
3067 }
3068 }
3069 for i := 2; i < s.len; i++ {
3070 mut has_boundary_before_upper := false
3071 c := s[i]
3072 c_is_upper := c.is_capital()
3073 c_is_number := c.is_digit()
3074 next_is_lower := i + 1 < s.len && s[i + 1].is_letter() && !s[i + 1].is_capital()
3075 next2_is_lower := i + 2 < s.len && s[i + 2].is_letter() && !s[i + 2].is_capital()
3076 // Cases: `XML2JSON == xml_2_json` || `HTTP2Server == http2_server`
3077 skip_digit := c_is_number && prev_is_upper && !next_is_lower && next2_is_lower
3078 // Cases: `HTTPServer == http_server` || `getHTTPSUrl == get_https_url`
3079 if c_is_upper && prev_is_upper && i >= 2 && s[i - 2].is_capital() && next_is_lower
3080 && c != `_` {
3081 unsafe {
3082 if b[pos - 1] != `_` {
3083 b[pos] = `_`
3084 pos++
3085 }
3086 }
3087 has_boundary_before_upper = true
3088 }
3089 // Cases: `aBcd == a_bcd` || `ABcd == ab_cd`
3090 // TODO: remove this workaround for v2's parser
3091 // vfmt off
3092 if ((c_is_upper && !prev_is_upper) ||
3093 (!c_is_upper && prev_is_upper && s[i - 2].is_capital() && !prev_inserted_boundary && !skip_digit))
3094 && c != `_` {
3095 unsafe {
3096 if b[pos - 1] != `_` {
3097 b[pos] = `_`
3098 pos++
3099 }
3100 }
3101 }
3102 // vfmt on
3103 lower_c := if c_is_upper { c + 32 } else { c }
3104 unsafe {
3105 b[pos] = lower_c
3106 }
3107 prev_is_upper = c_is_upper
3108 prev_inserted_boundary = has_boundary_before_upper
3109 pos++
3110 }
3111 unsafe {
3112 b[pos] = 0
3113 }
3114 return unsafe { tos(b, pos) }
3115}
3116
3117// snake_to_camel convert string from snake_case to camelCase
3118// Example: assert 'abcd'.snake_to_camel() == 'Abcd'
3119// Example: assert 'ab_cd'.snake_to_camel() == 'AbCd'
3120// Example: assert '_abcd'.snake_to_camel() == 'Abcd'
3121// Example: assert '_abcd_'.snake_to_camel() == 'Abcd'
3122@[direct_array_access]
3123pub fn (s string) snake_to_camel() string {
3124 if s.len == 0 {
3125 return ''
3126 }
3127 if s.len == 1 {
3128 return s
3129 }
3130 mut need_upper := true
3131 mut upper_c := `_`
3132 mut b := unsafe { malloc_noscan(s.len + 1) }
3133 mut i := 0
3134 for c in s {
3135 upper_c = if c >= `a` && c <= `z` { c - 32 } else { c }
3136 if c == `_` {
3137 need_upper = true
3138 } else if need_upper {
3139 unsafe {
3140 b[i] = upper_c
3141 }
3142 i++
3143 need_upper = false
3144 } else {
3145 unsafe {
3146 b[i] = c
3147 }
3148 i++
3149 }
3150 }
3151 unsafe {
3152 b[i] = 0
3153 }
3154 return unsafe { tos(b, i) }
3155}
3156
3157@[params]
3158pub struct WrapConfig {
3159pub:
3160 width int = 80
3161 end string = '\n'
3162}
3163
3164// wrap wraps the string `s` when each line exceeds the width specified in `width` .
3165// (default value is 80), and will use `end` (default value is '\n') as a line break.
3166// Example: assert 'Hello, my name is Carl and I am a delivery'.wrap(width: 20) == 'Hello, my name is\nCarl and I am a\ndelivery'
3167pub fn (s string) wrap(config WrapConfig) string {
3168 if config.width <= 0 {
3169 return ''
3170 }
3171 words := s.fields()
3172 if words.len == 0 {
3173 return ''
3174 }
3175 mut sb := strings.new_builder(s.len)
3176 sb.write_string(words[0])
3177 mut space_left := config.width - words[0].len
3178 for i in 1 .. words.len {
3179 word := words[i]
3180 if word.len + 1 > space_left {
3181 sb.write_string(config.end)
3182 sb.write_string(word)
3183 space_left = config.width - word.len
3184 } else {
3185 sb.write_string(' ')
3186 sb.write_string(word)
3187 space_left -= 1 + word.len
3188 }
3189 }
3190 return sb.str()
3191}
3192
3193// hex returns a string with the hexadecimal representation of the bytes of the string `s` .
3194pub fn (s string) hex() string {
3195 if s == '' {
3196 return ''
3197 }
3198 return unsafe { data_to_hex_string(s.str, s.len) }
3199}
3200
3201@[unsafe]
3202fn data_to_hex_string(data &u8, len int) string {
3203 mut hex := malloc_noscan(u64(len) * 2 + 1)
3204 mut dst := 0
3205 for c in 0 .. len {
3206 b := data[c]
3207 n0 := b >> 4
3208 n1 := b & 0xF
3209 hex[dst] = if n0 < 10 { n0 + `0` } else { n0 + `W` }
3210 hex[dst + 1] = if n1 < 10 { n1 + `0` } else { n1 + `W` }
3211 dst += 2
3212 }
3213 hex[dst] = 0
3214 return tos(hex, dst)
3215}
3216
3217pub struct RunesIterator {
3218mut:
3219 s string
3220 i int
3221}
3222
3223// runes_iterator creates an iterator over all the runes in the given string `s`.
3224// It can be used in `for r in s.runes_iterator() {`, as a direct substitute to
3225// calling .runes(): `for r in s.runes() {`, which needs an intermediate allocation
3226// of an array.
3227pub fn (s string) runes_iterator() RunesIterator {
3228 return RunesIterator{
3229 s: s
3230 i: 0
3231 }
3232}
3233
3234// next is the method that will be called for each iteration in `for r in s.runes_iterator() {` .
3235pub fn (mut ri RunesIterator) next() ?rune {
3236 if ri.i >= ri.s.len {
3237 return none
3238 }
3239 char_len := utf8_char_len(unsafe { ri.s.str[ri.i] })
3240 if char_len == 1 {
3241 res := unsafe { ri.s.str[ri.i] }
3242 ri.i++
3243 return res
3244 }
3245 start := &u8(unsafe { &ri.s.str[ri.i] })
3246 len := if ri.s.len - 1 >= ri.i + char_len { char_len } else { ri.s.len - ri.i }
3247 ri.i += char_len
3248 if char_len > 4 {
3249 return 0
3250 }
3251 return rune(impl_utf8_to_utf32(start, len))
3252}
3253