Gitly


1 /*
2 regex 1.0 alpha
3 
4 Copyright (c) 2019-2024 Dario Deledda. All rights reserved.
5 Use of this source code is governed by an MIT license
6 that can be found in the LICENSE file.
7 */
8 module regex
9 
10 import strings
11 
12 /******************************************************************************
13 *
14 * Inits
15 *
16 ******************************************************************************/
17 // regex_base returns a regex object (`RE`) generated from `pattern` string and
18 // detailed information in re_err, err_pos, if an error occurred.
19 pub fn regex_base(pattern string) (RE, int, int) {
20     // init regex
21     mut re := RE{}
22     re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
23     re.cc = []CharClass{len: pattern.len} // can not be more char class the length of the pattern
24     re.group_csave_flag = false // enable continuos group saving
25     re.group_max_nested = pattern.len >> 1 // set max 128 group nested
26     re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
27 
28     re.group_stack = []int{len: re.group_max, init: -1}
29     re.group_data = []int{len: re.group_max, init: -1}
30 
31     re_err, err_pos := re.impl_compile(pattern)
32     return re, re_err, err_pos
33 }
34 
35 /******************************************************************************
36 *
37 * Utilities
38 *
39 ******************************************************************************/
40 // get_group_bounds_by_name get a group boundaries by its name
41 pub fn (re &RE) get_group_bounds_by_name(group_name string) (int, int) {
42     if group_name in re.group_map {
43         tmp_index := re.group_map[group_name] - 1
44         start := re.groups[tmp_index * 2]
45         end := re.groups[tmp_index * 2 + 1]
46         return start, end
47     }
48     return -1, -1
49 }
50 
51 // get_group_by_name get a group boundaries by its name
52 pub fn (re &RE) get_group_by_name(in_txt string, group_name string) string {
53     if group_name in re.group_map {
54         tmp_index := re.group_map[group_name] - 1
55         start := re.groups[tmp_index * 2]
56         end := re.groups[tmp_index * 2 + 1]
57         if start >= 0 && end > start {
58             return in_txt[start..end]
59         }
60     }
61     return ''
62 }
63 
64 // get_group_by_id get a group string by its id
65 pub fn (re &RE) get_group_by_id(in_txt string, group_id int) string {
66     if group_id < (re.groups.len >> 1) {
67         index := group_id * 2
68         start := re.groups[index]
69         end := re.groups[index + 1]
70         if start >= 0 && end > start {
71             return in_txt[start..end]
72         }
73     }
74     return ''
75 }
76 
77 // get_group_by_id get a group boundaries by its id
78 pub fn (re &RE) get_group_bounds_by_id(group_id int) (int, int) {
79     if group_id < re.group_count {
80         index := group_id * 2
81         return re.groups[index], re.groups[index + 1]
82     }
83     return -1, -1
84 }
85 
86 pub struct Re_group {
87 pub:
88     start int = -1
89     end   int = -1
90 }
91 
92 // get_group_list return a list of Re_group for the found groups
93 pub fn (re &RE) get_group_list() []Re_group {
94     mut res := []Re_group{len: re.groups.len >> 1}
95     mut gi := 0
96     // println("len: ${re.groups.len} groups: ${re.groups}")
97 
98     for gi < re.groups.len {
99         if re.groups[gi] >= 0 {
100             txt_st := re.groups[gi]
101             txt_en := re.groups[gi + 1]
102 
103             // println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
104             if txt_st >= 0 && txt_en > txt_st {
105                 tmp := Re_group{
106                     start: re.groups[gi]
107                     end:   re.groups[gi + 1]
108                 }
109                 // println(tmp)
110                 res[gi >> 1] = tmp
111             } else {
112                 res[gi >> 1] = Re_group{}
113             }
114         }
115         gi += 2
116     }
117     return res
118 }
119 
120 /******************************************************************************
121 *
122 * Matchers
123 *
124 ******************************************************************************/
125 // match_string Match the pattern with the in_txt string
126 @[direct_array_access]
127 pub fn (re &RE) match_string(in_txt string) (int, int) {
128     unsafe {
129         start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
130         if end > in_txt.len {
131             end = in_txt.len
132         }
133 
134         if start >= 0 && end >= start {
135             ok, _ := re.check_anchors(in_txt, start, end)
136             if ok {
137                 return start, end
138             }
139             return no_match_found, 0
140         }
141         return start, end
142     }
143 }
144 
145 // matches_string Checks if the pattern matches the in_txt string
146 pub fn (re &RE) matches_string(in_txt string) bool {
147     start, _ := re.match_string(in_txt)
148     return start != no_match_found
149 }
150 
151 /******************************************************************************
152 *
153 * Finders
154 *
155 ******************************************************************************/
156 @[direct_array_access; inline]
157 fn (re &RE) check_anchors(in_txt string, start int, end int) (bool, bool) {
158     // `^` means start of the source string.
159     if (re.flag & f_ms) != 0 && start > 0 {
160         return false, true
161     }
162     // `$` means end of the source string, or right before a newline.
163     if (re.flag & f_me) != 0 && end < in_txt.len {
164         if in_txt[end] in new_line_list {
165             return true, false
166         }
167         // When `^` is also present, scanning forward can never recover.
168         return false, (re.flag & f_ms) != 0
169     }
170     return true, false
171 }
172 
173 /*
174 // find internal implementation HERE for reference do not remove!!
175 @[direct_array_access]
176 fn (mut re RE) find_imp(in_txt string) (int,int) {
177     old_flag := re.flag
178     re.flag |= f_src  // enable search mode
179 
180     start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
181     //print("Find [${start},${end}] '${in_txt[start..end]}'")
182     if end > in_txt.len {
183         end = in_txt.len
184     }
185     re.flag = old_flag
186 
187     if start >= 0 && end > start {
188         return start, end
189     }
190     return no_match_found, 0
191 }
192 */
193 
194 // find try to find the first match in the input string
195 @[direct_array_access]
196 pub fn (mut re RE) find(in_txt string) (int, int) {
197     // old_flag := re.flag
198     // re.flag |= f_src  // enable search mode
199 
200     mut i := 0
201     for i <= in_txt.len {
202         mut s := -1
203         mut e := -1
204         unsafe {
205             // tmp_str := tos(in_txt.str + i, in_txt.len - i)
206             // println("Check: [${tmp_str}]")
207             s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1)
208 
209             if s >= 0 && e >= s {
210                 abs_start := i + s
211                 abs_end := i + e
212                 ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end)
213                 if !ok {
214                     if stop_scan {
215                         break
216                     }
217                     i++
218                     continue
219                 }
220                 // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
221                 // re.flag = old_flag
222                 mut gi := 0
223                 for gi < re.groups.len {
224                     re.groups[gi] += i
225                     gi++
226                 }
227                 return abs_start, abs_end
228             }
229             i++
230         }
231     }
232     // re.flag = old_flag
233     return -1, -1
234 }
235 
236 // find try to find the first match in the input string strarting from start index
237 @[direct_array_access]
238 pub fn (mut re RE) find_from(in_txt string, start int) (int, int) {
239     old_flag := re.flag
240     // re.flag |= f_src // enable search mode
241 
242     mut i := start
243     if i < 0 {
244         return -1, -1
245     }
246     for i <= in_txt.len {
247         //--- speed references ---
248 
249         mut s := -1
250         mut e := -1
251 
252         unsafe {
253             tmp_str := tos(in_txt.str + i, in_txt.len - i)
254             s, e = re.match_string(tmp_str)
255         }
256         //------------------------
257         // s,e = re.find_imp(in_txt[i..])
258         //------------------------
259         if s >= 0 && e >= s {
260             abs_start := i + s
261             abs_end := i + e
262             ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end)
263             if !ok {
264                 if stop_scan {
265                     break
266                 }
267                 i++
268                 continue
269             }
270             // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
271             re.flag = old_flag
272             mut gi := 0
273             for gi < re.groups.len {
274                 re.groups[gi] += i
275                 gi++
276             }
277             return abs_start, abs_end
278         } else {
279             i++
280         }
281     }
282     re.flag = old_flag
283     return -1, -1
284 }
285 
286 // find_all find all the non overlapping occurrences of the match pattern and return the start and end index of the match
287 //
288 // Usage:
289 // ```v
290 // blurb := 'foobar boo steelbar toolbox foot tooooot'
291 // mut re := regex.regex_opt('f|t[eo]+')?
292 // res := re.find_all(blurb) // [0, 3, 12, 15, 20, 23, 28, 31, 33, 39]
293 // ```
294 @[direct_array_access]
295 pub fn (mut re RE) find_all(in_txt string) []int {
296     // old_flag := re.flag
297     // re.flag |= f_src // enable search mode
298 
299     mut i := 0
300     mut res := []int{}
301 
302     for i <= in_txt.len {
303         mut s := -1
304         mut e := -1
305         unsafe {
306             // tmp_str := in_txt[i..]
307             // tmp_str := tos(in_txt.str + i, in_txt.len - i)
308             // println("Check: [${tmp_str}]")
309             s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
310 
311             if s >= 0 && e >= s {
312                 abs_start := i + s
313                 abs_end := i + e
314                 ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end)
315                 if !ok {
316                     if stop_scan {
317                         break
318                     }
319                     i++
320                     continue
321                 }
322                 res << abs_start
323                 res << abs_end
324                 if e > s {
325                     i += e
326                 } else {
327                     i++
328                 }
329                 continue
330             }
331             /*
332             if e > 0 {
333                 i += e
334                 continue
335             }
336             */
337             i++
338         }
339     }
340     // re.flag = old_flag
341     return res
342 }
343 
344 // split returns the sections of string around the regex
345 //
346 // Usage:
347 // ```v
348 // blurb := 'foobar boo steelbar toolbox foot tooooot'
349 // mut re := regex.regex_opt('f|t[eo]+')?
350 // res := re.split(blurb) // ['bar boo s', 'lbar ', 'lbox ', 't ', 't']
351 // ```
352 pub fn (mut re RE) split(in_txt string) []string {
353     pos := re.find_all(in_txt)
354 
355     mut sections := []string{cap: pos.len / 2 + 1}
356 
357     if pos.len == 0 {
358         return [in_txt]
359     }
360     for i := 0; i < pos.len; i += 2 {
361         if i == 0 {
362             sections << in_txt[..pos[i]]
363         } else {
364             sections << in_txt[pos[i - 1]..pos[i]]
365         }
366     }
367     sections << in_txt[pos[pos.len - 1]..]
368     return sections
369 }
370 
371 // find_all_str find all the non overlapping occurrences of the match pattern, return a string list
372 @[direct_array_access]
373 pub fn (mut re RE) find_all_str(in_txt string) []string {
374     // old_flag := re.flag
375     // re.flag |= f_src // enable search mode
376 
377     mut i := 0
378     mut res := []string{}
379 
380     for i <= in_txt.len {
381         mut s := -1
382         mut e := -1
383         unsafe {
384             // tmp_str := in_txt[i..]
385             // tmp_str := tos(in_txt.str + i, in_txt.len - i)
386             // println("Check: [${tmp_str}]")
387             s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
388 
389             if s >= 0 && e >= s {
390                 abs_start := i + s
391                 abs_end := i + e
392                 ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end)
393                 if !ok {
394                     if stop_scan {
395                         break
396                     }
397                     i++
398                     continue
399                 }
400                 tmp_str := tos(in_txt.str + i, in_txt.len - i)
401                 mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e }
402                 // println("Found: ${s}:${e} [${tmp_str[s..e]}]")
403                 res << tmp_str[s..tmp_e]
404                 if e > s {
405                     i += e
406                 } else {
407                     i++
408                 }
409                 continue
410             }
411         }
412         /*
413         if e > 0 {
414             i += e
415             continue
416         }
417         */
418         i++
419     }
420     // re.flag = old_flag
421     return res
422 }
423 
424 /******************************************************************************
425 *
426 * Replacers
427 *
428 ******************************************************************************/
429 // replace_simple return a string where the matches are replaced with the replace string
430 pub fn (mut re RE) replace_simple(in_txt string, repl string) string {
431     pos := re.find_all(in_txt)
432 
433     if pos.len > 0 {
434         mut res := ''
435         mut i := 0
436 
437         mut s1 := 0
438         mut e1 := in_txt.len
439 
440         for i < pos.len {
441             e1 = pos[i]
442             res += in_txt[s1..e1] + repl
443             s1 = pos[i + 1]
444             i += 2
445         }
446 
447         res += in_txt[s1..]
448         return res
449     }
450     return in_txt
451 }
452 
453 // type of function used for custom replace
454 // in_txt  source text
455 // start   index of the start of the match in in_txt
456 // end     index of the end   of the match in in_txt
457 // the match is in in_txt[start..end]
458 pub type FnReplace = fn (re RE, in_txt string, start int, end int) string
459 
460 // replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
461 pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
462     mut i := 0
463     mut res := strings.new_builder(in_txt.len)
464     mut last_end := 0
465 
466     for i < in_txt.len {
467         // println("Find Start. ${i} [${in_txt[i..]}]")
468         s, e := re.find_from(in_txt, i)
469         // println("Find End.")
470         if s >= 0 && e > s {
471             // println("find match in: ${s},${e} [${in_txt[s..e]}]")
472 
473             if last_end < s {
474                 res.write_string(in_txt[last_end..s])
475             }
476             /*
477             for g_i in 0 .. re.group_count {
478                 re.groups[g_i * 2] += i
479                 re.groups[(g_i * 2) + 1] += i
480             }
481             */
482             repl := repl_fn(re, in_txt, s, e)
483             // println("repl res: ${repl}")
484             res.write_string(repl)
485             // res.write_string("[[${in_txt[s..e]}]]")
486 
487             last_end = e
488             i = e
489         } else {
490             break
491             // i++
492         }
493         // println(i)
494     }
495     if last_end >= 0 && last_end < in_txt.len {
496         res.write_string(in_txt[last_end..])
497     }
498     return res.str()
499 }
500 
501 fn (re &RE) parsed_replace_string(in_txt string, repl string) string {
502     str_lst := repl.split('\\')
503     mut res := str_lst[0]
504     mut i := 1
505     for i < str_lst.len {
506         tmp := str_lst[i]
507         // println("tmp: ${tmp}")
508         if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
509             group_id := int(tmp[0] - `0`)
510             group := re.get_group_by_id(in_txt, group_id)
511             // println("group: ${group_id} [${group}]")
512             res += '${group}${tmp[1..]}'
513         } else {
514             res += '\\' + tmp
515         }
516         i++
517     }
518     return res
519 }
520 
521 // replace return a string where the matches are replaced with the repl_str string,
522 // this function supports groups in the replace string
523 pub fn (mut re RE) replace(in_txt string, repl_str string) string {
524     mut i := 0
525     mut res := strings.new_builder(in_txt.len)
526     mut last_end := 0
527 
528     for i < in_txt.len {
529         // println("Find Start. ${i} [${in_txt[i..]}]")
530         s, e := re.find_from(in_txt, i)
531         // println("Find End.")
532         if s >= 0 && e > s {
533             // println("find match in: ${s},${e} [${in_txt[s..e]}]")
534 
535             if last_end < s {
536                 res.write_string(in_txt[last_end..s])
537             }
538             /*
539             for g_i in 0 .. re.group_count {
540                 re.groups[g_i * 2] += i
541                 re.groups[(g_i * 2) + 1] += i
542             }
543             */
544             // repl := repl_fn(re, in_txt, s, e)
545             repl := re.parsed_replace_string(in_txt, repl_str)
546             // println("repl res: ${repl}")
547             res.write_string(repl)
548             // res.write_string("[[${in_txt[s..e]}]]")
549 
550             last_end = e
551             i = e
552         } else {
553             break
554             // i++
555         }
556         // println(i)
557     }
558     if last_end >= 0 && last_end < in_txt.len {
559         res.write_string(in_txt[last_end..])
560     }
561     return res.str()
562 }
563 
564 // replace_n return a string where the first count matches are replaced with the repl_str string,
565 // if count is > 0 the replace began from the start of the string toward the end
566 // if count is < 0 the replace began from the end of the string toward the start
567 // if count is 0 do nothing
568 pub fn (mut re RE) replace_n(in_txt string, repl_str string, count int) string {
569     mut i := 0
570     mut index := 0
571     mut i_p := 0
572     mut res := strings.new_builder(in_txt.len)
573     mut lst := re.find_all(in_txt)
574 
575     if count < 0 { // start from the right of the string
576         lst = unsafe { lst#[count * 2..] } // limitate the number of substitions
577     } else if count > 0 { // start from the left of the string
578         lst = unsafe { lst#[..count * 2] } // limitate the number of substitions
579     } else if count == 0 { // no replace
580         return in_txt
581     }
582 
583     // println("found: ${lst}")
584     for index < lst.len {
585         i = lst[index]
586         res.write_string(in_txt[i_p..i])
587         res.write_string(repl_str)
588         index++
589         i_p = lst[index]
590         index++
591     }
592     i = i_p
593     res.write_string(in_txt[i..])
594 
595     return res.str()
596 }
597

1	/*
2	regex 1.0 alpha
3
4	Copyright (c) 2019-2024 Dario Deledda. All rights reserved.
5	Use of this source code is governed by an MIT license
6	that can be found in the LICENSE file.
7	*/
8	module regex
9
10	import strings
11
12	/******************************************************************************
13	*
14	* Inits
15	*
16	******************************************************************************/
17	// regex_base returns a regex object (`RE`) generated from `pattern` string and
18	// detailed information in re_err, err_pos, if an error occurred.
19	pub fn regex_base(pattern string) (RE, int, int) {
20	// init regex
21	mut re := RE{}
22	re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
23	re.cc = []CharClass{len: pattern.len} // can not be more char class the length of the pattern
24	re.group_csave_flag = false // enable continuos group saving
25	re.group_max_nested = pattern.len >> 1 // set max 128 group nested
26	re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
27
28	re.group_stack = []int{len: re.group_max, init: -1}
29	re.group_data = []int{len: re.group_max, init: -1}
30
31	re_err, err_pos := re.impl_compile(pattern)
32	return re, re_err, err_pos
33	}
34
35	/******************************************************************************
36	*
37	* Utilities
38	*
39	******************************************************************************/
40	// get_group_bounds_by_name get a group boundaries by its name
41	pub fn (re &RE) get_group_bounds_by_name(group_name string) (int, int) {
42	if group_name in re.group_map {
43	tmp_index := re.group_map[group_name] - 1
44	start := re.groups[tmp_index * 2]
45	end := re.groups[tmp_index * 2 + 1]
46	return start, end
47	}
48	return -1, -1
49	}
50
51	// get_group_by_name get a group boundaries by its name
52	pub fn (re &RE) get_group_by_name(in_txt string, group_name string) string {
53	if group_name in re.group_map {
54	tmp_index := re.group_map[group_name] - 1
55	start := re.groups[tmp_index * 2]
56	end := re.groups[tmp_index * 2 + 1]
57	if start >= 0 && end > start {
58	return in_txt[start..end]
59	}
60	}
61	return ''
62	}
63
64	// get_group_by_id get a group string by its id
65	pub fn (re &RE) get_group_by_id(in_txt string, group_id int) string {
66	if group_id < (re.groups.len >> 1) {
67	index := group_id * 2
68	start := re.groups[index]
69	end := re.groups[index + 1]
70	if start >= 0 && end > start {
71	return in_txt[start..end]
72	}
73	}
74	return ''
75	}
76
77	// get_group_by_id get a group boundaries by its id
78	pub fn (re &RE) get_group_bounds_by_id(group_id int) (int, int) {
79	if group_id < re.group_count {
80	index := group_id * 2
81	return re.groups[index], re.groups[index + 1]
82	}
83	return -1, -1
84	}
85
86	pub struct Re_group {
87	pub:
88	start int = -1
89	end int = -1
90	}
91
92	// get_group_list return a list of Re_group for the found groups
93	pub fn (re &RE) get_group_list() []Re_group {
94	mut res := []Re_group{len: re.groups.len >> 1}
95	mut gi := 0
96	// println("len: ${re.groups.len} groups: ${re.groups}")
97
98	for gi < re.groups.len {
99	if re.groups[gi] >= 0 {
100	txt_st := re.groups[gi]
101	txt_en := re.groups[gi + 1]
102
103	// println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
104	if txt_st >= 0 && txt_en > txt_st {
105	tmp := Re_group{
106	start: re.groups[gi]
107	end: re.groups[gi + 1]
108	}
109	// println(tmp)
110	res[gi >> 1] = tmp
111	} else {
112	res[gi >> 1] = Re_group{}
113	}
114	}
115	gi += 2
116	}
117	return res
118	}
119
120	/******************************************************************************
121	*
122	* Matchers
123	*
124	******************************************************************************/
125	// match_string Match the pattern with the in_txt string
126	@[direct_array_access]
127	pub fn (re &RE) match_string(in_txt string) (int, int) {
128	unsafe {
129	start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
130	if end > in_txt.len {
131	end = in_txt.len
132	}
133
134	if start >= 0 && end >= start {
135	ok, _ := re.check_anchors(in_txt, start, end)
136	if ok {
137	return start, end
138	}
139	return no_match_found, 0
140	}
141	return start, end
142	}
143	}
144
145	// matches_string Checks if the pattern matches the in_txt string
146	pub fn (re &RE) matches_string(in_txt string) bool {
147	start, _ := re.match_string(in_txt)
148	return start != no_match_found
149	}
150
151	/******************************************************************************
152	*
153	* Finders
154	*
155	******************************************************************************/
156	@[direct_array_access; inline]
157	fn (re &RE) check_anchors(in_txt string, start int, end int) (bool, bool) {
158	// `^` means start of the source string.
159	if (re.flag & f_ms) != 0 && start > 0 {
160	return false, true
161	}
162	// `$` means end of the source string, or right before a newline.
163	if (re.flag & f_me) != 0 && end < in_txt.len {
164	if in_txt[end] in new_line_list {
165	return true, false
166	}
167	// When `^` is also present, scanning forward can never recover.
168	return false, (re.flag & f_ms) != 0
169	}
170	return true, false
171	}
172
173	/*
174	// find internal implementation HERE for reference do not remove!!
175	@[direct_array_access]
176	fn (mut re RE) find_imp(in_txt string) (int,int) {
177	old_flag := re.flag
178	re.flag \|= f_src // enable search mode
179
180	start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
181	//print("Find [${start},${end}] '${in_txt[start..end]}'")
182	if end > in_txt.len {
183	end = in_txt.len
184	}
185	re.flag = old_flag
186
187	if start >= 0 && end > start {
188	return start, end
189	}
190	return no_match_found, 0
191	}
192	*/
193
194	// find try to find the first match in the input string
195	@[direct_array_access]
196	pub fn (mut re RE) find(in_txt string) (int, int) {
197	// old_flag := re.flag
198	// re.flag \|= f_src // enable search mode
199
200	mut i := 0
201	for i <= in_txt.len {
202	mut s := -1
203	mut e := -1
204	unsafe {
205	// tmp_str := tos(in_txt.str + i, in_txt.len - i)
206	// println("Check: [${tmp_str}]")
207	s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1)
208
209	if s >= 0 && e >= s {
210	abs_start := i + s
211	abs_end := i + e
212	ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end)
213	if !ok {
214	if stop_scan {
215	break
216	}
217	i++
218	continue
219	}
220	// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
221	// re.flag = old_flag
222	mut gi := 0
223	for gi < re.groups.len {
224	re.groups[gi] += i
225	gi++
226	}
227	return abs_start, abs_end
228	}
229	i++
230	}
231	}
232	// re.flag = old_flag
233	return -1, -1
234	}
235
236	// find try to find the first match in the input string strarting from start index
237	@[direct_array_access]
238	pub fn (mut re RE) find_from(in_txt string, start int) (int, int) {
239	old_flag := re.flag
240	// re.flag \|= f_src // enable search mode
241
242	mut i := start
243	if i < 0 {
244	return -1, -1
245	}
246	for i <= in_txt.len {
247	//--- speed references ---
248
249	mut s := -1
250	mut e := -1
251
252	unsafe {
253	tmp_str := tos(in_txt.str + i, in_txt.len - i)
254	s, e = re.match_string(tmp_str)
255	}
256	//------------------------
257	// s,e = re.find_imp(in_txt[i..])
258	//------------------------
259	if s >= 0 && e >= s {
260	abs_start := i + s
261	abs_end := i + e
262	ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end)
263	if !ok {
264	if stop_scan {
265	break
266	}
267	i++
268	continue
269	}
270	// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
271	re.flag = old_flag
272	mut gi := 0
273	for gi < re.groups.len {
274	re.groups[gi] += i
275	gi++
276	}
277	return abs_start, abs_end
278	} else {
279	i++
280	}
281	}
282	re.flag = old_flag
283	return -1, -1
284	}
285
286	// find_all find all the non overlapping occurrences of the match pattern and return the start and end index of the match
287	//
288	// Usage:
289	// ```v
290	// blurb := 'foobar boo steelbar toolbox foot tooooot'
291	// mut re := regex.regex_opt('f\|t[eo]+')?
292	// res := re.find_all(blurb) // [0, 3, 12, 15, 20, 23, 28, 31, 33, 39]
293	// ```
294	@[direct_array_access]
295	pub fn (mut re RE) find_all(in_txt string) []int {
296	// old_flag := re.flag
297	// re.flag \|= f_src // enable search mode
298
299	mut i := 0
300	mut res := []int{}
301
302	for i <= in_txt.len {
303	mut s := -1
304	mut e := -1
305	unsafe {
306	// tmp_str := in_txt[i..]
307	// tmp_str := tos(in_txt.str + i, in_txt.len - i)
308	// println("Check: [${tmp_str}]")
309	s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
310
311	if s >= 0 && e >= s {
312	abs_start := i + s
313	abs_end := i + e
314	ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end)
315	if !ok {
316	if stop_scan {
317	break
318	}
319	i++
320	continue
321	}
322	res << abs_start
323	res << abs_end
324	if e > s {
325	i += e
326	} else {
327	i++
328	}
329	continue
330	}
331	/*
332	if e > 0 {
333	i += e
334	continue
335	}
336	*/
337	i++
338	}
339	}
340	// re.flag = old_flag
341	return res
342	}
343
344	// split returns the sections of string around the regex
345	//
346	// Usage:
347	// ```v
348	// blurb := 'foobar boo steelbar toolbox foot tooooot'
349	// mut re := regex.regex_opt('f\|t[eo]+')?
350	// res := re.split(blurb) // ['bar boo s', 'lbar ', 'lbox ', 't ', 't']
351	// ```
352	pub fn (mut re RE) split(in_txt string) []string {
353	pos := re.find_all(in_txt)
354
355	mut sections := []string{cap: pos.len / 2 + 1}
356
357	if pos.len == 0 {
358	return [in_txt]
359	}
360	for i := 0; i < pos.len; i += 2 {
361	if i == 0 {
362	sections << in_txt[..pos[i]]
363	} else {
364	sections << in_txt[pos[i - 1]..pos[i]]
365	}
366	}
367	sections << in_txt[pos[pos.len - 1]..]
368	return sections
369	}
370
371	// find_all_str find all the non overlapping occurrences of the match pattern, return a string list
372	@[direct_array_access]
373	pub fn (mut re RE) find_all_str(in_txt string) []string {
374	// old_flag := re.flag
375	// re.flag \|= f_src // enable search mode
376
377	mut i := 0
378	mut res := []string{}
379
380	for i <= in_txt.len {
381	mut s := -1
382	mut e := -1
383	unsafe {
384	// tmp_str := in_txt[i..]
385	// tmp_str := tos(in_txt.str + i, in_txt.len - i)
386	// println("Check: [${tmp_str}]")
387	s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
388
389	if s >= 0 && e >= s {
390	abs_start := i + s
391	abs_end := i + e
392	ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end)
393	if !ok {
394	if stop_scan {
395	break
396	}
397	i++
398	continue
399	}
400	tmp_str := tos(in_txt.str + i, in_txt.len - i)
401	mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e }
402	// println("Found: ${s}:${e} [${tmp_str[s..e]}]")
403	res << tmp_str[s..tmp_e]
404	if e > s {
405	i += e
406	} else {
407	i++
408	}
409	continue
410	}
411	}
412	/*
413	if e > 0 {
414	i += e
415	continue
416	}
417	*/
418	i++
419	}
420	// re.flag = old_flag
421	return res
422	}
423
424	/******************************************************************************
425	*
426	* Replacers
427	*
428	******************************************************************************/
429	// replace_simple return a string where the matches are replaced with the replace string
430	pub fn (mut re RE) replace_simple(in_txt string, repl string) string {
431	pos := re.find_all(in_txt)
432
433	if pos.len > 0 {
434	mut res := ''
435	mut i := 0
436
437	mut s1 := 0
438	mut e1 := in_txt.len
439
440	for i < pos.len {
441	e1 = pos[i]
442	res += in_txt[s1..e1] + repl
443	s1 = pos[i + 1]
444	i += 2
445	}
446
447	res += in_txt[s1..]
448	return res
449	}
450	return in_txt
451	}
452
453	// type of function used for custom replace
454	// in_txt source text
455	// start index of the start of the match in in_txt
456	// end index of the end of the match in in_txt
457	// the match is in in_txt[start..end]
458	pub type FnReplace = fn (re RE, in_txt string, start int, end int) string
459
460	// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
461	pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
462	mut i := 0
463	mut res := strings.new_builder(in_txt.len)
464	mut last_end := 0
465
466	for i < in_txt.len {
467	// println("Find Start. ${i} [${in_txt[i..]}]")
468	s, e := re.find_from(in_txt, i)
469	// println("Find End.")
470	if s >= 0 && e > s {
471	// println("find match in: ${s},${e} [${in_txt[s..e]}]")
472
473	if last_end < s {
474	res.write_string(in_txt[last_end..s])
475	}
476	/*
477	for g_i in 0 .. re.group_count {
478	re.groups[g_i 2] += i*
479	re.groups[(g_i 2) + 1] += i*
480	}
481	*/
482	repl := repl_fn(re, in_txt, s, e)
483	// println("repl res: ${repl}")
484	res.write_string(repl)
485	// res.write_string("[[${in_txt[s..e]}]]")
486
487	last_end = e
488	i = e
489	} else {
490	break
491	// i++
492	}
493	// println(i)
494	}
495	if last_end >= 0 && last_end < in_txt.len {
496	res.write_string(in_txt[last_end..])
497	}
498	return res.str()
499	}
500
501	fn (re &RE) parsed_replace_string(in_txt string, repl string) string {
502	str_lst := repl.split('\\')
503	mut res := str_lst[0]
504	mut i := 1
505	for i < str_lst.len {
506	tmp := str_lst[i]
507	// println("tmp: ${tmp}")
508	if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
509	group_id := int(tmp[0] - `0`)
510	group := re.get_group_by_id(in_txt, group_id)
511	// println("group: ${group_id} [${group}]")
512	res += '${group}${tmp[1..]}'
513	} else {
514	res += '\\' + tmp
515	}
516	i++
517	}
518	return res
519	}
520
521	// replace return a string where the matches are replaced with the repl_str string,
522	// this function supports groups in the replace string
523	pub fn (mut re RE) replace(in_txt string, repl_str string) string {
524	mut i := 0
525	mut res := strings.new_builder(in_txt.len)
526	mut last_end := 0
527
528	for i < in_txt.len {
529	// println("Find Start. ${i} [${in_txt[i..]}]")
530	s, e := re.find_from(in_txt, i)
531	// println("Find End.")
532	if s >= 0 && e > s {
533	// println("find match in: ${s},${e} [${in_txt[s..e]}]")
534
535	if last_end < s {
536	res.write_string(in_txt[last_end..s])
537	}
538	/*
539	for g_i in 0 .. re.group_count {
540	re.groups[g_i 2] += i*
541	re.groups[(g_i 2) + 1] += i*
542	}
543	*/
544	// repl := repl_fn(re, in_txt, s, e)
545	repl := re.parsed_replace_string(in_txt, repl_str)
546	// println("repl res: ${repl}")
547	res.write_string(repl)
548	// res.write_string("[[${in_txt[s..e]}]]")
549
550	last_end = e
551	i = e
552	} else {
553	break
554	// i++
555	}
556	// println(i)
557	}
558	if last_end >= 0 && last_end < in_txt.len {
559	res.write_string(in_txt[last_end..])
560	}
561	return res.str()
562	}
563
564	// replace_n return a string where the first count matches are replaced with the repl_str string,
565	// if count is > 0 the replace began from the start of the string toward the end
566	// if count is < 0 the replace began from the end of the string toward the start
567	// if count is 0 do nothing
568	pub fn (mut re RE) replace_n(in_txt string, repl_str string, count int) string {
569	mut i := 0
570	mut index := 0
571	mut i_p := 0
572	mut res := strings.new_builder(in_txt.len)
573	mut lst := re.find_all(in_txt)
574
575	if count < 0 { // start from the right of the string
576	lst = unsafe { lst#[count * 2..] } // limitate the number of substitions
577	} else if count > 0 { // start from the left of the string
578	lst = unsafe { lst#[..count * 2] } // limitate the number of substitions
579	} else if count == 0 { // no replace
580	return in_txt
581	}
582
583	// println("found: ${lst}")
584	for index < lst.len {
585	i = lst[index]
586	res.write_string(in_txt[i_p..i])
587	res.write_string(repl_str)
588	index++
589	i_p = lst[index]
590	index++
591	}
592	i = i_p
593	res.write_string(in_txt[i..])
594
595	return res.str()
596	}
597