Gitly


1 /*
2 regex_test.v
3 
4 Copyright (c) 2026 Dario Deledda. All rights reserved.
5 Use of this source code is governed by an MIT license
6 that can be found in the LICENSE file.
7 */
8 import regex.pcre
9 
10 fn main() {
11     println('Running pcre tests...\n')
12 
13     test_regex()
14     test_complex_quantifiers()
15     test_range_quantifiers()
16     test_anchors()
17     test_word_boundaries()
18     test_flags()
19     test_named_groups()
20     test_non_capturing_groups()
21 
22     // New features tests
23     test_find_all()
24     test_find_from()
25     test_replace()
26     // test_stress_vm()
27 
28     println('\nAll tests passed!')
29 }
30 
31 // --- New Feature Tests ---
32 
33 fn test_find_all() {
34     println('\n--- Testing find_all() ---')
35 
36     // Basic extraction
37     tst_find_all(r'\d+', '123 abc 456', ['123', '456'])
38     tst_find_all(r'\w+', 'hi there', ['hi', 'there'])
39 
40     // No matches
41     tst_find_all(r'\d+', 'no numbers', [])
42 
43     // Pattern matching empty strings (e.g., boundaries)
44     // Note: Behavior depends on engine implementation regarding empty matches.
45     // Current VM advances index if match length is 0 to avoid infinite loop.
46     // Pattern \b matches at 0 (start), 3 (after 123), 4 (before abc), 7 (after abc)
47     // But find_all usually returns non-overlapping text. \b returns empty string.
48     // tst_find_all(r'\b', '123 abc', ['', '', '', '']) // Commented out, specific implementation detail
49 
50     // Anchored find_all (should only match once if anchored at start)
51     tst_find_all(r'^\w+', 'word word word', ['word'])
52 
53     // Overlapping logic check (find_all is typically non-overlapping)
54     // "ana" in "banana". Indices: 1 ("ana"). Next search starts at 4 ("na").
55     tst_find_all(r'ana', 'banana', ['ana'])
56 }
57 
58 fn test_find_from() {
59     println('\n--- Testing find_from() ---')
60 
61     text := 'test test test'
62 
63     // Start from 0 (finds first)
64     tst_find_from(r'test', text, 0, 0, 'test')
65 
66     // Start from 1 (skips first, finds second)
67     tst_find_from(r'test', text, 1, 5, 'test')
68 
69     // Start from 6 (finds third)
70     tst_find_from(r'test', text, 6, 10, 'test')
71 
72     // Start from end (finds nothing)
73     tst_find_from(r'test', text, 11, -1, 'none')
74 
75     // Out of bounds
76     tst_find_from(r'test', text, 50, -1, 'none')
77 
78     // Start exactly at match position
79     tst_find_from(r'test', text, 5, 5, 'test')
80 }
81 
82 fn test_replace() {
83     println('\n--- Testing replace() ---')
84 
85     // Simple replacement
86     tst_replace(r'\d+', 'abc 123 def', 'NUM', 'abc NUM def')
87 
88     // Group substitution
89     tst_replace(r'(\w+), (\w+)', 'Doe, John', '$2 $1', 'John Doe')
90 
91     // Multiple replacements?
92     // The current replace() implementation in the provided code
93     // usually replaces the FIRST occurrence (based on find()).
94     // Let's verify:
95     tst_replace(r'a', 'bananas', 'o', 'bonanas')
96 
97     tst_replace(r'(^[#.]+)|([#.]+$)', r'_#abc.#_ab#', '*', '_#abc.#_ab*')
98 
99     // Invalid group index (should ignore or remove)
100     tst_replace(r'(\d+)', '123', 'Num: $9', 'Num: ')
101 }
102 
103 /*
104 fn test_stress_vm() {
105     println('\n--- Testing VM Stability (Stress Test) ---')
106     // Recursive engines often crash on patterns like (a*)* or very long strings
107     // if not carefully managed. The VM should handle this via heap stack.
108     
109     long_text := 'a'.repeat(2000)
110     tst_find(r'a+', long_text, long_text)
111     
112     println('  [Pass] Long string match')
113     
114     // Backtracking stress
115     // Pattern: (a+)+b matching aaaaa....a (fails)
116     // This forces extensive backtracking.
117     short_text := 'a'.repeat(25)
118     mut r := pcre.compile(r'(a+)+b') or { panic(err) }
119     r.max_stack_depth = 4000 // increase the stack depth for this test
120     res := r.find(short_text)
121     assert res == none
122     println('  [Pass] Backtracking stress test')
123 }
124 */
125 
126 // --- Existing Tests ---
127 
128 fn test_flags() {
129     println('\n--- Testing Flags ((?i), (?m), (?s)) ---')
130 
131     // 1. Case Insensitive (?i)
132     tst_find('(?i)cat', 'Cat', 'Cat')
133     tst_find('(?i)CAT', 'cat', 'cat')
134     tst_find('(?i)[a-z]+', 'UPPER', 'UPPER') // char class expansion
135     tst_find('(?i)x', 'X', 'X')
136     tst_find('(?i)x', 'y', 'none')
137 
138     // Mixed (flag applies to subsequent tokens)
139     tst_find('c(?i)at', 'cAT', 'cAT')
140     tst_find('c(?i)at', 'Cat', 'none') // first 'c' is case-sensitive
141 
142     // 2. Multiline (?m)
143     // ^ matches start of line
144     tst_find('(?m)^line2', 'line1\nline2', 'line2')
145     tst_find('^line2', 'line1\nline2', 'none') // Default: matches only start of string
146 
147     // $ matches end of line
148     tst_find('(?m)line1$', 'line1\nline2', 'line1')
149     tst_find('line1$', 'line1\nline2', 'none') // Default: matches only end of string
150 
151     // 3. Dot-all / Singleline (?s)
152     // . matches newline
153     tst_find('(?s)a.b', 'a\nb', 'a\nb')
154     tst_find('a.b', 'a\nb', 'none') // Default: . does not match \n
155 
156     // 4. Combined Flags (?im)
157     tst_find('(?im)^line2', 'LINE1\nLINE2', 'LINE2')
158 
159     // --- Negative Tests (Flags) ---
160     tst_find('(?i)cat', 'dog', 'none')
161     tst_find('(?m)^line2', 'line1 line2', 'none') // Not at start of line
162 }
163 
164 fn test_word_boundaries() {
165     println('\n--- Testing Word Boundaries (\\b and \\B) ---')
166 
167     // 1. Word Boundary (\b)
168     tst_find('\\bcat', 'cat', 'cat')
169     tst_find('\\bcat', 'concat', 'none')
170     tst_find('\\bcat', 'catapult', 'cat')
171 
172     tst_find('cat\\b', 'cat', 'cat')
173     tst_find('cat\\b', 'concat', 'cat')
174     tst_find('cat\\b', 'catapult', 'none')
175 
176     tst_find('\\bcat\\b', 'cat', 'cat')
177     tst_find('\\bcat\\b', 'a cat is here', 'cat')
178     tst_find('\\bcat\\b', 'concat', 'none')
179     tst_find('\\bcat\\b', 'catapult', 'none')
180 
181     tst_find('\\btest\\b', 'test.', 'test')
182     tst_find('\\btest\\b', '(test)', 'test')
183 
184     // 2. Non-Word Boundary (\B)
185     tst_find('a\\B', 'ab', 'a')
186     tst_find('a\\B', 'a.', 'none')
187     tst_find('\\Bcat', 'concat', 'cat')
188     tst_find('\\Bcat', 'cat', 'none')
189     tst_find('cat\\B', 'catapult', 'cat')
190 
191     // --- Negative Tests (Word Boundaries) ---
192     tst_find('\\b\\w+\\b', '... ...', 'none')
193     tst_find('\\B', 'a', 'none')
194 }
195 
196 fn test_anchors() {
197     println('\n--- Testing Anchors (^ and $) ---')
198 
199     // 1. Start of String (^)
200     tst_find('^abc', 'abc', 'abc')
201     tst_find('^abc', 'abcdef', 'abc')
202     tst_find('^abc', 'abc abc', 'abc')
203     tst_find('^\\d+', '123 text', '123')
204 
205     // 2. End of String ($)
206     tst_find('xyz$', 'xyz', 'xyz')
207     tst_find('xyz$', 'abcxyz', 'xyz')
208     tst_find('\\d+$', 'text 123', '123')
209 
210     // 3. Both Anchors (^...$)
211     tst_find('^hello$', 'hello', 'hello')
212 
213     // 4. Zero-width matches
214     tst_find('^', 'abc', '')
215     tst_find('$', 'abc', '')
216 
217     // 5. Anchors with Alternation
218     tst_find('^a|b$', 'apple', 'a')
219     tst_find('^a|b$', 'blob', 'b')
220 
221     // 6. Anchors with Groups
222     tst_find('^(abc)+$', 'abcabc', 'abcabc')
223 
224     // --- Negative Tests (Anchors) ---
225     tst_find('^abc', 'xyzabc', 'none')
226     tst_find('^\\d+', 'text 123', 'none')
227     tst_find('xyz$', 'xyzabc', 'none')
228     tst_find('\\d+$', '123 text', 'none')
229     tst_find('^hello$', 'hello world', 'none')
230     tst_find('^hello$', 'say hello', 'none')
231     tst_find('^a|b$', 'cba', 'none')
232     tst_find('^(abc)+$', 'abcabcx', 'none')
233     tst_find('^$', 'a', 'none')
234 }
235 
236 fn test_regex() {
237     println('\n--- Testing Basic Features ---')
238     tst_find('a?b', 'ab', 'ab')
239     tst_find('a?b', 'b', 'b')
240     tst_find('a+b', 'aaab', 'aaab')
241     tst_find('a*b', 'b', 'b')
242     tst_find('\\d+', '123 abc', '123')
243 
244     println('\n--- Testing Character Classes ---')
245     tst_find('\\w+', 'word1_ and', 'word1_')
246     tst_find('\\W+', ' and', ' ')
247     tst_find('\\s+', '          start', '          ')
248     tst_find('\\d{3}-\\d{4}', 'call 555-1234 now', '555-1234')
249     tst_find('\\D+', 'call 555', 'call ')
250     tst_find('\\a+', 'lowercase', 'lowercase')
251     tst_find('\\A+', 'UPPER', 'UPPER')
252 
253     println('\n--- Testing Alternation (|) ---')
254     tst_find('cat|dog', 'the dog says meow', 'dog')
255     tst_find('a(b|c)d', 'acd', 'acd')
256     tst_find('apple|apply', 'I want to apply', 'apply')
257 
258     println('\n--- Testing Custom Character Classes ([...]) ---')
259     tst_find('[aeiou]', 'hello world', 'e')
260     tst_find('gr[ae]y', 'the color grey', 'grey')
261     tst_find('[^aeiou]+', 'rhythm', 'rhythm')
262     tst_find('[a-z]+', 'lowercase123', 'lowercase')
263     tst_find('[a-zA-Z0-9_]+', 'word_1_with_everything', 'word_1_with_everything')
264 
265     println('\n--- Testing Unicode ---')
266     tst_find('日本語', 'Text containing 日本語.', '日本語')
267     tst_find('h.llo', 'héllo wørld', 'héllo')
268     tst_find('(é)+', 'cafééé', 'ééé')
269     tst_find('😀+', 'Happy 😀😀 day', '😀😀')
270 
271     println('\n--- Testing fullmatch() ---')
272     tst_fullmatch(r'\d+', '12345', '12345')
273     tst_fullmatch('(?s).*', 'Any content including 😀', 'Any content including 😀')
274 
275     // --- Negative Tests (Basic) ---
276     tst_find('abc', 'ab', 'none')
277     tst_find('abc', 'acb', 'none')
278     tst_find('a+b', 'b', 'none')
279     tst_find('\\d+', 'abc', 'none')
280     tst_find('\\D+', '123', 'none')
281     tst_find('\\w+', '@#$', 'none')
282     tst_find('\\s+', 'Text', 'none')
283     tst_find('\\a+', 'UPPERCASE', 'none')
284     tst_find('\\A+', 'lowercase', 'none')
285     tst_find('cat|dog', 'bird', 'none')
286     tst_find('[0-9]', 'a', 'none')
287     tst_find('[^0-9]', '1', 'none')
288     tst_fullmatch(r'\d+', '12345abc', 'none')
289     tst_fullmatch(r'\d+', 'abc12345', 'none')
290 
291     println('\n--- Testing Compilation Errors ---')
292     tst_compile_error('a++')
293     tst_compile_error('[a-z')
294     tst_compile_error('a|')
295 }
296 
297 fn test_complex_quantifiers() {
298     println('\n--- Testing Complex Quantifiers (+, *, ?) ---')
299 
300     tst_find('a+', 'aaaaa', 'aaaaa')
301     tst_find('a+b', 'aaaaab', 'aaaaab')
302 
303     tst_find('x*y', 'y', 'y')
304     tst_find('x*y', 'xy', 'xy')
305     tst_find('x*y', 'xxxy', 'xxxy')
306 
307     tst_find('colou?r', 'color', 'color')
308     tst_find('colou?r', 'colour', 'colour')
309     tst_find('x?y', 'xy', 'xy')
310     tst_find('x?y', 'y', 'y')
311 
312     tst_find('(ab)+', 'ababab', 'ababab')
313     tst_find('(ha)+', 'hahaha!', 'hahaha')
314 
315     tst_find('(cat|dog)+', 'catdogcat', 'catdogcat')
316     tst_find('(a|b)+', 'abaabbba', 'abaabbba')
317 
318     tst_find('[0-9]+', 'Order 12345', '12345')
319     tst_find('[a-z]*', '123', '')
320 
321     // --- Negative Tests (Complex Quantifiers) ---
322     tst_find('a+', '', 'none')
323     tst_find('a+', 'b', 'none')
324     tst_find('a+b', 'aaac', 'none')
325     tst_find('x?y', 'x', 'none')
326     tst_find('(ab)+', 'ac', 'none')
327     tst_find('[0-9]+', 'abc', 'none')
328 }
329 
330 fn test_range_quantifiers() {
331     println('\n--- Testing Range Quantifiers {m,n} ---')
332 
333     tst_find('a{3}', 'aaa', 'aaa')
334     tst_find('a{3}', 'aaaa', 'aaa')
335 
336     tst_find('a{2,}', 'aa', 'aa')
337     tst_find('a{2,}', 'aaaaa', 'aaaaa')
338 
339     tst_find('a{,3}', 'aaaa', 'aaa')
340     tst_find('a{,3}', 'aa', 'aa')
341     tst_find('a{,3}', '', '')
342 
343     tst_find('a{2,4}', 'aa', 'aa')
344     tst_find('a{2,4}', 'aaa', 'aaa')
345     tst_find('a{2,4}', 'aaaa', 'aaaa')
346     tst_find('a{2,4}', 'aaaaa', 'aaaa')
347 
348     tst_find(r'\d{2,4}-\w{2}', '123-ab', '123-ab')
349     tst_find(r'\d{2,4}-\w{2}', '12345-ab', '2345-ab')
350 
351     // --- Negative Tests (Range Quantifiers) ---
352     tst_find('a{3}', 'aa', 'none')
353     tst_find('a{2,}', 'a', 'none')
354     tst_find('a{2,4}', 'a', 'none')
355     tst_find(r'\d{2,4}-\w{2}', '1-ab', 'none')
356     tst_find(r'\d{2,4}-\w{2}', '123-a', 'none')
357 }
358 
359 fn test_named_groups() {
360     println('\n--- Testing Named Groups ---')
361 
362     pattern := '(?P<year>\\d{4})-(?P<month>\\d{2})'
363     text := 'Date: 2025-01'
364     r := pcre.compile(pattern) or { panic(err) }
365     m := r.find(text) or { panic('Match not found') }
366 
367     assert m.groups[0] == '2025'
368     assert m.groups[1] == '01'
369 
370     assert r.group_by_name(m, 'year') == '2025'
371     assert r.group_by_name(m, 'month') == '01'
372     assert r.group_by_name(m, 'missing') == ''
373 
374     nested_pat := '(?P<entry>key: (?P<val>\\d+))'
375     nested_txt := 'List [ key: 99 ]'
376     r_nested := pcre.compile(nested_pat) or { panic(err) }
377     m_nested := r_nested.find(nested_txt) or { panic('Match not found') }
378 
379     println('Nested: entry="${r_nested.group_by_name(m_nested, 'entry')}", val="${r_nested.group_by_name(m_nested,
380         'val')}"')
381     assert r_nested.group_by_name(m_nested, 'entry') == 'key: 99'
382     assert r_nested.group_by_name(m_nested, 'val') == '99'
383 
384     pattern_mixed := '(?P<key>\\w+): (\\d+)'
385     text_mixed := 'Price: 100'
386     r_mixed := pcre.compile(pattern_mixed) or { panic(err) }
387     m_mixed := r_mixed.find(text_mixed) or { panic('Match not found') }
388 
389     assert m_mixed.groups[0] == 'Price'
390     assert m_mixed.groups[1] == '100'
391     assert r_mixed.group_by_name(m_mixed, 'key') == 'Price'
392 
393     p_seq := '(?P<a>a)(?P<b>b)(?P<c>c)'
394     t_seq := 'abc'
395     r_seq := pcre.compile(p_seq) or { panic(err) }
396     m_seq := r_seq.find(t_seq) or { panic('Match not found') }
397     assert r_seq.group_by_name(m_seq, 'a') == 'a'
398     assert r_seq.group_by_name(m_seq, 'b') == 'b'
399     assert r_seq.group_by_name(m_seq, 'c') == 'c'
400 
401     // --- Negative Tests (Named Groups) ---
402     if _ := r.find('Date: 99-01') {
403         assert false, 'Should not match'
404     } else {
405         println('Found: none (Expected: none)')
406     }
407     tst_find('(?P<id>\\d+)', 'abc', 'none')
408 }
409 
410 fn test_non_capturing_groups() {
411     println('\n--- Testing Non-Capturing Groups ---')
412 
413     tst_find_with_groups('(?:a|b)c', 'ac', 'ac', [])
414 
415     tst_find_with_groups('(a)(?:b)(c)', 'abc', 'abc', ['a', 'c'])
416 
417     tst_find_with_groups('(a(?:b)c)', 'abc', 'abc', ['abc'])
418 
419     tst_find_with_groups('(?:header): (\\d+)', 'header: 123', 'header: 123', ['123'])
420 
421     // --- Negative Tests (Non-Capturing Groups) ---
422     tst_find('(?:a|b)c', 'dc', 'none')
423     tst_find('(?:a)b', 'c', 'none')
424 }
425 
426 // --- Helper Functions ---
427 
428 fn tst_find(pattern string, text string, expected string) {
429     print('[find] Pattern: "${pattern}", Text: "${text}" -> ')
430     r := pcre.compile(pattern) or {
431         println('Compile error: ${err}')
432         assert false, 'Unexpected compile error: ${err}'
433         return
434     }
435     match_res := r.find(text)
436     check_result(match_res, expected)
437 }
438 
439 fn tst_find_all(pattern string, text string, expected []string) {
440     print('[find_all] Pattern: "${pattern}", Text: "${text}" -> ')
441     r := pcre.compile(pattern) or { panic(err) }
442     matches := r.find_all(text)
443 
444     mut res_strs := []string{}
445     for m in matches {
446         res_strs << m.text
447     }
448 
449     println('Found: ${res_strs}')
450     assert res_strs == expected
451 }
452 
453 fn tst_find_from(pattern string, text string, start int, expected_pos int, expected_text string) {
454     print('[find_from] Pattern: "${pattern}", Start: ${start} -> ')
455     r := pcre.compile(pattern) or { panic(err) }
456     match_res := r.find_from(text, start)
457 
458     if match_res != none {
459         println('Found: "${match_res.text}" at ${match_res.start}')
460         assert match_res.text == expected_text
461         assert match_res.start == expected_pos
462     } else {
463         println('Found: none')
464         assert expected_text == 'none'
465     }
466 }
467 
468 fn tst_replace(pattern string, text string, repl string, expected string) {
469     print('[replace] Pattern: "${pattern}", Repl: "${repl}" -> ')
470     r := pcre.compile(pattern) or { panic(err) }
471     res := r.replace(text, repl)
472     println('Result: "${res}"')
473     assert res == expected
474 }
475 
476 fn tst_fullmatch(pattern string, text string, expected string) {
477     print('[fullmatch] Pattern: "${pattern}", Text: "${text}" -> ')
478     r := pcre.compile(pattern) or {
479         println('Compile error: ${err}')
480         assert false, 'Unexpected compile error: ${err}'
481         return
482     }
483     match_res := r.fullmatch(text)
484     check_result(match_res, expected)
485 }
486 
487 fn check_result(match_res ?pcre.Match, expected string) {
488     if match_res != none {
489         println('Found: "${match_res.text}" (Expected: "${expected}")')
490         assert match_res.text == expected
491     } else {
492         println('Found: none (Expected: "${expected}")')
493         assert expected == 'none'
494     }
495 }
496 
497 fn tst_find_with_groups(pattern string, text string, expected_match string, expected_groups []string) {
498     print('[find+groups] Pattern: "${pattern}", Text: "${text}" -> ')
499     r := pcre.compile(pattern) or {
500         println('Compile error: ${err}')
501         assert false, 'Unexpected compile error: ${err}'
502         return
503     }
504     match_res := r.find(text)
505     if match_res != none {
506         println('Found: "${match_res.text}", Groups: ${match_res.groups}')
507         assert match_res.text == expected_match
508         assert match_res.groups == expected_groups
509     } else {
510         println('Found: none')
511         assert false // Should have found a match_res
512     }
513 }
514 
515 fn tst_compile_error(pattern string) {
516     print('[compile_error] Pattern: "${pattern}" -> ')
517     _ := pcre.compile(pattern) or {
518         println('Caught expected error: ${err}')
519         return
520     }
521     println('Error: Did not get a compilation error!')
522     assert false
523 }
524 
525 fn test_non_greedy_quantifiers() {
526     println('\n--- Testing Non-Greedy Quantifiers (*?, +?, ??, {m,n}?) ---')
527 
528     // 1. Lazy Star (*?)
529     // Should stop at the first closing '>' (minimal match)
530     tst_find(r'<.*?>', '<div>content</div>', '<div>')
531     // Contrast with greedy (default) which consumes until the last '>'
532     tst_find(r'<.*>', '<div>content</div>', '<div>content</div>')
533 
534     // 2. Lazy Plus (+?)
535     // Should match minimal characters (1 'a') to satisfy the constraint
536     tst_find(r'a+?', 'aaaaa', 'a')
537     // Forced expansion: Must match all 'a's to finally match 'b' (backtracking test)
538     tst_find(r'a+?b', 'aaab', 'aaab')
539 
540     // 3. Lazy Question Mark (??)
541     // Should match empty string (prefers 0 occurrences over 1)
542     tst_find(r'a??', 'a', '')
543     // Contextual: 'u' is lazy (prefers skip), matches 'color' immediately
544     tst_find(r'colou??r', 'color', 'color')
545     // Contextual: 'u' is lazy, tries skip, fails to match 'r', backtracks to match 'u'
546     tst_find(r'colou??r', 'colour', 'colour')
547 
548     // 4. Lazy Range ({m,n}?)
549     // Should match minimum required (2 digits)
550     tst_find(r'\d{2,5}?', '123456789', '12')
551     // Contrast with greedy which matches maximum (5 digits)
552     tst_find(r'\d{2,5}', '123456789', '12345')
553 
554     // 5. Complex/Real-world Case (User report)
555     // Escaped characters + lazy capture group
556     // Should match only '$t(common.hello)', not the span to the second ')'
557     tst_find(r'\$t\((.*?)\)', r'$t(common.hello) dear $t(common.name)', r'$t(common.hello)')
558 
559     // --- Negative / Edge Cases ---
560 
561     // Lazy quantifier with no termination in string should match nothing/min if possible,
562     // but since it's "find", it grabs the first valid match.
563     tst_find(r'x.*?y', 'x123y456y', 'x123y') // Stops at first y
564 
565     // Anchor interaction: ^.*?b
566     // Matches from start, .*? expands lazily until it hits 'b'
567     tst_find(r'^.*?b', '123b', '123b')
568 
569     // Ensure lazy doesn't cause failure when a greedy match would succeed (correct backtracking)
570     // Pattern wants to match "a" lazily, but must consume "a" to satisfy the final "a"
571     tst_find(r'a?a', 'a', 'a')
572     tst_find(r'a??a', 'a', 'a')
573 }
574 
575 fn test_compatibility_layer() {
576     // Test new_regex (alias for compile)
577     // Passing '0' as the second argument to simulate the ignored C-flag argument
578     pattern := r'(\w+)\s+(\d+)'
579     re := pcre.new_regex(pattern, 0) or {
580         assert false, 'new_regex failed to compile: ${err}'
581         return
582     }
583 
584     text := 'item 42 ignored item 99'
585 
586     // Test match_str (alias for find_from)
587     // We start searching from index 0. The third argument '0' is the ignored option flag.
588     // This should match "item 42"
589     m1 := re.match_str(text, 0, 0) or {
590         assert false, 'match_str failed to find match'
591         return
592     }
593 
594     // Test get()
595     // Index 0 should be the full text of the match
596     full_match := m1.get(0) or { '' }
597     assert full_match == 'item 42'
598 
599     // Index 1 should be the first capture group (\w+)
600     group_1 := m1.get(1) or { '' }
601     assert group_1 == 'item'
602 
603     // Index 2 should be the second capture group (\d+)
604     group_2 := m1.get(2) or { '' }
605     assert group_2 == '42'
606 
607     // Index 3 should be none (out of bounds)
608     if _ := m1.get(3) {
609         assert false, 'get(3) should return none for 2 groups'
610     }
611 
612     // Test get_all()
613     // Should return ['item 42', 'item', '42']
614     all_captures := m1.get_all()
615     assert all_captures.len == 3
616     assert all_captures[0] == 'item 42'
617     assert all_captures[1] == 'item'
618     assert all_captures[2] == '42'
619 
620     // Test match_str with a specific start index
621     // Start searching after "item 42" (length is 7)
622     // This should match "item 99"
623     m2 := re.match_str(text, 7, 0) or {
624         assert false, 'match_str failed to find second match from offset'
625         return
626     }
627 
628     assert m2.get(0) or { '' } == 'item 99'
629     assert m2.get(2) or { '' } == '99'
630 
631     // Test match_str failure case
632     // Start searching at the very end of string
633     no_match := re.match_str(text, text.len, 0)
634     if _ := no_match {
635         assert false, 'match_str should return none when no match is found'
636     }
637 }
638 
639 fn test_hex_escapes() {
640     // \xHH — two hex digits
641     tst_find(r'\x41', 'ABC', 'A') // 0x41 = 'A'
642     tst_find(r'\x61', 'abc', 'a') // 0x61 = 'a'
643     tst_find(r'\x41+', 'AAAB', 'AAA')
644     tst_find(r'\x20\x41', ' A test', ' A') // space + 'A'
645 
646     // \XHHHH — four hex digits (Unicode codepoint)
647     tst_find(r'\X0041', 'ABC', 'A') // U+0041 = 'A'
648     tst_find(r'\X0061', 'abc', 'a') // U+0061 = 'a'
649     tst_find(r'\X03B1', 'αβγ', 'α') // U+03B1 = 'α'
650 
651     // Mix with other escapes
652     tst_find(r'\x48\x65\x6C\x6C\x6F', 'Hello World', 'Hello') // \x48\x65\x6C\x6C\x6F = "Hello"
653 
654     // Invalid hex escape compile errors
655     tst_compile_error(r'\x4') // only 1 digit
656     tst_compile_error(r'\xGG') // invalid hex chars
657     tst_compile_error(r'\X004') // only 3 digits
658 }
659 
660 fn test_duplicate_named_groups() {
661     // Compile error: same name used twice
662     tst_compile_error(r'(?P<id>\d+)-(?P<id>\w+)')
663     // Different names are fine
664     r := pcre.compile(r'(?P<a>\d+)-(?P<b>\w+)') or {
665         assert false, 'Should compile: ${err}'
666         return
667     }
668     m := r.find('12-abc') or {
669         assert false, 'Should match'
670         return
671     }
672     assert r.group_by_name(m, 'a') == '12'
673     assert r.group_by_name(m, 'b') == 'abc'
674 }
675 
676 fn test_invalid_quantifier_ranges() {
677     // min > max is an error
678     tst_compile_error(r'a{3,1}')
679     tst_compile_error(r'a{5,2}')
680     // negative min-like patterns (parsed as 0)
681     // {0,0} should compile and match empty string
682     r := pcre.compile(r'a{0,0}b') or {
683         assert false, 'Should compile: ${err}'
684         return
685     }
686     m := r.find('b') or {
687         assert false, 'Should match'
688         return
689     }
690     assert m.text == 'b'
691 }
692 
693 fn test_find_all_utf8_safety() {
694     // find_all with an empty-matching pattern must not get stuck inside a multi-byte rune
695     r := pcre.compile(r'x*') or { panic(err) }
696     matches := r.find_all('aé') // 'é' is 2 bytes (0xC3 0xA9)
697     // Every result start/end must align on a rune boundary
698     for m in matches {
699         text_bytes := 'aé'.bytes()
700         if m.start < text_bytes.len {
701             // byte at start must not be a UTF-8 continuation byte
702             assert (text_bytes[m.start] & 0xC0) != 0x80, 'Misaligned match start at ${m.start}'
703         }
704     }
705     // find_all should not infinite-loop on emoji
706     r2 := pcre.compile(r'y*') or { panic(err) }
707     matches2 := r2.find_all('😀!')
708     assert matches2.len > 0
709 }
710