From 775fd657b38becbbbec2cf71f7d91618b2dabab2 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Thu, 26 Feb 2026 20:21:43 +0300 Subject: [PATCH] regex: fix module ignores ^ and $ (fixes #22119) --- vlib/regex/regex_anchor_test.v | 21 +++++- vlib/regex/regex_util.v | 113 ++++++++++++++++++++------------- 2 files changed, 86 insertions(+), 48 deletions(-) diff --git a/vlib/regex/regex_anchor_test.v b/vlib/regex/regex_anchor_test.v index 6064fc3c8..891a0b6cf 100644 --- a/vlib/regex/regex_anchor_test.v +++ b/vlib/regex/regex_anchor_test.v @@ -21,9 +21,24 @@ fn test_anchor_both() { assert end == -1 } -fn test_anchor_find_all_str_multiline() { +fn test_anchor_both_find_multiline() { text := 'TITLE\n\nThis is a test.' mut re := regex.regex_opt(r'^\w+$') or { panic(err) } - assert re.find_all(text) == [0, 5] - assert re.find_all_str(text) == ['TITLE'] + start, end := re.find(text) + assert start == 0 + assert end == 5 +} + +fn test_anchor_both_find_all_multiline() { + text := 'TITLE\n\nThis is a test.' + mut re := regex.regex_opt(r'^\w+$') or { panic(err) } + res := re.find_all(text) + assert res == [0, 5] +} + +fn test_anchor_both_find_all_str_multiline() { + text := 'TITLE\n\nThis is a test.' + mut re := regex.regex_opt(r'^\w+$') or { panic(err) } + res := re.find_all_str(text) + assert res == ['TITLE'] } diff --git a/vlib/regex/regex_util.v b/vlib/regex/regex_util.v index 5d21db002..cccde9bf0 100644 --- a/vlib/regex/regex_util.v +++ b/vlib/regex/regex_util.v @@ -158,6 +158,23 @@ pub fn (re &RE) matches_string(in_txt string) bool { * Finders * ******************************************************************************/ +@[direct_array_access; inline] +fn (re &RE) check_anchors(in_txt string, start int, end int) (bool, bool) { + // `^` means start of the source string. + if (re.flag & f_ms) != 0 && start > 0 { + return false, true + } + // `$` means end of the source string, or right before a newline. + if (re.flag & f_me) != 0 && end < in_txt.len { + if in_txt[end] in new_line_list { + return true, false + } + // When `^` is also present, scanning forward can never recover. + return false, (re.flag & f_ms) != 0 + } + return true, false +} + /* // find internal implementation HERE for reference do not remove!! @[direct_array_access] @@ -195,6 +212,16 @@ pub fn (mut re RE) find(in_txt string) (int, int) { s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1) if s >= 0 && e > s { + abs_start := i + s + abs_end := i + e + ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end) + if !ok { + if stop_scan { + break + } + i++ + continue + } // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]") // re.flag = old_flag mut gi := 0 @@ -202,15 +229,7 @@ pub fn (mut re RE) find(in_txt string) (int, int) { re.groups[gi] += i gi++ } - // when ^ (f_ms) is used, it must match on beginning of string - if (re.flag & f_ms) != 0 && s > 0 { - break - } - // when $ (f_me) is used, it must match on ending of string - if (re.flag & f_me) != 0 && i + e < in_txt.len { - break - } - return i + s, i + e + return abs_start, abs_end } i++ } @@ -243,6 +262,16 @@ pub fn (mut re RE) find_from(in_txt string, start int) (int, int) { // s,e = re.find_imp(in_txt[i..]) //------------------------ if s >= 0 && e > s { + abs_start := i + s + abs_end := i + e + ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end) + if !ok { + if stop_scan { + break + } + i++ + continue + } // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]") re.flag = old_flag mut gi := 0 @@ -250,7 +279,7 @@ pub fn (mut re RE) find_from(in_txt string, start int) (int, int) { re.groups[gi] += i gi++ } - return i + s, i + e + return abs_start, abs_end } else { i++ } @@ -282,27 +311,24 @@ pub fn (mut re RE) find_all(in_txt string) []int { // tmp_str := in_txt[i..] // tmp_str := tos(in_txt.str + i, in_txt.len - i) // println("Check: [${tmp_str}]") - s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i) - - if s >= 0 && e > s { - match_start := i + s - match_end := i + e - // when ^ (f_ms) is used, it must match at the beginning of input - if (re.flag & f_ms) != 0 && match_start > 0 { - break - } - // when $ (f_me) is used, it must match at the end of input or before a new line - if (re.flag & f_me) != 0 && match_end < in_txt.len { - if in_txt[match_end] !in new_line_list { - i++ - continue + s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i) + + if s >= 0 && e > s { + abs_start := i + s + abs_end := i + e + ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end) + if !ok { + if stop_scan { + break + } + i++ + continue } + res << abs_start + res << abs_end + i += e + continue } - res << match_start - res << match_end - i += e - continue - } /* if e > 0 { i += e @@ -359,24 +385,21 @@ pub fn (mut re RE) find_all_str(in_txt string) []string { // tmp_str := in_txt[i..] // tmp_str := tos(in_txt.str + i, in_txt.len - i) // println("Check: [${tmp_str}]") - s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i) - - if s >= 0 && e > s { - match_start := i + s - match_end := i + e - // when ^ (f_ms) is used, it must match at the beginning of input - if (re.flag & f_ms) != 0 && match_start > 0 { - break - } - // when $ (f_me) is used, it must match at the end of input or before a new line - if (re.flag & f_me) != 0 && match_end < in_txt.len { - if in_txt[match_end] !in new_line_list { - i++ + s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i) + + if s >= 0 && e > s { + abs_start := i + s + abs_end := i + e + ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end) + if !ok { + if stop_scan { + break + } + i++ continue } - } - tmp_str := tos(in_txt.str + i, in_txt.len - i) - mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e } + tmp_str := tos(in_txt.str + i, in_txt.len - i) + mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e } // println("Found: ${s}:${e} [${tmp_str[s..e]}]") res << tmp_str[s..tmp_e] i += e -- 2.39.5