From 7dca3d91f8b54e78fe10ffb51e96c8d5e239f7c8 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Wed, 25 Mar 2026 22:44:49 +0300 Subject: [PATCH] regex: fix matching failure (fixes #16880) --- vlib/regex/regex.v | 3 +++ vlib/regex/regex_test.v | 43 +++++++++++++++++++++++++++++++++++++---- vlib/regex/regex_util.v | 43 ++++++++++++++++++++++------------------- 3 files changed, 65 insertions(+), 24 deletions(-) diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index 3f245f887..df094c8fa 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -2753,6 +2753,9 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) { // println("Check end of text!") // Check the results + if ist == ist_prog_end && state.first_match < 0 { + return state.i, state.i + } if state.match_index >= 0 { if state.group_index < 0 { if re.prog[state.pc].ist == ist_prog_end { diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index b692aaaca..5b8ca7f3a 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -399,8 +399,8 @@ const find_all_test_suite = [ Test_find_all{ "ab", r"[^\n]*", - [0, 2], - ['ab'] + [0, 2, 2, 2], + ['ab', ''] }, Test_find_all{ "ab", @@ -411,8 +411,20 @@ const find_all_test_suite = [ Test_find_all{ "ab", r"([^\n]|a)*", - [0, 2], - ['ab'] + [0, 2, 2, 2], + ['ab', ''] + }, + Test_find_all{ + "", + r"a*", + [0, 0], + [''] + }, + Test_find_all{ + "b", + r"a*", + [0, 0, 1, 1], + ['', ''] } ] @@ -711,6 +723,29 @@ fn test_regex() { } } +fn test_zero_length_find_matches() { + mut re := regex.regex_opt(r'a*') or { panic(err) } + start_1, end_1 := re.match_string('') + assert start_1 == 0 + assert end_1 == 0 + start_2, end_2 := re.match_string('b') + assert start_2 == 0 + assert end_2 == 0 + start_3, end_3 := re.find('') + assert start_3 == 0 + assert end_3 == 0 + start_4, end_4 := re.find('b') + assert start_4 == 0 + assert end_4 == 0 + start_5, end_5 := re.find_from('b', 1) + assert start_5 == 1 + assert end_5 == 1 + assert re.find_all('') == [0, 0] + assert re.find_all('b') == [0, 0, 1, 1] + assert re.find_all_str('') == [''] + assert re.find_all_str('b') == ['', ''] +} + // test regex_base function fn test_regex_func() { query := r'\d\dabcd' diff --git a/vlib/regex/regex_util.v b/vlib/regex/regex_util.v index 901a85749..5b74a96c8 100644 --- a/vlib/regex/regex_util.v +++ b/vlib/regex/regex_util.v @@ -131,17 +131,12 @@ pub fn (re &RE) match_string(in_txt string) (int, int) { end = in_txt.len } - if start >= 0 && end > start { - if (re.flag & f_ms) != 0 && start > 0 { - return no_match_found, 0 - } - if (re.flag & f_me) != 0 && end < in_txt.len { - if in_txt[end] in new_line_list { - return start, end - } - return no_match_found, 0 + if start >= 0 && end >= start { + ok, _ := re.check_anchors(in_txt, start, end) + if ok { + return start, end } - return start, end + return no_match_found, 0 } return start, end } @@ -203,7 +198,7 @@ pub fn (mut re RE) find(in_txt string) (int, int) { // re.flag |= f_src // enable search mode mut i := 0 - for i < in_txt.len { + for i <= in_txt.len { mut s := -1 mut e := -1 unsafe { @@ -211,7 +206,7 @@ pub fn (mut re RE) find(in_txt string) (int, int) { // println("Check: [${tmp_str}]") s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1) - if s >= 0 && e > s { + if s >= 0 && e >= s { abs_start := i + s abs_end := i + e ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end) @@ -248,7 +243,7 @@ pub fn (mut re RE) find_from(in_txt string, start int) (int, int) { if i < 0 { return -1, -1 } - for i < in_txt.len { + for i <= in_txt.len { //--- speed references --- mut s := -1 @@ -261,7 +256,7 @@ pub fn (mut re RE) find_from(in_txt string, start int) (int, int) { //------------------------ // s,e = re.find_imp(in_txt[i..]) //------------------------ - if s >= 0 && e > s { + if s >= 0 && e >= s { abs_start := i + s abs_end := i + e ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end) @@ -304,7 +299,7 @@ pub fn (mut re RE) find_all(in_txt string) []int { mut i := 0 mut res := []int{} - for i < in_txt.len { + for i <= in_txt.len { mut s := -1 mut e := -1 unsafe { @@ -313,7 +308,7 @@ pub fn (mut re RE) find_all(in_txt string) []int { // println("Check: [${tmp_str}]") s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i) - if s >= 0 && e > s { + if s >= 0 && e >= s { abs_start := i + s abs_end := i + e ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end) @@ -326,7 +321,11 @@ pub fn (mut re RE) find_all(in_txt string) []int { } res << abs_start res << abs_end - i += e + if e > s { + i += e + } else { + i++ + } continue } /* @@ -378,7 +377,7 @@ pub fn (mut re RE) find_all_str(in_txt string) []string { mut i := 0 mut res := []string{} - for i < in_txt.len { + for i <= in_txt.len { mut s := -1 mut e := -1 unsafe { @@ -387,7 +386,7 @@ pub fn (mut re RE) find_all_str(in_txt string) []string { // println("Check: [${tmp_str}]") s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i) - if s >= 0 && e > s { + if s >= 0 && e >= s { abs_start := i + s abs_end := i + e ok, stop_scan := re.check_anchors(in_txt, abs_start, abs_end) @@ -402,7 +401,11 @@ pub fn (mut re RE) find_all_str(in_txt string) []string { mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e } // println("Found: ${s}:${e} [${tmp_str[s..e]}]") res << tmp_str[s..tmp_e] - i += e + if e > s { + i += e + } else { + i++ + } continue } } -- 2.39.5