v / vlib / regex / regex_test.v
1012 lines · 908 sloc · 27.63 KB · 962a98f48e62ff5e0ee449c8bd251fc5de285373
Raw
1import regex
2import rand
3import strings
4
5const debug = true // true for debug println
6
7/******************************************************************************
8*
9* Test section
10*
11******************************************************************************/
12struct TestItem {
13 src string
14 q string
15 s int
16 e int
17}
18
19// vfmt off
20const match_test_suite = [
21 // minus in CC
22 TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0},
23 TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,4},
24 TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8},
25 TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,16},
26 TestItem{"abcdefGHK",r"[a-f]+\A+",0,9},
27 TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11},
28
29 // base OR
30 TestItem{"a",r"a|b",0,1},
31 TestItem{"a",r"b|a",0,1},
32 TestItem{"b",r"a|b",0,1},
33 TestItem{"b",r"b|a",0,1},
34 TestItem{"c",r"b|a",-1,0},
35
36 // test base
37 TestItem{"[ciao]",r"(.)ciao(.)",0,6},
38 TestItem{"[ciao] da me",r"(.)ciao(.)",0,6},
39
40 // positive
41 TestItem{"this is a good.",r"this",0,4},
42 TestItem{"this is a good.",r"good",10,14},
43 TestItem{"this is a good.",r"go+d",10,14},
44 TestItem{"this is a good.",r"g[oae]+d",10,14},
45 TestItem{"this is a goed.",r"g[oae]+d",10,14},
46 TestItem{"this is a good.",r"g[oae]*d",10,14},
47 TestItem{"this is a goaezd.",r"g[ea-cm-z]*d",10,16},
48 TestItem{"this is a good.",r"this (\w+) a",0,9},
49 TestItem{"this is a good.",r"this( \w+){2} g",0,11},
50 TestItem{"this is a good.",r"( ?\w+){,1}",0,4},
51 TestItem{"this is a good.",r"( ?\w+)+",0,14},
52 TestItem{"this is a good.",r"this( \w+)+",0,14},
53 TestItem{"this is a good sample.",r"( ?\w+){,2}",0,7},
54 TestItem{"this is a good sample.",r"( ?\w+){,3}",0,9},
55 TestItem{"this is a good sample.",r"( ?\w+){,4}",0,14},
56 TestItem{"this is a good sample.",r"( ?\w+){,5}",0,21},
57 TestItem{"this is a good sample.",r"( ?\w+){2,3}",0,9},
58 TestItem{"this is a good sample.",r"(\s?\w+){2,3}",0,9},
59 TestItem{"this these those.",r"(th[ei]se?\s|\.)+",0,11},
60 TestItem{"this these those ",r"(th[eio]se? ?)+",0,17},
61 TestItem{"this these those ",r"(th[eio]se? )+",0,17},
62 TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17},
63 TestItem{"soday,this,these,those. over",r".+(th[eio]se?[,. ])+",0,23},
64
65 TestItem{"cpapaz",r"(c(pa)+z)",0,6},
66 TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16},
67 TestItem{"this is a cpapapez over",r"(c(p[ae])+z)",10,18},
68 TestItem{"[email protected]",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,17},
69 TestItem{"[email protected], pera",r"[\w]+@([\w]+\.)+\w+",0,18},
70 TestItem{"[email protected] ",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,14},
71 TestItem{"adce aabe",r"(a(ab)+)|(a(dc)+)e",0,4},
72 TestItem{"zadce aabe",r"(a(ab)+)|(a(dc)+)e",1,5},
73 TestItem{"abbz accz addz.",r"c|(d)|e|(ab+)",0,3},
74 TestItem{"this those these ciao",r"((t[hieo]+se?)\s*)+",0,17},
75 TestItem{"this ciao",r"((t[hieo]+se?)\s*)+",0,5},
76 TestItem{"this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}",5,21},
77 TestItem{"1234this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}$",9,25},
78 TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}",5,21},
79 TestItem{"123cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",3,20},
80
81 TestItem{"this is a good sample.",r".*i(\w)+",0,4},
82 TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23},
83 TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29},
84 TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
85 TestItem{"cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",0,17},
86 TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
87 TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
88 TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
89
90 TestItem{"cpapaz ole. pippo,",r".*c.+ole.*pi",0,14},
91 TestItem{"cpapaz ole. pipipo,",r".*c.+ole.*p([ip])+o",0,18},
92 TestItem{"cpapaz ole. pipipo",r"^.*c.+ol?e.*p([ip])+o$",0,18},
93 TestItem{"abbb",r"ab{2,3}?",0,3},
94 TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11},
95 TestItem{" abb",r"\s(.*)",0,4},
96
97 TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29}
98
99 // negative
100 TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0},
101 TestItem{"this is a good.",r"thes",-1,2},
102 TestItem{"test1post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",-1,9},
103 TestItem{"this cpapaz adce",r"(c(pa)+z)(\s[\a]+){2}",-1,0},
104 TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
105 TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
106 TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0},
107
108 // check unicode
109 TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34},
110 TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23},
111
112 // new edge cases
113 TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",-1,8},
114 TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",0,8},
115 TestItem{"123456789", r"^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$",0,9}
116 TestItem{"12345678", r"^\d{8}$",0,8},
117 TestItem{"12345678", r"^\d{7}$",-1,0},
118 TestItem{"12345678", r"^\d{9}$",-1,8},
119
120 TestItem{"eth", r"(oth)|(eth)",0,3},
121 TestItem{"et", r"(oth)|(eth)",-1,2},
122 TestItem{"et", r".*(oth)|(eth)",-1,2},
123 TestItem{"peoth", r".*(ith)|(eth)",-1,5},
124
125 TestItem{"poth", r"(eth)|(oth)",1,4},
126 TestItem{"poth", r"(oth)|(eth)",1,4},
127 TestItem{"poth", r".(oth)|(eth)$",0,4},
128 TestItem{"poth", r"^.(oth)|(eth)$",0,4},
129 TestItem{"poth", r"^\w+$",0,4},
130
131 // test dot_char
132 TestItem{"8-11 l: qllllqllklhlvtl", r"^(\d+)-(\d+) ([a-z]): (.*)$",0,23},
133 TestItem{"accccb deer", r"^a(.*)b d(.+)r",0,11},
134 TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11},
135 TestItem{"accccb deer", r"^(.*)$",0,11},
136 TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,11},
137 TestItem{"##.#....#.##.####...#.##", r".{18}[.#]",0,19},
138 TestItem{"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", r'.*#[.#]{4}##[.#]{4}##[.#]{4}###',0,49},
139
140 // test bcksls chars
141 TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},
142 TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28},
143 TestItem{"p_p", r"\w+",0,3},
144 TestItem{"p_é", r"\w+",0,2},
145
146 // Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()])
147 TestItem{"1*1", r"(\d+)([*])(\d+)",0,3},
148 TestItem{"+1*1", r"^(\d+)([*])(\d+)",-1,0},
149 TestItem{"*1*1", r"(?:^|[*])(\d+)([*])(\d+)",0,4},
150 TestItem{"*1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
151 TestItem{")1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
152 TestItem{"(1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
153 TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5},
154 TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
155 TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
156
157 // particular groups
158 TestItem{"ababababac", r"ab(.*)(ac)",0,10},
159
160 // backslash on finish string
161 TestItem{"a", r"\S+",0,1},
162 TestItem{"aaaa", r"\S+",0,4},
163 TestItem{"aaaa ", r"\S+",0,4},
164
165 // multiple dot char
166 TestItem{"aba", r"a*(b*)*a",0,3},
167 TestItem{"/*x*/", r"/\**(.*)\**/",0,5},
168 TestItem{"/*x*/", r"/*(.*)*/",0,5},
169
170 // test last IST check
171 TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(.*)",0,26},
172 TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(.*)",0,26},
173 TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(\w*)",0,26},
174 TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(\w*)",0,26},
175
176 // test \S+ vs [^\s]+
177 TestItem{"ab.c", r"\S+\.",0,3},
178 TestItem{"ab.c", r"[^\s]+\.",0,3},
179 TestItem{"ab.c", r"\S*\.",0,3},
180 TestItem{"ab.c", r"[^\s]*\.",0,3},
181 TestItem{"ab c", r"[\S]+\s",0,3},
182 TestItem{"ab c", r"[^\s]+\s",0,3},
183
184 // test last charr classes neg class
185 TestItem{"/a/", r"^/a/[^/]+$", -1,3},
186 TestItem{"/a/b",r"^/a/[^/]+$", 0,4},
187
188 // test `\0` as terminator
189 TestItem{"abc", "^abc\0$", -1,3},
190 TestItem{"abc\0", "^abc\0$", 0,4},
191
192 // test has `\0` chars
193 TestItem{"abcxyz", "^abc\0xyz$", -1,3},
194 TestItem{"abc\0xyz", "^abc\0xyz$", 0,7},
195
196 // test hex byte chars
197 TestItem{"abc_xyz", r"abc\x5Fxyz", 0,7},
198 TestItem{"abc_xyz", r"^abc\x5fxyz$", 0,7},
199 TestItem{"abcAxyz", r"^abc\x41xyz$", 0,7},
200 TestItem{"abcAAxyz", r"^abc\x41+xyz$", 0,8},
201 TestItem{"abcALxyz", r"^abc\x41\x4Cxyz$", 0,8},
202 TestItem{"abcAAxyz", r"^abc\X4141xyz$", 0,8},
203 TestItem{"abcALxyz", r"^abc\X414cxyz$", 0,8},
204 TestItem{"abcALxyz", r"^abc\X414Cxyz$", 0,8},
205 TestItem{"abcBxyz", r"^abc\x41+xyz$", -1,3},
206
207 // test anchor
208 TestItem{"abc", r"^abc$",0,3},
209 TestItem{"abc", r"^abc+$",0,3},
210 TestItem{"abcd", r"^abc+$",-1,0},
211]
212
213struct TestItemRe {
214 src string
215 q string
216 rep string
217 r string
218}
219
220const match_test_suite_replace = [
221 // replace tests
222 TestItemRe{
223 "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
224 r"(pi?(ba)+o)",
225 "CIAO",
226 "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
227 },
228 TestItemRe{
229 "Today is a good day and tomorrow will be for sure.",
230 r"[Tt]o\w+",
231 "CIAO",
232 "CIAO is a good day and CIAO will be for sure."
233 },
234 TestItemRe{
235 "Today is a good day and tomorrow will be for sure.",
236 r"(a\w) ",
237 r"[\0] ",
238 "Tod[ay] is a good d[ay] and tomorrow will be for sure."
239 },
240 TestItemRe{
241 "Today is a good day and tomorrow will be for sure.",
242 r"(a\w) ",
243 r"[\0_\0] ",
244 "Tod[ay_ay] is a good d[ay_ay] and tomorrow will be for sure."
245 },
246 TestItemRe{
247 "Today is a good day and tomorrow will be for sure.",
248 r"(a\w) ",
249 r"[\0\1] ",
250 "Tod[ay] is a good d[ay] and tomorrow will be for sure."
251 },
252]
253
254const match_test_suite_replace_simple = [
255 // replace tests
256 TestItemRe{
257 "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
258 r"(pi?(ba)+o)",
259 "CIAO",
260 "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
261 },
262 TestItemRe{
263 "Today is a good day and tomorrow will be for sure.",
264 r"[Tt]o\w+",
265 "CIAO",
266 "CIAO is a good day and CIAO will be for sure."
267 },
268]
269
270struct TestItemCGroup {
271 src string
272 q string
273 s int
274 e int
275 cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ]
276 cgn map[string]int
277}
278
279const cgroups_test_suite = [
280 TestItemCGroup{
281 "http://www.ciao.mondo/hello/pippo12_/pera.html",
282 r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+[\.|/])+",0,42,
283 [7, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42],
284 {'format':int(0),'token':1}
285 },
286 TestItemCGroup{
287 "http://www.ciao.mondo/hello/pippo12_/pera.html",
288 r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
289 [8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46]
290 //[8, 0, 0, 4, 1, 7, 10, 1, 11, 15, 1, 16, 21, 1, 22, 27, 1, 28, 36, 1, 37, 41, 1, 42, 46],
291 {'format':int(0),'token':1}
292 },
293 TestItemCGroup{
294 "http://www.ciao.mondo/hello/pippo12_/pera.html",
295 r"(?P<format>https?)|(?P<format>ftps?)://([\w_]+\.)+",0,16,
296 [3, 0, 0, 4, 1, 7, 11, 1, 11, 16],
297 {'format':int(0)}
298 },
299 TestItemCGroup{
300 "acc +13 pippo",
301 r"(\w+)\s(.)([0-9]+) \w+",0,13,
302 [0, 3, 4, 5, 5, 7],
303 map[string]int{}
304 },
305 TestItemCGroup{
306 "acc +13",
307 r"(\w+)\s(.)([0-9]+)",0,7,
308 [0, 3, 4, 5, 5, 7],
309 map[string]int{}
310 },
311 TestItemCGroup{
312 "ababababac",
313 r"ab(.*)(ac)",0,10,
314 [2, 8, 8, 10],
315 map[string]int{}
316 },
317]
318
319struct Test_find_all {
320 src string
321 q string
322 res []int // [0,4,5,6...]
323 res_str []string // ['find0','find1'...]
324}
325
326const find_all_test_suite = [
327 Test_find_all{
328 "abcd 1234 efgh 1234 ghkl1234 ab34546df",
329 r"\d+",
330 [5, 9, 15, 19, 24, 28, 31, 36],
331 ['1234', '1234', '1234', '34546']
332 },
333 Test_find_all{
334 "abcd 1234 efgh 1234 ghkl1234 ab34546df",
335 r"\a+",
336 [0, 4, 10, 14, 20, 24, 29, 31, 36, 38],
337 ['abcd', 'efgh', 'ghkl', 'ab', 'df']
338 },
339 Test_find_all{
340 "oggi pippo è andato a casa di pluto ed ha trovato pippo",
341 r"p[iplut]+o",
342 [5, 10, 31, 36, 51, 56],
343 ['pippo', 'pluto', 'pippo']
344 },
345 Test_find_all{
346 "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
347 r"(pi?(ba)+o)",
348 [5, 10, 31, 39, 54, 65],
349 ['pibao', 'pbababao', 'pibabababao']
350 },
351 Test_find_all{
352 "Today is a good day and tomorrow will be for sure.",
353 r"[Tt]o\w+",
354 [0, 5, 24, 32],
355 ['Today', 'tomorrow']
356 },
357 Test_find_all{
358 "pera\nurl = https://github.com/dario/pig.html\npippo",
359 r"url *= *https?://[\w./]+",
360 [5, 44],
361 ['url = https://github.com/dario/pig.html']
362 },
363 Test_find_all{
364 "pera\nurl = https://github.com/dario/pig.html\npippo",
365 r"url *= *https?://.*"+'\n',
366 [5, 45],
367 ['url = https://github.com/dario/pig.html\n']
368 },
369 Test_find_all{
370 "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
371 r"#[.#]{4}##[.#]{4}##[.#]{4}###",
372 [29, 49],
373 ['#....###...##...####']
374 },
375 Test_find_all{
376 "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
377 r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
378 [0, 49],
379 ['#.#......##.#..#..##........##....###...##...####']
380 },
381 Test_find_all{
382 "1234 Aa dddd Aaf 12334 Aa opopo Aaf",
383 r"Aa.+Aaf",
384 [5, 16, 23, 35],
385 ['Aa dddd Aaf', 'Aa opopo Aaf']
386 },
387 Test_find_all{
388 "@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo",
389 r"@for.+@endfor",
390 [0, 22, 23, 50, 63, 80, 89, 117],
391 ['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor']
392 },
393 Test_find_all{
394 "+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++",
395 r"\+{3}.*\+{3}",
396 [0, 11, 18, 32, 33, 44],
397 ['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++']
398 },
399 Test_find_all{
400 "ab",
401 r"[^\n]*",
402 [0, 2, 2, 2],
403 ['ab', '']
404 },
405 Test_find_all{
406 "ab",
407 r"([^\n]*)",
408 [0, 2],
409 ['ab']
410 },
411 Test_find_all{
412 "ab",
413 r"([^\n]|a)*",
414 [0, 2, 2, 2],
415 ['ab', '']
416 },
417 Test_find_all{
418 "",
419 r"a*",
420 [0, 0],
421 ['']
422 },
423 Test_find_all{
424 "b",
425 r"a*",
426 [0, 0, 1, 1],
427 ['', '']
428 }
429]
430
431
432struct Test_split {
433 src string
434 q string
435 res []string // ['abc','def',...]
436}
437
438const split_test_suite = [
439 Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\d+', ['abcd ', ' efgh ', ' ghkl',
440 ' ab', 'df']},
441 Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\a+', ['', ' 1234 ', ' 1234 ', '1234 ',
442 '34546', '']},
443 Test_split{'oggi pippo è andato a casa di pluto ed ha trovato pippo', r'p[iplut]+o', [
444 'oggi ', ' è andato a casa di ', ' ed ha trovato ', '']},
445 Test_split{'oggi pibao è andato a casa di pbababao ed ha trovato pibabababao', r'(pi?(ba)+o)', [
446 'oggi ', ' è andato a casa di ', ' ed ha trovato ', '']},
447 Test_split{'Today is a good day and tomorrow will be for sure.', r'[Tt]o\w+', [
448 '', ' is a good day and ', ' will be for sure.']},
449 Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://[\w./]+', [
450 'pera\n', '\npippo']},
451 Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://.*' +
452 '\n', ['pera\n', 'pippo']},
453 Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'#[.#]{4}##[.#]{4}##[.#]{4}###', [
454 '#.#......##.#..#..##........#', '##.......#.....#..#......#...#........###.#..#.']},
455 Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'.*#[.#]{4}##[.#]{4}##[.#]{4}###', [
456 '', '##.......#.....#..#......#...#........###.#..#.']},
457 Test_split{'1234 Aa dddd Aaf 12334 Aa opopo Aaf', r'Aa.+Aaf', ['1234 ', ' 12334 ', '']},
458 Test_split{'@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo', r'@for.+@endfor', [
459 '', ' ', ' altro testo ', ' uno due ', ' pippo']},
460 Test_split{'+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++', r'\+{3}.*\+{3}', [
461 '', '\n elvo ', ' ', '']},
462 Test_split{'foobar', r'\d', ['foobar']},
463 Test_split{'1234', r'\d+', ['', '']},
464 Test_split{'a-', r'-', ['a', '']},
465 Test_split{'-a', r'-', ['', 'a']},
466 ]
467// vfmt on
468
469fn test_regex() {
470 // check capturing groups
471 for c, to in cgroups_test_suite {
472 // debug print
473 if debug {
474 println('${c} [${to.src}] [q${to.q}] (${to.s}, ${to.e})')
475 }
476
477 mut re := regex.regex_opt(to.q) or {
478 eprintln('err: ${err}')
479 assert false
480 continue
481 }
482
483 if to.cgn.len > 0 {
484 re.group_csave_flag = true
485 // re.group_csave = [-1].repeat(3*20+1)
486 if debug {
487 println('continuous save')
488 }
489 } else {
490 if debug {
491 println('NO continuous save')
492 }
493 }
494
495 start, end := re.match_string(to.src)
496
497 mut tmp_str := ''
498 if start >= 0 && end > start {
499 tmp_str = to.src[start..end]
500 }
501
502 if start != to.s || end != to.e {
503 println('#${c} [${to.src}] q[${to.q}] res[${tmp_str}] base:[${to.s},${to.e}] ${start}, ${end}')
504 eprintln('ERROR!')
505 assert false
506 continue
507 }
508
509 // check cgroups
510 if to.cgn.len > 0 {
511 if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
512 eprintln('Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}')
513 assert false
514 continue
515 }
516
517 // check captured groups
518 mut ln := re.group_csave[0] * 3
519 for ln > 0 {
520 if re.group_csave[ln] != to.cg[ln] {
521 eprintln('Capturing group failed on ${ln} item!')
522 assert false
523 }
524 ln--
525 }
526
527 // check named captured groups
528 for k in to.cgn.keys() {
529 if to.cgn[k] != (re.group_map[k] - 1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
530 eprintln('Named capturing group error! [${k}]')
531 assert false
532 continue
533 }
534 }
535 } else {
536 // check normal captured groups
537 if re.groups.len != to.cg.len {
538 assert false
539 }
540 for ln := 0; ln < re.groups.len; ln++ {
541 if re.groups[ln] != to.cg[ln] {
542 eprintln("Capture group doesn't match:")
543 eprintln('true ground: ${to.cg}')
544 eprintln('elaborated : ${re.groups}')
545 assert false
546 }
547 }
548 }
549 }
550
551 // check find_all
552 for c, to in find_all_test_suite {
553 // debug print
554 if debug {
555 println('#${c} [${to.src}] q[${to.q}] (${to.res}, ${to.res_str})')
556 }
557
558 mut re := regex.regex_opt(to.q) or {
559 eprintln('err: ${err}')
560 assert false
561 continue
562 }
563
564 re.reset()
565 res := re.find_all(to.src)
566 if res != to.res {
567 eprintln('err: find_all !!')
568 if debug {
569 println('#${c} exp: ${to.res} calculated: ${res}')
570 }
571 assert false
572 }
573
574 res_str := re.find_all_str(to.src)
575 if res_str != to.res_str {
576 eprintln('err: find_all_str !!')
577 if debug {
578 println('#${c} exp: ${to.res_str} calculated: ${res_str}')
579 }
580 assert false
581 }
582 }
583
584 // check split
585 for c, to in split_test_suite {
586 // debug print
587 if debug {
588 println('#${c} [${to.src}] q[${to.q}] (${to.res})')
589 }
590
591 mut re := regex.regex_opt(to.q) or {
592 eprintln('err: ${err}')
593 assert false
594 continue
595 }
596
597 re.reset()
598 res := re.split(to.src)
599 if res != to.res {
600 eprintln('err: split !!')
601 if debug {
602 println('#${c} exp: ${to.res} calculated: ${res}')
603 }
604 assert false
605 }
606 }
607
608 // check replace
609 for c, to in match_test_suite_replace {
610 // debug print
611 if debug {
612 println('#${c} [${to.src}] q[${to.q}] ${to.r}')
613 }
614
615 mut re := regex.regex_opt(to.q) or {
616 eprintln('err: ${err}')
617 assert false
618 continue
619 }
620
621 res := re.replace(to.src, to.rep)
622 if res != to.r {
623 eprintln('ERROR: replace.')
624 assert false
625 continue
626 }
627 }
628
629 // check replace simple
630 for c, to in match_test_suite_replace_simple {
631 // debug print
632 if debug {
633 println('#${c} [${to.src}] q[${to.q}] ${to.r}')
634 }
635
636 mut re := regex.regex_opt(to.q) or {
637 eprintln('err: ${err}')
638 assert false
639 continue
640 }
641
642 res := re.replace_simple(to.src, to.rep)
643 if res != to.r {
644 eprintln('ERROR: replace.')
645 assert false
646 continue
647 }
648 }
649
650 // check match and find
651 for c, to in match_test_suite {
652 // debug print
653 if debug {
654 println('#${c} [${to.src}] q[${to.q}] ${to.s} ${to.e}')
655 }
656
657 // test the find
658 if to.s > 0 {
659 mut re := regex.regex_opt(to.q) or {
660 eprintln('err: ${err}')
661 assert false
662 continue
663 }
664 // q_str := re.get_query()
665 // eprintln("Query: ${q_str}")
666 start, end := re.find(to.src)
667
668 if start != to.s || end != to.e {
669 err_str := re.get_parse_error_string(start)
670 eprintln('ERROR : ${err_str} start: ${start} end: ${end}')
671 assert false
672 } else {
673 // tmp_str := text[start..end]
674 // println("found in [${start}, ${end}] => [${tmp_str}]")
675 assert true
676 }
677 continue
678 }
679
680 // test the match
681 mut re := regex.new()
682 // re.debug = true
683
684 re.compile_opt(to.q) or {
685 eprintln('err: ${err}')
686 assert false
687 continue
688 }
689 // println("#${c} [${to.src}] q[${to.q}]")
690 start, end := re.match_string(to.src)
691
692 mut tmp_str := ''
693 if start >= 0 && end > start {
694 tmp_str = to.src[start..end]
695 }
696
697 if start != to.s || end != to.e {
698 eprintln('#${c} [${to.src}] q[${to.q}] res[${tmp_str}] ${start}, ${end}')
699 eprintln('ERROR!')
700 assert false
701 continue
702 }
703
704 // test the match predicate
705 if to.s >= 0 {
706 assert re.matches_string(to.src)
707 } else {
708 assert !re.matches_string(to.src)
709 }
710
711 // rerun to test consistency
712 tmp_str1 := to.src.clone()
713 start1, end1 := re.match_string(tmp_str1)
714 if start1 != start || end1 != end {
715 eprintln('two run ERROR!!')
716 assert false
717 continue
718 }
719 }
720
721 if debug {
722 println('DONE!')
723 }
724}
725
726fn test_zero_length_find_matches() {
727 mut re := regex.regex_opt(r'a*') or { panic(err) }
728 start_1, end_1 := re.match_string('')
729 assert start_1 == 0
730 assert end_1 == 0
731 start_2, end_2 := re.match_string('b')
732 assert start_2 == 0
733 assert end_2 == 0
734 start_3, end_3 := re.find('')
735 assert start_3 == 0
736 assert end_3 == 0
737 start_4, end_4 := re.find('b')
738 assert start_4 == 0
739 assert end_4 == 0
740 start_5, end_5 := re.find_from('b', 1)
741 assert start_5 == 1
742 assert end_5 == 1
743 assert re.find_all('') == [0, 0]
744 assert re.find_all('b') == [0, 0, 1, 1]
745 assert re.find_all_str('') == ['']
746 assert re.find_all_str('b') == ['', '']
747}
748
749fn test_case_insensitive_flag() {
750 mut re := regex.regex_opt(r'hello') or { panic(err) }
751 re.flag |= regex.f_ci
752 start1, end1 := re.match_string('HeLLo')
753 assert start1 == 0
754 assert end1 == 5
755
756 mut class_re := regex.regex_opt(r'^[A-Z]+$') or { panic(err) }
757 class_re.flag |= regex.f_ci
758 start2, end2 := class_re.match_string('abcXYZ')
759 assert start2 == 0
760 assert end2 == 6
761
762 mut neg_class_re := regex.regex_opt(r'^[^a]+$') or { panic(err) }
763 neg_class_re.flag |= regex.f_ci
764 start3, _ := neg_class_re.match_string('A')
765 assert start3 == -1
766
767 mut validator_re := regex.regex_opt(r'^\a+$') or { panic(err) }
768 validator_re.flag |= regex.f_ci
769 start4, end4 := validator_re.match_string('AbC')
770 assert start4 == 0
771 assert end4 == 3
772}
773
774// test regex_base function
775fn test_regex_func() {
776 query := r'\d\dabcd'
777 test_str := '78abcd'
778 mut re, re_err, err_pos := regex.regex_base(query)
779 if re_err == regex.compile_ok {
780 start, end := re.match_string(test_str)
781 assert start == 0 && end == 6
782 } else {
783 eprintln('Error in query string in pos ${err_pos}')
784 eprintln('Error: ${re.get_parse_error_string(re_err)}')
785 assert false
786 }
787}
788
789fn my_repl_1(re regex.RE, in_txt string, start int, end int) string {
790 s0 := re.get_group_by_id(in_txt, 0)
791 println('[${start}, ${end}] => ${s0}')
792 return 'a' + s0.to_upper()
793}
794
795fn test_regex_func_replace1() {
796 txt := 'abbabbbabbbbaabba'
797 query := r'a(b+)'
798 mut re := regex.regex_opt(query) or { panic(err) }
799 result := re.replace_by_fn(txt, my_repl_1)
800
801 assert result == 'aBBaBBBaBBBBaaBBa'
802}
803
804fn my_repl(re regex.RE, in_txt string, start int, end int) string {
805 s0 := re.get_group_by_id(in_txt, 0)[0..1] + 'X'
806 s1 := re.get_group_by_id(in_txt, 1)[0..1] + 'X'
807 s2 := re.get_group_by_id(in_txt, 2)[0..1] + 'X'
808 return '${s0}${s1}${s2}'
809}
810
811// test regex replace function
812fn test_regex_func_replace() {
813 filler := "E il primo dei tre regni dell'Oltretomba cristiano visitato da Dante nel corso del viaggio, con la guida di Virgilio."
814 txt := r'"content": "They dont necessarily flag "you will be buying these shares on margin!"", "channel_id"'
815 query := r'"(content":\s+")(.*)(, "channel_id")'
816 mut re := regex.regex_opt(query) or { panic(err) }
817
818 mut txt1 := ''
819 mut txt2 := ''
820
821 for _ in 0 .. 3 {
822 rnd := int(10 + rand.u32() % 20)
823 txt1 += txt + filler[0..rnd] + '\n'
824 txt2 += 'cXTX,X' + filler[0..rnd] + '\n'
825 }
826
827 result := re.replace_by_fn(txt1, my_repl)
828 if debug {
829 eprintln(result)
830 eprintln(txt2)
831 }
832 assert result == txt2
833}
834
835fn rest_regex_replace_n() {
836 s := 'dario 1234 pepep 23454 pera'
837 query := r'\d+'
838
839 mut re := regex.regex_opt(query) or { panic(err) }
840
841 assert re.replace_n(s, '[repl]', 0) == 'dario 1234 pepep 23454 pera'
842 assert re.replace_n(s, '[repl]', -1) == 'dario 1234 pepep [repl] pera'
843 assert re.replace_n(s, '[repl]', 1) == 'dario [repl] pepep 23454 pera'
844 assert re.replace_n(s, '[repl]', 2) == 'dario [repl] pepep [repl] pera'
845 assert re.replace_n(s, '[repl]', -2) == 'dario [repl] pepep [repl] pera'
846 assert re.replace_n(s, '[repl]', 3) == 'dario [repl] pepep [repl] pera'
847 assert re.replace_n(s, '[repl]', -3) == 'dario [repl] pepep [repl] pera'
848
849 // mut res := re.replace_n(s, "[repl]", -1)
850 // println("source: ${s}")
851 // println("res : ${res}")
852}
853
854// test quantifier wrong sequences
855const test_quantifier_sequences_list = [
856 r'+{3}.*+{3}',
857 r'+{3}.*?{3}',
858 r'+{3}.**{3}',
859 r'+{3}.*\+{3}*',
860 r'+{3}.*\+{3}+',
861 r'+{3}.*\+{3}??',
862 r'+{3}.*\+{3}{4}',
863]
864
865fn test_quantifier_sequences() {
866 for pattern in test_quantifier_sequences_list {
867 re, re_err, err_pos := regex.regex_base(pattern)
868 if re_err != regex.err_syntax_error {
869 eprintln('pattern: ${pattern} => ${re_err}')
870 }
871 assert re_err == regex.err_syntax_error
872 }
873}
874
875// test group index in find
876struct Test_find_groups {
877 src string
878 q string
879 s int // start index
880 e int // end index
881 res []int // groups indexes
882}
883
884// vfmt off
885const find_groups_test_suite = [
886 Test_find_groups{
887 "aabbbccccdd",
888 r"(b+)(c+)",
889 2,
890 9,
891 [2, 5, 5, 9],
892 },
893 Test_find_groups{
894 "aabbbccccdd",
895 r"(a+).*(c+)",
896 0,
897 9,
898 [0, 2, 5, 9],
899 },
900 Test_find_groups{
901 "aabbbccccdd",
902 r"((b+).*)(d+)",
903 2,
904 11,
905 [2, 9, 2, 5, 9, 11],
906 },
907]
908// vfmt on
909
910fn test_groups_in_find() {
911 for test_obj in find_groups_test_suite {
912 src_text := test_obj.src
913 query := test_obj.q
914 mut re := regex.regex_opt(query) or { panic(err) }
915 start, end := re.find(src_text)
916 // Debug print do not remove!!
917 /*
918 println("---------")
919 println("src_text:[${src_text}]")
920 println("query :[${query}]")
921 println("[${start}, ${end}]")
922 println(re.groups)
923 mut gi := 0
924 for gi < re.groups.len {
925 if re.groups[gi] >= 0 {
926 println('${gi / 2} :[${src_text[re.groups[gi]..re.groups[gi + 1]]}]')
927 }
928 gi += 2
929 }
930 */
931 // check
932 assert start == test_obj.s
933 assert end == test_obj.e
934 assert re.groups == test_obj.res
935 }
936}
937
938const err_query_list = [
939 r'([a]|[b])*',
940]
941
942fn test_errors() {
943 mut count := 0
944 for query in err_query_list {
945 _, err, _ := regex.regex_base(query)
946 if err != regex.compile_ok {
947 count++
948 }
949 }
950 assert count == err_query_list.len
951}
952
953fn test_long_query() {
954 test_len := 32768
955 mut buf := strings.new_builder(test_len * 3)
956 base_string := rand.string(test_len)
957
958 for c in base_string {
959 buf.write_u8(`(`)
960 buf.write_u8(c)
961 buf.write_u8(`)`)
962 }
963
964 mut query := buf.str()
965
966 // println(base_string)
967 // println(buf.str())
968
969 // test 1
970 mut re := regex.regex_opt(query) or { panic(err) }
971 mut start, mut end := re.match_string(base_string)
972 // println("${start}, ${end}")
973 assert start >= 0 && end == base_string.len
974
975 // test 2
976 buf.clear()
977 for c in base_string {
978 buf.write_u8(`(`)
979 buf.write_u8(c)
980 }
981 for _ in 0 .. base_string.len {
982 buf.write_u8(`)`)
983 }
984 query = buf.str()
985 re = regex.regex_opt(query) or { panic(err) }
986 start, end = re.match_string(base_string)
987 // println("${start}, ${end}")
988 assert start >= 0 && end == base_string.len
989}
990
991struct Test_negation_group {
992 src string
993 res bool
994}
995
996const negation_groups = [
997 Test_negation_group{'automobile', false},
998 Test_negation_group{'botomobile', true},
999 Test_negation_group{'auto_caravan', false},
1000 Test_negation_group{'moto_mobile', true},
1001 Test_negation_group{'pippole', true},
1002 Test_negation_group{'boring test', false},
1003]
1004
1005fn test_negation_groups() {
1006 mut query := r'(?!auto)\w+le'
1007 mut re := regex.regex_opt(query) or { panic(err) }
1008 for test in negation_groups {
1009 start, end := re.match_string(test.src)
1010 assert (start >= 0) == test.res
1011 }
1012}
1013