| 1 | import regex |
| 2 | import rand |
| 3 | import strings |
| 4 | |
| 5 | const debug = true // true for debug println |
| 6 | |
| 7 | /****************************************************************************** |
| 8 | * |
| 9 | * Test section |
| 10 | * |
| 11 | ******************************************************************************/ |
| 12 | struct TestItem { |
| 13 | src string |
| 14 | q string |
| 15 | s int |
| 16 | e int |
| 17 | } |
| 18 | |
| 19 | // vfmt off |
| 20 | const match_test_suite = [ |
| 21 | // minus in CC |
| 22 | TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0}, |
| 23 | TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,4}, |
| 24 | TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8}, |
| 25 | TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,16}, |
| 26 | TestItem{"abcdefGHK",r"[a-f]+\A+",0,9}, |
| 27 | TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11}, |
| 28 | |
| 29 | // base OR |
| 30 | TestItem{"a",r"a|b",0,1}, |
| 31 | TestItem{"a",r"b|a",0,1}, |
| 32 | TestItem{"b",r"a|b",0,1}, |
| 33 | TestItem{"b",r"b|a",0,1}, |
| 34 | TestItem{"c",r"b|a",-1,0}, |
| 35 | |
| 36 | // test base |
| 37 | TestItem{"[ciao]",r"(.)ciao(.)",0,6}, |
| 38 | TestItem{"[ciao] da me",r"(.)ciao(.)",0,6}, |
| 39 | |
| 40 | // positive |
| 41 | TestItem{"this is a good.",r"this",0,4}, |
| 42 | TestItem{"this is a good.",r"good",10,14}, |
| 43 | TestItem{"this is a good.",r"go+d",10,14}, |
| 44 | TestItem{"this is a good.",r"g[oae]+d",10,14}, |
| 45 | TestItem{"this is a goed.",r"g[oae]+d",10,14}, |
| 46 | TestItem{"this is a good.",r"g[oae]*d",10,14}, |
| 47 | TestItem{"this is a goaezd.",r"g[ea-cm-z]*d",10,16}, |
| 48 | TestItem{"this is a good.",r"this (\w+) a",0,9}, |
| 49 | TestItem{"this is a good.",r"this( \w+){2} g",0,11}, |
| 50 | TestItem{"this is a good.",r"( ?\w+){,1}",0,4}, |
| 51 | TestItem{"this is a good.",r"( ?\w+)+",0,14}, |
| 52 | TestItem{"this is a good.",r"this( \w+)+",0,14}, |
| 53 | TestItem{"this is a good sample.",r"( ?\w+){,2}",0,7}, |
| 54 | TestItem{"this is a good sample.",r"( ?\w+){,3}",0,9}, |
| 55 | TestItem{"this is a good sample.",r"( ?\w+){,4}",0,14}, |
| 56 | TestItem{"this is a good sample.",r"( ?\w+){,5}",0,21}, |
| 57 | TestItem{"this is a good sample.",r"( ?\w+){2,3}",0,9}, |
| 58 | TestItem{"this is a good sample.",r"(\s?\w+){2,3}",0,9}, |
| 59 | TestItem{"this these those.",r"(th[ei]se?\s|\.)+",0,11}, |
| 60 | TestItem{"this these those ",r"(th[eio]se? ?)+",0,17}, |
| 61 | TestItem{"this these those ",r"(th[eio]se? )+",0,17}, |
| 62 | TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17}, |
| 63 | TestItem{"soday,this,these,those. over",r".+(th[eio]se?[,. ])+",0,23}, |
| 64 | |
| 65 | TestItem{"cpapaz",r"(c(pa)+z)",0,6}, |
| 66 | TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16}, |
| 67 | TestItem{"this is a cpapapez over",r"(c(p[ae])+z)",10,18}, |
| 68 | TestItem{"[email protected]",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,17}, |
| 69 | TestItem{"[email protected], pera",r"[\w]+@([\w]+\.)+\w+",0,18}, |
| 70 | TestItem{"[email protected] ",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,14}, |
| 71 | TestItem{"adce aabe",r"(a(ab)+)|(a(dc)+)e",0,4}, |
| 72 | TestItem{"zadce aabe",r"(a(ab)+)|(a(dc)+)e",1,5}, |
| 73 | TestItem{"abbz accz addz.",r"c|(d)|e|(ab+)",0,3}, |
| 74 | TestItem{"this those these ciao",r"((t[hieo]+se?)\s*)+",0,17}, |
| 75 | TestItem{"this ciao",r"((t[hieo]+se?)\s*)+",0,5}, |
| 76 | TestItem{"this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}",5,21}, |
| 77 | TestItem{"1234this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}$",9,25}, |
| 78 | TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}",5,21}, |
| 79 | TestItem{"123cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",3,20}, |
| 80 | |
| 81 | TestItem{"this is a good sample.",r".*i(\w)+",0,4}, |
| 82 | TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23}, |
| 83 | TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29}, |
| 84 | TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18}, |
| 85 | TestItem{"cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",0,17}, |
| 86 | TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18}, |
| 87 | TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20}, |
| 88 | TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20}, |
| 89 | |
| 90 | TestItem{"cpapaz ole. pippo,",r".*c.+ole.*pi",0,14}, |
| 91 | TestItem{"cpapaz ole. pipipo,",r".*c.+ole.*p([ip])+o",0,18}, |
| 92 | TestItem{"cpapaz ole. pipipo",r"^.*c.+ol?e.*p([ip])+o$",0,18}, |
| 93 | TestItem{"abbb",r"ab{2,3}?",0,3}, |
| 94 | TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11}, |
| 95 | TestItem{" abb",r"\s(.*)",0,4}, |
| 96 | |
| 97 | TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29} |
| 98 | |
| 99 | // negative |
| 100 | TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0}, |
| 101 | TestItem{"this is a good.",r"thes",-1,2}, |
| 102 | TestItem{"test1post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",-1,9}, |
| 103 | TestItem{"this cpapaz adce",r"(c(pa)+z)(\s[\a]+){2}",-1,0}, |
| 104 | TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, |
| 105 | TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, |
| 106 | TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0}, |
| 107 | |
| 108 | // check unicode |
| 109 | TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34}, |
| 110 | TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23}, |
| 111 | |
| 112 | // new edge cases |
| 113 | TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",-1,8}, |
| 114 | TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",0,8}, |
| 115 | TestItem{"123456789", r"^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$",0,9} |
| 116 | TestItem{"12345678", r"^\d{8}$",0,8}, |
| 117 | TestItem{"12345678", r"^\d{7}$",-1,0}, |
| 118 | TestItem{"12345678", r"^\d{9}$",-1,8}, |
| 119 | |
| 120 | TestItem{"eth", r"(oth)|(eth)",0,3}, |
| 121 | TestItem{"et", r"(oth)|(eth)",-1,2}, |
| 122 | TestItem{"et", r".*(oth)|(eth)",-1,2}, |
| 123 | TestItem{"peoth", r".*(ith)|(eth)",-1,5}, |
| 124 | |
| 125 | TestItem{"poth", r"(eth)|(oth)",1,4}, |
| 126 | TestItem{"poth", r"(oth)|(eth)",1,4}, |
| 127 | TestItem{"poth", r".(oth)|(eth)$",0,4}, |
| 128 | TestItem{"poth", r"^.(oth)|(eth)$",0,4}, |
| 129 | TestItem{"poth", r"^\w+$",0,4}, |
| 130 | |
| 131 | // test dot_char |
| 132 | TestItem{"8-11 l: qllllqllklhlvtl", r"^(\d+)-(\d+) ([a-z]): (.*)$",0,23}, |
| 133 | TestItem{"accccb deer", r"^a(.*)b d(.+)r",0,11}, |
| 134 | TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11}, |
| 135 | TestItem{"accccb deer", r"^(.*)$",0,11}, |
| 136 | TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,11}, |
| 137 | TestItem{"##.#....#.##.####...#.##", r".{18}[.#]",0,19}, |
| 138 | TestItem{"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", r'.*#[.#]{4}##[.#]{4}##[.#]{4}###',0,49}, |
| 139 | |
| 140 | // test bcksls chars |
| 141 | TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31}, |
| 142 | TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28}, |
| 143 | TestItem{"p_p", r"\w+",0,3}, |
| 144 | TestItem{"p_é", r"\w+",0,2}, |
| 145 | |
| 146 | // Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()]) |
| 147 | TestItem{"1*1", r"(\d+)([*])(\d+)",0,3}, |
| 148 | TestItem{"+1*1", r"^(\d+)([*])(\d+)",-1,0}, |
| 149 | TestItem{"*1*1", r"(?:^|[*])(\d+)([*])(\d+)",0,4}, |
| 150 | TestItem{"*1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4}, |
| 151 | TestItem{")1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4}, |
| 152 | TestItem{"(1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4}, |
| 153 | TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5}, |
| 154 | TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0}, |
| 155 | TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0}, |
| 156 | |
| 157 | // particular groups |
| 158 | TestItem{"ababababac", r"ab(.*)(ac)",0,10}, |
| 159 | |
| 160 | // backslash on finish string |
| 161 | TestItem{"a", r"\S+",0,1}, |
| 162 | TestItem{"aaaa", r"\S+",0,4}, |
| 163 | TestItem{"aaaa ", r"\S+",0,4}, |
| 164 | |
| 165 | // multiple dot char |
| 166 | TestItem{"aba", r"a*(b*)*a",0,3}, |
| 167 | TestItem{"/*x*/", r"/\**(.*)\**/",0,5}, |
| 168 | TestItem{"/*x*/", r"/*(.*)*/",0,5}, |
| 169 | |
| 170 | // test last IST check |
| 171 | TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(.*)",0,26}, |
| 172 | TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(.*)",0,26}, |
| 173 | TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(\w*)",0,26}, |
| 174 | TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(\w*)",0,26}, |
| 175 | |
| 176 | // test \S+ vs [^\s]+ |
| 177 | TestItem{"ab.c", r"\S+\.",0,3}, |
| 178 | TestItem{"ab.c", r"[^\s]+\.",0,3}, |
| 179 | TestItem{"ab.c", r"\S*\.",0,3}, |
| 180 | TestItem{"ab.c", r"[^\s]*\.",0,3}, |
| 181 | TestItem{"ab c", r"[\S]+\s",0,3}, |
| 182 | TestItem{"ab c", r"[^\s]+\s",0,3}, |
| 183 | |
| 184 | // test last charr classes neg class |
| 185 | TestItem{"/a/", r"^/a/[^/]+$", -1,3}, |
| 186 | TestItem{"/a/b",r"^/a/[^/]+$", 0,4}, |
| 187 | |
| 188 | // test `\0` as terminator |
| 189 | TestItem{"abc", "^abc\0$", -1,3}, |
| 190 | TestItem{"abc\0", "^abc\0$", 0,4}, |
| 191 | |
| 192 | // test has `\0` chars |
| 193 | TestItem{"abcxyz", "^abc\0xyz$", -1,3}, |
| 194 | TestItem{"abc\0xyz", "^abc\0xyz$", 0,7}, |
| 195 | |
| 196 | // test hex byte chars |
| 197 | TestItem{"abc_xyz", r"abc\x5Fxyz", 0,7}, |
| 198 | TestItem{"abc_xyz", r"^abc\x5fxyz$", 0,7}, |
| 199 | TestItem{"abcAxyz", r"^abc\x41xyz$", 0,7}, |
| 200 | TestItem{"abcAAxyz", r"^abc\x41+xyz$", 0,8}, |
| 201 | TestItem{"abcALxyz", r"^abc\x41\x4Cxyz$", 0,8}, |
| 202 | TestItem{"abcAAxyz", r"^abc\X4141xyz$", 0,8}, |
| 203 | TestItem{"abcALxyz", r"^abc\X414cxyz$", 0,8}, |
| 204 | TestItem{"abcALxyz", r"^abc\X414Cxyz$", 0,8}, |
| 205 | TestItem{"abcBxyz", r"^abc\x41+xyz$", -1,3}, |
| 206 | |
| 207 | // test anchor |
| 208 | TestItem{"abc", r"^abc$",0,3}, |
| 209 | TestItem{"abc", r"^abc+$",0,3}, |
| 210 | TestItem{"abcd", r"^abc+$",-1,0}, |
| 211 | ] |
| 212 | |
| 213 | struct TestItemRe { |
| 214 | src string |
| 215 | q string |
| 216 | rep string |
| 217 | r string |
| 218 | } |
| 219 | |
| 220 | const match_test_suite_replace = [ |
| 221 | // replace tests |
| 222 | TestItemRe{ |
| 223 | "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", |
| 224 | r"(pi?(ba)+o)", |
| 225 | "CIAO", |
| 226 | "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO" |
| 227 | }, |
| 228 | TestItemRe{ |
| 229 | "Today is a good day and tomorrow will be for sure.", |
| 230 | r"[Tt]o\w+", |
| 231 | "CIAO", |
| 232 | "CIAO is a good day and CIAO will be for sure." |
| 233 | }, |
| 234 | TestItemRe{ |
| 235 | "Today is a good day and tomorrow will be for sure.", |
| 236 | r"(a\w) ", |
| 237 | r"[\0] ", |
| 238 | "Tod[ay] is a good d[ay] and tomorrow will be for sure." |
| 239 | }, |
| 240 | TestItemRe{ |
| 241 | "Today is a good day and tomorrow will be for sure.", |
| 242 | r"(a\w) ", |
| 243 | r"[\0_\0] ", |
| 244 | "Tod[ay_ay] is a good d[ay_ay] and tomorrow will be for sure." |
| 245 | }, |
| 246 | TestItemRe{ |
| 247 | "Today is a good day and tomorrow will be for sure.", |
| 248 | r"(a\w) ", |
| 249 | r"[\0\1] ", |
| 250 | "Tod[ay] is a good d[ay] and tomorrow will be for sure." |
| 251 | }, |
| 252 | ] |
| 253 | |
| 254 | const match_test_suite_replace_simple = [ |
| 255 | // replace tests |
| 256 | TestItemRe{ |
| 257 | "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", |
| 258 | r"(pi?(ba)+o)", |
| 259 | "CIAO", |
| 260 | "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO" |
| 261 | }, |
| 262 | TestItemRe{ |
| 263 | "Today is a good day and tomorrow will be for sure.", |
| 264 | r"[Tt]o\w+", |
| 265 | "CIAO", |
| 266 | "CIAO is a good day and CIAO will be for sure." |
| 267 | }, |
| 268 | ] |
| 269 | |
| 270 | struct TestItemCGroup { |
| 271 | src string |
| 272 | q string |
| 273 | s int |
| 274 | e int |
| 275 | cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ] |
| 276 | cgn map[string]int |
| 277 | } |
| 278 | |
| 279 | const cgroups_test_suite = [ |
| 280 | TestItemCGroup{ |
| 281 | "http://www.ciao.mondo/hello/pippo12_/pera.html", |
| 282 | r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+[\.|/])+",0,42, |
| 283 | [7, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42], |
| 284 | {'format':int(0),'token':1} |
| 285 | }, |
| 286 | TestItemCGroup{ |
| 287 | "http://www.ciao.mondo/hello/pippo12_/pera.html", |
| 288 | r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46, |
| 289 | [8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46] |
| 290 | //[8, 0, 0, 4, 1, 7, 10, 1, 11, 15, 1, 16, 21, 1, 22, 27, 1, 28, 36, 1, 37, 41, 1, 42, 46], |
| 291 | {'format':int(0),'token':1} |
| 292 | }, |
| 293 | TestItemCGroup{ |
| 294 | "http://www.ciao.mondo/hello/pippo12_/pera.html", |
| 295 | r"(?P<format>https?)|(?P<format>ftps?)://([\w_]+\.)+",0,16, |
| 296 | [3, 0, 0, 4, 1, 7, 11, 1, 11, 16], |
| 297 | {'format':int(0)} |
| 298 | }, |
| 299 | TestItemCGroup{ |
| 300 | "acc +13 pippo", |
| 301 | r"(\w+)\s(.)([0-9]+) \w+",0,13, |
| 302 | [0, 3, 4, 5, 5, 7], |
| 303 | map[string]int{} |
| 304 | }, |
| 305 | TestItemCGroup{ |
| 306 | "acc +13", |
| 307 | r"(\w+)\s(.)([0-9]+)",0,7, |
| 308 | [0, 3, 4, 5, 5, 7], |
| 309 | map[string]int{} |
| 310 | }, |
| 311 | TestItemCGroup{ |
| 312 | "ababababac", |
| 313 | r"ab(.*)(ac)",0,10, |
| 314 | [2, 8, 8, 10], |
| 315 | map[string]int{} |
| 316 | }, |
| 317 | ] |
| 318 | |
| 319 | struct Test_find_all { |
| 320 | src string |
| 321 | q string |
| 322 | res []int // [0,4,5,6...] |
| 323 | res_str []string // ['find0','find1'...] |
| 324 | } |
| 325 | |
| 326 | const find_all_test_suite = [ |
| 327 | Test_find_all{ |
| 328 | "abcd 1234 efgh 1234 ghkl1234 ab34546df", |
| 329 | r"\d+", |
| 330 | [5, 9, 15, 19, 24, 28, 31, 36], |
| 331 | ['1234', '1234', '1234', '34546'] |
| 332 | }, |
| 333 | Test_find_all{ |
| 334 | "abcd 1234 efgh 1234 ghkl1234 ab34546df", |
| 335 | r"\a+", |
| 336 | [0, 4, 10, 14, 20, 24, 29, 31, 36, 38], |
| 337 | ['abcd', 'efgh', 'ghkl', 'ab', 'df'] |
| 338 | }, |
| 339 | Test_find_all{ |
| 340 | "oggi pippo è andato a casa di pluto ed ha trovato pippo", |
| 341 | r"p[iplut]+o", |
| 342 | [5, 10, 31, 36, 51, 56], |
| 343 | ['pippo', 'pluto', 'pippo'] |
| 344 | }, |
| 345 | Test_find_all{ |
| 346 | "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", |
| 347 | r"(pi?(ba)+o)", |
| 348 | [5, 10, 31, 39, 54, 65], |
| 349 | ['pibao', 'pbababao', 'pibabababao'] |
| 350 | }, |
| 351 | Test_find_all{ |
| 352 | "Today is a good day and tomorrow will be for sure.", |
| 353 | r"[Tt]o\w+", |
| 354 | [0, 5, 24, 32], |
| 355 | ['Today', 'tomorrow'] |
| 356 | }, |
| 357 | Test_find_all{ |
| 358 | "pera\nurl = https://github.com/dario/pig.html\npippo", |
| 359 | r"url *= *https?://[\w./]+", |
| 360 | [5, 44], |
| 361 | ['url = https://github.com/dario/pig.html'] |
| 362 | }, |
| 363 | Test_find_all{ |
| 364 | "pera\nurl = https://github.com/dario/pig.html\npippo", |
| 365 | r"url *= *https?://.*"+'\n', |
| 366 | [5, 45], |
| 367 | ['url = https://github.com/dario/pig.html\n'] |
| 368 | }, |
| 369 | Test_find_all{ |
| 370 | "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", |
| 371 | r"#[.#]{4}##[.#]{4}##[.#]{4}###", |
| 372 | [29, 49], |
| 373 | ['#....###...##...####'] |
| 374 | }, |
| 375 | Test_find_all{ |
| 376 | "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", |
| 377 | r".*#[.#]{4}##[.#]{4}##[.#]{4}###", |
| 378 | [0, 49], |
| 379 | ['#.#......##.#..#..##........##....###...##...####'] |
| 380 | }, |
| 381 | Test_find_all{ |
| 382 | "1234 Aa dddd Aaf 12334 Aa opopo Aaf", |
| 383 | r"Aa.+Aaf", |
| 384 | [5, 16, 23, 35], |
| 385 | ['Aa dddd Aaf', 'Aa opopo Aaf'] |
| 386 | }, |
| 387 | Test_find_all{ |
| 388 | "@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo", |
| 389 | r"@for.+@endfor", |
| 390 | [0, 22, 23, 50, 63, 80, 89, 117], |
| 391 | ['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor'] |
| 392 | }, |
| 393 | Test_find_all{ |
| 394 | "+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++", |
| 395 | r"\+{3}.*\+{3}", |
| 396 | [0, 11, 18, 32, 33, 44], |
| 397 | ['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++'] |
| 398 | }, |
| 399 | Test_find_all{ |
| 400 | "ab", |
| 401 | r"[^\n]*", |
| 402 | [0, 2, 2, 2], |
| 403 | ['ab', ''] |
| 404 | }, |
| 405 | Test_find_all{ |
| 406 | "ab", |
| 407 | r"([^\n]*)", |
| 408 | [0, 2], |
| 409 | ['ab'] |
| 410 | }, |
| 411 | Test_find_all{ |
| 412 | "ab", |
| 413 | r"([^\n]|a)*", |
| 414 | [0, 2, 2, 2], |
| 415 | ['ab', ''] |
| 416 | }, |
| 417 | Test_find_all{ |
| 418 | "", |
| 419 | r"a*", |
| 420 | [0, 0], |
| 421 | [''] |
| 422 | }, |
| 423 | Test_find_all{ |
| 424 | "b", |
| 425 | r"a*", |
| 426 | [0, 0, 1, 1], |
| 427 | ['', ''] |
| 428 | } |
| 429 | ] |
| 430 | |
| 431 | |
| 432 | struct Test_split { |
| 433 | src string |
| 434 | q string |
| 435 | res []string // ['abc','def',...] |
| 436 | } |
| 437 | |
| 438 | const split_test_suite = [ |
| 439 | Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\d+', ['abcd ', ' efgh ', ' ghkl', |
| 440 | ' ab', 'df']}, |
| 441 | Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\a+', ['', ' 1234 ', ' 1234 ', '1234 ', |
| 442 | '34546', '']}, |
| 443 | Test_split{'oggi pippo è andato a casa di pluto ed ha trovato pippo', r'p[iplut]+o', [ |
| 444 | 'oggi ', ' è andato a casa di ', ' ed ha trovato ', '']}, |
| 445 | Test_split{'oggi pibao è andato a casa di pbababao ed ha trovato pibabababao', r'(pi?(ba)+o)', [ |
| 446 | 'oggi ', ' è andato a casa di ', ' ed ha trovato ', '']}, |
| 447 | Test_split{'Today is a good day and tomorrow will be for sure.', r'[Tt]o\w+', [ |
| 448 | '', ' is a good day and ', ' will be for sure.']}, |
| 449 | Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://[\w./]+', [ |
| 450 | 'pera\n', '\npippo']}, |
| 451 | Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://.*' + |
| 452 | '\n', ['pera\n', 'pippo']}, |
| 453 | Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'#[.#]{4}##[.#]{4}##[.#]{4}###', [ |
| 454 | '#.#......##.#..#..##........#', '##.......#.....#..#......#...#........###.#..#.']}, |
| 455 | Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'.*#[.#]{4}##[.#]{4}##[.#]{4}###', [ |
| 456 | '', '##.......#.....#..#......#...#........###.#..#.']}, |
| 457 | Test_split{'1234 Aa dddd Aaf 12334 Aa opopo Aaf', r'Aa.+Aaf', ['1234 ', ' 12334 ', '']}, |
| 458 | Test_split{'@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo', r'@for.+@endfor', [ |
| 459 | '', ' ', ' altro testo ', ' uno due ', ' pippo']}, |
| 460 | Test_split{'+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++', r'\+{3}.*\+{3}', [ |
| 461 | '', '\n elvo ', ' ', '']}, |
| 462 | Test_split{'foobar', r'\d', ['foobar']}, |
| 463 | Test_split{'1234', r'\d+', ['', '']}, |
| 464 | Test_split{'a-', r'-', ['a', '']}, |
| 465 | Test_split{'-a', r'-', ['', 'a']}, |
| 466 | ] |
| 467 | // vfmt on |
| 468 | |
| 469 | fn test_regex() { |
| 470 | // check capturing groups |
| 471 | for c, to in cgroups_test_suite { |
| 472 | // debug print |
| 473 | if debug { |
| 474 | println('${c} [${to.src}] [q${to.q}] (${to.s}, ${to.e})') |
| 475 | } |
| 476 | |
| 477 | mut re := regex.regex_opt(to.q) or { |
| 478 | eprintln('err: ${err}') |
| 479 | assert false |
| 480 | continue |
| 481 | } |
| 482 | |
| 483 | if to.cgn.len > 0 { |
| 484 | re.group_csave_flag = true |
| 485 | // re.group_csave = [-1].repeat(3*20+1) |
| 486 | if debug { |
| 487 | println('continuous save') |
| 488 | } |
| 489 | } else { |
| 490 | if debug { |
| 491 | println('NO continuous save') |
| 492 | } |
| 493 | } |
| 494 | |
| 495 | start, end := re.match_string(to.src) |
| 496 | |
| 497 | mut tmp_str := '' |
| 498 | if start >= 0 && end > start { |
| 499 | tmp_str = to.src[start..end] |
| 500 | } |
| 501 | |
| 502 | if start != to.s || end != to.e { |
| 503 | println('#${c} [${to.src}] q[${to.q}] res[${tmp_str}] base:[${to.s},${to.e}] ${start}, ${end}') |
| 504 | eprintln('ERROR!') |
| 505 | assert false |
| 506 | continue |
| 507 | } |
| 508 | |
| 509 | // check cgroups |
| 510 | if to.cgn.len > 0 { |
| 511 | if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] { |
| 512 | eprintln('Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}') |
| 513 | assert false |
| 514 | continue |
| 515 | } |
| 516 | |
| 517 | // check captured groups |
| 518 | mut ln := re.group_csave[0] * 3 |
| 519 | for ln > 0 { |
| 520 | if re.group_csave[ln] != to.cg[ln] { |
| 521 | eprintln('Capturing group failed on ${ln} item!') |
| 522 | assert false |
| 523 | } |
| 524 | ln-- |
| 525 | } |
| 526 | |
| 527 | // check named captured groups |
| 528 | for k in to.cgn.keys() { |
| 529 | if to.cgn[k] != (re.group_map[k] - 1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1 |
| 530 | eprintln('Named capturing group error! [${k}]') |
| 531 | assert false |
| 532 | continue |
| 533 | } |
| 534 | } |
| 535 | } else { |
| 536 | // check normal captured groups |
| 537 | if re.groups.len != to.cg.len { |
| 538 | assert false |
| 539 | } |
| 540 | for ln := 0; ln < re.groups.len; ln++ { |
| 541 | if re.groups[ln] != to.cg[ln] { |
| 542 | eprintln("Capture group doesn't match:") |
| 543 | eprintln('true ground: ${to.cg}') |
| 544 | eprintln('elaborated : ${re.groups}') |
| 545 | assert false |
| 546 | } |
| 547 | } |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | // check find_all |
| 552 | for c, to in find_all_test_suite { |
| 553 | // debug print |
| 554 | if debug { |
| 555 | println('#${c} [${to.src}] q[${to.q}] (${to.res}, ${to.res_str})') |
| 556 | } |
| 557 | |
| 558 | mut re := regex.regex_opt(to.q) or { |
| 559 | eprintln('err: ${err}') |
| 560 | assert false |
| 561 | continue |
| 562 | } |
| 563 | |
| 564 | re.reset() |
| 565 | res := re.find_all(to.src) |
| 566 | if res != to.res { |
| 567 | eprintln('err: find_all !!') |
| 568 | if debug { |
| 569 | println('#${c} exp: ${to.res} calculated: ${res}') |
| 570 | } |
| 571 | assert false |
| 572 | } |
| 573 | |
| 574 | res_str := re.find_all_str(to.src) |
| 575 | if res_str != to.res_str { |
| 576 | eprintln('err: find_all_str !!') |
| 577 | if debug { |
| 578 | println('#${c} exp: ${to.res_str} calculated: ${res_str}') |
| 579 | } |
| 580 | assert false |
| 581 | } |
| 582 | } |
| 583 | |
| 584 | // check split |
| 585 | for c, to in split_test_suite { |
| 586 | // debug print |
| 587 | if debug { |
| 588 | println('#${c} [${to.src}] q[${to.q}] (${to.res})') |
| 589 | } |
| 590 | |
| 591 | mut re := regex.regex_opt(to.q) or { |
| 592 | eprintln('err: ${err}') |
| 593 | assert false |
| 594 | continue |
| 595 | } |
| 596 | |
| 597 | re.reset() |
| 598 | res := re.split(to.src) |
| 599 | if res != to.res { |
| 600 | eprintln('err: split !!') |
| 601 | if debug { |
| 602 | println('#${c} exp: ${to.res} calculated: ${res}') |
| 603 | } |
| 604 | assert false |
| 605 | } |
| 606 | } |
| 607 | |
| 608 | // check replace |
| 609 | for c, to in match_test_suite_replace { |
| 610 | // debug print |
| 611 | if debug { |
| 612 | println('#${c} [${to.src}] q[${to.q}] ${to.r}') |
| 613 | } |
| 614 | |
| 615 | mut re := regex.regex_opt(to.q) or { |
| 616 | eprintln('err: ${err}') |
| 617 | assert false |
| 618 | continue |
| 619 | } |
| 620 | |
| 621 | res := re.replace(to.src, to.rep) |
| 622 | if res != to.r { |
| 623 | eprintln('ERROR: replace.') |
| 624 | assert false |
| 625 | continue |
| 626 | } |
| 627 | } |
| 628 | |
| 629 | // check replace simple |
| 630 | for c, to in match_test_suite_replace_simple { |
| 631 | // debug print |
| 632 | if debug { |
| 633 | println('#${c} [${to.src}] q[${to.q}] ${to.r}') |
| 634 | } |
| 635 | |
| 636 | mut re := regex.regex_opt(to.q) or { |
| 637 | eprintln('err: ${err}') |
| 638 | assert false |
| 639 | continue |
| 640 | } |
| 641 | |
| 642 | res := re.replace_simple(to.src, to.rep) |
| 643 | if res != to.r { |
| 644 | eprintln('ERROR: replace.') |
| 645 | assert false |
| 646 | continue |
| 647 | } |
| 648 | } |
| 649 | |
| 650 | // check match and find |
| 651 | for c, to in match_test_suite { |
| 652 | // debug print |
| 653 | if debug { |
| 654 | println('#${c} [${to.src}] q[${to.q}] ${to.s} ${to.e}') |
| 655 | } |
| 656 | |
| 657 | // test the find |
| 658 | if to.s > 0 { |
| 659 | mut re := regex.regex_opt(to.q) or { |
| 660 | eprintln('err: ${err}') |
| 661 | assert false |
| 662 | continue |
| 663 | } |
| 664 | // q_str := re.get_query() |
| 665 | // eprintln("Query: ${q_str}") |
| 666 | start, end := re.find(to.src) |
| 667 | |
| 668 | if start != to.s || end != to.e { |
| 669 | err_str := re.get_parse_error_string(start) |
| 670 | eprintln('ERROR : ${err_str} start: ${start} end: ${end}') |
| 671 | assert false |
| 672 | } else { |
| 673 | // tmp_str := text[start..end] |
| 674 | // println("found in [${start}, ${end}] => [${tmp_str}]") |
| 675 | assert true |
| 676 | } |
| 677 | continue |
| 678 | } |
| 679 | |
| 680 | // test the match |
| 681 | mut re := regex.new() |
| 682 | // re.debug = true |
| 683 | |
| 684 | re.compile_opt(to.q) or { |
| 685 | eprintln('err: ${err}') |
| 686 | assert false |
| 687 | continue |
| 688 | } |
| 689 | // println("#${c} [${to.src}] q[${to.q}]") |
| 690 | start, end := re.match_string(to.src) |
| 691 | |
| 692 | mut tmp_str := '' |
| 693 | if start >= 0 && end > start { |
| 694 | tmp_str = to.src[start..end] |
| 695 | } |
| 696 | |
| 697 | if start != to.s || end != to.e { |
| 698 | eprintln('#${c} [${to.src}] q[${to.q}] res[${tmp_str}] ${start}, ${end}') |
| 699 | eprintln('ERROR!') |
| 700 | assert false |
| 701 | continue |
| 702 | } |
| 703 | |
| 704 | // test the match predicate |
| 705 | if to.s >= 0 { |
| 706 | assert re.matches_string(to.src) |
| 707 | } else { |
| 708 | assert !re.matches_string(to.src) |
| 709 | } |
| 710 | |
| 711 | // rerun to test consistency |
| 712 | tmp_str1 := to.src.clone() |
| 713 | start1, end1 := re.match_string(tmp_str1) |
| 714 | if start1 != start || end1 != end { |
| 715 | eprintln('two run ERROR!!') |
| 716 | assert false |
| 717 | continue |
| 718 | } |
| 719 | } |
| 720 | |
| 721 | if debug { |
| 722 | println('DONE!') |
| 723 | } |
| 724 | } |
| 725 | |
| 726 | fn test_zero_length_find_matches() { |
| 727 | mut re := regex.regex_opt(r'a*') or { panic(err) } |
| 728 | start_1, end_1 := re.match_string('') |
| 729 | assert start_1 == 0 |
| 730 | assert end_1 == 0 |
| 731 | start_2, end_2 := re.match_string('b') |
| 732 | assert start_2 == 0 |
| 733 | assert end_2 == 0 |
| 734 | start_3, end_3 := re.find('') |
| 735 | assert start_3 == 0 |
| 736 | assert end_3 == 0 |
| 737 | start_4, end_4 := re.find('b') |
| 738 | assert start_4 == 0 |
| 739 | assert end_4 == 0 |
| 740 | start_5, end_5 := re.find_from('b', 1) |
| 741 | assert start_5 == 1 |
| 742 | assert end_5 == 1 |
| 743 | assert re.find_all('') == [0, 0] |
| 744 | assert re.find_all('b') == [0, 0, 1, 1] |
| 745 | assert re.find_all_str('') == [''] |
| 746 | assert re.find_all_str('b') == ['', ''] |
| 747 | } |
| 748 | |
| 749 | fn test_case_insensitive_flag() { |
| 750 | mut re := regex.regex_opt(r'hello') or { panic(err) } |
| 751 | re.flag |= regex.f_ci |
| 752 | start1, end1 := re.match_string('HeLLo') |
| 753 | assert start1 == 0 |
| 754 | assert end1 == 5 |
| 755 | |
| 756 | mut class_re := regex.regex_opt(r'^[A-Z]+$') or { panic(err) } |
| 757 | class_re.flag |= regex.f_ci |
| 758 | start2, end2 := class_re.match_string('abcXYZ') |
| 759 | assert start2 == 0 |
| 760 | assert end2 == 6 |
| 761 | |
| 762 | mut neg_class_re := regex.regex_opt(r'^[^a]+$') or { panic(err) } |
| 763 | neg_class_re.flag |= regex.f_ci |
| 764 | start3, _ := neg_class_re.match_string('A') |
| 765 | assert start3 == -1 |
| 766 | |
| 767 | mut validator_re := regex.regex_opt(r'^\a+$') or { panic(err) } |
| 768 | validator_re.flag |= regex.f_ci |
| 769 | start4, end4 := validator_re.match_string('AbC') |
| 770 | assert start4 == 0 |
| 771 | assert end4 == 3 |
| 772 | } |
| 773 | |
| 774 | // test regex_base function |
| 775 | fn test_regex_func() { |
| 776 | query := r'\d\dabcd' |
| 777 | test_str := '78abcd' |
| 778 | mut re, re_err, err_pos := regex.regex_base(query) |
| 779 | if re_err == regex.compile_ok { |
| 780 | start, end := re.match_string(test_str) |
| 781 | assert start == 0 && end == 6 |
| 782 | } else { |
| 783 | eprintln('Error in query string in pos ${err_pos}') |
| 784 | eprintln('Error: ${re.get_parse_error_string(re_err)}') |
| 785 | assert false |
| 786 | } |
| 787 | } |
| 788 | |
| 789 | fn my_repl_1(re regex.RE, in_txt string, start int, end int) string { |
| 790 | s0 := re.get_group_by_id(in_txt, 0) |
| 791 | println('[${start}, ${end}] => ${s0}') |
| 792 | return 'a' + s0.to_upper() |
| 793 | } |
| 794 | |
| 795 | fn test_regex_func_replace1() { |
| 796 | txt := 'abbabbbabbbbaabba' |
| 797 | query := r'a(b+)' |
| 798 | mut re := regex.regex_opt(query) or { panic(err) } |
| 799 | result := re.replace_by_fn(txt, my_repl_1) |
| 800 | |
| 801 | assert result == 'aBBaBBBaBBBBaaBBa' |
| 802 | } |
| 803 | |
| 804 | fn my_repl(re regex.RE, in_txt string, start int, end int) string { |
| 805 | s0 := re.get_group_by_id(in_txt, 0)[0..1] + 'X' |
| 806 | s1 := re.get_group_by_id(in_txt, 1)[0..1] + 'X' |
| 807 | s2 := re.get_group_by_id(in_txt, 2)[0..1] + 'X' |
| 808 | return '${s0}${s1}${s2}' |
| 809 | } |
| 810 | |
| 811 | // test regex replace function |
| 812 | fn test_regex_func_replace() { |
| 813 | filler := "E il primo dei tre regni dell'Oltretomba cristiano visitato da Dante nel corso del viaggio, con la guida di Virgilio." |
| 814 | txt := r'"content": "They dont necessarily flag "you will be buying these shares on margin!"", "channel_id"' |
| 815 | query := r'"(content":\s+")(.*)(, "channel_id")' |
| 816 | mut re := regex.regex_opt(query) or { panic(err) } |
| 817 | |
| 818 | mut txt1 := '' |
| 819 | mut txt2 := '' |
| 820 | |
| 821 | for _ in 0 .. 3 { |
| 822 | rnd := int(10 + rand.u32() % 20) |
| 823 | txt1 += txt + filler[0..rnd] + '\n' |
| 824 | txt2 += 'cXTX,X' + filler[0..rnd] + '\n' |
| 825 | } |
| 826 | |
| 827 | result := re.replace_by_fn(txt1, my_repl) |
| 828 | if debug { |
| 829 | eprintln(result) |
| 830 | eprintln(txt2) |
| 831 | } |
| 832 | assert result == txt2 |
| 833 | } |
| 834 | |
| 835 | fn rest_regex_replace_n() { |
| 836 | s := 'dario 1234 pepep 23454 pera' |
| 837 | query := r'\d+' |
| 838 | |
| 839 | mut re := regex.regex_opt(query) or { panic(err) } |
| 840 | |
| 841 | assert re.replace_n(s, '[repl]', 0) == 'dario 1234 pepep 23454 pera' |
| 842 | assert re.replace_n(s, '[repl]', -1) == 'dario 1234 pepep [repl] pera' |
| 843 | assert re.replace_n(s, '[repl]', 1) == 'dario [repl] pepep 23454 pera' |
| 844 | assert re.replace_n(s, '[repl]', 2) == 'dario [repl] pepep [repl] pera' |
| 845 | assert re.replace_n(s, '[repl]', -2) == 'dario [repl] pepep [repl] pera' |
| 846 | assert re.replace_n(s, '[repl]', 3) == 'dario [repl] pepep [repl] pera' |
| 847 | assert re.replace_n(s, '[repl]', -3) == 'dario [repl] pepep [repl] pera' |
| 848 | |
| 849 | // mut res := re.replace_n(s, "[repl]", -1) |
| 850 | // println("source: ${s}") |
| 851 | // println("res : ${res}") |
| 852 | } |
| 853 | |
| 854 | // test quantifier wrong sequences |
| 855 | const test_quantifier_sequences_list = [ |
| 856 | r'+{3}.*+{3}', |
| 857 | r'+{3}.*?{3}', |
| 858 | r'+{3}.**{3}', |
| 859 | r'+{3}.*\+{3}*', |
| 860 | r'+{3}.*\+{3}+', |
| 861 | r'+{3}.*\+{3}??', |
| 862 | r'+{3}.*\+{3}{4}', |
| 863 | ] |
| 864 | |
| 865 | fn test_quantifier_sequences() { |
| 866 | for pattern in test_quantifier_sequences_list { |
| 867 | re, re_err, err_pos := regex.regex_base(pattern) |
| 868 | if re_err != regex.err_syntax_error { |
| 869 | eprintln('pattern: ${pattern} => ${re_err}') |
| 870 | } |
| 871 | assert re_err == regex.err_syntax_error |
| 872 | } |
| 873 | } |
| 874 | |
| 875 | // test group index in find |
| 876 | struct Test_find_groups { |
| 877 | src string |
| 878 | q string |
| 879 | s int // start index |
| 880 | e int // end index |
| 881 | res []int // groups indexes |
| 882 | } |
| 883 | |
| 884 | // vfmt off |
| 885 | const find_groups_test_suite = [ |
| 886 | Test_find_groups{ |
| 887 | "aabbbccccdd", |
| 888 | r"(b+)(c+)", |
| 889 | 2, |
| 890 | 9, |
| 891 | [2, 5, 5, 9], |
| 892 | }, |
| 893 | Test_find_groups{ |
| 894 | "aabbbccccdd", |
| 895 | r"(a+).*(c+)", |
| 896 | 0, |
| 897 | 9, |
| 898 | [0, 2, 5, 9], |
| 899 | }, |
| 900 | Test_find_groups{ |
| 901 | "aabbbccccdd", |
| 902 | r"((b+).*)(d+)", |
| 903 | 2, |
| 904 | 11, |
| 905 | [2, 9, 2, 5, 9, 11], |
| 906 | }, |
| 907 | ] |
| 908 | // vfmt on |
| 909 | |
| 910 | fn test_groups_in_find() { |
| 911 | for test_obj in find_groups_test_suite { |
| 912 | src_text := test_obj.src |
| 913 | query := test_obj.q |
| 914 | mut re := regex.regex_opt(query) or { panic(err) } |
| 915 | start, end := re.find(src_text) |
| 916 | // Debug print do not remove!! |
| 917 | /* |
| 918 | println("---------") |
| 919 | println("src_text:[${src_text}]") |
| 920 | println("query :[${query}]") |
| 921 | println("[${start}, ${end}]") |
| 922 | println(re.groups) |
| 923 | mut gi := 0 |
| 924 | for gi < re.groups.len { |
| 925 | if re.groups[gi] >= 0 { |
| 926 | println('${gi / 2} :[${src_text[re.groups[gi]..re.groups[gi + 1]]}]') |
| 927 | } |
| 928 | gi += 2 |
| 929 | } |
| 930 | */ |
| 931 | // check |
| 932 | assert start == test_obj.s |
| 933 | assert end == test_obj.e |
| 934 | assert re.groups == test_obj.res |
| 935 | } |
| 936 | } |
| 937 | |
| 938 | const err_query_list = [ |
| 939 | r'([a]|[b])*', |
| 940 | ] |
| 941 | |
| 942 | fn test_errors() { |
| 943 | mut count := 0 |
| 944 | for query in err_query_list { |
| 945 | _, err, _ := regex.regex_base(query) |
| 946 | if err != regex.compile_ok { |
| 947 | count++ |
| 948 | } |
| 949 | } |
| 950 | assert count == err_query_list.len |
| 951 | } |
| 952 | |
| 953 | fn test_long_query() { |
| 954 | test_len := 32768 |
| 955 | mut buf := strings.new_builder(test_len * 3) |
| 956 | base_string := rand.string(test_len) |
| 957 | |
| 958 | for c in base_string { |
| 959 | buf.write_u8(`(`) |
| 960 | buf.write_u8(c) |
| 961 | buf.write_u8(`)`) |
| 962 | } |
| 963 | |
| 964 | mut query := buf.str() |
| 965 | |
| 966 | // println(base_string) |
| 967 | // println(buf.str()) |
| 968 | |
| 969 | // test 1 |
| 970 | mut re := regex.regex_opt(query) or { panic(err) } |
| 971 | mut start, mut end := re.match_string(base_string) |
| 972 | // println("${start}, ${end}") |
| 973 | assert start >= 0 && end == base_string.len |
| 974 | |
| 975 | // test 2 |
| 976 | buf.clear() |
| 977 | for c in base_string { |
| 978 | buf.write_u8(`(`) |
| 979 | buf.write_u8(c) |
| 980 | } |
| 981 | for _ in 0 .. base_string.len { |
| 982 | buf.write_u8(`)`) |
| 983 | } |
| 984 | query = buf.str() |
| 985 | re = regex.regex_opt(query) or { panic(err) } |
| 986 | start, end = re.match_string(base_string) |
| 987 | // println("${start}, ${end}") |
| 988 | assert start >= 0 && end == base_string.len |
| 989 | } |
| 990 | |
| 991 | struct Test_negation_group { |
| 992 | src string |
| 993 | res bool |
| 994 | } |
| 995 | |
| 996 | const negation_groups = [ |
| 997 | Test_negation_group{'automobile', false}, |
| 998 | Test_negation_group{'botomobile', true}, |
| 999 | Test_negation_group{'auto_caravan', false}, |
| 1000 | Test_negation_group{'moto_mobile', true}, |
| 1001 | Test_negation_group{'pippole', true}, |
| 1002 | Test_negation_group{'boring test', false}, |
| 1003 | ] |
| 1004 | |
| 1005 | fn test_negation_groups() { |
| 1006 | mut query := r'(?!auto)\w+le' |
| 1007 | mut re := regex.regex_opt(query) or { panic(err) } |
| 1008 | for test in negation_groups { |
| 1009 | start, end := re.match_string(test.src) |
| 1010 | assert (start >= 0) == test.res |
| 1011 | } |
| 1012 | } |
| 1013 | |