| 1 | // vtest build: !sanitized_job? |
| 2 | module scanner |
| 3 | |
| 4 | import v.token |
| 5 | import v.pref |
| 6 | |
| 7 | fn scan_kinds(text string) []token.Kind { |
| 8 | mut scanner := new_plain_scanner(text, .skip_comments, &pref.Preferences{}) |
| 9 | mut token_kinds := []token.Kind{} |
| 10 | for { |
| 11 | tok := scanner.text_scan() |
| 12 | if tok.kind == .eof { |
| 13 | break |
| 14 | } |
| 15 | token_kinds << tok.kind |
| 16 | } |
| 17 | return token_kinds |
| 18 | } |
| 19 | |
| 20 | fn scan_tokens(text string) []token.Token { |
| 21 | mut scanner := new_plain_scanner(text, .parse_comments, &pref.Preferences{}) |
| 22 | mut tokens := []token.Token{} |
| 23 | for { |
| 24 | tok := scanner.text_scan() |
| 25 | if tok.kind == .eof { |
| 26 | break |
| 27 | } |
| 28 | tokens << tok |
| 29 | } |
| 30 | return tokens |
| 31 | } |
| 32 | |
| 33 | fn test_scan() { |
| 34 | token_kinds := scan_kinds('println(2 + 3)') |
| 35 | assert token_kinds.len == 6 |
| 36 | assert token_kinds[0] == .name |
| 37 | assert token_kinds[1] == .lpar |
| 38 | assert token_kinds[2] == .number |
| 39 | assert token_kinds[3] == .plus |
| 40 | assert token_kinds[4] == .number |
| 41 | assert token_kinds[5] == .rpar |
| 42 | } |
| 43 | |
| 44 | fn test_number_constant_input_format() { |
| 45 | mut c := 0xa0 |
| 46 | assert c == 0xa0 |
| 47 | c = 0b1001 |
| 48 | assert c == 9 |
| 49 | c = 1000000 |
| 50 | assert c == 1000000 |
| 51 | } |
| 52 | |
| 53 | fn test_float_conversion_and_reading() { |
| 54 | d := 23000000e-3 |
| 55 | assert int(d) == 23000 |
| 56 | mut e := 1.2E3 * -1e-1 |
| 57 | assert e == -120.0 |
| 58 | e = 1.2E3 * 1e-1 |
| 59 | x := 55.0 |
| 60 | assert e == 120.0 |
| 61 | assert 1.23e+10 == 1.23e10 |
| 62 | assert 1.23e+10 == 1.23e0010 |
| 63 | assert (-1.23e+10) == (1.23e0010 * -1.0) |
| 64 | assert x == 55.0 |
| 65 | } |
| 66 | |
| 67 | fn test_float_without_fraction() { |
| 68 | mut result := scan_kinds('x := 10.0') |
| 69 | assert result.len == 3 |
| 70 | assert result[0] == .name |
| 71 | assert result[1] == .decl_assign |
| 72 | assert result[2] == .number |
| 73 | result = scan_kinds('return 3.0, 4.0') |
| 74 | assert result.len == 4 |
| 75 | assert result[0] == .key_return |
| 76 | assert result[1] == .number |
| 77 | assert result[2] == .comma |
| 78 | assert result[3] == .number |
| 79 | result = scan_kinds('fun(5.0)') |
| 80 | assert result.len == 4 |
| 81 | assert result[0] == .name |
| 82 | assert result[1] == .lpar |
| 83 | assert result[2] == .number |
| 84 | assert result[3] == .rpar |
| 85 | } |
| 86 | |
| 87 | fn test_reference_bools() { |
| 88 | result := scan_kinds('true && false') |
| 89 | assert result.len == 3 |
| 90 | assert result[0] == .key_true |
| 91 | assert result[1] == .and |
| 92 | assert result[2] == .key_false |
| 93 | } |
| 94 | |
| 95 | fn test_reference_var() { |
| 96 | result := scan_kinds('&foo') |
| 97 | assert result.len == 2 |
| 98 | assert result[0] == .amp |
| 99 | assert result[1] == .name |
| 100 | } |
| 101 | |
| 102 | fn test_array_of_references() { |
| 103 | result := scan_kinds('[]&foo') |
| 104 | assert result.len == 4 |
| 105 | assert result[0] == .lsbr |
| 106 | assert result[1] == .rsbr |
| 107 | assert result[2] == .amp |
| 108 | assert result[3] == .name |
| 109 | } |
| 110 | |
| 111 | fn test_ref_array_of_references() { |
| 112 | result := scan_kinds('&[]&foo') |
| 113 | assert result.len == 5 |
| 114 | assert result[0] == .amp |
| 115 | assert result[1] == .lsbr |
| 116 | assert result[2] == .rsbr |
| 117 | assert result[3] == .amp |
| 118 | assert result[4] == .name |
| 119 | } |
| 120 | |
| 121 | fn test_ref_ref_foo() { |
| 122 | result := scan_kinds('&&foo') |
| 123 | assert result.len == 3 |
| 124 | assert result[0] == .amp |
| 125 | assert result[1] == .amp |
| 126 | assert result[2] == .name |
| 127 | } |
| 128 | |
| 129 | fn test_array_of_ref_ref_foo() { |
| 130 | result := scan_kinds('[]&&foo') |
| 131 | assert result.len == 5 |
| 132 | assert result[0] == .lsbr |
| 133 | assert result[1] == .rsbr |
| 134 | assert result[2] == .amp |
| 135 | assert result[3] == .amp |
| 136 | assert result[4] == .name |
| 137 | } |
| 138 | |
| 139 | fn test_ref_ref_array_ref_ref_foo() { |
| 140 | result := scan_kinds('&&[]&&foo') |
| 141 | assert result.len == 7 |
| 142 | assert result[0] == .amp |
| 143 | assert result[1] == .amp |
| 144 | assert result[2] == .lsbr |
| 145 | assert result[3] == .rsbr |
| 146 | assert result[4] == .amp |
| 147 | assert result[5] == .amp |
| 148 | assert result[6] == .name |
| 149 | } |
| 150 | |
| 151 | fn test_escape_rune() { |
| 152 | assert `\x61` == `a` |
| 153 | assert `\u0061` == `a` |
| 154 | assert `\U00000061` == `a` |
| 155 | |
| 156 | assert `\141` == `a` |
| 157 | assert `\xe2\x98\x85` == `★` |
| 158 | assert `\342\230\205` == `★` |
| 159 | |
| 160 | // the following lines test the scanner module |
| 161 | // even before it is compiled into the v executable |
| 162 | |
| 163 | // SINGLE CHAR ESCAPES |
| 164 | // SINGLE CHAR APOSTROPHE |
| 165 | mut result := scan_tokens(r"`'`") |
| 166 | assert result[0].kind == .chartoken |
| 167 | assert result[0].lit == r"\'" |
| 168 | |
| 169 | // SINGLE CHAR BACKTICK |
| 170 | result = scan_tokens(r'`\``') |
| 171 | assert result[0].kind == .chartoken |
| 172 | assert result[0].lit == r'\`' |
| 173 | |
| 174 | // SINGLE CHAR SLASH |
| 175 | result = scan_tokens(r'`\\`') |
| 176 | assert result[0].kind == .chartoken |
| 177 | assert result[0].lit == r'\\' |
| 178 | |
| 179 | // SINGLE CHAR 16-bit UNICODE ESCAPE |
| 180 | result = scan_tokens(r'`\u2605`') |
| 181 | assert result[0].kind == .chartoken |
| 182 | assert result[0].lit == r'★' |
| 183 | |
| 184 | // SINGLE CHAR 32-bit UNICODE ESCAPE |
| 185 | result = scan_tokens(r'`\U00002605`') |
| 186 | assert result[0].kind == .chartoken |
| 187 | assert result[0].lit == r'★' |
| 188 | |
| 189 | // SINGLE CHAR ESCAPED ASCII |
| 190 | result = scan_tokens(r'`\x61`') |
| 191 | assert result[0].kind == .chartoken |
| 192 | assert result[0].lit == r'a' |
| 193 | |
| 194 | // SINGLE CHAR INCORRECT ESCAPE |
| 195 | // result = scan_tokens(r'`\x61\x61`') // should always result in an error |
| 196 | |
| 197 | // SINGLE CHAR MULTI-BYTE UTF-8 (hex) |
| 198 | result = scan_tokens(r'`\xe2\x98\x85`') |
| 199 | assert result[0].lit == r'★' |
| 200 | |
| 201 | // SINGLE CHAR MULTI-BYTE UTF-8 (octal) |
| 202 | result = scan_tokens(r'`\342\230\205`') |
| 203 | assert result[0].lit == r'★' |
| 204 | } |
| 205 | |
| 206 | fn test_escape_string() { |
| 207 | // these lines work if the v compiler is working |
| 208 | assert '\x61' == 'a' |
| 209 | assert '\x62' == 'b' |
| 210 | assert '\u0061' == 'a' |
| 211 | assert '\U00000061' == 'a' |
| 212 | assert '\141' == 'a' |
| 213 | assert '\xe2\x98\x85' == '★' |
| 214 | assert '\342\230\205' == '★' |
| 215 | |
| 216 | // the following lines test the scanner module |
| 217 | // even before it is compiled into the v executable |
| 218 | |
| 219 | // STRING ESCAPES ================= |
| 220 | // STRING APOSTROPHE |
| 221 | mut result := scan_tokens(r"'\''") |
| 222 | assert result[0].kind == .string |
| 223 | assert result[0].lit == r"\'" |
| 224 | |
| 225 | // STRING BACKTICK |
| 226 | result = scan_tokens(r"'\`'") |
| 227 | assert result[0].kind == .string |
| 228 | assert result[0].lit == r'\`' |
| 229 | |
| 230 | // STRING SLASH |
| 231 | result = scan_tokens(r"'\\'") |
| 232 | assert result[0].kind == .string |
| 233 | assert result[0].lit == r'\\' |
| 234 | |
| 235 | // STRING 16-bit UNICODE ESCAPE |
| 236 | result = scan_tokens(r"'\u2605'") |
| 237 | assert result[0].kind == .string |
| 238 | assert result[0].lit == r'★' |
| 239 | result = scan_tokens(r"'H\u2605H'") |
| 240 | assert result[0].kind == .string |
| 241 | assert result[0].lit == r'H★H' |
| 242 | |
| 243 | // STRING 32-bit UNICODE ESCAPE |
| 244 | result = scan_tokens(r"'\U00002605'") |
| 245 | assert result[0].kind == .string |
| 246 | assert result[0].lit == r'★' |
| 247 | result = scan_tokens(r"'H\U00002605H'") |
| 248 | assert result[0].kind == .string |
| 249 | assert result[0].lit == r'H★H' |
| 250 | |
| 251 | // STRING ESCAPED ASCII |
| 252 | result = scan_tokens(r"'\x61'") |
| 253 | assert result[0].kind == .string |
| 254 | assert result[0].lit == r'a' |
| 255 | |
| 256 | // STRING ESCAPED EXTENDED ASCII |
| 257 | // (should not be converted to unicode) |
| 258 | result = scan_tokens(r"'\xe29885'") |
| 259 | assert result[0].kind == .string |
| 260 | assert result[0].lit.bytes() == [u8(0xe2), `9`, `8`, `8`, `5`] |
| 261 | |
| 262 | // MIX STRING ESCAPES with UTF-16 escapes |
| 263 | result = scan_tokens(r"'\x61\u2605'") |
| 264 | assert result[0].kind == .string |
| 265 | assert result[0].lit == r'a★' |
| 266 | result = scan_tokens(r"'\u2605\x61'") |
| 267 | assert result[0].kind == .string |
| 268 | assert result[0].lit == r'★a' |
| 269 | |
| 270 | // MIX STRING ESCAPES with UTF-16 escapes with offset |
| 271 | result = scan_tokens(r"'x \x61\u2605\x61'") |
| 272 | assert result[0].kind == .string |
| 273 | assert result[0].lit == r'x a★a' |
| 274 | result = scan_tokens(r"'x \u2605\x61\u2605'") |
| 275 | assert result[0].kind == .string |
| 276 | assert result[0].lit == r'x ★a★' |
| 277 | |
| 278 | // MIX STRING ESCAPES with UTF-32 escapes |
| 279 | result = scan_tokens(r"'\x61\U00002605'") |
| 280 | assert result[0].kind == .string |
| 281 | assert result[0].lit == r'a★' |
| 282 | result = scan_tokens(r"'\U00002605\x61'") |
| 283 | assert result[0].kind == .string |
| 284 | assert result[0].lit == r'★a' |
| 285 | |
| 286 | // MIX STRING ESCAPES with UTF-32 escapes with offset |
| 287 | result = scan_tokens(r"'x \x61\U00002605\x61'") |
| 288 | assert result[0].kind == .string |
| 289 | assert result[0].lit == r'x a★a' |
| 290 | result = scan_tokens(r"'x \U00002605\x61\U00002605'") |
| 291 | assert result[0].kind == .string |
| 292 | assert result[0].lit == r'x ★a★' |
| 293 | |
| 294 | // MIX STRING ESCAPES with UTF-16 and UTF-32 escapes |
| 295 | result = scan_tokens(r"'\u2605\x61\U00002605'") |
| 296 | assert result[0].kind == .string |
| 297 | assert result[0].lit == r'★a★' |
| 298 | result = scan_tokens(r"'\U00002605\x61\u2605'") |
| 299 | assert result[0].kind == .string |
| 300 | assert result[0].lit == r'★a★' |
| 301 | |
| 302 | // MIX STRING ESCAPES with UTF-16 and UTF-32 escapes with offset |
| 303 | result = scan_tokens(r"'x \x61\U00002605\x61\u2605'") |
| 304 | assert result[0].kind == .string |
| 305 | assert result[0].lit == r'x a★a★' |
| 306 | result = scan_tokens(r"'x \x61\u2605\x61\U00002605'") |
| 307 | assert result[0].kind == .string |
| 308 | assert result[0].lit == r'x a★a★' |
| 309 | |
| 310 | // SHOULD RESULT IN ERRORS |
| 311 | // result = scan_tokens(r'`\x61\x61`') // should always result in an error |
| 312 | // result = scan_tokens(r"'\x'") // should always result in an error |
| 313 | // result = scan_tokens(r'`hello`') // should always result in an error |
| 314 | } |
| 315 | |
| 316 | fn assert_str_interpolation_works(mlen int, text string) { |
| 317 | mut max_len := 0 |
| 318 | mut scanner := new_plain_scanner(text, .skip_comments, &pref.Preferences{}) |
| 319 | for { |
| 320 | tok := scanner.text_scan() |
| 321 | if scanner.str_helper_tokens.len > max_len { |
| 322 | max_len = scanner.str_helper_tokens.len |
| 323 | } |
| 324 | if tok.kind == .eof { |
| 325 | break |
| 326 | } |
| 327 | } |
| 328 | assert max_len == mlen |
| 329 | assert scanner.errors.len == 0 |
| 330 | assert scanner.str_helper_tokens.len == 0 |
| 331 | } |
| 332 | |
| 333 | fn test_string_interpolation_with_nested_string_does_not_grow_str_helper_tokens_too_much() { |
| 334 | sinterpolation := " s := 'x \${if true { '{' } else { '}' }} y' " |
| 335 | assert_str_interpolation_works(3, sinterpolation) |
| 336 | assert_str_interpolation_works(3, sinterpolation + sinterpolation + sinterpolation) |
| 337 | assert_str_interpolation_works(3, '{'.repeat(100) + sinterpolation + '}'.repeat(100)) |
| 338 | assert_str_interpolation_works(0, '{'.repeat(100) + '}'.repeat(100)) |
| 339 | } |
| 340 | |
| 341 | fn test_dollar_sign_is_literal_without_braces() { |
| 342 | mut result := scan_tokens("'a$b'") |
| 343 | assert result.len == 1 |
| 344 | assert result[0].kind == .string |
| 345 | assert result[0].lit == 'a$b' |
| 346 | |
| 347 | result = scan_tokens('"a$b"') |
| 348 | assert result.len == 1 |
| 349 | assert result[0].kind == .string |
| 350 | assert result[0].lit == 'a$b' |
| 351 | } |
| 352 | |
| 353 | fn test_comment_string() { |
| 354 | mut result := scan_tokens('// single line comment will get an \\x01 prepended') |
| 355 | assert result[0].kind == .comment |
| 356 | assert result[0].lit[0] == u8(1) // \x01 |
| 357 | // result = scan_tokens('/// doc comment will keep third / at beginning') |
| 358 | // result = scan_tokens('/* block comment will be stripped of whitespace */') |
| 359 | // result = scan_tokens('a := 0 // line end comment also gets \\x01 prepended') |
| 360 | } |
| 361 | |