// vtest build: !sanitized_job? module scanner import v.token import v.pref fn scan_kinds(text string) []token.Kind { mut scanner := new_plain_scanner(text, .skip_comments, &pref.Preferences{}) mut token_kinds := []token.Kind{} for { tok := scanner.text_scan() if tok.kind == .eof { break } token_kinds << tok.kind } return token_kinds } fn scan_tokens(text string) []token.Token { mut scanner := new_plain_scanner(text, .parse_comments, &pref.Preferences{}) mut tokens := []token.Token{} for { tok := scanner.text_scan() if tok.kind == .eof { break } tokens << tok } return tokens } fn test_scan() { token_kinds := scan_kinds('println(2 + 3)') assert token_kinds.len == 6 assert token_kinds[0] == .name assert token_kinds[1] == .lpar assert token_kinds[2] == .number assert token_kinds[3] == .plus assert token_kinds[4] == .number assert token_kinds[5] == .rpar } fn test_number_constant_input_format() { mut c := 0xa0 assert c == 0xa0 c = 0b1001 assert c == 9 c = 1000000 assert c == 1000000 } fn test_float_conversion_and_reading() { d := 23000000e-3 assert int(d) == 23000 mut e := 1.2E3 * -1e-1 assert e == -120.0 e = 1.2E3 * 1e-1 x := 55.0 assert e == 120.0 assert 1.23e+10 == 1.23e10 assert 1.23e+10 == 1.23e0010 assert (-1.23e+10) == (1.23e0010 * -1.0) assert x == 55.0 } fn test_float_without_fraction() { mut result := scan_kinds('x := 10.0') assert result.len == 3 assert result[0] == .name assert result[1] == .decl_assign assert result[2] == .number result = scan_kinds('return 3.0, 4.0') assert result.len == 4 assert result[0] == .key_return assert result[1] == .number assert result[2] == .comma assert result[3] == .number result = scan_kinds('fun(5.0)') assert result.len == 4 assert result[0] == .name assert result[1] == .lpar assert result[2] == .number assert result[3] == .rpar } fn test_reference_bools() { result := scan_kinds('true && false') assert result.len == 3 assert result[0] == .key_true assert result[1] == .and assert result[2] == .key_false } fn test_reference_var() { result := scan_kinds('&foo') assert result.len == 2 assert result[0] == .amp assert result[1] == .name } fn test_array_of_references() { result := scan_kinds('[]&foo') assert result.len == 4 assert result[0] == .lsbr assert result[1] == .rsbr assert result[2] == .amp assert result[3] == .name } fn test_ref_array_of_references() { result := scan_kinds('&[]&foo') assert result.len == 5 assert result[0] == .amp assert result[1] == .lsbr assert result[2] == .rsbr assert result[3] == .amp assert result[4] == .name } fn test_ref_ref_foo() { result := scan_kinds('&&foo') assert result.len == 3 assert result[0] == .amp assert result[1] == .amp assert result[2] == .name } fn test_array_of_ref_ref_foo() { result := scan_kinds('[]&&foo') assert result.len == 5 assert result[0] == .lsbr assert result[1] == .rsbr assert result[2] == .amp assert result[3] == .amp assert result[4] == .name } fn test_ref_ref_array_ref_ref_foo() { result := scan_kinds('&&[]&&foo') assert result.len == 7 assert result[0] == .amp assert result[1] == .amp assert result[2] == .lsbr assert result[3] == .rsbr assert result[4] == .amp assert result[5] == .amp assert result[6] == .name } fn test_escape_rune() { assert `\x61` == `a` assert `\u0061` == `a` assert `\U00000061` == `a` assert `\141` == `a` assert `\xe2\x98\x85` == `★` assert `\342\230\205` == `★` // the following lines test the scanner module // even before it is compiled into the v executable // SINGLE CHAR ESCAPES // SINGLE CHAR APOSTROPHE mut result := scan_tokens(r"`'`") assert result[0].kind == .chartoken assert result[0].lit == r"\'" // SINGLE CHAR BACKTICK result = scan_tokens(r'`\``') assert result[0].kind == .chartoken assert result[0].lit == r'\`' // SINGLE CHAR SLASH result = scan_tokens(r'`\\`') assert result[0].kind == .chartoken assert result[0].lit == r'\\' // SINGLE CHAR 16-bit UNICODE ESCAPE result = scan_tokens(r'`\u2605`') assert result[0].kind == .chartoken assert result[0].lit == r'★' // SINGLE CHAR 32-bit UNICODE ESCAPE result = scan_tokens(r'`\U00002605`') assert result[0].kind == .chartoken assert result[0].lit == r'★' // SINGLE CHAR ESCAPED ASCII result = scan_tokens(r'`\x61`') assert result[0].kind == .chartoken assert result[0].lit == r'a' // SINGLE CHAR INCORRECT ESCAPE // result = scan_tokens(r'`\x61\x61`') // should always result in an error // SINGLE CHAR MULTI-BYTE UTF-8 (hex) result = scan_tokens(r'`\xe2\x98\x85`') assert result[0].lit == r'★' // SINGLE CHAR MULTI-BYTE UTF-8 (octal) result = scan_tokens(r'`\342\230\205`') assert result[0].lit == r'★' } fn test_escape_string() { // these lines work if the v compiler is working assert '\x61' == 'a' assert '\x62' == 'b' assert '\u0061' == 'a' assert '\U00000061' == 'a' assert '\141' == 'a' assert '\xe2\x98\x85' == '★' assert '\342\230\205' == '★' // the following lines test the scanner module // even before it is compiled into the v executable // STRING ESCAPES ================= // STRING APOSTROPHE mut result := scan_tokens(r"'\''") assert result[0].kind == .string assert result[0].lit == r"\'" // STRING BACKTICK result = scan_tokens(r"'\`'") assert result[0].kind == .string assert result[0].lit == r'\`' // STRING SLASH result = scan_tokens(r"'\\'") assert result[0].kind == .string assert result[0].lit == r'\\' // STRING 16-bit UNICODE ESCAPE result = scan_tokens(r"'\u2605'") assert result[0].kind == .string assert result[0].lit == r'★' result = scan_tokens(r"'H\u2605H'") assert result[0].kind == .string assert result[0].lit == r'H★H' // STRING 32-bit UNICODE ESCAPE result = scan_tokens(r"'\U00002605'") assert result[0].kind == .string assert result[0].lit == r'★' result = scan_tokens(r"'H\U00002605H'") assert result[0].kind == .string assert result[0].lit == r'H★H' // STRING ESCAPED ASCII result = scan_tokens(r"'\x61'") assert result[0].kind == .string assert result[0].lit == r'a' // STRING ESCAPED EXTENDED ASCII // (should not be converted to unicode) result = scan_tokens(r"'\xe29885'") assert result[0].kind == .string assert result[0].lit.bytes() == [u8(0xe2), `9`, `8`, `8`, `5`] // MIX STRING ESCAPES with UTF-16 escapes result = scan_tokens(r"'\x61\u2605'") assert result[0].kind == .string assert result[0].lit == r'a★' result = scan_tokens(r"'\u2605\x61'") assert result[0].kind == .string assert result[0].lit == r'★a' // MIX STRING ESCAPES with UTF-16 escapes with offset result = scan_tokens(r"'x \x61\u2605\x61'") assert result[0].kind == .string assert result[0].lit == r'x a★a' result = scan_tokens(r"'x \u2605\x61\u2605'") assert result[0].kind == .string assert result[0].lit == r'x ★a★' // MIX STRING ESCAPES with UTF-32 escapes result = scan_tokens(r"'\x61\U00002605'") assert result[0].kind == .string assert result[0].lit == r'a★' result = scan_tokens(r"'\U00002605\x61'") assert result[0].kind == .string assert result[0].lit == r'★a' // MIX STRING ESCAPES with UTF-32 escapes with offset result = scan_tokens(r"'x \x61\U00002605\x61'") assert result[0].kind == .string assert result[0].lit == r'x a★a' result = scan_tokens(r"'x \U00002605\x61\U00002605'") assert result[0].kind == .string assert result[0].lit == r'x ★a★' // MIX STRING ESCAPES with UTF-16 and UTF-32 escapes result = scan_tokens(r"'\u2605\x61\U00002605'") assert result[0].kind == .string assert result[0].lit == r'★a★' result = scan_tokens(r"'\U00002605\x61\u2605'") assert result[0].kind == .string assert result[0].lit == r'★a★' // MIX STRING ESCAPES with UTF-16 and UTF-32 escapes with offset result = scan_tokens(r"'x \x61\U00002605\x61\u2605'") assert result[0].kind == .string assert result[0].lit == r'x a★a★' result = scan_tokens(r"'x \x61\u2605\x61\U00002605'") assert result[0].kind == .string assert result[0].lit == r'x a★a★' // SHOULD RESULT IN ERRORS // result = scan_tokens(r'`\x61\x61`') // should always result in an error // result = scan_tokens(r"'\x'") // should always result in an error // result = scan_tokens(r'`hello`') // should always result in an error } fn assert_str_interpolation_works(mlen int, text string) { mut max_len := 0 mut scanner := new_plain_scanner(text, .skip_comments, &pref.Preferences{}) for { tok := scanner.text_scan() if scanner.str_helper_tokens.len > max_len { max_len = scanner.str_helper_tokens.len } if tok.kind == .eof { break } } assert max_len == mlen assert scanner.errors.len == 0 assert scanner.str_helper_tokens.len == 0 } fn test_string_interpolation_with_nested_string_does_not_grow_str_helper_tokens_too_much() { sinterpolation := " s := 'x \${if true { '{' } else { '}' }} y' " assert_str_interpolation_works(3, sinterpolation) assert_str_interpolation_works(3, sinterpolation + sinterpolation + sinterpolation) assert_str_interpolation_works(3, '{'.repeat(100) + sinterpolation + '}'.repeat(100)) assert_str_interpolation_works(0, '{'.repeat(100) + '}'.repeat(100)) } fn test_dollar_sign_is_literal_without_braces() { mut result := scan_tokens("'a$b'") assert result.len == 1 assert result[0].kind == .string assert result[0].lit == 'a$b' result = scan_tokens('"a$b"') assert result.len == 1 assert result[0].kind == .string assert result[0].lit == 'a$b' } fn test_comment_string() { mut result := scan_tokens('// single line comment will get an \\x01 prepended') assert result[0].kind == .comment assert result[0].lit[0] == u8(1) // \x01 // result = scan_tokens('/// doc comment will keep third / at beginning') // result = scan_tokens('/* block comment will be stripped of whitespace */') // result = scan_tokens('a := 0 // line end comment also gets \\x01 prepended') }