v2 / vlib / v / scanner / scanner_test.v
360 lines · 313 sloc · 9.69 KB · 046d870cb46a2a276fbe02a1d4f43b3af0c00a23
Raw
1// vtest build: !sanitized_job?
2module scanner
3
4import v.token
5import v.pref
6
7fn scan_kinds(text string) []token.Kind {
8 mut scanner := new_plain_scanner(text, .skip_comments, &pref.Preferences{})
9 mut token_kinds := []token.Kind{}
10 for {
11 tok := scanner.text_scan()
12 if tok.kind == .eof {
13 break
14 }
15 token_kinds << tok.kind
16 }
17 return token_kinds
18}
19
20fn scan_tokens(text string) []token.Token {
21 mut scanner := new_plain_scanner(text, .parse_comments, &pref.Preferences{})
22 mut tokens := []token.Token{}
23 for {
24 tok := scanner.text_scan()
25 if tok.kind == .eof {
26 break
27 }
28 tokens << tok
29 }
30 return tokens
31}
32
33fn test_scan() {
34 token_kinds := scan_kinds('println(2 + 3)')
35 assert token_kinds.len == 6
36 assert token_kinds[0] == .name
37 assert token_kinds[1] == .lpar
38 assert token_kinds[2] == .number
39 assert token_kinds[3] == .plus
40 assert token_kinds[4] == .number
41 assert token_kinds[5] == .rpar
42}
43
44fn test_number_constant_input_format() {
45 mut c := 0xa0
46 assert c == 0xa0
47 c = 0b1001
48 assert c == 9
49 c = 1000000
50 assert c == 1000000
51}
52
53fn test_float_conversion_and_reading() {
54 d := 23000000e-3
55 assert int(d) == 23000
56 mut e := 1.2E3 * -1e-1
57 assert e == -120.0
58 e = 1.2E3 * 1e-1
59 x := 55.0
60 assert e == 120.0
61 assert 1.23e+10 == 1.23e10
62 assert 1.23e+10 == 1.23e0010
63 assert (-1.23e+10) == (1.23e0010 * -1.0)
64 assert x == 55.0
65}
66
67fn test_float_without_fraction() {
68 mut result := scan_kinds('x := 10.0')
69 assert result.len == 3
70 assert result[0] == .name
71 assert result[1] == .decl_assign
72 assert result[2] == .number
73 result = scan_kinds('return 3.0, 4.0')
74 assert result.len == 4
75 assert result[0] == .key_return
76 assert result[1] == .number
77 assert result[2] == .comma
78 assert result[3] == .number
79 result = scan_kinds('fun(5.0)')
80 assert result.len == 4
81 assert result[0] == .name
82 assert result[1] == .lpar
83 assert result[2] == .number
84 assert result[3] == .rpar
85}
86
87fn test_reference_bools() {
88 result := scan_kinds('true && false')
89 assert result.len == 3
90 assert result[0] == .key_true
91 assert result[1] == .and
92 assert result[2] == .key_false
93}
94
95fn test_reference_var() {
96 result := scan_kinds('&foo')
97 assert result.len == 2
98 assert result[0] == .amp
99 assert result[1] == .name
100}
101
102fn test_array_of_references() {
103 result := scan_kinds('[]&foo')
104 assert result.len == 4
105 assert result[0] == .lsbr
106 assert result[1] == .rsbr
107 assert result[2] == .amp
108 assert result[3] == .name
109}
110
111fn test_ref_array_of_references() {
112 result := scan_kinds('&[]&foo')
113 assert result.len == 5
114 assert result[0] == .amp
115 assert result[1] == .lsbr
116 assert result[2] == .rsbr
117 assert result[3] == .amp
118 assert result[4] == .name
119}
120
121fn test_ref_ref_foo() {
122 result := scan_kinds('&&foo')
123 assert result.len == 3
124 assert result[0] == .amp
125 assert result[1] == .amp
126 assert result[2] == .name
127}
128
129fn test_array_of_ref_ref_foo() {
130 result := scan_kinds('[]&&foo')
131 assert result.len == 5
132 assert result[0] == .lsbr
133 assert result[1] == .rsbr
134 assert result[2] == .amp
135 assert result[3] == .amp
136 assert result[4] == .name
137}
138
139fn test_ref_ref_array_ref_ref_foo() {
140 result := scan_kinds('&&[]&&foo')
141 assert result.len == 7
142 assert result[0] == .amp
143 assert result[1] == .amp
144 assert result[2] == .lsbr
145 assert result[3] == .rsbr
146 assert result[4] == .amp
147 assert result[5] == .amp
148 assert result[6] == .name
149}
150
151fn test_escape_rune() {
152 assert `\x61` == `a`
153 assert `\u0061` == `a`
154 assert `\U00000061` == `a`
155
156 assert `\141` == `a`
157 assert `\xe2\x98\x85` == `★`
158 assert `\342\230\205` == `★`
159
160 // the following lines test the scanner module
161 // even before it is compiled into the v executable
162
163 // SINGLE CHAR ESCAPES
164 // SINGLE CHAR APOSTROPHE
165 mut result := scan_tokens(r"`'`")
166 assert result[0].kind == .chartoken
167 assert result[0].lit == r"\'"
168
169 // SINGLE CHAR BACKTICK
170 result = scan_tokens(r'`\``')
171 assert result[0].kind == .chartoken
172 assert result[0].lit == r'\`'
173
174 // SINGLE CHAR SLASH
175 result = scan_tokens(r'`\\`')
176 assert result[0].kind == .chartoken
177 assert result[0].lit == r'\\'
178
179 // SINGLE CHAR 16-bit UNICODE ESCAPE
180 result = scan_tokens(r'`\u2605`')
181 assert result[0].kind == .chartoken
182 assert result[0].lit == r'★'
183
184 // SINGLE CHAR 32-bit UNICODE ESCAPE
185 result = scan_tokens(r'`\U00002605`')
186 assert result[0].kind == .chartoken
187 assert result[0].lit == r'★'
188
189 // SINGLE CHAR ESCAPED ASCII
190 result = scan_tokens(r'`\x61`')
191 assert result[0].kind == .chartoken
192 assert result[0].lit == r'a'
193
194 // SINGLE CHAR INCORRECT ESCAPE
195 // result = scan_tokens(r'`\x61\x61`') // should always result in an error
196
197 // SINGLE CHAR MULTI-BYTE UTF-8 (hex)
198 result = scan_tokens(r'`\xe2\x98\x85`')
199 assert result[0].lit == r'★'
200
201 // SINGLE CHAR MULTI-BYTE UTF-8 (octal)
202 result = scan_tokens(r'`\342\230\205`')
203 assert result[0].lit == r'★'
204}
205
206fn test_escape_string() {
207 // these lines work if the v compiler is working
208 assert '\x61' == 'a'
209 assert '\x62' == 'b'
210 assert '\u0061' == 'a'
211 assert '\U00000061' == 'a'
212 assert '\141' == 'a'
213 assert '\xe2\x98\x85' == '★'
214 assert '\342\230\205' == '★'
215
216 // the following lines test the scanner module
217 // even before it is compiled into the v executable
218
219 // STRING ESCAPES =================
220 // STRING APOSTROPHE
221 mut result := scan_tokens(r"'\''")
222 assert result[0].kind == .string
223 assert result[0].lit == r"\'"
224
225 // STRING BACKTICK
226 result = scan_tokens(r"'\`'")
227 assert result[0].kind == .string
228 assert result[0].lit == r'\`'
229
230 // STRING SLASH
231 result = scan_tokens(r"'\\'")
232 assert result[0].kind == .string
233 assert result[0].lit == r'\\'
234
235 // STRING 16-bit UNICODE ESCAPE
236 result = scan_tokens(r"'\u2605'")
237 assert result[0].kind == .string
238 assert result[0].lit == r'★'
239 result = scan_tokens(r"'H\u2605H'")
240 assert result[0].kind == .string
241 assert result[0].lit == r'H★H'
242
243 // STRING 32-bit UNICODE ESCAPE
244 result = scan_tokens(r"'\U00002605'")
245 assert result[0].kind == .string
246 assert result[0].lit == r'★'
247 result = scan_tokens(r"'H\U00002605H'")
248 assert result[0].kind == .string
249 assert result[0].lit == r'H★H'
250
251 // STRING ESCAPED ASCII
252 result = scan_tokens(r"'\x61'")
253 assert result[0].kind == .string
254 assert result[0].lit == r'a'
255
256 // STRING ESCAPED EXTENDED ASCII
257 // (should not be converted to unicode)
258 result = scan_tokens(r"'\xe29885'")
259 assert result[0].kind == .string
260 assert result[0].lit.bytes() == [u8(0xe2), `9`, `8`, `8`, `5`]
261
262 // MIX STRING ESCAPES with UTF-16 escapes
263 result = scan_tokens(r"'\x61\u2605'")
264 assert result[0].kind == .string
265 assert result[0].lit == r'a★'
266 result = scan_tokens(r"'\u2605\x61'")
267 assert result[0].kind == .string
268 assert result[0].lit == r'★a'
269
270 // MIX STRING ESCAPES with UTF-16 escapes with offset
271 result = scan_tokens(r"'x \x61\u2605\x61'")
272 assert result[0].kind == .string
273 assert result[0].lit == r'x a★a'
274 result = scan_tokens(r"'x \u2605\x61\u2605'")
275 assert result[0].kind == .string
276 assert result[0].lit == r'x ★a★'
277
278 // MIX STRING ESCAPES with UTF-32 escapes
279 result = scan_tokens(r"'\x61\U00002605'")
280 assert result[0].kind == .string
281 assert result[0].lit == r'a★'
282 result = scan_tokens(r"'\U00002605\x61'")
283 assert result[0].kind == .string
284 assert result[0].lit == r'★a'
285
286 // MIX STRING ESCAPES with UTF-32 escapes with offset
287 result = scan_tokens(r"'x \x61\U00002605\x61'")
288 assert result[0].kind == .string
289 assert result[0].lit == r'x a★a'
290 result = scan_tokens(r"'x \U00002605\x61\U00002605'")
291 assert result[0].kind == .string
292 assert result[0].lit == r'x ★a★'
293
294 // MIX STRING ESCAPES with UTF-16 and UTF-32 escapes
295 result = scan_tokens(r"'\u2605\x61\U00002605'")
296 assert result[0].kind == .string
297 assert result[0].lit == r'★a★'
298 result = scan_tokens(r"'\U00002605\x61\u2605'")
299 assert result[0].kind == .string
300 assert result[0].lit == r'★a★'
301
302 // MIX STRING ESCAPES with UTF-16 and UTF-32 escapes with offset
303 result = scan_tokens(r"'x \x61\U00002605\x61\u2605'")
304 assert result[0].kind == .string
305 assert result[0].lit == r'x a★a★'
306 result = scan_tokens(r"'x \x61\u2605\x61\U00002605'")
307 assert result[0].kind == .string
308 assert result[0].lit == r'x a★a★'
309
310 // SHOULD RESULT IN ERRORS
311 // result = scan_tokens(r'`\x61\x61`') // should always result in an error
312 // result = scan_tokens(r"'\x'") // should always result in an error
313 // result = scan_tokens(r'`hello`') // should always result in an error
314}
315
316fn assert_str_interpolation_works(mlen int, text string) {
317 mut max_len := 0
318 mut scanner := new_plain_scanner(text, .skip_comments, &pref.Preferences{})
319 for {
320 tok := scanner.text_scan()
321 if scanner.str_helper_tokens.len > max_len {
322 max_len = scanner.str_helper_tokens.len
323 }
324 if tok.kind == .eof {
325 break
326 }
327 }
328 assert max_len == mlen
329 assert scanner.errors.len == 0
330 assert scanner.str_helper_tokens.len == 0
331}
332
333fn test_string_interpolation_with_nested_string_does_not_grow_str_helper_tokens_too_much() {
334 sinterpolation := " s := 'x \${if true { '{' } else { '}' }} y' "
335 assert_str_interpolation_works(3, sinterpolation)
336 assert_str_interpolation_works(3, sinterpolation + sinterpolation + sinterpolation)
337 assert_str_interpolation_works(3, '{'.repeat(100) + sinterpolation + '}'.repeat(100))
338 assert_str_interpolation_works(0, '{'.repeat(100) + '}'.repeat(100))
339}
340
341fn test_dollar_sign_is_literal_without_braces() {
342 mut result := scan_tokens("'a$b'")
343 assert result.len == 1
344 assert result[0].kind == .string
345 assert result[0].lit == 'a$b'
346
347 result = scan_tokens('"a$b"')
348 assert result.len == 1
349 assert result[0].kind == .string
350 assert result[0].lit == 'a$b'
351}
352
353fn test_comment_string() {
354 mut result := scan_tokens('// single line comment will get an \\x01 prepended')
355 assert result[0].kind == .comment
356 assert result[0].lit[0] == u8(1) // \x01
357 // result = scan_tokens('/// doc comment will keep third / at beginning')
358 // result = scan_tokens('/* block comment will be stripped of whitespace */')
359 // result = scan_tokens('a := 0 // line end comment also gets \\x01 prepended')
360}
361