v / cmd / tools / vreduce.v
468 lines · 442 sloc · 14.88 KB · e2e5cf8db56f3562c7baa735061690be936bdf3e
Raw
1import os
2import v.vmod
3import flag
4import time
5import math
6import log
7
8const version = '0.0.2'
9const default_command = '${os.quoted_path(@VEXE)} -no-skip-unused' // Command used to compile the program, using -no-skip-unused to ease the reducing
10const default_error_msg = 'C compilation error' // the pattern to reproduce
11// Temporary files
12const tmp_folder = os.join_path(os.vtmp_dir(), 'vreduce')
13
14fn main() {
15 log.use_stdout()
16 mut fp := flag.new_flag_parser(os.args)
17 fp.skip_executable()
18 fp.application('v reduce path/to/file_to_reduce.v')
19 fp.description('This tool will reduce the code file and try to make the smallest one it can that reproduces the error when the command is executed')
20 fp.version(version)
21
22 error_msg := fp.string('error_msg', `m`, default_error_msg,
23 'the error message you want to reproduce, default: \'${default_error_msg}\'')
24 mut command := fp.string('command', `c`, default_command,
25 'the command used to try to reproduce the error, default: \'${default_command}\', will replace PATH with the path of the folder where it is run')
26 copy_project := fp.bool('cp', `p`, false,
27 'if used v reduce will copy the whole folder of the project')
28 timeout := fp.int('to', `t`, 0, 'sets a timeout for the command, default=0 : no timeout')
29 do_fmt := fp.bool('fmt', `w`, false, 'enable v fmt for the output (rpdc_file_name.v)')
30 file_paths := fp.finalize() or {
31 eprintln(err)
32 println(fp.usage())
33 return
34 }
35
36 if file_paths.len != 2 {
37 println(fp.usage())
38 exit(0)
39 }
40
41 mut file_path := file_paths[1]
42 if file_path == '' || !os.exists(file_path) {
43 log.error('You need to specify a valid file to reduce.')
44 if file_path != '' {
45 log.error('Path `${file_path}` is not a valid .v file.')
46 }
47 println(fp.usage())
48 exit(1)
49 }
50
51 log.info("Starting to reduce the file: '${file_path}'\n with command: `${command}`,\n trying to reproduce: `${error_msg}`")
52
53 if do_fmt {
54 log.info('Will do `v fmt -w rpdc_file_name.v` after the reduction.')
55 } else {
56 log.info('Will NOT do `v fmt -w rpdc_file_name.v` (use the `--fmt` or `-w` flag to enable it)')
57 }
58
59 content := os.read_file(file_path)!
60 show_code_stats(content, label: 'Original code size')
61
62 // copy project
63 if os.exists(tmp_folder) {
64 os.rmdir_all(tmp_folder)!
65 }
66 os.mkdir(tmp_folder)!
67 if copy_project {
68 mut vmod_cacher := vmod.new_mod_file_cacher()
69 project_folder := vmod_cacher.get_by_file(file_path).vmod_folder
70 os.cp_all('${project_folder}/.', tmp_folder + '/', true)!
71 // the path of the target file from the project folder
72 file_path = os.walk_ext(project_folder, os.file_name(file_path))[0] or {
73 panic('File not found in the project folder')
74 }
75 file_path = file_path[project_folder.len + 1..] // will remove the / too
76 }
77 full_file_path := '${tmp_folder}/${file_path}'
78 if command == default_command {
79 command = '${default_command} ${full_file_path}'
80 } else {
81 command = command.replace('PATH', '${tmp_folder}/')
82 }
83
84 // start tests
85 tmp_code := create_code(parse(content))
86 warn_on_false(string_reproduces(tmp_code, error_msg, command, full_file_path, true, timeout),
87 'string_reproduces', @LOCATION)
88 show_code_stats(tmp_code, label: 'Code size without comments')
89
90 // reduce the code
91 reduce_scope(content, error_msg, command, do_fmt, full_file_path, timeout)
92
93 // cleanse
94 if os.exists(tmp_folder) {
95 os.rmdir_all(tmp_folder)!
96 }
97}
98
99// Return true if the command ran on the file produces the pattern
100fn string_reproduces(file_content string, pattern string, command string, file_path string, debug bool, timeout int) bool {
101 if !os.exists(tmp_folder) {
102 os.mkdir(tmp_folder) or { panic(err) }
103 }
104 os.write_file(file_path, file_content) or { panic(err) }
105 mut output := ''
106 if timeout == 0 {
107 res := os.execute(command)
108 output = res.output
109 } else {
110 split := command.split(' ')
111 mut prog := os.new_process(split[0])
112 prog.use_pgroup = true
113 prog.set_args(split[1..])
114 prog.set_redirect_stdio()
115 prog.run()
116 mut sw := time.new_stopwatch()
117 sw.start()
118 for prog.is_alive() {
119 // check if there is any input from the user (it does not block, if there is not):
120 time.sleep(1 * time.millisecond)
121 mut b := true
122 for b {
123 b = false
124 if oline := prog.pipe_read(.stdout) {
125 if oline != '' {
126 output += oline
127 b = true
128 }
129 }
130 if eline := prog.pipe_read(.stderr) {
131 if eline != '' {
132 output += eline
133 b = true
134 }
135 }
136 if sw.elapsed().seconds() > f32(timeout) {
137 break
138 }
139 }
140 if sw.elapsed().seconds() > f32(timeout) {
141 if debug {
142 println('Timeout')
143 }
144 prog.signal_pgkill()
145 prog.close()
146 prog.wait()
147 return false
148 }
149 }
150 prog.signal_pgkill()
151 prog.close()
152 prog.wait()
153 }
154 if output.contains(pattern) {
155 // println('reproduces')
156 return true
157 } else {
158 // println('does not reproduce')
159 if debug {
160 println(output)
161 println('executed command: ${command}')
162 }
163 return false
164 }
165}
166
167type Elem = string | Scope
168
169@[heap]
170struct Scope {
171mut:
172 fn_scope bool // contains a function (string: signature{}, children: function body)
173 ignored bool // is the scope ignored when creating the file
174 tmp_ignored bool // used when testing if it can be ignored in the file
175 children []Elem // code blocks (strings & children scope
176}
177
178// Parse a V file and create a scope tree to represent it
179fn parse(file_content string) Scope { // The parser is surely incomplete for the V syntax, but should work for most of the cases, if not, please open an issue or submit a PR
180 mut stack := []&Scope{} // add the last parent to the stack
181 stack << &Scope{}
182 mut top := stack[0] // stores stack[stack.len-1] (the element on the top of the stack)
183 mut scope_level := 0 // Counts the scope depth of the current position in the file_content
184 mut i := 0 // index of the current char in the file_content
185 mut current_string := ''
186 for i < file_content.len {
187 top = stack[stack.len - 1] // the element on the top of the stack
188 if file_content[i] == `/` && file_content[i + 1] == `/` {
189 for file_content[i] != `\n` { // comment -> skip until newline
190 i++
191 }
192 } else if file_content[i] == `\n` && file_content[i - 1] == `\n` {
193 i++ // remove excess newlines
194 } else if file_content[i] == `\t` {
195 i++ // remove tabs for easier processing
196 } else if file_content[i] == `f` && file_content[i + 1] == `n` && file_content[i + 2] == ` ` && file_content[i - 1] or {
197 `\n`
198 } == `\n` {
199 top.children << current_string
200 // no increase in scope because not handled with {}
201 current_string = ''
202 top.children << &Scope{
203 fn_scope: true
204 }
205 stack << &(top.children[top.children.len - 1] as Scope)
206 current_string += file_content[i].ascii_str() // f
207 i++
208 current_string += file_content[i].ascii_str() // n
209 i++
210 } else if file_content[i] == `/` && file_content[i + 1] == `*` {
211 i++
212 i++
213 i++
214 for !(file_content[i - 1] == `*` && file_content[i] == `/`) { // multiline comment -> skip next multiline end sequence
215 i++
216 }
217 i++
218 } else if file_content[i] == `\`` && file_content[i - 1] != `\\` {
219 current_string += file_content[i].ascii_str()
220 i++
221 for file_content[i] != `\``
222 || (file_content[i - 1] == `\\` && file_content[i - 2] != `\\`) { // string -> skip until next `
223 current_string += file_content[i].ascii_str()
224 i++
225 }
226 current_string += file_content[i].ascii_str() // `
227 i++
228 } else if file_content[i] == `'` {
229 current_string += file_content[i].ascii_str() // '
230 i++
231 for file_content[i] != `'`
232 || (file_content[i - 1] == `\\` && file_content[i - 2] != `\\`) { // string -> skip until next '
233 current_string += file_content[i].ascii_str()
234 i++
235 }
236 current_string += file_content[i].ascii_str() // '
237 i++
238 } else if file_content[i] == `"` {
239 current_string += file_content[i].ascii_str() // "
240 i++
241 for file_content[i] != `"`
242 || (file_content[i - 1] == `\\` && file_content[i - 2] != `\\`) { // string -> skip until next "
243 current_string += file_content[i].ascii_str()
244 i++
245 }
246 current_string += file_content[i].ascii_str() // "
247 i++
248 } else if file_content[i] == `{` {
249 current_string += file_content[i].ascii_str()
250 i++
251 top.children << current_string
252 scope_level += 1
253 current_string = ''
254 top.children << &Scope{}
255 stack << &(top.children[top.children.len - 1] as Scope)
256 } else if file_content[i] == `}` {
257 scope_level -= 1
258 assert scope_level >= 0, 'The scopes are not well detected ${stack[0]}'
259 if current_string != '' {
260 top.children << current_string
261 }
262 if stack.last().children == [] {
263 stack[stack.len - 2].children.delete(stack[stack.len - 2].children.len - 1) // delete the empty scope (the last children because top of the stack)
264 }
265 stack.pop()
266 top = stack[stack.len - 1]
267 current_string = ''
268 current_string += file_content[i].ascii_str() // }
269 i++
270 if scope_level == 0 && stack.len == 2 { // the function and the body scope
271 top.children << current_string
272 stack.pop()
273 top = stack[stack.len - 1]
274 current_string = ''
275 }
276 } else {
277 current_string += file_content[i].ascii_str()
278 i++
279 }
280 // nothing here: to avoid complexity, no need to predict what happened before in the ifs, everything will be handled properly by the ifs
281 }
282 top = stack[stack.len - 1]
283 top.children << current_string // last part of the file
284 warn_on_false(scope_level == 0, 'scope_level == 0 /* the scopes are not well detected*/',
285 @LOCATION)
286 warn_on_false(stack.len == 1, 'stack.len == 1 /* the stack should only have the body scope */',
287 @LOCATION)
288 return *stack[0]
289}
290
291// Create the file from a scope tree
292fn create_code(sc Scope) string {
293 mut output_code := ''
294 mut stack := []Elem{}
295 stack << sc
296 for stack.len > 0 {
297 item := stack.pop()
298 if item is Scope {
299 if !item.ignored && !item.tmp_ignored {
300 stack << item.children.reverse() // to traverse the tree in the good order
301 } else {
302 }
303 } else if item is string { // string
304 output_code += item
305 } else {
306 panic('Should never happen')
307 }
308 }
309 return output_code
310}
311
312// Reduces the code contained in the scope tree and writes the reduced code to `rpdc_file_name.v`
313fn reduce_scope(content string, error_msg string, command string, do_fmt bool, file_path string, timeout int) {
314 mut sc := parse('') // will get filled in the start of the loop
315 log.info('Cleaning the scopes')
316 mut text_code := content
317 mut outer_modified_smth := true
318 rpdc_file_path := 'rpdc_${os.file_name(file_path)#[..-2]}.v'
319 for outer_modified_smth {
320 sc = parse(text_code)
321 outer_modified_smth = false
322 mut modified_smth := true // was a modification successful in reducing the code in the last iteration
323 for modified_smth { // as long as there are successful modifications
324 modified_smth = false
325 log.info('NEXT ITERATION, loop 1')
326 mut stack := []&Elem{}
327 for i in 0 .. sc.children.len {
328 stack << &sc.children[i]
329 }
330 mut item_nb := 0
331 for stack.len > 0 { // traverse the tree and disable (ignore) scopes that are not needed for reproduction
332 mut item := stack.pop()
333 item_nb += 1
334 eprint('\ritem n: ${item_nb}')
335 if mut item is Scope {
336 if !item.ignored {
337 item.tmp_ignored = true // try to ignore it
338 code := create_code(sc)
339 item.tmp_ignored = false // dont need it anymore
340 if string_reproduces(code, error_msg, command, file_path, false, timeout) {
341 item.ignored = true
342 modified_smth = true
343 outer_modified_smth = true
344 println('')
345 show_code_stats(code)
346 } else { // if can remove it, no need to go through its children
347 for i in 0 .. item.children.len {
348 stack.insert(0, &item.children[i]) // breadth first search
349 }
350 }
351 }
352 }
353 }
354 println('')
355
356 text_code = create_code(sc)
357 os.write_file(rpdc_file_path, text_code) or { panic(err) }
358 if do_fmt {
359 vfmt_file(rpdc_file_path)
360 }
361 println('The WIP reduced code is now in ${rpdc_file_path}')
362 }
363
364 log.info('Processing remaining lines')
365 split_code := text_code.split_into_lines() // dont forget to add back the \n
366 // Create the binary tree of the lines
367 depth := int(math.log2(split_code.len)) + 1
368 mut c := 0
369 mut line_stack := []&Scope{}
370 line_stack << &Scope{}
371 for c < split_code.len {
372 l1 := line_stack.len
373 if l1 <= depth { // or equal because of the first node
374 if line_stack[l1 - 1].children.len < 2 {
375 line_stack[l1 - 1].children << &Scope{}
376 l2 := line_stack[l1 - 1].children.len
377 line_stack << &(line_stack[l1 - 1].children[l2 - 1] as Scope)
378 } else {
379 line_stack.pop()
380 }
381 } else {
382 if line_stack[l1 - 1].children.len != 0 { // if there is already a string
383 line_stack.pop()
384 } else {
385 line_stack[l1 - 1].children << split_code[c] + '\n' // the \n were removed by the split
386 c++
387 line_stack.pop() // already a string
388 }
389 }
390 }
391
392 // Traverse the tree and prune the useless lines / line groups for the reproduction
393 mut line_tree := *line_stack[0]
394 warn_on_false(string_reproduces(create_code(line_tree), error_msg, command, file_path,
395 true, timeout), 'string_reproduces', @LOCATION) // should be the same
396 log.info('Pruning the lines/line groups')
397 modified_smth = true
398 for modified_smth {
399 modified_smth = false
400 log.info('NEXT ITERATION, loop 2')
401 mut stack := []&Elem{}
402 for i in 0 .. line_tree.children.len {
403 stack << &line_tree.children[i]
404 }
405 mut item_nb := 0
406 for stack.len > 0 { // traverse the binary tree (of the lines)
407 mut item := stack.pop()
408 item_nb += 1
409 eprint('\ritem n: ${item_nb}')
410 if mut item is Scope {
411 if !item.ignored {
412 item.tmp_ignored = true
413 code := create_code(line_tree)
414 item.tmp_ignored = false // dont need it anymore
415 if string_reproduces(code, error_msg, command, file_path, false, timeout) {
416 item.ignored = true
417 modified_smth = true
418 outer_modified_smth = true
419 println('')
420 show_code_stats(code)
421 } else { // if can remove it, can remove its children
422 for i in 0 .. item.children.len {
423 stack << &item.children[i]
424 }
425 }
426 }
427 }
428 }
429 println('')
430 text_code = create_code(line_tree)
431 os.write_file(rpdc_file_path, text_code) or { panic(err) }
432 if do_fmt {
433 vfmt_file(rpdc_file_path)
434 }
435 println('The WIP reduced code is now in ${rpdc_file_path}')
436 }
437 }
438
439 warn_on_false(string_reproduces(text_code, error_msg, command, file_path, true, timeout),
440 'string_reproduces', @LOCATION)
441 os.write_file(rpdc_file_path, text_code) or { panic(err) }
442 if do_fmt {
443 vfmt_file(rpdc_file_path)
444 }
445 println('The reduced code is now in ${rpdc_file_path}')
446}
447
448fn vfmt_file(rpdc_file_path string) {
449 os.execute('${os.quoted_path(@VEXE)} fmt -w ${rpdc_file_path}')
450 final_content := os.read_file(rpdc_file_path) or { panic(err) }
451 show_code_stats(final_content, label: 'Code size after formatting')
452}
453
454@[params]
455struct ShowParams {
456 label string = 'Code size'
457}
458
459fn show_code_stats(code string, params ShowParams) {
460 lines := code.split_into_lines()
461 log.info('${params.label}: ${code.len} chars, ${lines.len} lines.')
462}
463
464fn warn_on_false(res bool, what string, loc string) {
465 if !res {
466 log.warn('${what} is false, at ${loc}; see output above')
467 }
468}
469