v / bench / bench_string_dedup.v
121 lines · 104 sloc · 3.35 KB · e517dd9059bd568ce41dfd7e3820a9305de789b0
Raw
1// Benchmark comparison of four string deduplication methods in V: basic array, pre-allocated array, map, and set
2module main
3
4import time
5import datatypes
6
7// Method 1: Using basic array (no pre-allocation)
8struct Context1 {
9mut:
10 used_str []string
11}
12
13fn (mut c Context1) add_used(str string) {
14 if str !in c.used_str {
15 c.used_str << str
16 }
17}
18
19// Method 2: Using pre-allocated array with capacity
20struct Context2 {
21mut:
22 used_str []string
23}
24
25fn (mut c Context2) add_used(str string) {
26 if str !in c.used_str {
27 c.used_str << str
28 }
29}
30
31// Method 3: Using map
32struct Context3 {
33mut:
34 used_str map[string]bool
35}
36
37fn (mut c Context3) add_used(str string) {
38 c.used_str[str] = true
39}
40
41// Method 4: Using set
42struct Context4 {
43mut:
44 used_str datatypes.Set[string]
45}
46
47fn (mut c Context4) add_used(str string) {
48 c.used_str.add(str)
49}
50
51// Generate random test strings
52fn generate_test_strings(count int, duplicate_ratio f64) []string {
53 mut strs := []string{cap: count}
54 unique_count := int(f64(count) * (1.0 - duplicate_ratio))
55 // First generate a batch of unique strings
56 for i in 0 .. unique_count {
57 strs << 'str_${i}_${time.ticks()}' // Add timestamp to reduce duplication rate
58 }
59 // The remaining part uses duplicate strings
60 for i in 0 .. (count - unique_count) {
61 strs << strs[i % unique_count] // Cycle through the first half of strings to create duplicates
62 }
63 return strs
64}
65
66fn main() {
67 num_strs := 10000 // Total number of strings
68 duplicate_ratio := 0.3 // Duplicate string ratio (30%)
69 test_strs := generate_test_strings(num_strs, duplicate_ratio)
70 println('Generated test strings: ${test_strs.len} (approximately ${int(duplicate_ratio * 100)}% are duplicates)')
71
72 // Test method 1: basic array (no pre-allocation)
73 mut ctx1 := Context1{}
74 sw1 := time.new_stopwatch()
75 for str in test_strs {
76 ctx1.add_used(str)
77 }
78 time1 := sw1.elapsed().milliseconds()
79 println('Method 1 (basic array) - Time: ${time1}ms, Final unique strings: ${ctx1.used_str.len}')
80
81 // Test method 2: pre-allocated array
82 mut ctx2 := Context2{
83 used_str: []string{cap: num_strs} // Pre-allocate capacity to avoid reallocations
84 }
85 sw2 := time.new_stopwatch()
86 for str in test_strs {
87 ctx2.add_used(str)
88 }
89 time2 := sw2.elapsed().milliseconds()
90 println('Method 2 (pre-allocated array) - Time: ${time2}ms, Final unique strings: ${ctx2.used_str.len}')
91
92 // Test method 3: map
93 mut ctx3 := Context3{}
94 sw3 := time.new_stopwatch()
95 for str in test_strs {
96 ctx3.add_used(str)
97 }
98 time3 := sw3.elapsed().milliseconds()
99 println('Method 3 (map) - Time: ${time3}ms, Final unique strings: ${ctx3.used_str.len}')
100
101 // Test method 4: set
102 mut ctx4 := Context4{}
103 sw4 := time.new_stopwatch()
104 for str in test_strs {
105 ctx4.add_used(str)
106 }
107 time4 := sw4.elapsed().milliseconds()
108 println('Method 4 (set) - Time: ${time4}ms, Final unique strings: ${ctx4.used_str.size()}')
109
110 // Performance comparison
111 println('\nPerformance comparison:')
112 println('Method 2 (pre-allocated array) is ${f64(time1) / f64(time2):.2f} times faster than method 1 (basic array)')
113 println('Method 3 (map) is ${f64(time1) / f64(time3):.2f} times faster than method 1 (basic array)')
114 println('Method 4 (set) is ${f64(time1) / f64(time4):.2f} times faster than method 1 (basic array)')
115
116 if time3 < time4 {
117 println('Map is slightly faster than set, difference: ${time4 - time3}ms')
118 } else {
119 println('Set is slightly faster than map, difference: ${time3 - time4}ms')
120 }
121}
122