Gitly


1 // Benchmark comparison of four string deduplication methods in V: basic array, pre-allocated array, map, and set
2 module main
3 
4 import time
5 import datatypes
6 
7 // Method 1: Using basic array (no pre-allocation)
8 struct Context1 {
9 mut:
10     used_str []string
11 }
12 
13 fn (mut c Context1) add_used(str string) {
14     if str !in c.used_str {
15         c.used_str << str
16     }
17 }
18 
19 // Method 2: Using pre-allocated array with capacity
20 struct Context2 {
21 mut:
22     used_str []string
23 }
24 
25 fn (mut c Context2) add_used(str string) {
26     if str !in c.used_str {
27         c.used_str << str
28     }
29 }
30 
31 // Method 3: Using map
32 struct Context3 {
33 mut:
34     used_str map[string]bool
35 }
36 
37 fn (mut c Context3) add_used(str string) {
38     c.used_str[str] = true
39 }
40 
41 // Method 4: Using set
42 struct Context4 {
43 mut:
44     used_str datatypes.Set[string]
45 }
46 
47 fn (mut c Context4) add_used(str string) {
48     c.used_str.add(str)
49 }
50 
51 // Generate random test strings
52 fn generate_test_strings(count int, duplicate_ratio f64) []string {
53     mut strs := []string{cap: count}
54     unique_count := int(f64(count) * (1.0 - duplicate_ratio))
55     // First generate a batch of unique strings
56     for i in 0 .. unique_count {
57         strs << 'str_${i}_${time.ticks()}' // Add timestamp to reduce duplication rate
58     }
59     // The remaining part uses duplicate strings
60     for i in 0 .. (count - unique_count) {
61         strs << strs[i % unique_count] // Cycle through the first half of strings to create duplicates
62     }
63     return strs
64 }
65 
66 fn main() {
67     num_strs := 10000 // Total number of strings
68     duplicate_ratio := 0.3 // Duplicate string ratio (30%)
69     test_strs := generate_test_strings(num_strs, duplicate_ratio)
70     println('Generated test strings: ${test_strs.len} (approximately ${int(duplicate_ratio * 100)}% are duplicates)')
71 
72     // Test method 1: basic array (no pre-allocation)
73     mut ctx1 := Context1{}
74     sw1 := time.new_stopwatch()
75     for str in test_strs {
76         ctx1.add_used(str)
77     }
78     time1 := sw1.elapsed().milliseconds()
79     println('Method 1 (basic array) - Time: ${time1}ms, Final unique strings: ${ctx1.used_str.len}')
80 
81     // Test method 2: pre-allocated array
82     mut ctx2 := Context2{
83         used_str: []string{cap: num_strs} // Pre-allocate capacity to avoid reallocations
84     }
85     sw2 := time.new_stopwatch()
86     for str in test_strs {
87         ctx2.add_used(str)
88     }
89     time2 := sw2.elapsed().milliseconds()
90     println('Method 2 (pre-allocated array) - Time: ${time2}ms, Final unique strings: ${ctx2.used_str.len}')
91 
92     // Test method 3: map
93     mut ctx3 := Context3{}
94     sw3 := time.new_stopwatch()
95     for str in test_strs {
96         ctx3.add_used(str)
97     }
98     time3 := sw3.elapsed().milliseconds()
99     println('Method 3 (map) - Time: ${time3}ms, Final unique strings: ${ctx3.used_str.len}')
100 
101     // Test method 4: set
102     mut ctx4 := Context4{}
103     sw4 := time.new_stopwatch()
104     for str in test_strs {
105         ctx4.add_used(str)
106     }
107     time4 := sw4.elapsed().milliseconds()
108     println('Method 4 (set) - Time: ${time4}ms, Final unique strings: ${ctx4.used_str.size()}')
109 
110     // Performance comparison
111     println('\nPerformance comparison:')
112     println('Method 2 (pre-allocated array) is ${f64(time1) / f64(time2):.2f} times faster than method 1 (basic array)')
113     println('Method 3 (map) is ${f64(time1) / f64(time3):.2f} times faster than method 1 (basic array)')
114     println('Method 4 (set) is ${f64(time1) / f64(time4):.2f} times faster than method 1 (basic array)')
115 
116     if time3 < time4 {
117         println('Map is slightly faster than set, difference: ${time4 - time3}ms')
118     } else {
119         println('Set is slightly faster than map, difference: ${time3 - time4}ms')
120     }
121 }
122

1	// Benchmark comparison of four string deduplication methods in V: basic array, pre-allocated array, map, and set
2	module main
3
4	import time
5	import datatypes
6
7	// Method 1: Using basic array (no pre-allocation)
8	struct Context1 {
9	mut:
10	used_str []string
11	}
12
13	fn (mut c Context1) add_used(str string) {
14	if str !in c.used_str {
15	c.used_str << str
16	}
17	}
18
19	// Method 2: Using pre-allocated array with capacity
20	struct Context2 {
21	mut:
22	used_str []string
23	}
24
25	fn (mut c Context2) add_used(str string) {
26	if str !in c.used_str {
27	c.used_str << str
28	}
29	}
30
31	// Method 3: Using map
32	struct Context3 {
33	mut:
34	used_str map[string]bool
35	}
36
37	fn (mut c Context3) add_used(str string) {
38	c.used_str[str] = true
39	}
40
41	// Method 4: Using set
42	struct Context4 {
43	mut:
44	used_str datatypes.Set[string]
45	}
46
47	fn (mut c Context4) add_used(str string) {
48	c.used_str.add(str)
49	}
50
51	// Generate random test strings
52	fn generate_test_strings(count int, duplicate_ratio f64) []string {
53	mut strs := []string{cap: count}
54	unique_count := int(f64(count) * (1.0 - duplicate_ratio))
55	// First generate a batch of unique strings
56	for i in 0 .. unique_count {
57	strs << 'str_${i}_${time.ticks()}' // Add timestamp to reduce duplication rate
58	}
59	// The remaining part uses duplicate strings
60	for i in 0 .. (count - unique_count) {
61	strs << strs[i % unique_count] // Cycle through the first half of strings to create duplicates
62	}
63	return strs
64	}
65
66	fn main() {
67	num_strs := 10000 // Total number of strings
68	duplicate_ratio := 0.3 // Duplicate string ratio (30%)
69	test_strs := generate_test_strings(num_strs, duplicate_ratio)
70	println('Generated test strings: ${test_strs.len} (approximately ${int(duplicate_ratio * 100)}% are duplicates)')
71
72	// Test method 1: basic array (no pre-allocation)
73	mut ctx1 := Context1{}
74	sw1 := time.new_stopwatch()
75	for str in test_strs {
76	ctx1.add_used(str)
77	}
78	time1 := sw1.elapsed().milliseconds()
79	println('Method 1 (basic array) - Time: ${time1}ms, Final unique strings: ${ctx1.used_str.len}')
80
81	// Test method 2: pre-allocated array
82	mut ctx2 := Context2{
83	used_str: []string{cap: num_strs} // Pre-allocate capacity to avoid reallocations
84	}
85	sw2 := time.new_stopwatch()
86	for str in test_strs {
87	ctx2.add_used(str)
88	}
89	time2 := sw2.elapsed().milliseconds()
90	println('Method 2 (pre-allocated array) - Time: ${time2}ms, Final unique strings: ${ctx2.used_str.len}')
91
92	// Test method 3: map
93	mut ctx3 := Context3{}
94	sw3 := time.new_stopwatch()
95	for str in test_strs {
96	ctx3.add_used(str)
97	}
98	time3 := sw3.elapsed().milliseconds()
99	println('Method 3 (map) - Time: ${time3}ms, Final unique strings: ${ctx3.used_str.len}')
100
101	// Test method 4: set
102	mut ctx4 := Context4{}
103	sw4 := time.new_stopwatch()
104	for str in test_strs {
105	ctx4.add_used(str)
106	}
107	time4 := sw4.elapsed().milliseconds()
108	println('Method 4 (set) - Time: ${time4}ms, Final unique strings: ${ctx4.used_str.size()}')
109
110	// Performance comparison
111	println('\nPerformance comparison:')
112	println('Method 2 (pre-allocated array) is ${f64(time1) / f64(time2):.2f} times faster than method 1 (basic array)')
113	println('Method 3 (map) is ${f64(time1) / f64(time3):.2f} times faster than method 1 (basic array)')
114	println('Method 4 (set) is ${f64(time1) / f64(time4):.2f} times faster than method 1 (basic array)')
115
116	if time3 < time4 {
117	println('Map is slightly faster than set, difference: ${time4 - time3}ms')
118	} else {
119	println('Set is slightly faster than map, difference: ${time3 - time4}ms')
120	}
121	}
122