| 1 | // Benchmark comparison of four string deduplication methods in V: basic array, pre-allocated array, map, and set |
| 2 | module main |
| 3 | |
| 4 | import time |
| 5 | import datatypes |
| 6 | |
| 7 | // Method 1: Using basic array (no pre-allocation) |
| 8 | struct Context1 { |
| 9 | mut: |
| 10 | used_str []string |
| 11 | } |
| 12 | |
| 13 | fn (mut c Context1) add_used(str string) { |
| 14 | if str !in c.used_str { |
| 15 | c.used_str << str |
| 16 | } |
| 17 | } |
| 18 | |
| 19 | // Method 2: Using pre-allocated array with capacity |
| 20 | struct Context2 { |
| 21 | mut: |
| 22 | used_str []string |
| 23 | } |
| 24 | |
| 25 | fn (mut c Context2) add_used(str string) { |
| 26 | if str !in c.used_str { |
| 27 | c.used_str << str |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | // Method 3: Using map |
| 32 | struct Context3 { |
| 33 | mut: |
| 34 | used_str map[string]bool |
| 35 | } |
| 36 | |
| 37 | fn (mut c Context3) add_used(str string) { |
| 38 | c.used_str[str] = true |
| 39 | } |
| 40 | |
| 41 | // Method 4: Using set |
| 42 | struct Context4 { |
| 43 | mut: |
| 44 | used_str datatypes.Set[string] |
| 45 | } |
| 46 | |
| 47 | fn (mut c Context4) add_used(str string) { |
| 48 | c.used_str.add(str) |
| 49 | } |
| 50 | |
| 51 | // Generate random test strings |
| 52 | fn generate_test_strings(count int, duplicate_ratio f64) []string { |
| 53 | mut strs := []string{cap: count} |
| 54 | unique_count := int(f64(count) * (1.0 - duplicate_ratio)) |
| 55 | // First generate a batch of unique strings |
| 56 | for i in 0 .. unique_count { |
| 57 | strs << 'str_${i}_${time.ticks()}' // Add timestamp to reduce duplication rate |
| 58 | } |
| 59 | // The remaining part uses duplicate strings |
| 60 | for i in 0 .. (count - unique_count) { |
| 61 | strs << strs[i % unique_count] // Cycle through the first half of strings to create duplicates |
| 62 | } |
| 63 | return strs |
| 64 | } |
| 65 | |
| 66 | fn main() { |
| 67 | num_strs := 10000 // Total number of strings |
| 68 | duplicate_ratio := 0.3 // Duplicate string ratio (30%) |
| 69 | test_strs := generate_test_strings(num_strs, duplicate_ratio) |
| 70 | println('Generated test strings: ${test_strs.len} (approximately ${int(duplicate_ratio * 100)}% are duplicates)') |
| 71 | |
| 72 | // Test method 1: basic array (no pre-allocation) |
| 73 | mut ctx1 := Context1{} |
| 74 | sw1 := time.new_stopwatch() |
| 75 | for str in test_strs { |
| 76 | ctx1.add_used(str) |
| 77 | } |
| 78 | time1 := sw1.elapsed().milliseconds() |
| 79 | println('Method 1 (basic array) - Time: ${time1}ms, Final unique strings: ${ctx1.used_str.len}') |
| 80 | |
| 81 | // Test method 2: pre-allocated array |
| 82 | mut ctx2 := Context2{ |
| 83 | used_str: []string{cap: num_strs} // Pre-allocate capacity to avoid reallocations |
| 84 | } |
| 85 | sw2 := time.new_stopwatch() |
| 86 | for str in test_strs { |
| 87 | ctx2.add_used(str) |
| 88 | } |
| 89 | time2 := sw2.elapsed().milliseconds() |
| 90 | println('Method 2 (pre-allocated array) - Time: ${time2}ms, Final unique strings: ${ctx2.used_str.len}') |
| 91 | |
| 92 | // Test method 3: map |
| 93 | mut ctx3 := Context3{} |
| 94 | sw3 := time.new_stopwatch() |
| 95 | for str in test_strs { |
| 96 | ctx3.add_used(str) |
| 97 | } |
| 98 | time3 := sw3.elapsed().milliseconds() |
| 99 | println('Method 3 (map) - Time: ${time3}ms, Final unique strings: ${ctx3.used_str.len}') |
| 100 | |
| 101 | // Test method 4: set |
| 102 | mut ctx4 := Context4{} |
| 103 | sw4 := time.new_stopwatch() |
| 104 | for str in test_strs { |
| 105 | ctx4.add_used(str) |
| 106 | } |
| 107 | time4 := sw4.elapsed().milliseconds() |
| 108 | println('Method 4 (set) - Time: ${time4}ms, Final unique strings: ${ctx4.used_str.size()}') |
| 109 | |
| 110 | // Performance comparison |
| 111 | println('\nPerformance comparison:') |
| 112 | println('Method 2 (pre-allocated array) is ${f64(time1) / f64(time2):.2f} times faster than method 1 (basic array)') |
| 113 | println('Method 3 (map) is ${f64(time1) / f64(time3):.2f} times faster than method 1 (basic array)') |
| 114 | println('Method 4 (set) is ${f64(time1) / f64(time4):.2f} times faster than method 1 (basic array)') |
| 115 | |
| 116 | if time3 < time4 { |
| 117 | println('Map is slightly faster than set, difference: ${time4 - time3}ms') |
| 118 | } else { |
| 119 | println('Set is slightly faster than map, difference: ${time3 - time4}ms') |
| 120 | } |
| 121 | } |
| 122 | |