From e517dd9059bd568ce41dfd7e3820a9305de789b0 Mon Sep 17 00:00:00 2001 From: kbkpbot Date: Sun, 4 Jan 2026 15:55:30 +0800 Subject: [PATCH] bench: add benchmark for string dedup (#26254) --- bench/bench_string_dedup.v | 121 +++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 bench/bench_string_dedup.v diff --git a/bench/bench_string_dedup.v b/bench/bench_string_dedup.v new file mode 100644 index 000000000..72ccd237b --- /dev/null +++ b/bench/bench_string_dedup.v @@ -0,0 +1,121 @@ +// Benchmark comparison of four string deduplication methods in V: basic array, pre-allocated array, map, and set +module main + +import time +import datatypes + +// Method 1: Using basic array (no pre-allocation) +struct Context1 { +mut: + used_str []string +} + +fn (mut c Context1) add_used(str string) { + if str !in c.used_str { + c.used_str << str + } +} + +// Method 2: Using pre-allocated array with capacity +struct Context2 { +mut: + used_str []string +} + +fn (mut c Context2) add_used(str string) { + if str !in c.used_str { + c.used_str << str + } +} + +// Method 3: Using map +struct Context3 { +mut: + used_str map[string]bool +} + +fn (mut c Context3) add_used(str string) { + c.used_str[str] = true +} + +// Method 4: Using set +struct Context4 { +mut: + used_str datatypes.Set[string] +} + +fn (mut c Context4) add_used(str string) { + c.used_str.add(str) +} + +// Generate random test strings +fn generate_test_strings(count int, duplicate_ratio f64) []string { + mut strs := []string{cap: count} + unique_count := int(f64(count) * (1.0 - duplicate_ratio)) + // First generate a batch of unique strings + for i in 0 .. unique_count { + strs << 'str_${i}_${time.ticks()}' // Add timestamp to reduce duplication rate + } + // The remaining part uses duplicate strings + for i in 0 .. (count - unique_count) { + strs << strs[i % unique_count] // Cycle through the first half of strings to create duplicates + } + return strs +} + +fn main() { + num_strs := 10000 // Total number of strings + duplicate_ratio := 0.3 // Duplicate string ratio (30%) + test_strs := generate_test_strings(num_strs, duplicate_ratio) + println('Generated test strings: ${test_strs.len} (approximately ${int(duplicate_ratio * 100)}% are duplicates)') + + // Test method 1: basic array (no pre-allocation) + mut ctx1 := Context1{} + sw1 := time.new_stopwatch() + for str in test_strs { + ctx1.add_used(str) + } + time1 := sw1.elapsed().milliseconds() + println('Method 1 (basic array) - Time: ${time1}ms, Final unique strings: ${ctx1.used_str.len}') + + // Test method 2: pre-allocated array + mut ctx2 := Context2{ + used_str: []string{cap: num_strs} // Pre-allocate capacity to avoid reallocations + } + sw2 := time.new_stopwatch() + for str in test_strs { + ctx2.add_used(str) + } + time2 := sw2.elapsed().milliseconds() + println('Method 2 (pre-allocated array) - Time: ${time2}ms, Final unique strings: ${ctx2.used_str.len}') + + // Test method 3: map + mut ctx3 := Context3{} + sw3 := time.new_stopwatch() + for str in test_strs { + ctx3.add_used(str) + } + time3 := sw3.elapsed().milliseconds() + println('Method 3 (map) - Time: ${time3}ms, Final unique strings: ${ctx3.used_str.len}') + + // Test method 4: set + mut ctx4 := Context4{} + sw4 := time.new_stopwatch() + for str in test_strs { + ctx4.add_used(str) + } + time4 := sw4.elapsed().milliseconds() + println('Method 4 (set) - Time: ${time4}ms, Final unique strings: ${ctx4.used_str.size()}') + + // Performance comparison + println('\nPerformance comparison:') + println('Method 2 (pre-allocated array) is ${f64(time1) / f64(time2):.2f} times faster than method 1 (basic array)') + println('Method 3 (map) is ${f64(time1) / f64(time3):.2f} times faster than method 1 (basic array)') + println('Method 4 (set) is ${f64(time1) / f64(time4):.2f} times faster than method 1 (basic array)') + + if time3 < time4 { + println('Map is slightly faster than set, difference: ${time4 - time3}ms') + } else { + println('Set is slightly faster than map, difference: ${time3 - time4}ms') + } +} -- 2.39.5