// Benchmark comparison of four string deduplication methods in V: basic array, pre-allocated array, map, and set
module main

import time
import datatypes

// Method 1: Using basic array (no pre-allocation)
struct Context1 {
mut:
	used_str []string
}

fn (mut c Context1) add_used(str string) {
	if str !in c.used_str {
		c.used_str << str
	}
}

// Method 2: Using pre-allocated array with capacity
struct Context2 {
mut:
	used_str []string
}

fn (mut c Context2) add_used(str string) {
	if str !in c.used_str {
		c.used_str << str
	}
}

// Method 3: Using map
struct Context3 {
mut:
	used_str map[string]bool
}

fn (mut c Context3) add_used(str string) {
	c.used_str[str] = true
}

// Method 4: Using set
struct Context4 {
mut:
	used_str datatypes.Set[string]
}

fn (mut c Context4) add_used(str string) {
	c.used_str.add(str)
}

// Generate random test strings
fn generate_test_strings(count int, duplicate_ratio f64) []string {
	mut strs := []string{cap: count}
	unique_count := int(f64(count) * (1.0 - duplicate_ratio))
	// First generate a batch of unique strings
	for i in 0 .. unique_count {
		strs << 'str_${i}_${time.ticks()}' // Add timestamp to reduce duplication rate
	}
	// The remaining part uses duplicate strings
	for i in 0 .. (count - unique_count) {
		strs << strs[i % unique_count] // Cycle through the first half of strings to create duplicates
	}
	return strs
}

fn main() {
	num_strs := 10000 // Total number of strings
	duplicate_ratio := 0.3 // Duplicate string ratio (30%)
	test_strs := generate_test_strings(num_strs, duplicate_ratio)
	println('Generated test strings: ${test_strs.len} (approximately ${int(duplicate_ratio * 100)}% are duplicates)')

	// Test method 1: basic array (no pre-allocation)
	mut ctx1 := Context1{}
	sw1 := time.new_stopwatch()
	for str in test_strs {
		ctx1.add_used(str)
	}
	time1 := sw1.elapsed().milliseconds()
	println('Method 1 (basic array) - Time: ${time1}ms, Final unique strings: ${ctx1.used_str.len}')

	// Test method 2: pre-allocated array
	mut ctx2 := Context2{
		used_str: []string{cap: num_strs} // Pre-allocate capacity to avoid reallocations
	}
	sw2 := time.new_stopwatch()
	for str in test_strs {
		ctx2.add_used(str)
	}
	time2 := sw2.elapsed().milliseconds()
	println('Method 2 (pre-allocated array) - Time: ${time2}ms, Final unique strings: ${ctx2.used_str.len}')

	// Test method 3: map
	mut ctx3 := Context3{}
	sw3 := time.new_stopwatch()
	for str in test_strs {
		ctx3.add_used(str)
	}
	time3 := sw3.elapsed().milliseconds()
	println('Method 3 (map) - Time: ${time3}ms, Final unique strings: ${ctx3.used_str.len}')

	// Test method 4: set
	mut ctx4 := Context4{}
	sw4 := time.new_stopwatch()
	for str in test_strs {
		ctx4.add_used(str)
	}
	time4 := sw4.elapsed().milliseconds()
	println('Method 4 (set) - Time: ${time4}ms, Final unique strings: ${ctx4.used_str.size()}')

	// Performance comparison
	println('\nPerformance comparison:')
	println('Method 2 (pre-allocated array) is ${f64(time1) / f64(time2):.2f} times faster than method 1 (basic array)')
	println('Method 3 (map) is ${f64(time1) / f64(time3):.2f} times faster than method 1 (basic array)')
	println('Method 4 (set) is ${f64(time1) / f64(time4):.2f} times faster than method 1 (basic array)')

	if time3 < time4 {
		println('Map is slightly faster than set, difference: ${time4 - time3}ms')
	} else {
		println('Set is slightly faster than map, difference: ${time3 - time4}ms')
	}
}