From c72dde20d0a435069398220abddd0071c118dc77 Mon Sep 17 00:00:00 2001 From: skeris <49439433+skeris@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:56:00 +0300 Subject: [PATCH] vlib: add a go like `x.benchmark` module, that estimates automatically how many iterations are needed, to get a statistically significant result (#22215) --- vlib/x/benchmark/benchmark.v | 254 ++++++++++++++++++++++++++++++ vlib/x/benchmark/benchmark_test.v | 169 ++++++++++++++++++++ 2 files changed, 423 insertions(+) create mode 100644 vlib/x/benchmark/benchmark.v create mode 100644 vlib/x/benchmark/benchmark_test.v diff --git a/vlib/x/benchmark/benchmark.v b/vlib/x/benchmark/benchmark.v new file mode 100644 index 000000000..2d334b6c0 --- /dev/null +++ b/vlib/x/benchmark/benchmark.v @@ -0,0 +1,254 @@ +module benchmark + +import time +import math +import sync + +// Benchmark represent all significant data for benchmarking. Provide clear way for getting result in convinient way by exported methods +@[noinit] +pub struct Benchmark { +pub mut: + n i64 // Number of iterations. Set explicitly or computed from expected time of benchmarking + bench_func fn () ! @[required] // function for benchmarking + bench_time time.Duration // benchmark duration + is_parallel bool // if true every bench_func run in separate coroutine + benchmark_result BenchmarkResult // accumulator of benchmark metrics + timer_on bool // inner flag of time recording + start_time time.Time // start timestamp of timer + duration time.Duration // expected time of benchmark process + failed bool // flag of bench_func failure. true if one of bench_func run failed + start_memory usize // memory status on start benchmark + start_allocs usize // size of object allocated on heap +} + +// BenchmarkDefaults is params struct for providing parameters of benchmarking to setup function +// - n - number of iterations. set if you know how many runs of function you need. if you don't know how many you need - set 0 +// - duration - by default 1s. expecting duration of all benchmark runs. doesn't work if is_parallel == true +// - is_parallel - if true, every bench_func run in separate coroutine +@[params] +pub struct BenchmarkDefaults { +pub: + duration time.Duration = time.second + is_parallel bool + n i64 +} + +// Benchmark.new - constructor of benchmark +// arguments: +// - bench_func - function to benchmark. required, if you have no function - you don't need benchmark +// - params - structure of benchmark parameters +pub fn setup(bench_func fn () !, params BenchmarkDefaults) !Benchmark { + if bench_func == unsafe { nil } { + return error('Benchmark function cannot be `nil`') + } + + if params.duration > 0 && params.is_parallel { + return error('can not predict number of parallel iterations') + } + + return Benchmark{ + n: params.n + bench_func: bench_func + bench_time: params.duration + is_parallel: params.is_parallel + } +} + +// run_benchmark - function for start benchmarking +// run benchmark n times, or duration time +pub fn (mut b Benchmark) run() { + // run bench_func one time for heat up processor cache and get elapsed time for n prediction + b.run_n(1) + + // if one iteration failed no need to do more + if b.failed { + b.n = 1 + // show failed result. bad result is steel result + b.benchmark_result.print() + } + + // if n is provided we should run exactly n times. but 1 time we already run + if b.n > 1 { + b.run_n(b.n - 1) + } + + // if n is zero then we should run bench_func enough time for estimate duration time of execution + if b.n == 0 { + b.n = 1 + // if one of runs failed - bench_func is not valid + // but 1e9 times of evaluation is too much + // so we need to repeat prediction-execition process while elapsed time less then expected time + for !b.failed && b.duration < b.bench_time && b.n < 1000000000 { + // we need predict new amount of executions to estimate expected time + n := b.predict_n() + + // later we predict how many runs we need yet. so we run predicted times + b.run_n(n) + b.n += n + } + } + + // if n is provided, duration will be calculated. otherwise n will + b.benchmark_result.n = b.n + b.benchmark_result.t = b.duration + + // despite of the way of usage of benchmark result(send py api, send to chat, process, logging, etc), we print it + b.benchmark_result.print() +} + +// run_n - run bench_func n times +fn (mut b Benchmark) run_n(n i64) { + // clear memory for avoid GC influence + gc_collect() + + // reset and start timer for get elapsed time + b.reset_timer() + b.start_timer() + + // unwrap function from struct field + mut f := b.bench_func + + if !b.is_parallel { + // run n times consistently + for i := i64(0); i < n; i++ { + f() or { + // if one execution failed print err, set failed flag and stop execution + b.failed = true + // workaround for consider unsuccesful runs + b.n -= n - i + eprintln('Error: ${err}') + return + } + } + } + + // spawn n coroutines, wait end of spawning and unpause all coroutines + if b.is_parallel { + // WaitGroup for spawn and pause enough coroutines + mut spawnwg := sync.new_waitgroup() + spawnwg.add(int(n)) + // WaitGroup for wait of end of execution + mut workwg := sync.new_waitgroup() + workwg.add(int(n)) + + for i := i64(0); i < n; i++ { + spawn run_in_one_time(mut workwg, mut spawnwg, f) + spawnwg.done() + } + workwg.wait() + } + + // stop timer and collect data + b.stop_timer() +} + +fn run_in_one_time(mut workwg sync.WaitGroup, mut spawnwg sync.WaitGroup, f fn () !) { + defer { + workwg.done() + } + spawnwg.wait() + f() or { return } // TODO: add error handling +} + +// predict_n - predict number of executions to estimate duration +// based on previous values +fn (mut b Benchmark) predict_n() i64 { + // goal duration in nanoseconds + mut goal_ns := b.bench_time.nanoseconds() + // get number of previous iterations + prev_iters := b.n + // get elapsed time in nanoseconds + mut prev_ns := b.duration.nanoseconds() + + // to avoid division by zero + if prev_ns <= 0 { + prev_ns = 1 + } + + // multiple first to avoid division with less then 0 result + mut n := goal_ns * prev_iters + n = n / prev_ns + // grow at least in 1.2 + n += n / 5 + + // to not grow to fast + n = math.min(n, 100 * b.n) + // to grow at least on 1 + n = math.max(n, b.n + 1) + // to avoid run more then 1e9 times + n = math.min(n, 1000000000) + + return n +} + +// reset_timer - clear timer and reset memory start data +fn (mut b Benchmark) reset_timer() { + // if timer_on we should restart it + if b.timer_on { + b.start_time = time.now() + b.start_memory = gc_memory_use() + b.start_allocs = gc_heap_usage().bytes_since_gc + } +} + +// starttimer - register start measures of memory +fn (mut b Benchmark) start_timer() { + // you do not need to start timer that already started + if !b.timer_on { + b.start_time = time.now() + b.start_memory = gc_memory_use() + b.start_allocs = gc_heap_usage().bytes_since_gc + b.timer_on = true + } +} + +// stop_timer - accumulate menchmark data +fn (mut b Benchmark) stop_timer() { + if b.timer_on { + // accumulate delta time of execution + b.duration += time.since(b.start_time) + // accumulate memory growth + b.benchmark_result.mem += gc_memory_use() - b.start_memory + // accumulate heap usage + b.benchmark_result.allocs += gc_heap_usage().bytes_since_gc - b.start_allocs + b.timer_on = false + } +} + +// BenchmarkResult - struct for represent result of benchmark +struct BenchmarkResult { +pub mut: + n i64 // iterations count + t time.Duration // elapsed time + mem usize // all allocated memory + allocs usize // heap allocated memory +} + +// ns_per_op - elapsed time in nanoseconds per iteration +fn (r BenchmarkResult) ns_per_op() i64 { + if r.n <= 0 { + return 0 + } + return r.t.nanoseconds() / i64(r.n) +} + +// allocs_per_op - heap usage per iteration +fn (r BenchmarkResult) allocs_per_op() i64 { + if r.n <= 0 { + return 0 + } + return i64(r.allocs) / i64(r.n) +} + +// alloced_bytes_per_op - memory usage per iteration +fn (r BenchmarkResult) alloced_bytes_per_op() i64 { + if r.n <= 0 { + return 0 + } + return i64(r.mem) / i64(r.n) +} + +// print - all measurements +fn (r BenchmarkResult) print() { + println('Iterations: ${r.n}\t\tTotal Duration: ${r.t}\tns/op: ${r.ns_per_op()}\tB/op: ${r.alloced_bytes_per_op()}\tallocs/op: ${r.allocs_per_op()}') +} diff --git a/vlib/x/benchmark/benchmark_test.v b/vlib/x/benchmark/benchmark_test.v new file mode 100644 index 000000000..539389805 --- /dev/null +++ b/vlib/x/benchmark/benchmark_test.v @@ -0,0 +1,169 @@ +module benchmark + +import time + +// if n == 0, n predict == 1 +fn test_predict_n_zero() { + mut b := Benchmark{ + n: 0 + duration: 0 + bench_time: time.second + bench_func: fn () ! {} + } + expected := 1 + println(b.predict_n()) + assert b.predict_n() == expected +} + +// n can't be more 1000000000 +fn test_predict_n_limit() { + mut b := Benchmark{ + n: 10000000000 + duration: 0 + bench_time: time.second + bench_func: fn () ! {} + } + expected := 1000000000 + assert b.predict_n() == expected +} + +// test prediction for slow bench function +fn test_slow_fn() { + mut b := Benchmark{ + duration: time.second + bench_func: fn () ! {} + } + assert b.predict_n() == 1 +} + +// if bench_func cause error set failed true, n = 1 +fn test_fn_with_error() { + f := fn () ! { + return error('error') + } + mut bench := setup(f) or { + eprintln('Error creating benchmark: ${err}') + return + } + + bench.run() + + assert bench.failed == true + assert bench.benchmark_result.n == 1 +} + +fn test_n_must_be_over_1() { + f := fn () ! { + mut i := 0 + i++ + } + mut bench := setup(f) or { + eprintln('Error creating benchmark: ${err}') + return + } + + bench.run() + + assert bench.benchmark_result.n > 1 +} + +fn test_n() { + f := fn () ! { + mut i := 0 + i++ + } + mut bench := setup(f, BenchmarkDefaults{ + n: 1000 + }) or { + eprintln('Error creating benchmark: ${err}') + return + } + + bench.run() + + assert bench.benchmark_result.n == 1000 +} + +fn test_max_bench_time() { + f := fn () ! { + time.sleep(500 * time.millisecond) + } + mut bench := setup(f) or { + eprintln('Error creating benchmark: ${err}') + return + } + + bench.run() + + assert bench.benchmark_result.n == 3 + assert bench.benchmark_result.t >= time.second +} + +fn test_performance() { + scheduler := [func_1, func_2, func_3] + expected := [false, false, false] + mut actual := []bool{} + + for i in scheduler { + mut bench := setup(i) or { + eprintln('Error creating benchmark: ${err}') + return + } + + bench.run() + actual << bench.failed + } + + assert expected.len == actual.len + for i := 0; i < expected.len; i++ { + assert expected[i] == actual[i] + } +} + +fn func_1() ! { + mut arr := []int{} + appender(mut arr) + assert arr.len == 10 +} + +fn appender(mut arr []int) { + if arr.len == 10 { + return + } + arr << 1 + appender(mut arr) +} + +fn func_2() ! { + target := 2 + arr := [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + + mut left := 0 + mut right := arr.len - 1 + + for left <= right { + mid := left + (right - left) / 2 + if arr[mid] == target { + return + } + if arr[mid] < target { + left = mid + 1 + } + if arr[mid] > target { + right = mid - 1 + } + } + return +} + +fn func_3() ! { + mut arr := [10, 2, 13, 4, 5, 16, 7, 1, 9, 20] + + for i := 0; i < arr.len - 1; i++ { + for j := 0; j < arr.len - i - 1; j++ { + if arr[j] > arr[j + 1] { + arr[j], arr[j + 1] = arr[j + 1], arr[j] + } + } + } +} -- 2.39.5