| 1 | module benchmark |
| 2 | |
| 3 | import time |
| 4 | import math |
| 5 | import sync |
| 6 | |
| 7 | // Benchmark represent all significant data for benchmarking. Provide clear way for getting result in convinient way by exported methods |
| 8 | @[noinit] |
| 9 | pub struct Benchmark { |
| 10 | pub mut: |
| 11 | n i64 // Number of iterations. Set explicitly or computed from expected time of benchmarking |
| 12 | bench_func fn () ! @[required] // function for benchmarking |
| 13 | bench_time time.Duration // benchmark duration |
| 14 | is_parallel bool // if true every bench_func run in separate coroutine |
| 15 | benchmark_result BenchmarkResult // accumulator of benchmark metrics |
| 16 | timer_on bool // inner flag of time recording |
| 17 | start_time time.Time // start timestamp of timer |
| 18 | duration time.Duration // expected time of benchmark process |
| 19 | failed bool // flag of bench_func failure. true if one of bench_func run failed |
| 20 | start_memory usize // memory status on start benchmark |
| 21 | start_allocs usize // size of object allocated on heap |
| 22 | } |
| 23 | |
| 24 | // BenchmarkDefaults is params struct for providing parameters of benchmarking to setup function |
| 25 | // - n - number of iterations. set if you know how many runs of function you need. if you don't know how many you need - set 0 |
| 26 | // - duration - by default 1s. expecting duration of all benchmark runs. doesn't work if is_parallel == true |
| 27 | // - is_parallel - if true, every bench_func run in separate coroutine |
| 28 | @[params] |
| 29 | pub struct BenchmarkDefaults { |
| 30 | pub: |
| 31 | duration time.Duration = time.second |
| 32 | is_parallel bool |
| 33 | n i64 |
| 34 | } |
| 35 | |
| 36 | // Benchmark.new - constructor of benchmark |
| 37 | // arguments: |
| 38 | // - bench_func - function to benchmark. required, if you have no function - you don't need benchmark |
| 39 | // - params - structure of benchmark parameters |
| 40 | pub fn setup(bench_func fn () !, params BenchmarkDefaults) !Benchmark { |
| 41 | if bench_func == unsafe { nil } { |
| 42 | return error('Benchmark function cannot be `nil`') |
| 43 | } |
| 44 | |
| 45 | if params.duration > 0 && params.is_parallel { |
| 46 | return error('can not predict number of parallel iterations') |
| 47 | } |
| 48 | |
| 49 | return Benchmark{ |
| 50 | n: params.n |
| 51 | bench_func: bench_func |
| 52 | bench_time: params.duration |
| 53 | is_parallel: params.is_parallel |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | // run_benchmark - function for start benchmarking |
| 58 | // run benchmark n times, or duration time |
| 59 | pub fn (mut b Benchmark) run() { |
| 60 | // run bench_func one time for heat up processor cache and get elapsed time for n prediction |
| 61 | b.run_n(1) |
| 62 | |
| 63 | // if one iteration failed no need to do more |
| 64 | if b.failed { |
| 65 | b.n = 1 |
| 66 | // show failed result. bad result is steel result |
| 67 | b.benchmark_result.print() |
| 68 | } |
| 69 | |
| 70 | // if n is provided we should run exactly n times. but 1 time we already run |
| 71 | if b.n > 1 { |
| 72 | b.run_n(b.n - 1) |
| 73 | } |
| 74 | |
| 75 | // if n is zero then we should run bench_func enough time for estimate duration time of execution |
| 76 | if b.n == 0 { |
| 77 | b.n = 1 |
| 78 | // if one of runs failed - bench_func is not valid |
| 79 | // but 1e9 times of evaluation is too much |
| 80 | // so we need to repeat prediction-execition process while elapsed time less then expected time |
| 81 | for !b.failed && b.duration < b.bench_time && b.n < 1000000000 { |
| 82 | // we need predict new amount of executions to estimate expected time |
| 83 | n := b.predict_n() |
| 84 | |
| 85 | // later we predict how many runs we need yet. so we run predicted times |
| 86 | b.run_n(n) |
| 87 | b.n += n |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | // if n is provided, duration will be calculated. otherwise n will |
| 92 | b.benchmark_result.n = b.n |
| 93 | b.benchmark_result.t = b.duration |
| 94 | |
| 95 | // despite of the way of usage of benchmark result(send py api, send to chat, process, logging, etc), we print it |
| 96 | b.benchmark_result.print() |
| 97 | } |
| 98 | |
| 99 | // run_n - run bench_func n times |
| 100 | fn (mut b Benchmark) run_n(n i64) { |
| 101 | // clear memory for avoid GC influence |
| 102 | gc_collect() |
| 103 | |
| 104 | // reset and start timer for get elapsed time |
| 105 | b.reset_timer() |
| 106 | b.start_timer() |
| 107 | |
| 108 | // unwrap function from struct field |
| 109 | mut f := b.bench_func |
| 110 | |
| 111 | if !b.is_parallel { |
| 112 | // run n times consistently |
| 113 | for i := i64(0); i < n; i++ { |
| 114 | f() or { |
| 115 | // if one execution failed print err, set failed flag and stop execution |
| 116 | b.failed = true |
| 117 | // workaround for consider unsuccesful runs |
| 118 | b.n -= n - i |
| 119 | eprintln('Error: ${err}') |
| 120 | return |
| 121 | } |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | // spawn n coroutines, wait end of spawning and unpause all coroutines |
| 126 | if b.is_parallel { |
| 127 | // WaitGroup for spawn and pause enough coroutines |
| 128 | mut spawnwg := sync.new_waitgroup() |
| 129 | spawnwg.add(int(n)) |
| 130 | // WaitGroup for wait of end of execution |
| 131 | mut workwg := sync.new_waitgroup() |
| 132 | workwg.add(int(n)) |
| 133 | |
| 134 | for i := i64(0); i < n; i++ { |
| 135 | spawn run_in_one_time(mut workwg, mut spawnwg, f) |
| 136 | spawnwg.done() |
| 137 | } |
| 138 | workwg.wait() |
| 139 | } |
| 140 | |
| 141 | // stop timer and collect data |
| 142 | b.stop_timer() |
| 143 | } |
| 144 | |
| 145 | fn run_in_one_time(mut workwg sync.WaitGroup, mut spawnwg sync.WaitGroup, f fn () !) { |
| 146 | defer { |
| 147 | workwg.done() |
| 148 | } |
| 149 | spawnwg.wait() |
| 150 | f() or { return } // TODO: add error handling |
| 151 | } |
| 152 | |
| 153 | // predict_n - predict number of executions to estimate duration |
| 154 | // based on previous values |
| 155 | fn (mut b Benchmark) predict_n() i64 { |
| 156 | // goal duration in nanoseconds |
| 157 | mut goal_ns := b.bench_time.nanoseconds() |
| 158 | // get number of previous iterations |
| 159 | prev_iters := b.n |
| 160 | // get elapsed time in nanoseconds |
| 161 | mut prev_ns := b.duration.nanoseconds() |
| 162 | |
| 163 | // to avoid division by zero |
| 164 | if prev_ns <= 0 { |
| 165 | prev_ns = 1 |
| 166 | } |
| 167 | |
| 168 | // multiple first to avoid division with less then 0 result |
| 169 | mut n := goal_ns * prev_iters |
| 170 | n = n / prev_ns |
| 171 | // grow at least in 1.2 |
| 172 | n += n / 5 |
| 173 | |
| 174 | // to not grow to fast |
| 175 | n = math.min(n, 100 * b.n) |
| 176 | // to grow at least on 1 |
| 177 | n = math.max(n, b.n + 1) |
| 178 | // to avoid run more then 1e9 times |
| 179 | n = math.min(n, 1000000000) |
| 180 | |
| 181 | return n |
| 182 | } |
| 183 | |
| 184 | // reset_timer - clear timer and reset memory start data |
| 185 | fn (mut b Benchmark) reset_timer() { |
| 186 | // if timer_on we should restart it |
| 187 | if b.timer_on { |
| 188 | b.start_time = time.now() |
| 189 | b.start_memory = gc_memory_use() |
| 190 | b.start_allocs = gc_heap_usage().bytes_since_gc |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | // starttimer - register start measures of memory |
| 195 | fn (mut b Benchmark) start_timer() { |
| 196 | // you do not need to start timer that already started |
| 197 | if !b.timer_on { |
| 198 | b.start_time = time.now() |
| 199 | b.start_memory = gc_memory_use() |
| 200 | b.start_allocs = gc_heap_usage().bytes_since_gc |
| 201 | b.timer_on = true |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | // stop_timer - accumulate menchmark data |
| 206 | fn (mut b Benchmark) stop_timer() { |
| 207 | if b.timer_on { |
| 208 | // accumulate delta time of execution |
| 209 | b.duration += time.since(b.start_time) |
| 210 | // accumulate memory growth |
| 211 | b.benchmark_result.mem += gc_memory_use() - b.start_memory |
| 212 | // accumulate heap usage |
| 213 | b.benchmark_result.allocs += gc_heap_usage().bytes_since_gc - b.start_allocs |
| 214 | b.timer_on = false |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | // BenchmarkResult - struct for represent result of benchmark |
| 219 | struct BenchmarkResult { |
| 220 | pub mut: |
| 221 | n i64 // iterations count |
| 222 | t time.Duration // elapsed time |
| 223 | mem usize // all allocated memory |
| 224 | allocs usize // heap allocated memory |
| 225 | } |
| 226 | |
| 227 | // ns_per_op - elapsed time in nanoseconds per iteration |
| 228 | fn (r BenchmarkResult) ns_per_op() i64 { |
| 229 | if r.n <= 0 { |
| 230 | return 0 |
| 231 | } |
| 232 | return r.t.nanoseconds() / i64(r.n) |
| 233 | } |
| 234 | |
| 235 | // allocs_per_op - heap usage per iteration |
| 236 | fn (r BenchmarkResult) allocs_per_op() i64 { |
| 237 | if r.n <= 0 { |
| 238 | return 0 |
| 239 | } |
| 240 | return i64(r.allocs) / i64(r.n) |
| 241 | } |
| 242 | |
| 243 | // alloced_bytes_per_op - memory usage per iteration |
| 244 | fn (r BenchmarkResult) alloced_bytes_per_op() i64 { |
| 245 | if r.n <= 0 { |
| 246 | return 0 |
| 247 | } |
| 248 | return i64(r.mem) / i64(r.n) |
| 249 | } |
| 250 | |
| 251 | // print - all measurements |
| 252 | fn (r BenchmarkResult) print() { |
| 253 | println('Iterations: ${r.n:10}\t\tTotal Duration: ${r.t:10}\tns/op: ${r.ns_per_op():10}\tB/op: ${r.alloced_bytes_per_op():6}\tallocs/op: ${r.allocs_per_op():6}') |
| 254 | } |
| 255 | |