| 1 | module main |
| 2 | |
| 3 | // Note: this benchmark is preferable to be compiled with: `v -prod -cg -gc boehm bench_euclid.v` |
| 4 | import math.big |
| 5 | import benchmark |
| 6 | import os |
| 7 | import v.tests.bench.math_big_gcd.prime { |
| 8 | DataI, |
| 9 | PrimeCfg, |
| 10 | PrimeSet, |
| 11 | } |
| 12 | |
| 13 | interface TestDataI { |
| 14 | r big.Integer |
| 15 | aa big.Integer |
| 16 | bb big.Integer |
| 17 | } |
| 18 | |
| 19 | type GCDSet = PrimeSet |
| 20 | type Clocks = map[string]benchmark.Benchmark |
| 21 | |
| 22 | const empty_set = GCDSet{'1', '1', '1'} |
| 23 | const with_dots = false |
| 24 | |
| 25 | fn main() { |
| 26 | fp := os.join_path(@VEXEROOT, prime.toml_path) |
| 27 | if !prime_file_exists(fp) { |
| 28 | panic('expected file |${fp}| - not found.') |
| 29 | } |
| 30 | |
| 31 | mut clocks := Clocks(map[string]benchmark.Benchmark{}) |
| 32 | |
| 33 | for algo in [ |
| 34 | 'gcd', |
| 35 | 'gcd_euclid', |
| 36 | 'gcd_binary', |
| 37 | //'u32binary', |
| 38 | //'u64binary' |
| 39 | ] { |
| 40 | clocks[algo] = benchmark.new_benchmark() |
| 41 | } |
| 42 | |
| 43 | // any test-prime-set needs to pass this predicate |
| 44 | // before used in the benchmark. |
| 45 | // |
| 46 | predicate_fn := fn (ps PrimeSet) bool { |
| 47 | cast_bi := bi_from_decimal_string |
| 48 | |
| 49 | r := cast_bi(ps.r) |
| 50 | aa := r * cast_bi(ps.a) |
| 51 | bb := r * cast_bi(ps.b) |
| 52 | gcd := aa.gcd(bb) |
| 53 | |
| 54 | return if [ |
| 55 | gcd != big.one_int, |
| 56 | gcd == bb.gcd(aa), |
| 57 | gcd == r, |
| 58 | aa != bb, |
| 59 | (aa % gcd) == big.zero_int, |
| 60 | (bb % gcd) == big.zero_int, |
| 61 | ].all(it == true) |
| 62 | { |
| 63 | true |
| 64 | } else { |
| 65 | false |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | cfgs := [ |
| 70 | // root-prime a-prime b-prime |
| 71 | ['s.3', 'xs.3', 's'], |
| 72 | ['s.10', 's.10', 'm.all'], |
| 73 | ['m.9', 's.10', 'xs'], |
| 74 | ['l.40', 'xs.10', 's.5'], |
| 75 | ['ml.20', 'm', 's.15'], |
| 76 | ['xl', 's.10', 'l.15'], |
| 77 | ['xxl', 'l.10', 'xl'], |
| 78 | ['l.30', 'm.10', 'xxl'], |
| 79 | ['xxl', 'xl', 'm.15'], |
| 80 | ['mega', 'xl.6', 's'], |
| 81 | ['xxl', 'xxxl.10', 'm.30'], |
| 82 | ['mega', 'xxxl', 'mega'], |
| 83 | ['giga.5', 'mega', 'giga'], |
| 84 | ['crazy', 'mega', 'giga'], |
| 85 | ['s', 'biggest', 'crazy'], |
| 86 | ['biggest', 'crazy', 'giga'], |
| 87 | ].map(PrimeCfg{it[0], it[1], it[2]}) |
| 88 | |
| 89 | println('\n${cfgs.len} x Tests') |
| 90 | |
| 91 | for i, prime_cfg in cfgs { |
| 92 | println('\n#-${i + 1}(Stack) "${prime_cfg}"') |
| 93 | bench_euclid_vs_binary(prime_cfg, false, predicate_fn, mut clocks) |
| 94 | |
| 95 | // just-to-be-sure, but makes no difference in this test. |
| 96 | // println('\n#-${i + 1}(Heap) "$prime_cfg"') |
| 97 | // bench_euclid_vs_binary(prime_cfg, true, predicate_fn, mut clocks) |
| 98 | } |
| 99 | println('') |
| 100 | for _, mut clock in clocks { |
| 101 | clock.stop() |
| 102 | } |
| 103 | |
| 104 | println(clocks['euclid'].total_message('both algorithms ')) |
| 105 | |
| 106 | msg := [ |
| 107 | 'Seems to me, that big.Integer.gcd_euclid in big.Integer.gcd() performs better on ', |
| 108 | 'very-small-integers up to 8-byte/u64. The tests #-1..5 show this.', |
| 109 | 'The big.Integer.gcd_binary algo seems to be perform better, the larger the numbers/buffers get.', |
| 110 | 'On my machine, I see consistent gains between ~10-30-percent with :', |
| 111 | '\n', |
| 112 | 'v -prod -cg -gc boehm bench_euclid.v', |
| 113 | '\n', |
| 114 | 'This test covers multiplied primes up-to a length of 300-char-digits in ', |
| 115 | 'a decimal-string. This equals (188-byte) == 47 x u32-values.', |
| 116 | 'edit/change primes in ${prime.toml_path}', |
| 117 | 'Improvements & critique are welcome : \n', |
| 118 | 'https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/', |
| 119 | ].join('\n') |
| 120 | |
| 121 | println(msg) |
| 122 | } |
| 123 | |
| 124 | fn run_benchmark(data []DataI, heap bool, mut clocks Clocks) bool { |
| 125 | mut testdata := []TestDataI{} |
| 126 | for elem in data { |
| 127 | // if elem is StackData || elem is HeapData{ |
| 128 | // testdata << elem |
| 129 | // } |
| 130 | // TODO: this reads strange |
| 131 | if elem is StackData { |
| 132 | testdata << elem |
| 133 | } |
| 134 | if elem is HeapData { |
| 135 | testdata << elem |
| 136 | } |
| 137 | } |
| 138 | // some statistics |
| 139 | // |
| 140 | mut tmp := []int{cap: testdata.len * 3} |
| 141 | for set in testdata { |
| 142 | for prime in [set.r, set.aa, set.bb] { |
| 143 | bi, _ := prime.bytes() |
| 144 | tmp << bi_buffer_len(bi) |
| 145 | } |
| 146 | } |
| 147 | tmp.sort() |
| 148 | min_byte := tmp.first() * 4 |
| 149 | max_byte := tmp.last() * 4 |
| 150 | mut buffer_space := 0 |
| 151 | for tmp.len != 0 { |
| 152 | buffer_space += tmp.pop() |
| 153 | } |
| 154 | |
| 155 | // trying to balance prime-size and item-count |
| 156 | // minimum rounds is 100-times |
| 157 | // |
| 158 | mut rounds := 2000 / ((3 * buffer_space) / testdata.len) |
| 159 | rounds = if rounds < 100 { 100 } else { rounds } |
| 160 | |
| 161 | ratio := (buffer_space * 4) / (testdata.len * 3) |
| 162 | |
| 163 | msg := [ |
| 164 | 'avg-${ratio}-byte/Prime, ${min_byte}-byte < Prime < ${max_byte}-byte \n', |
| 165 | '~${buffer_space * 4 / 1024}-Kb-', |
| 166 | if heap { 'Heap' } else { 'Stack' }, |
| 167 | '-space for ${testdata.len}-items x ', |
| 168 | '${rounds}-rounds', |
| 169 | ].join('') |
| 170 | println(msg) |
| 171 | |
| 172 | mut cycles := 0 |
| 173 | for algo, mut clock in clocks { |
| 174 | cycles = 0 |
| 175 | clock.step() |
| 176 | for cycles < rounds { |
| 177 | for set in testdata { |
| 178 | match algo { |
| 179 | 'gcd' { |
| 180 | if set.r != set.aa.gcd(set.bb) { |
| 181 | eprintln('${algo} failed ?') |
| 182 | clock.fail() |
| 183 | break |
| 184 | } |
| 185 | } |
| 186 | 'gcd_euclid' { |
| 187 | if set.r != set.aa.gcd_euclid(set.bb) { |
| 188 | eprintln('${algo} failed ?') |
| 189 | clock.fail() |
| 190 | break |
| 191 | } |
| 192 | } |
| 193 | 'gcd_binary' { |
| 194 | if set.r != set.aa.gcd_binary(set.bb) { |
| 195 | eprintln('${algo} failed ?') |
| 196 | clock.fail() |
| 197 | break |
| 198 | } |
| 199 | } |
| 200 | else { |
| 201 | eprintln('unknown algo was "${algo}"') |
| 202 | continue |
| 203 | } |
| 204 | } |
| 205 | } // eo-for over testdata |
| 206 | if with_dots { |
| 207 | print('.') |
| 208 | } |
| 209 | cycles += 1 |
| 210 | } // eo-for cycles |
| 211 | if with_dots { |
| 212 | println('') |
| 213 | } |
| 214 | clock.ok() |
| 215 | clock.measure(algo) |
| 216 | } // eo-for-loop over algo, clock |
| 217 | |
| 218 | return true |
| 219 | } |
| 220 | |
| 221 | fn bench_euclid_vs_binary(test_config PrimeCfg, heap bool, predicate_fn fn (ps PrimeSet) bool, mut clocks Clocks) bool { |
| 222 | testprimes := prime.random_set(test_config) or { panic(err) } |
| 223 | |
| 224 | // validate the test-data |
| 225 | // |
| 226 | gcd_primes := testprimes.map(prepare_and_test_gcd(it, predicate_fn)).filter(it != empty_set) |
| 227 | |
| 228 | // just to make sure all generated primes are sane |
| 229 | // |
| 230 | assert gcd_primes.len == testprimes.len |
| 231 | |
| 232 | // casting the decimal-strings into big.Integers |
| 233 | // here to avoid measuring string-parsing-cycles |
| 234 | // during later testing. |
| 235 | // |
| 236 | mut casted_sets := if heap { gcd_primes.map(unsafe { |
| 237 | DataI(&PrimeSet(&it)).cast[HeapData]() |
| 238 | }) } else { gcd_primes.map(unsafe { |
| 239 | DataI(&PrimeSet(&it)).cast[StackData]() |
| 240 | }) } |
| 241 | |
| 242 | // ready use the primes in the benchmark |
| 243 | // |
| 244 | return run_benchmark(casted_sets, heap, mut clocks) |
| 245 | } |
| 246 | |
| 247 | fn prepare_and_test_gcd(primeset PrimeSet, test fn (ps PrimeSet) bool) GCDSet { |
| 248 | if !primeset.predicate(test) { |
| 249 | eprintln('? Corrupt Testdata was ?') |
| 250 | dump(primeset) |
| 251 | return empty_set // {'1', '1', '1'} |
| 252 | } |
| 253 | |
| 254 | cast_bi := bi_from_decimal_string |
| 255 | |
| 256 | r := cast_bi(primeset.r) |
| 257 | aa := cast_bi(primeset.a) * r |
| 258 | bb := cast_bi(primeset.b) * r |
| 259 | gcd := aa.gcd(bb) |
| 260 | |
| 261 | return GCDSet{'${gcd}', '${aa}', '${bb}'} |
| 262 | } |
| 263 | |
| 264 | fn prime_file_exists(path string) bool { |
| 265 | return os.is_readable(path) |
| 266 | } |
| 267 | |
| 268 | // bi_from_decimal_string converts a string-of-decimals into |
| 269 | // a math.big.Integer using the big.Integers 'from_radix-fn' |
| 270 | // |
| 271 | pub fn bi_from_decimal_string(s string) big.Integer { |
| 272 | return big.integer_from_radix(s, u32(10)) or { |
| 273 | msg := [ |
| 274 | 'Cannot convert prime from decimal-string.', |
| 275 | 'prime was : "${s}"\n', |
| 276 | ].join('\n') |
| 277 | panic(msg) |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | // need the bi.digits.len - during test only - to calculate |
| 282 | // the size of big.Integers-buffer |
| 283 | // |
| 284 | fn bi_buffer_len(input []u8) int { |
| 285 | if input.len == 0 { |
| 286 | return 0 |
| 287 | } |
| 288 | // pad input |
| 289 | mut padded_input := []u8{len: ((input.len + 3) & ~0x3) - input.len, cap: (input.len + 3) & ~0x3, init: 0x0} |
| 290 | padded_input << input |
| 291 | mut digits := []u32{len: padded_input.len / 4} |
| 292 | // combine every 4 bytes into a u32 and insert into n.digits |
| 293 | for i := 0; i < padded_input.len; i += 4 { |
| 294 | x3 := u32(padded_input[i]) |
| 295 | x2 := u32(padded_input[i + 1]) |
| 296 | x1 := u32(padded_input[i + 2]) |
| 297 | x0 := u32(padded_input[i + 3]) |
| 298 | val := (x3 << 24) | (x2 << 16) | (x1 << 8) | x0 |
| 299 | digits[(padded_input.len - i) / 4 - 1] = val |
| 300 | } |
| 301 | return digits.len |
| 302 | } |
| 303 | |
| 304 | @[heap] |
| 305 | pub struct HeapData { |
| 306 | pub mut: |
| 307 | r big.Integer |
| 308 | aa big.Integer |
| 309 | bb big.Integer |
| 310 | } |
| 311 | |
| 312 | pub fn (hd HeapData) to_primeset() PrimeSet { |
| 313 | return PrimeSet{ |
| 314 | r: '${hd.r}' |
| 315 | a: '${hd.aa}' |
| 316 | b: '${hd.bb}' |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | pub fn (hd HeapData) from_primeset(p PrimeSet) DataI { |
| 321 | return DataI(HeapData{ |
| 322 | r: bi_from_decimal_string(p.r) |
| 323 | aa: bi_from_decimal_string(p.a) |
| 324 | bb: bi_from_decimal_string(p.b) |
| 325 | }) |
| 326 | } |
| 327 | |
| 328 | pub struct StackData { |
| 329 | pub mut: |
| 330 | r big.Integer |
| 331 | aa big.Integer |
| 332 | bb big.Integer |
| 333 | } |
| 334 | |
| 335 | pub fn (sd StackData) to_primeset() PrimeSet { |
| 336 | return PrimeSet{ |
| 337 | r: '${sd.r}' |
| 338 | a: '${sd.aa}' |
| 339 | b: '${sd.bb}' |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | pub fn (sd StackData) from_primeset(p PrimeSet) DataI { |
| 344 | return DataI(StackData{ |
| 345 | r: bi_from_decimal_string(p.r) |
| 346 | aa: bi_from_decimal_string(p.a) |
| 347 | bb: bi_from_decimal_string(p.b) |
| 348 | }) |
| 349 | } |
| 350 | |