Gitly


1 module stats
2 
3 import math
4 
5 // freq calculates the Measure of Occurrence
6 // Frequency of a given number
7 // Based on
8 // https://www.mathsisfun.com/data/frequency-distribution.html
9 pub fn freq[T](data []T, val T) int {
10     if data.len == 0 {
11         return 0
12     }
13     mut count := 0
14     for v in data {
15         if v == val {
16             count++
17         }
18     }
19     return count
20 }
21 
22 // mean calculates the average
23 // of the given input array, sum(data)/data.len
24 // Based on
25 // https://www.mathsisfun.com/data/central-measures.html
26 pub fn mean[T](data []T) T {
27     if data.len == 0 {
28         return T(0)
29     }
30     mut sum := T(0)
31     for v in data {
32         sum += v
33     }
34     return T(sum / data.len)
35 }
36 
37 // geometric_mean calculates the central tendency
38 // of the given input array, product(data)**1/data.len
39 // Based on
40 // https://www.mathsisfun.com/numbers/geometric-mean.html
41 pub fn geometric_mean[T](data []T) T {
42     if data.len == 0 {
43         return T(0)
44     }
45     mut sum := T(1)
46     for v in data {
47         sum *= v
48     }
49     $if T is f64 {
50         return math.pow(sum, f64(1.0) / data.len)
51     } $else {
52         // use f32 for f32/int/...
53         return T(math.powf(f32(sum), f32(1.0) / data.len))
54     }
55 }
56 
57 // harmonic_mean calculates the reciprocal of the average of reciprocals
58 // of the given input array
59 // Based on
60 // https://www.mathsisfun.com/numbers/harmonic-mean.html
61 pub fn harmonic_mean[T](data []T) T {
62     if data.len == 0 {
63         return T(0)
64     }
65     $if T is f64 {
66         mut sum := f64(0)
67         for v in data {
68             sum += f64(1.0) / v
69         }
70         return f64(f64(data.len) / sum)
71     } $else {
72         // use f32 for f32/int/...
73         mut sum := f32(0)
74         for v in data {
75             sum += f32(1.0) / f32(v)
76         }
77         return T(f32(data.len) / sum)
78     }
79 }
80 
81 // median returns the middlemost value of the given input array ( input array is assumed to be sorted )
82 // Based on
83 // https://www.mathsisfun.com/data/central-measures.html
84 pub fn median[T](sorted_data []T) T {
85     if sorted_data.len == 0 {
86         return T(0)
87     }
88     if sorted_data.len % 2 == 0 {
89         mid := (sorted_data.len / 2) - 1
90         return (sorted_data[mid] + sorted_data[mid + 1]) / T(2)
91     } else {
92         return sorted_data[((sorted_data.len - 1) / 2)]
93     }
94 }
95 
96 // mode calculates the highest occurring value of the given input array
97 // Based on
98 // https://www.mathsisfun.com/data/central-measures.html
99 pub fn mode[T](data []T) T {
100     if data.len == 0 {
101         return T(0)
102     }
103     mut freqs := []int{}
104     for v in data {
105         freqs << freq(data, v)
106     }
107     mut max := 0
108     for i := 0; i < freqs.len; i++ {
109         if freqs[i] > freqs[max] {
110             max = i
111         }
112     }
113     return data[max]
114 }
115 
116 // rms, Root Mean Square, calculates the sqrt of the mean of the squares of the given input array
117 // Based on
118 // https://en.wikipedia.org/wiki/Root_mean_square
119 pub fn rms[T](data []T) T {
120     if data.len == 0 {
121         return T(0)
122     }
123 
124     $if T is f64 {
125         mut sum := f64(0)
126         for v in data {
127             sum += math.pow(v, 2)
128         }
129         return math.sqrt(sum / data.len)
130     } $else {
131         // use f32 for f32/int/...
132         mut sum := f32(0)
133         for v in data {
134             sum += math.powf(f32(v), 2)
135         }
136         return T(math.sqrtf(sum / data.len))
137     }
138 }
139 
140 // population_variance is the Measure of Dispersion / Spread
141 // of the given input array
142 // Based on
143 // https://www.mathsisfun.com/data/standard-deviation.html
144 @[inline]
145 pub fn population_variance[T](data []T) T {
146     if data.len == 0 {
147         return T(0)
148     }
149     data_mean := mean[T](data)
150     return population_variance_mean[T](data, data_mean)
151 }
152 
153 // population_variance_mean is the Measure of Dispersion / Spread
154 // of the given input array, with the provided mean
155 // Based on
156 // https://www.mathsisfun.com/data/standard-deviation.html
157 pub fn population_variance_mean[T](data []T, mean T) T {
158     if data.len == 0 {
159         return T(0)
160     }
161 
162     mut sum := T(0)
163     for v in data {
164         sum += T((v - mean) * (v - mean))
165     }
166     return T(sum / data.len)
167 }
168 
169 // sample_variance calculates the spread of dataset around the mean
170 // Based on
171 // https://www.mathsisfun.com/data/standard-deviation.html
172 @[inline]
173 pub fn sample_variance[T](data []T) T {
174     if data.len == 0 {
175         return T(0)
176     }
177     data_mean := mean[T](data)
178     return sample_variance_mean[T](data, data_mean)
179 }
180 
181 // sample_variance calculates the spread of dataset around the provided mean
182 // Based on
183 // https://www.mathsisfun.com/data/standard-deviation.html
184 pub fn sample_variance_mean[T](data []T, mean T) T {
185     if data.len == 0 {
186         return T(0)
187     }
188     mut sum := T(0)
189     for v in data {
190         sum += T((v - mean) * (v - mean))
191     }
192     return T(sum / (data.len - 1))
193 }
194 
195 // population_stddev calculates how spread out the dataset is
196 // Based on
197 // https://www.mathsisfun.com/data/standard-deviation.html
198 @[inline]
199 pub fn population_stddev[T](data []T) T {
200     if data.len == 0 {
201         return T(0)
202     }
203     $if T is f64 {
204         return math.sqrt(population_variance[T](data))
205     } $else {
206         return T(math.sqrtf(f32(population_variance[T](data))))
207     }
208 }
209 
210 // population_stddev_mean calculates how spread out the dataset is, with the provide mean
211 // Based on
212 // https://www.mathsisfun.com/data/standard-deviation.html
213 @[inline]
214 pub fn population_stddev_mean[T](data []T, mean T) T {
215     if data.len == 0 {
216         return T(0)
217     }
218     $if T is f64 {
219         return math.sqrt(population_variance_mean[T](data, mean))
220     } $else {
221         return T(math.sqrtf(f32(population_variance_mean[T](data, mean))))
222     }
223 }
224 
225 // Measure of Dispersion / Spread
226 // Sample Standard Deviation of the given input array
227 // Based on
228 // https://www.mathsisfun.com/data/standard-deviation.html
229 @[inline]
230 pub fn sample_stddev[T](data []T) T {
231     if data.len == 0 {
232         return T(0)
233     }
234     $if T is f64 {
235         return math.sqrt(sample_variance[T](data))
236     } $else {
237         return T(math.sqrtf(f32(sample_variance[T](data))))
238     }
239 }
240 
241 // Measure of Dispersion / Spread
242 // Sample Standard Deviation of the given input array
243 // Based on
244 // https://www.mathsisfun.com/data/standard-deviation.html
245 @[inline]
246 pub fn sample_stddev_mean[T](data []T, mean T) T {
247     if data.len == 0 {
248         return T(0)
249     }
250     $if T is f64 {
251         return math.sqrt(sample_variance_mean[T](data, mean))
252     } $else {
253         return T(math.sqrtf(sample_variance_mean[T](data, mean)))
254     }
255 }
256 
257 // absdev calculates the average distance between each data point and the mean
258 // Based on
259 // https://en.wikipedia.org/wiki/Average_absolute_deviation
260 @[inline]
261 pub fn absdev[T](data []T) T {
262     if data.len == 0 {
263         return T(0)
264     }
265     data_mean := mean[T](data)
266     return absdev_mean[T](data, data_mean)
267 }
268 
269 // absdev_mean calculates the average distance between each data point and the provided mean
270 // Based on
271 // https://en.wikipedia.org/wiki/Average_absolute_deviation
272 pub fn absdev_mean[T](data []T, mean T) T {
273     if data.len == 0 {
274         return T(0)
275     }
276     mut sum := T(0)
277     for v in data {
278         sum += math.abs(v - mean)
279     }
280     return T(sum / data.len)
281 }
282 
283 // tts, Sum of squares, calculates the sum over all squared differences between values and overall mean
284 @[inline]
285 pub fn tss[T](data []T) T {
286     if data.len == 0 {
287         return T(0)
288     }
289     data_mean := mean[T](data)
290     return tss_mean[T](data, data_mean)
291 }
292 
293 // tts_mean, Sum of squares, calculates the sum over all squared differences between values and the provided mean
294 pub fn tss_mean[T](data []T, mean T) T {
295     if data.len == 0 {
296         return T(0)
297     }
298     mut tss := T(0)
299     for v in data {
300         tss += T((v - mean) * (v - mean))
301     }
302     return tss
303 }
304 
305 // min finds the minimum value from the dataset
306 pub fn min[T](data []T) T {
307     if data.len == 0 {
308         return T(0)
309     }
310     mut min := data[0]
311     for v in data {
312         if v < min {
313             min = v
314         }
315     }
316     return min
317 }
318 
319 // max finds the maximum value from the dataset
320 pub fn max[T](data []T) T {
321     if data.len == 0 {
322         return T(0)
323     }
324     mut max := data[0]
325     for v in data {
326         if v > max {
327             max = v
328         }
329     }
330     return max
331 }
332 
333 // minmax finds the minimum and maximum value from the dataset
334 pub fn minmax[T](data []T) (T, T) {
335     if data.len == 0 {
336         return T(0), T(0)
337     }
338     mut max := data[0]
339     mut min := data[0]
340     for v in data[1..] {
341         if v > max {
342             max = v
343         }
344         if v < min {
345             min = v
346         }
347     }
348     return min, max
349 }
350 
351 // min_index finds the first index of the minimum value
352 pub fn min_index[T](data []T) int {
353     if data.len == 0 {
354         return 0
355     }
356     mut min := data[0]
357     mut min_index := 0
358     for i, v in data {
359         if v < min {
360             min = v
361             min_index = i
362         }
363     }
364     return min_index
365 }
366 
367 // max_index finds the first index of the maximum value
368 pub fn max_index[T](data []T) int {
369     if data.len == 0 {
370         return 0
371     }
372     mut max := data[0]
373     mut max_index := 0
374     for i, v in data {
375         if v > max {
376             max = v
377             max_index = i
378         }
379     }
380     return max_index
381 }
382 
383 // minmax_index finds the first index of the minimum and maximum value
384 pub fn minmax_index[T](data []T) (int, int) {
385     if data.len == 0 {
386         return 0, 0
387     }
388     mut min := data[0]
389     mut max := data[0]
390     mut min_index := 0
391     mut max_index := 0
392     for i, v in data {
393         if v < min {
394             min = v
395             min_index = i
396         }
397         if v > max {
398             max = v
399             max_index = i
400         }
401     }
402     return min_index, max_index
403 }
404 
405 // range calculates the difference between the min and max
406 // Range ( Maximum - Minimum ) of the given input array
407 // Based on
408 // https://www.mathsisfun.com/data/range.html
409 pub fn range[T](data []T) T {
410     if data.len == 0 {
411         return T(0)
412     }
413     min, max := minmax[T](data)
414     return max - min
415 }
416 
417 // covariance calculates directional association between datasets
418 // positive value denotes variables move in same direction and negative denotes variables move in opposite directions
419 @[inline]
420 pub fn covariance[T](data1 []T, data2 []T) T {
421     mean1 := mean[T](data1)
422     mean2 := mean[T](data2)
423     return covariance_mean[T](data1, data2, mean1, mean2)
424 }
425 
426 // covariance_mean computes the covariance of a dataset with means provided
427 // the recurrence relation
428 pub fn covariance_mean[T](data1 []T, data2 []T, mean1 T, mean2 T) T {
429     n := int(math.min(data1.len, data2.len))
430     if n == 0 {
431         return T(0)
432     }
433     mut covariance := T(0)
434     for i in 0 .. n {
435         delta1 := data1[i] - mean1
436         delta2 := data2[i] - mean2
437         covariance += T((delta1 * delta2 - covariance) / (T(i) + T(1)))
438     }
439     return covariance
440 }
441 
442 // lag1_autocorrelation_mean calculates the correlation between values that are one time period apart
443 // of a dataset, based on the mean
444 @[inline]
445 pub fn lag1_autocorrelation[T](data []T) T {
446     data_mean := mean[T](data)
447     return lag1_autocorrelation_mean[T](data, data_mean)
448 }
449 
450 // lag1_autocorrelation_mean calculates the correlation between values that are one time period apart
451 // of a dataset, using
452 // the recurrence relation
453 pub fn lag1_autocorrelation_mean[T](data []T, mean T) T {
454     if data.len == 0 {
455         return T(0)
456     }
457     mut q := T(0)
458     mut v := (data[0] * mean) - (data[0] * mean)
459     for i := 1; i < data.len; i++ {
460         delta0 := data[i - 1] - mean
461         delta1 := data[i] - mean
462         d01 := delta0 * delta1
463         d11 := delta1 * delta1
464         ti1 := T(i) + T(1)
465         q += T((d01 - q) / ti1)
466         v += T((d11 - v) / ti1)
467     }
468     return T(q / v)
469 }
470 
471 // kurtosis calculates the measure of the 'tailedness' of the data by finding mean and standard of deviation
472 @[inline]
473 pub fn kurtosis[T](data []T) T {
474     data_mean := mean[T](data)
475     sd := population_stddev_mean[T](data, data_mean)
476     return kurtosis_mean_stddev[T](data, data_mean, sd)
477 }
478 
479 // kurtosis_mean_stddev calculates the measure of the 'tailedness' of the data
480 // using the fourth moment the deviations, normalized by the sd
481 pub fn kurtosis_mean_stddev[T](data []T, mean T, sd T) T {
482     if data.len == 0 {
483         return T(0)
484     }
485     mut avg := T(0) // find the fourth moment the deviations, normalized by the sd
486     /*
487     we use a recurrence relation to stably update a running value so
488          * there aren't any large sums that can overflow
489     */
490     for i, v in data {
491         x := (v - mean) / sd
492         x4 := x * x * x * x
493         ti1 := (T(i) + T(1))
494         avg += T((x4 - avg) / ti1)
495     }
496     return avg - T(3)
497 }
498 
499 // skew calculates the mean and standard of deviation to find the skew from the data
500 @[inline]
501 pub fn skew[T](data []T) T {
502     data_mean := mean[T](data)
503     sd := population_stddev_mean[T](data, data_mean)
504     return skew_mean_stddev[T](data, data_mean, sd)
505 }
506 
507 // skew_mean_stddev calculates the skewness of data
508 pub fn skew_mean_stddev[T](data []T, mean T, sd T) T {
509     if data.len == 0 {
510         return T(0)
511     }
512     mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd.
513     /*
514     we use a recurrence relation to stably update a running value so
515          * there aren't any large sums that can overflow
516     */
517     for i, v in data {
518         x := (v - mean) / sd
519         x3 := x * x * x
520         skew += T((x3 - skew) / (T(i) + T(1)))
521     }
522     return skew
523 }
524 
525 // quantile calculates quantile points
526 // for more reference
527 // https://en.wikipedia.org/wiki/Quantile
528 pub fn quantile[T](sorted_data []T, f T) !T {
529     if sorted_data.len == 0 {
530         return T(0)
531     }
532     index := f * (sorted_data.len - 1)
533     lhs := int(index)
534     if lhs < 0 || lhs >= sorted_data.len {
535         return error('index out of range')
536     } else if lhs == sorted_data.len - 1 {
537         return sorted_data[lhs]
538     } else {
539         if lhs >= sorted_data.len - 1 {
540             return error('index out of range')
541         }
542         delta := index - T(lhs)
543         return T((1 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)])
544     }
545 }
546

1	module stats
2
3	import math
4
5	// freq calculates the Measure of Occurrence
6	// Frequency of a given number
7	// Based on
8	// https://www.mathsisfun.com/data/frequency-distribution.html
9	pub fn freq[T](data []T, val T) int {
10	if data.len == 0 {
11	return 0
12	}
13	mut count := 0
14	for v in data {
15	if v == val {
16	count++
17	}
18	}
19	return count
20	}
21
22	// mean calculates the average
23	// of the given input array, sum(data)/data.len
24	// Based on
25	// https://www.mathsisfun.com/data/central-measures.html
26	pub fn mean[T](data []T) T {
27	if data.len == 0 {
28	return T(0)
29	}
30	mut sum := T(0)
31	for v in data {
32	sum += v
33	}
34	return T(sum / data.len)
35	}
36
37	// geometric_mean calculates the central tendency
38	// of the given input array, product(data)1/data.len
39	// Based on
40	// https://www.mathsisfun.com/numbers/geometric-mean.html
41	pub fn geometric_mean[T](data []T) T {
42	if data.len == 0 {
43	return T(0)
44	}
45	mut sum := T(1)
46	for v in data {
47	sum *= v
48	}
49	$if T is f64 {
50	return math.pow(sum, f64(1.0) / data.len)
51	} $else {
52	// use f32 for f32/int/...
53	return T(math.powf(f32(sum), f32(1.0) / data.len))
54	}
55	}
56
57	// harmonic_mean calculates the reciprocal of the average of reciprocals
58	// of the given input array
59	// Based on
60	// https://www.mathsisfun.com/numbers/harmonic-mean.html
61	pub fn harmonic_mean[T](data []T) T {
62	if data.len == 0 {
63	return T(0)
64	}
65	$if T is f64 {
66	mut sum := f64(0)
67	for v in data {
68	sum += f64(1.0) / v
69	}
70	return f64(f64(data.len) / sum)
71	} $else {
72	// use f32 for f32/int/...
73	mut sum := f32(0)
74	for v in data {
75	sum += f32(1.0) / f32(v)
76	}
77	return T(f32(data.len) / sum)
78	}
79	}
80
81	// median returns the middlemost value of the given input array ( input array is assumed to be sorted )
82	// Based on
83	// https://www.mathsisfun.com/data/central-measures.html
84	pub fn median[T](sorted_data []T) T {
85	if sorted_data.len == 0 {
86	return T(0)
87	}
88	if sorted_data.len % 2 == 0 {
89	mid := (sorted_data.len / 2) - 1
90	return (sorted_data[mid] + sorted_data[mid + 1]) / T(2)
91	} else {
92	return sorted_data[((sorted_data.len - 1) / 2)]
93	}
94	}
95
96	// mode calculates the highest occurring value of the given input array
97	// Based on
98	// https://www.mathsisfun.com/data/central-measures.html
99	pub fn mode[T](data []T) T {
100	if data.len == 0 {
101	return T(0)
102	}
103	mut freqs := []int{}
104	for v in data {
105	freqs << freq(data, v)
106	}
107	mut max := 0
108	for i := 0; i < freqs.len; i++ {
109	if freqs[i] > freqs[max] {
110	max = i
111	}
112	}
113	return data[max]
114	}
115
116	// rms, Root Mean Square, calculates the sqrt of the mean of the squares of the given input array
117	// Based on
118	// https://en.wikipedia.org/wiki/Root_mean_square
119	pub fn rms[T](data []T) T {
120	if data.len == 0 {
121	return T(0)
122	}
123
124	$if T is f64 {
125	mut sum := f64(0)
126	for v in data {
127	sum += math.pow(v, 2)
128	}
129	return math.sqrt(sum / data.len)
130	} $else {
131	// use f32 for f32/int/...
132	mut sum := f32(0)
133	for v in data {
134	sum += math.powf(f32(v), 2)
135	}
136	return T(math.sqrtf(sum / data.len))
137	}
138	}
139
140	// population_variance is the Measure of Dispersion / Spread
141	// of the given input array
142	// Based on
143	// https://www.mathsisfun.com/data/standard-deviation.html
144	@[inline]
145	pub fn population_variance[T](data []T) T {
146	if data.len == 0 {
147	return T(0)
148	}
149	data_mean := mean[T](data)
150	return population_variance_mean[T](data, data_mean)
151	}
152
153	// population_variance_mean is the Measure of Dispersion / Spread
154	// of the given input array, with the provided mean
155	// Based on
156	// https://www.mathsisfun.com/data/standard-deviation.html
157	pub fn population_variance_mean[T](data []T, mean T) T {
158	if data.len == 0 {
159	return T(0)
160	}
161
162	mut sum := T(0)
163	for v in data {
164	sum += T((v - mean) * (v - mean))
165	}
166	return T(sum / data.len)
167	}
168
169	// sample_variance calculates the spread of dataset around the mean
170	// Based on
171	// https://www.mathsisfun.com/data/standard-deviation.html
172	@[inline]
173	pub fn sample_variance[T](data []T) T {
174	if data.len == 0 {
175	return T(0)
176	}
177	data_mean := mean[T](data)
178	return sample_variance_mean[T](data, data_mean)
179	}
180
181	// sample_variance calculates the spread of dataset around the provided mean
182	// Based on
183	// https://www.mathsisfun.com/data/standard-deviation.html
184	pub fn sample_variance_mean[T](data []T, mean T) T {
185	if data.len == 0 {
186	return T(0)
187	}
188	mut sum := T(0)
189	for v in data {
190	sum += T((v - mean) * (v - mean))
191	}
192	return T(sum / (data.len - 1))
193	}
194
195	// population_stddev calculates how spread out the dataset is
196	// Based on
197	// https://www.mathsisfun.com/data/standard-deviation.html
198	@[inline]
199	pub fn population_stddev[T](data []T) T {
200	if data.len == 0 {
201	return T(0)
202	}
203	$if T is f64 {
204	return math.sqrt(population_variance[T](data))
205	} $else {
206	return T(math.sqrtf(f32(population_variance[T](data))))
207	}
208	}
209
210	// population_stddev_mean calculates how spread out the dataset is, with the provide mean
211	// Based on
212	// https://www.mathsisfun.com/data/standard-deviation.html
213	@[inline]
214	pub fn population_stddev_mean[T](data []T, mean T) T {
215	if data.len == 0 {
216	return T(0)
217	}
218	$if T is f64 {
219	return math.sqrt(population_variance_mean[T](data, mean))
220	} $else {
221	return T(math.sqrtf(f32(population_variance_mean[T](data, mean))))
222	}
223	}
224
225	// Measure of Dispersion / Spread
226	// Sample Standard Deviation of the given input array
227	// Based on
228	// https://www.mathsisfun.com/data/standard-deviation.html
229	@[inline]
230	pub fn sample_stddev[T](data []T) T {
231	if data.len == 0 {
232	return T(0)
233	}
234	$if T is f64 {
235	return math.sqrt(sample_variance[T](data))
236	} $else {
237	return T(math.sqrtf(f32(sample_variance[T](data))))
238	}
239	}
240
241	// Measure of Dispersion / Spread
242	// Sample Standard Deviation of the given input array
243	// Based on
244	// https://www.mathsisfun.com/data/standard-deviation.html
245	@[inline]
246	pub fn sample_stddev_mean[T](data []T, mean T) T {
247	if data.len == 0 {
248	return T(0)
249	}
250	$if T is f64 {
251	return math.sqrt(sample_variance_mean[T](data, mean))
252	} $else {
253	return T(math.sqrtf(sample_variance_mean[T](data, mean)))
254	}
255	}
256
257	// absdev calculates the average distance between each data point and the mean
258	// Based on
259	// https://en.wikipedia.org/wiki/Average_absolute_deviation
260	@[inline]
261	pub fn absdev[T](data []T) T {
262	if data.len == 0 {
263	return T(0)
264	}
265	data_mean := mean[T](data)
266	return absdev_mean[T](data, data_mean)
267	}
268
269	// absdev_mean calculates the average distance between each data point and the provided mean
270	// Based on
271	// https://en.wikipedia.org/wiki/Average_absolute_deviation
272	pub fn absdev_mean[T](data []T, mean T) T {
273	if data.len == 0 {
274	return T(0)
275	}
276	mut sum := T(0)
277	for v in data {
278	sum += math.abs(v - mean)
279	}
280	return T(sum / data.len)
281	}
282
283	// tts, Sum of squares, calculates the sum over all squared differences between values and overall mean
284	@[inline]
285	pub fn tss[T](data []T) T {
286	if data.len == 0 {
287	return T(0)
288	}
289	data_mean := mean[T](data)
290	return tss_mean[T](data, data_mean)
291	}
292
293	// tts_mean, Sum of squares, calculates the sum over all squared differences between values and the provided mean
294	pub fn tss_mean[T](data []T, mean T) T {
295	if data.len == 0 {
296	return T(0)
297	}
298	mut tss := T(0)
299	for v in data {
300	tss += T((v - mean) * (v - mean))
301	}
302	return tss
303	}
304
305	// min finds the minimum value from the dataset
306	pub fn min[T](data []T) T {
307	if data.len == 0 {
308	return T(0)
309	}
310	mut min := data[0]
311	for v in data {
312	if v < min {
313	min = v
314	}
315	}
316	return min
317	}
318
319	// max finds the maximum value from the dataset
320	pub fn max[T](data []T) T {
321	if data.len == 0 {
322	return T(0)
323	}
324	mut max := data[0]
325	for v in data {
326	if v > max {
327	max = v
328	}
329	}
330	return max
331	}
332
333	// minmax finds the minimum and maximum value from the dataset
334	pub fn minmax[T](data []T) (T, T) {
335	if data.len == 0 {
336	return T(0), T(0)
337	}
338	mut max := data[0]
339	mut min := data[0]
340	for v in data[1..] {
341	if v > max {
342	max = v
343	}
344	if v < min {
345	min = v
346	}
347	}
348	return min, max
349	}
350
351	// min_index finds the first index of the minimum value
352	pub fn min_index[T](data []T) int {
353	if data.len == 0 {
354	return 0
355	}
356	mut min := data[0]
357	mut min_index := 0
358	for i, v in data {
359	if v < min {
360	min = v
361	min_index = i
362	}
363	}
364	return min_index
365	}
366
367	// max_index finds the first index of the maximum value
368	pub fn max_index[T](data []T) int {
369	if data.len == 0 {
370	return 0
371	}
372	mut max := data[0]
373	mut max_index := 0
374	for i, v in data {
375	if v > max {
376	max = v
377	max_index = i
378	}
379	}
380	return max_index
381	}
382
383	// minmax_index finds the first index of the minimum and maximum value
384	pub fn minmax_index[T](data []T) (int, int) {
385	if data.len == 0 {
386	return 0, 0
387	}
388	mut min := data[0]
389	mut max := data[0]
390	mut min_index := 0
391	mut max_index := 0
392	for i, v in data {
393	if v < min {
394	min = v
395	min_index = i
396	}
397	if v > max {
398	max = v
399	max_index = i
400	}
401	}
402	return min_index, max_index
403	}
404
405	// range calculates the difference between the min and max
406	// Range ( Maximum - Minimum ) of the given input array
407	// Based on
408	// https://www.mathsisfun.com/data/range.html
409	pub fn range[T](data []T) T {
410	if data.len == 0 {
411	return T(0)
412	}
413	min, max := minmax[T](data)
414	return max - min
415	}
416
417	// covariance calculates directional association between datasets
418	// positive value denotes variables move in same direction and negative denotes variables move in opposite directions
419	@[inline]
420	pub fn covariance[T](data1 []T, data2 []T) T {
421	mean1 := mean[T](data1)
422	mean2 := mean[T](data2)
423	return covariance_mean[T](data1, data2, mean1, mean2)
424	}
425
426	// covariance_mean computes the covariance of a dataset with means provided
427	// the recurrence relation
428	pub fn covariance_mean[T](data1 []T, data2 []T, mean1 T, mean2 T) T {
429	n := int(math.min(data1.len, data2.len))
430	if n == 0 {
431	return T(0)
432	}
433	mut covariance := T(0)
434	for i in 0 .. n {
435	delta1 := data1[i] - mean1
436	delta2 := data2[i] - mean2
437	covariance += T((delta1 * delta2 - covariance) / (T(i) + T(1)))
438	}
439	return covariance
440	}
441
442	// lag1_autocorrelation_mean calculates the correlation between values that are one time period apart
443	// of a dataset, based on the mean
444	@[inline]
445	pub fn lag1_autocorrelation[T](data []T) T {
446	data_mean := mean[T](data)
447	return lag1_autocorrelation_mean[T](data, data_mean)
448	}
449
450	// lag1_autocorrelation_mean calculates the correlation between values that are one time period apart
451	// of a dataset, using
452	// the recurrence relation
453	pub fn lag1_autocorrelation_mean[T](data []T, mean T) T {
454	if data.len == 0 {
455	return T(0)
456	}
457	mut q := T(0)
458	mut v := (data[0] * mean) - (data[0] * mean)
459	for i := 1; i < data.len; i++ {
460	delta0 := data[i - 1] - mean
461	delta1 := data[i] - mean
462	d01 := delta0 * delta1
463	d11 := delta1 * delta1
464	ti1 := T(i) + T(1)
465	q += T((d01 - q) / ti1)
466	v += T((d11 - v) / ti1)
467	}
468	return T(q / v)
469	}
470
471	// kurtosis calculates the measure of the 'tailedness' of the data by finding mean and standard of deviation
472	@[inline]
473	pub fn kurtosis[T](data []T) T {
474	data_mean := mean[T](data)
475	sd := population_stddev_mean[T](data, data_mean)
476	return kurtosis_mean_stddev[T](data, data_mean, sd)
477	}
478
479	// kurtosis_mean_stddev calculates the measure of the 'tailedness' of the data
480	// using the fourth moment the deviations, normalized by the sd
481	pub fn kurtosis_mean_stddev[T](data []T, mean T, sd T) T {
482	if data.len == 0 {
483	return T(0)
484	}
485	mut avg := T(0) // find the fourth moment the deviations, normalized by the sd
486	/*
487	we use a recurrence relation to stably update a running value so
488	* there aren't any large sums that can overflow
489	*/
490	for i, v in data {
491	x := (v - mean) / sd
492	x4 := x * x * x * x
493	ti1 := (T(i) + T(1))
494	avg += T((x4 - avg) / ti1)
495	}
496	return avg - T(3)
497	}
498
499	// skew calculates the mean and standard of deviation to find the skew from the data
500	@[inline]
501	pub fn skew[T](data []T) T {
502	data_mean := mean[T](data)
503	sd := population_stddev_mean[T](data, data_mean)
504	return skew_mean_stddev[T](data, data_mean, sd)
505	}
506
507	// skew_mean_stddev calculates the skewness of data
508	pub fn skew_mean_stddev[T](data []T, mean T, sd T) T {
509	if data.len == 0 {
510	return T(0)
511	}
512	mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd.
513	/*
514	we use a recurrence relation to stably update a running value so
515	* there aren't any large sums that can overflow
516	*/
517	for i, v in data {
518	x := (v - mean) / sd
519	x3 := x * x * x
520	skew += T((x3 - skew) / (T(i) + T(1)))
521	}
522	return skew
523	}
524
525	// quantile calculates quantile points
526	// for more reference
527	// https://en.wikipedia.org/wiki/Quantile
528	pub fn quantile[T](sorted_data []T, f T) !T {
529	if sorted_data.len == 0 {
530	return T(0)
531	}
532	index := f * (sorted_data.len - 1)
533	lhs := int(index)
534	if lhs < 0 \|\| lhs >= sorted_data.len {
535	return error('index out of range')
536	} else if lhs == sorted_data.len - 1 {
537	return sorted_data[lhs]
538	} else {
539	if lhs >= sorted_data.len - 1 {
540	return error('index out of range')
541	}
542	delta := index - T(lhs)
543	return T((1 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)])
544	}
545	}
546