(index<- ) ./libextra/stats.rs
1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 use sort;
12 use std::cmp;
13 use std::hashmap;
14 use std::io;
15 use std::num;
16
17 // NB: this can probably be rewritten in terms of num::Num
18 // to be less f64-specific.
19
20 /// Trait that provides simple descriptive statistics on a univariate set of numeric samples.
21 pub trait Stats {
22
23 /// Sum of the samples.
24 fn sum(self) -> f64;
25
26 /// Minimum value of the samples.
27 fn min(self) -> f64;
28
29 /// Maximum value of the samples.
30 fn max(self) -> f64;
31
32 /// Arithmetic mean (average) of the samples: sum divided by sample-count.
33 ///
34 /// See: https://en.wikipedia.org/wiki/Arithmetic_mean
35 fn mean(self) -> f64;
36
37 /// Median of the samples: value separating the lower half of the samples from the higher half.
38 /// Equal to `self.percentile(50.0)`.
39 ///
40 /// See: https://en.wikipedia.org/wiki/Median
41 fn median(self) -> f64;
42
43 /// Variance of the samples: bias-corrected mean of the squares of the differences of each
44 /// sample from the sample mean. Note that this calculates the _sample variance_ rather than the
45 /// population variance, which is assumed to be unknown. It therefore corrects the `(n-1)/n`
46 /// bias that would appear if we calculated a population variance, by dividing by `(n-1)` rather
47 /// than `n`.
48 ///
49 /// See: https://en.wikipedia.org/wiki/Variance
50 fn var(self) -> f64;
51
52 /// Standard deviation: the square root of the sample variance.
53 ///
54 /// Note: this is not a robust statistic for non-normal distributions. Prefer the
55 /// `median_abs_dev` for unknown distributions.
56 ///
57 /// See: https://en.wikipedia.org/wiki/Standard_deviation
58 fn std_dev(self) -> f64;
59
60 /// Standard deviation as a percent of the mean value. See `std_dev` and `mean`.
61 ///
62 /// Note: this is not a robust statistic for non-normal distributions. Prefer the
63 /// `median_abs_dev_pct` for unknown distributions.
64 fn std_dev_pct(self) -> f64;
65
66 /// Scaled median of the absolute deviations of each sample from the sample median. This is a
67 /// robust (distribution-agnostic) estimator of sample variability. Use this in preference to
68 /// `std_dev` if you cannot assume your sample is normally distributed. Note that this is scaled
69 /// by the constant `1.4826` to allow its use as a consistent estimator for the standard
70 /// deviation.
71 ///
72 /// See: http://en.wikipedia.org/wiki/Median_absolute_deviation
73 fn median_abs_dev(self) -> f64;
74
75 /// Median absolute deviation as a percent of the median. See `median_abs_dev` and `median`.
76 fn median_abs_dev_pct(self) -> f64;
77
78 /// Percentile: the value below which `pct` percent of the values in `self` fall. For example,
79 /// percentile(95.0) will return the value `v` such that that 95% of the samples `s` in `self`
80 /// satisfy `s <= v`.
81 ///
82 /// Calculated by linear interpolation between closest ranks.
83 ///
84 /// See: http://en.wikipedia.org/wiki/Percentile
85 fn percentile(self, pct: f64) -> f64;
86
87 /// Quartiles of the sample: three values that divide the sample into four equal groups, each
88 /// with 1/4 of the data. The middle value is the median. See `median` and `percentile`. This
89 /// function may calculate the 3 quartiles more efficiently than 3 calls to `percentile`, but
90 /// is otherwise equivalent.
91 ///
92 /// See also: https://en.wikipedia.org/wiki/Quartile
93 fn quartiles(self) -> (f64,f64,f64);
94
95 /// Inter-quartile range: the difference between the 25th percentile (1st quartile) and the 75th
96 /// percentile (3rd quartile). See `quartiles`.
97 ///
98 /// See also: https://en.wikipedia.org/wiki/Interquartile_range
99 fn iqr(self) -> f64;
100 }
101
102 /// Extracted collection of all the summary statistics of a sample set.
103 #[deriving(Clone, Eq)]
104 struct Summary {
105 sum: f64,
106 min: f64,
107 max: f64,
108 mean: f64,
109 median: f64,
110 var: f64,
111 std_dev: f64,
112 std_dev_pct: f64,
113 median_abs_dev: f64,
114 median_abs_dev_pct: f64,
115 quartiles: (f64,f64,f64),
116 iqr: f64,
117 }
118
119 impl Summary {
120
121 /// Construct a new summary of a sample set.
122 pub fn new(samples: &[f64]) -> Summary {
123 Summary {
124 sum: samples.sum(),
125 min: samples.min(),
126 max: samples.max(),
127 mean: samples.mean(),
128 median: samples.median(),
129 var: samples.var(),
130 std_dev: samples.std_dev(),
131 std_dev_pct: samples.std_dev_pct(),
132 median_abs_dev: samples.median_abs_dev(),
133 median_abs_dev_pct: samples.median_abs_dev_pct(),
134 quartiles: samples.quartiles(),
135 iqr: samples.iqr()
136 }
137 }
138 }
139
140 impl<'self> Stats for &'self [f64] {
141
142 fn sum(self) -> f64 {
143 self.iter().fold(0.0, |p,q| p + *q)
144 }
145
146 fn min(self) -> f64 {
147 assert!(self.len() != 0);
148 self.iter().fold(self[0], |p,q| cmp::min(p, *q))
149 }
150
151 fn max(self) -> f64 {
152 assert!(self.len() != 0);
153 self.iter().fold(self[0], |p,q| cmp::max(p, *q))
154 }
155
156 fn mean(self) -> f64 {
157 assert!(self.len() != 0);
158 self.sum() / (self.len() as f64)
159 }
160
161 fn median(self) -> f64 {
162 self.percentile(50.0)
163 }
164
165 fn var(self) -> f64 {
166 if self.len() < 2 {
167 0.0
168 } else {
169 let mean = self.mean();
170 let mut v = 0.0;
171 for s in self.iter() {
172 let x = *s - mean;
173 v += x*x;
174 }
175 // NB: this is _supposed to be_ len-1, not len. If you
176 // change it back to len, you will be calculating a
177 // population variance, not a sample variance.
178 v/((self.len()-1) as f64)
179 }
180 }
181
182 fn std_dev(self) -> f64 {
183 self.var().sqrt()
184 }
185
186 fn std_dev_pct(self) -> f64 {
187 (self.std_dev() / self.mean()) * 100.0
188 }
189
190 fn median_abs_dev(self) -> f64 {
191 let med = self.median();
192 let abs_devs = self.map(|&v| num::abs(med - v));
193 // This constant is derived by smarter statistics brains than me, but it is
194 // consistent with how R and other packages treat the MAD.
195 abs_devs.median() * 1.4826
196 }
197
198 fn median_abs_dev_pct(self) -> f64 {
199 (self.median_abs_dev() / self.median()) * 100.0
200 }
201
202 fn percentile(self, pct: f64) -> f64 {
203 let mut tmp = self.to_owned();
204 sort::tim_sort(tmp);
205 percentile_of_sorted(tmp, pct)
206 }
207
208 fn quartiles(self) -> (f64,f64,f64) {
209 let mut tmp = self.to_owned();
210 sort::tim_sort(tmp);
211 let a = percentile_of_sorted(tmp, 25.0);
212 let b = percentile_of_sorted(tmp, 50.0);
213 let c = percentile_of_sorted(tmp, 75.0);
214 (a,b,c)
215 }
216
217 fn iqr(self) -> f64 {
218 let (a,_,c) = self.quartiles();
219 c - a
220 }
221 }
222
223
224 // Helper function: extract a value representing the `pct` percentile of a sorted sample-set, using
225 // linear interpolation. If samples are not sorted, return nonsensical value.
226 fn percentile_of_sorted(sorted_samples: &[f64],
227 pct: f64) -> f64 {
228 assert!(sorted_samples.len() != 0);
229 if sorted_samples.len() == 1 {
230 return sorted_samples[0];
231 }
232 assert!(0.0 <= pct);
233 assert!(pct <= 100.0);
234 if pct == 100.0 {
235 return sorted_samples[sorted_samples.len() - 1];
236 }
237 let rank = (pct / 100.0) * ((sorted_samples.len() - 1) as f64);
238 let lrank = rank.floor();
239 let d = rank - lrank;
240 let n = lrank as uint;
241 let lo = sorted_samples[n];
242 let hi = sorted_samples[n+1];
243 lo + (hi - lo) * d
244 }
245
246
247 /// Winsorize a set of samples, replacing values above the `100-pct` percentile and below the `pct`
248 /// percentile with those percentiles themselves. This is a way of minimizing the effect of
249 /// outliers, at the cost of biasing the sample. It differs from trimming in that it does not
250 /// change the number of samples, just changes the values of those that are outliers.
251 ///
252 /// See: http://en.wikipedia.org/wiki/Winsorising
253 pub fn winsorize(samples: &mut [f64], pct: f64) {
254 let mut tmp = samples.to_owned();
255 sort::tim_sort(tmp);
256 let lo = percentile_of_sorted(tmp, pct);
257 let hi = percentile_of_sorted(tmp, 100.0-pct);
258 for samp in samples.mut_iter() {
259 if *samp > hi {
260 *samp = hi
261 } else if *samp < lo {
262 *samp = lo
263 }
264 }
265 }
266
267 /// Render writes the min, max and quartiles of the provided `Summary` to the provided `Writer`.
268 pub fn write_5_number_summary(w: @io::Writer, s: &Summary) {
269 let (q1,q2,q3) = s.quartiles;
270 w.write_str(fmt!("(min=%f, q1=%f, med=%f, q3=%f, max=%f)",
271 s.min as float,
272 q1 as float,
273 q2 as float,
274 q3 as float,
275 s.max as float));
276 }
277
278 /// Render a boxplot to the provided writer. The boxplot shows the min, max and quartiles of the
279 /// provided `Summary` (thus includes the mean) and is scaled to display within the range of the
280 /// nearest multiple-of-a-power-of-ten above and below the min and max of possible values, and
281 /// target `width_hint` characters of display (though it will be wider if necessary).
282 ///
283 /// As an example, the summary with 5-number-summary `(min=15, q1=17, med=20, q3=24, max=31)` might
284 /// display as:
285 ///
286 /// ~~~~
287 /// 10 | [--****#******----------] | 40
288 /// ~~~~
289
290 pub fn write_boxplot(w: @io::Writer, s: &Summary, width_hint: uint) {
291
292 let (q1,q2,q3) = s.quartiles;
293
294 // the .abs() handles the case where numbers are negative
295 let lomag = (10.0_f64).pow(&(s.min.abs().log10().floor()));
296 let himag = (10.0_f64).pow(&(s.max.abs().log10().floor()));
297
298 // need to consider when the limit is zero
299 let lo = if lomag == 0.0 {
300 0.0
301 } else {
302 (s.min / lomag).floor() * lomag
303 };
304
305 let hi = if himag == 0.0 {
306 0.0
307 } else {
308 (s.max / himag).ceil() * himag
309 };
310
311 let range = hi - lo;
312
313 let lostr = lo.to_str();
314 let histr = hi.to_str();
315
316 let overhead_width = lostr.len() + histr.len() + 4;
317 let range_width = width_hint - overhead_width;;
318 let char_step = range / (range_width as f64);
319
320 w.write_str(lostr);
321 w.write_char(' ');
322 w.write_char('|');
323
324 let mut c = 0;
325 let mut v = lo;
326
327 while c < range_width && v < s.min {
328 w.write_char(' ');
329 v += char_step;
330 c += 1;
331 }
332 w.write_char('[');
333 c += 1;
334 while c < range_width && v < q1 {
335 w.write_char('-');
336 v += char_step;
337 c += 1;
338 }
339 while c < range_width && v < q2 {
340 w.write_char('*');
341 v += char_step;
342 c += 1;
343 }
344 w.write_char('#');
345 c += 1;
346 while c < range_width && v < q3 {
347 w.write_char('*');
348 v += char_step;
349 c += 1;
350 }
351 while c < range_width && v < s.max {
352 w.write_char('-');
353 v += char_step;
354 c += 1;
355 }
356 w.write_char(']');
357 while c < range_width {
358 w.write_char(' ');
359 v += char_step;
360 c += 1;
361 }
362
363 w.write_char('|');
364 w.write_char(' ');
365 w.write_str(histr);
366 }
367
368 /// Returns a HashMap with the number of occurrences of every element in the
369 /// sequence that the iterator exposes.
370 pub fn freq_count<T: Iterator<U>, U: Eq+Hash>(mut iter: T) -> hashmap::HashMap<U, uint> {
371 let mut map: hashmap::HashMap<U,uint> = hashmap::HashMap::new();
372 for elem in iter {
373 map.insert_or_update_with(elem, 1, |_, count| *count += 1);
374 }
375 map
376 }
377
378 // Test vectors generated from R, using the script src/etc/stat-test-vectors.r.
379
380 #[cfg(test)]
381 mod tests {
382
383 use stats::Stats;
384 use stats::Summary;
385 use stats::write_5_number_summary;
386 use stats::write_boxplot;
387 use std::io;
388
389 fn check(samples: &[f64], summ: &Summary) {
390
391 let summ2 = Summary::new(samples);
392
393 let w = io::stdout();
394 w.write_char('\n');
395 write_5_number_summary(w, &summ2);
396 w.write_char('\n');
397 write_boxplot(w, &summ2, 50);
398 w.write_char('\n');
399
400 assert_eq!(summ.sum, summ2.sum);
401 assert_eq!(summ.min, summ2.min);
402 assert_eq!(summ.max, summ2.max);
403 assert_eq!(summ.mean, summ2.mean);
404 assert_eq!(summ.median, summ2.median);
405
406 // We needed a few more digits to get exact equality on these
407 // but they're within float epsilon, which is 1.0e-6.
408 assert_approx_eq!(summ.var, summ2.var);
409 assert_approx_eq!(summ.std_dev, summ2.std_dev);
410 assert_approx_eq!(summ.std_dev_pct, summ2.std_dev_pct);
411 assert_approx_eq!(summ.median_abs_dev, summ2.median_abs_dev);
412 assert_approx_eq!(summ.median_abs_dev_pct, summ2.median_abs_dev_pct);
413
414 assert_eq!(summ.quartiles, summ2.quartiles);
415 assert_eq!(summ.iqr, summ2.iqr);
416 }
417
418 #[test]
419 fn test_norm2() {
420 let val = &[
421 958.0000000000,
422 924.0000000000,
423 ];
424 let summ = &Summary {
425 sum: 1882.0000000000,
426 min: 924.0000000000,
427 max: 958.0000000000,
428 mean: 941.0000000000,
429 median: 941.0000000000,
430 var: 578.0000000000,
431 std_dev: 24.0416305603,
432 std_dev_pct: 2.5549022912,
433 median_abs_dev: 25.2042000000,
434 median_abs_dev_pct: 2.6784484591,
435 quartiles: (932.5000000000,941.0000000000,949.5000000000),
436 iqr: 17.0000000000,
437 };
438 check(val, summ);
439 }
440 #[test]
441 fn test_norm10narrow() {
442 let val = &[
443 966.0000000000,
444 985.0000000000,
445 1110.0000000000,
446 848.0000000000,
447 821.0000000000,
448 975.0000000000,
449 962.0000000000,
450 1157.0000000000,
451 1217.0000000000,
452 955.0000000000,
453 ];
454 let summ = &Summary {
455 sum: 9996.0000000000,
456 min: 821.0000000000,
457 max: 1217.0000000000,
458 mean: 999.6000000000,
459 median: 970.5000000000,
460 var: 16050.7111111111,
461 std_dev: 126.6914010938,
462 std_dev_pct: 12.6742097933,
463 median_abs_dev: 102.2994000000,
464 median_abs_dev_pct: 10.5408964451,
465 quartiles: (956.7500000000,970.5000000000,1078.7500000000),
466 iqr: 122.0000000000,
467 };
468 check(val, summ);
469 }
470 #[test]
471 fn test_norm10medium() {
472 let val = &[
473 954.0000000000,
474 1064.0000000000,
475 855.0000000000,
476 1000.0000000000,
477 743.0000000000,
478 1084.0000000000,
479 704.0000000000,
480 1023.0000000000,
481 357.0000000000,
482 869.0000000000,
483 ];
484 let summ = &Summary {
485 sum: 8653.0000000000,
486 min: 357.0000000000,
487 max: 1084.0000000000,
488 mean: 865.3000000000,
489 median: 911.5000000000,
490 var: 48628.4555555556,
491 std_dev: 220.5186059170,
492 std_dev_pct: 25.4846418487,
493 median_abs_dev: 195.7032000000,
494 median_abs_dev_pct: 21.4704552935,
495 quartiles: (771.0000000000,911.5000000000,1017.2500000000),
496 iqr: 246.2500000000,
497 };
498 check(val, summ);
499 }
500 #[test]
501 fn test_norm10wide() {
502 let val = &[
503 505.0000000000,
504 497.0000000000,
505 1591.0000000000,
506 887.0000000000,
507 1026.0000000000,
508 136.0000000000,
509 1580.0000000000,
510 940.0000000000,
511 754.0000000000,
512 1433.0000000000,
513 ];
514 let summ = &Summary {
515 sum: 9349.0000000000,
516 min: 136.0000000000,
517 max: 1591.0000000000,
518 mean: 934.9000000000,
519 median: 913.5000000000,
520 var: 239208.9888888889,
521 std_dev: 489.0899599142,
522 std_dev_pct: 52.3146817750,
523 median_abs_dev: 611.5725000000,
524 median_abs_dev_pct: 66.9482758621,
525 quartiles: (567.2500000000,913.5000000000,1331.2500000000),
526 iqr: 764.0000000000,
527 };
528 check(val, summ);
529 }
530 #[test]
531 fn test_norm25verynarrow() {
532 let val = &[
533 991.0000000000,
534 1018.0000000000,
535 998.0000000000,
536 1013.0000000000,
537 974.0000000000,
538 1007.0000000000,
539 1014.0000000000,
540 999.0000000000,
541 1011.0000000000,
542 978.0000000000,
543 985.0000000000,
544 999.0000000000,
545 983.0000000000,
546 982.0000000000,
547 1015.0000000000,
548 1002.0000000000,
549 977.0000000000,
550 948.0000000000,
551 1040.0000000000,
552 974.0000000000,
553 996.0000000000,
554 989.0000000000,
555 1015.0000000000,
556 994.0000000000,
557 1024.0000000000,
558 ];
559 let summ = &Summary {
560 sum: 24926.0000000000,
561 min: 948.0000000000,
562 max: 1040.0000000000,
563 mean: 997.0400000000,
564 median: 998.0000000000,
565 var: 393.2066666667,
566 std_dev: 19.8294393937,
567 std_dev_pct: 1.9888308788,
568 median_abs_dev: 22.2390000000,
569 median_abs_dev_pct: 2.2283567134,
570 quartiles: (983.0000000000,998.0000000000,1013.0000000000),
571 iqr: 30.0000000000,
572 };
573 check(val, summ);
574 }
575 #[test]
576 fn test_exp10a() {
577 let val = &[
578 23.0000000000,
579 11.0000000000,
580 2.0000000000,
581 57.0000000000,
582 4.0000000000,
583 12.0000000000,
584 5.0000000000,
585 29.0000000000,
586 3.0000000000,
587 21.0000000000,
588 ];
589 let summ = &Summary {
590 sum: 167.0000000000,
591 min: 2.0000000000,
592 max: 57.0000000000,
593 mean: 16.7000000000,
594 median: 11.5000000000,
595 var: 287.7888888889,
596 std_dev: 16.9643416875,
597 std_dev_pct: 101.5828843560,
598 median_abs_dev: 13.3434000000,
599 median_abs_dev_pct: 116.0295652174,
600 quartiles: (4.2500000000,11.5000000000,22.5000000000),
601 iqr: 18.2500000000,
602 };
603 check(val, summ);
604 }
605 #[test]
606 fn test_exp10b() {
607 let val = &[
608 24.0000000000,
609 17.0000000000,
610 6.0000000000,
611 38.0000000000,
612 25.0000000000,
613 7.0000000000,
614 51.0000000000,
615 2.0000000000,
616 61.0000000000,
617 32.0000000000,
618 ];
619 let summ = &Summary {
620 sum: 263.0000000000,
621 min: 2.0000000000,
622 max: 61.0000000000,
623 mean: 26.3000000000,
624 median: 24.5000000000,
625 var: 383.5666666667,
626 std_dev: 19.5848580967,
627 std_dev_pct: 74.4671410520,
628 median_abs_dev: 22.9803000000,
629 median_abs_dev_pct: 93.7971428571,
630 quartiles: (9.5000000000,24.5000000000,36.5000000000),
631 iqr: 27.0000000000,
632 };
633 check(val, summ);
634 }
635 #[test]
636 fn test_exp10c() {
637 let val = &[
638 71.0000000000,
639 2.0000000000,
640 32.0000000000,
641 1.0000000000,
642 6.0000000000,
643 28.0000000000,
644 13.0000000000,
645 37.0000000000,
646 16.0000000000,
647 36.0000000000,
648 ];
649 let summ = &Summary {
650 sum: 242.0000000000,
651 min: 1.0000000000,
652 max: 71.0000000000,
653 mean: 24.2000000000,
654 median: 22.0000000000,
655 var: 458.1777777778,
656 std_dev: 21.4050876611,
657 std_dev_pct: 88.4507754589,
658 median_abs_dev: 21.4977000000,
659 median_abs_dev_pct: 97.7168181818,
660 quartiles: (7.7500000000,22.0000000000,35.0000000000),
661 iqr: 27.2500000000,
662 };
663 check(val, summ);
664 }
665 #[test]
666 fn test_exp25() {
667 let val = &[
668 3.0000000000,
669 24.0000000000,
670 1.0000000000,
671 19.0000000000,
672 7.0000000000,
673 5.0000000000,
674 30.0000000000,
675 39.0000000000,
676 31.0000000000,
677 13.0000000000,
678 25.0000000000,
679 48.0000000000,
680 1.0000000000,
681 6.0000000000,
682 42.0000000000,
683 63.0000000000,
684 2.0000000000,
685 12.0000000000,
686 108.0000000000,
687 26.0000000000,
688 1.0000000000,
689 7.0000000000,
690 44.0000000000,
691 25.0000000000,
692 11.0000000000,
693 ];
694 let summ = &Summary {
695 sum: 593.0000000000,
696 min: 1.0000000000,
697 max: 108.0000000000,
698 mean: 23.7200000000,
699 median: 19.0000000000,
700 var: 601.0433333333,
701 std_dev: 24.5161851301,
702 std_dev_pct: 103.3565983562,
703 median_abs_dev: 19.2738000000,
704 median_abs_dev_pct: 101.4410526316,
705 quartiles: (6.0000000000,19.0000000000,31.0000000000),
706 iqr: 25.0000000000,
707 };
708 check(val, summ);
709 }
710 #[test]
711 fn test_binom25() {
712 let val = &[
713 18.0000000000,
714 17.0000000000,
715 27.0000000000,
716 15.0000000000,
717 21.0000000000,
718 25.0000000000,
719 17.0000000000,
720 24.0000000000,
721 25.0000000000,
722 24.0000000000,
723 26.0000000000,
724 26.0000000000,
725 23.0000000000,
726 15.0000000000,
727 23.0000000000,
728 17.0000000000,
729 18.0000000000,
730 18.0000000000,
731 21.0000000000,
732 16.0000000000,
733 15.0000000000,
734 31.0000000000,
735 20.0000000000,
736 17.0000000000,
737 15.0000000000,
738 ];
739 let summ = &Summary {
740 sum: 514.0000000000,
741 min: 15.0000000000,
742 max: 31.0000000000,
743 mean: 20.5600000000,
744 median: 20.0000000000,
745 var: 20.8400000000,
746 std_dev: 4.5650848842,
747 std_dev_pct: 22.2037202539,
748 median_abs_dev: 5.9304000000,
749 median_abs_dev_pct: 29.6520000000,
750 quartiles: (17.0000000000,20.0000000000,24.0000000000),
751 iqr: 7.0000000000,
752 };
753 check(val, summ);
754 }
755 #[test]
756 fn test_pois25lambda30() {
757 let val = &[
758 27.0000000000,
759 33.0000000000,
760 34.0000000000,
761 34.0000000000,
762 24.0000000000,
763 39.0000000000,
764 28.0000000000,
765 27.0000000000,
766 31.0000000000,
767 28.0000000000,
768 38.0000000000,
769 21.0000000000,
770 33.0000000000,
771 36.0000000000,
772 29.0000000000,
773 37.0000000000,
774 32.0000000000,
775 34.0000000000,
776 31.0000000000,
777 39.0000000000,
778 25.0000000000,
779 31.0000000000,
780 32.0000000000,
781 40.0000000000,
782 24.0000000000,
783 ];
784 let summ = &Summary {
785 sum: 787.0000000000,
786 min: 21.0000000000,
787 max: 40.0000000000,
788 mean: 31.4800000000,
789 median: 32.0000000000,
790 var: 26.5933333333,
791 std_dev: 5.1568724372,
792 std_dev_pct: 16.3814245145,
793 median_abs_dev: 5.9304000000,
794 median_abs_dev_pct: 18.5325000000,
795 quartiles: (28.0000000000,32.0000000000,34.0000000000),
796 iqr: 6.0000000000,
797 };
798 check(val, summ);
799 }
800 #[test]
801 fn test_pois25lambda40() {
802 let val = &[
803 42.0000000000,
804 50.0000000000,
805 42.0000000000,
806 46.0000000000,
807 34.0000000000,
808 45.0000000000,
809 34.0000000000,
810 49.0000000000,
811 39.0000000000,
812 28.0000000000,
813 40.0000000000,
814 35.0000000000,
815 37.0000000000,
816 39.0000000000,
817 46.0000000000,
818 44.0000000000,
819 32.0000000000,
820 45.0000000000,
821 42.0000000000,
822 37.0000000000,
823 48.0000000000,
824 42.0000000000,
825 33.0000000000,
826 42.0000000000,
827 48.0000000000,
828 ];
829 let summ = &Summary {
830 sum: 1019.0000000000,
831 min: 28.0000000000,
832 max: 50.0000000000,
833 mean: 40.7600000000,
834 median: 42.0000000000,
835 var: 34.4400000000,
836 std_dev: 5.8685603004,
837 std_dev_pct: 14.3978417577,
838 median_abs_dev: 5.9304000000,
839 median_abs_dev_pct: 14.1200000000,
840 quartiles: (37.0000000000,42.0000000000,45.0000000000),
841 iqr: 8.0000000000,
842 };
843 check(val, summ);
844 }
845 #[test]
846 fn test_pois25lambda50() {
847 let val = &[
848 45.0000000000,
849 43.0000000000,
850 44.0000000000,
851 61.0000000000,
852 51.0000000000,
853 53.0000000000,
854 59.0000000000,
855 52.0000000000,
856 49.0000000000,
857 51.0000000000,
858 51.0000000000,
859 50.0000000000,
860 49.0000000000,
861 56.0000000000,
862 42.0000000000,
863 52.0000000000,
864 51.0000000000,
865 43.0000000000,
866 48.0000000000,
867 48.0000000000,
868 50.0000000000,
869 42.0000000000,
870 43.0000000000,
871 42.0000000000,
872 60.0000000000,
873 ];
874 let summ = &Summary {
875 sum: 1235.0000000000,
876 min: 42.0000000000,
877 max: 61.0000000000,
878 mean: 49.4000000000,
879 median: 50.0000000000,
880 var: 31.6666666667,
881 std_dev: 5.6273143387,
882 std_dev_pct: 11.3913245723,
883 median_abs_dev: 4.4478000000,
884 median_abs_dev_pct: 8.8956000000,
885 quartiles: (44.0000000000,50.0000000000,52.0000000000),
886 iqr: 8.0000000000,
887 };
888 check(val, summ);
889 }
890 #[test]
891 fn test_unif25() {
892 let val = &[
893 99.0000000000,
894 55.0000000000,
895 92.0000000000,
896 79.0000000000,
897 14.0000000000,
898 2.0000000000,
899 33.0000000000,
900 49.0000000000,
901 3.0000000000,
902 32.0000000000,
903 84.0000000000,
904 59.0000000000,
905 22.0000000000,
906 86.0000000000,
907 76.0000000000,
908 31.0000000000,
909 29.0000000000,
910 11.0000000000,
911 41.0000000000,
912 53.0000000000,
913 45.0000000000,
914 44.0000000000,
915 98.0000000000,
916 98.0000000000,
917 7.0000000000,
918 ];
919 let summ = &Summary {
920 sum: 1242.0000000000,
921 min: 2.0000000000,
922 max: 99.0000000000,
923 mean: 49.6800000000,
924 median: 45.0000000000,
925 var: 1015.6433333333,
926 std_dev: 31.8691595957,
927 std_dev_pct: 64.1488719719,
928 median_abs_dev: 45.9606000000,
929 median_abs_dev_pct: 102.1346666667,
930 quartiles: (29.0000000000,45.0000000000,79.0000000000),
931 iqr: 50.0000000000,
932 };
933 check(val, summ);
934 }
935
936 #[test]
937 fn test_boxplot_nonpositive() {
938 fn t(s: &Summary, expected: ~str) {
939 let out = do io::with_str_writer |w| {
940 write_boxplot(w, s, 30)
941 };
942
943 assert_eq!(out, expected);
944 }
945
946 t(&Summary::new([-2.0, -1.0]), ~"-2 |[------******#*****---]| -1");
947 t(&Summary::new([0.0, 2.0]), ~"0 |[-------*****#*******---]| 2");
948 t(&Summary::new([-2.0, 0.0]), ~"-2 |[------******#******---]| 0");
949
950 }
951
952 }
libextra/stats.rs:252:50-252:50 -fn- definition:
/// See: http://en.wikipedia.org/wiki/Winsorising
pub fn winsorize(samples: &mut [f64], pct: f64) {
references:-libextra/test.rs:
1082: stats::winsorize(samples, 5.0);
1074: stats::winsorize(samples, 5.0);
libextra/stats.rs:103:23-103:23 -struct- definition:
#[deriving(Clone, Eq)]
struct Summary {
references:-103: #[deriving(Clone, Eq)]
103: #[deriving(Clone, Eq)]
103: #[deriving(Clone, Eq)]
268: pub fn write_5_number_summary(w: @io::Writer, s: &Summary) {
103: #[deriving(Clone, Eq)]
103: #[deriving(Clone, Eq)]
103: #[deriving(Clone, Eq)]
119: impl Summary {
122: pub fn new(samples: &[f64]) -> Summary {
103: #[deriving(Clone, Eq)]
123: Summary {
103: #[deriving(Clone, Eq)]
103: #[deriving(Clone, Eq)]
290: pub fn write_boxplot(w: @io::Writer, s: &Summary, width_hint: uint) {
103: #[deriving(Clone, Eq)]
103: #[deriving(Clone, Eq)]
libextra/test.rs:
1050: pub fn auto_bench(&mut self, f: &fn(&mut BenchHarness)) -> stats::Summary {
296: ns_iter_summ: stats::Summary,
libextra/stats.rs:20:94-20:94 -trait- definition:
/// Trait that provides simple descriptive statistics on a univariate set of numeric samples.
pub trait Stats {
references:-140: impl<'self> Stats for &'self [f64] {
libextra/stats.rs:225:78-225:78 -fn- definition:
// linear interpolation. If samples are not sorted, return nonsensical value.
fn percentile_of_sorted(sorted_samples: &[f64],
references:-205: percentile_of_sorted(tmp, pct)
212: let b = percentile_of_sorted(tmp, 50.0);
257: let hi = percentile_of_sorted(tmp, 100.0-pct);
256: let lo = percentile_of_sorted(tmp, pct);
211: let a = percentile_of_sorted(tmp, 25.0);
213: let c = percentile_of_sorted(tmp, 75.0);