34 #ifndef OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H
35 #define OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H
41 #include <boost/accumulators/accumulators.hpp>
42 #include <boost/accumulators/statistics/covariance.hpp>
43 #include <boost/accumulators/statistics/mean.hpp>
44 #include <boost/accumulators/statistics/stats.hpp>
45 #include <boost/accumulators/statistics/variance.hpp>
46 #include <boost/accumulators/statistics/variates/covariate.hpp>
47 #include <boost/function/function_base.hpp>
48 #include <boost/lambda/casts.hpp>
49 #include <boost/lambda/lambda.hpp>
56 using std::iterator_traits;
70 template <
typename IteratorType>
72 IteratorType begin, IteratorType end)
86 template <
typename IteratorType>
88 IteratorType begin, IteratorType end)
102 template <
typename IteratorType1,
typename IteratorType2>
104 IteratorType1 begin_b, IteratorType1 end_b,
105 IteratorType2 begin_a, IteratorType2 end_a)
107 if(begin_b != end_b && begin_a == end_a)
117 template <
typename IteratorType>
118 static double sum(IteratorType begin, IteratorType end)
120 return std::accumulate(begin, end, 0.0);
130 template <
typename IteratorType>
131 static double mean(IteratorType begin, IteratorType end)
134 return sum(begin, end) / std::distance(begin, end);
148 template <
typename IteratorType>
149 static double median(IteratorType begin, IteratorType end,
bool sorted =
false)
154 std::sort(begin, end);
157 Size size = std::distance(begin, end);
160 IteratorType it1 = begin;
161 std::advance(it1, size / 2 - 1);
162 IteratorType it2 = it1;
163 std::advance(it2, 1);
164 return (*it1 + *it2) / 2.0;
168 IteratorType it = begin;
169 std::advance(it, (size - 1) / 2);
187 template <
typename IteratorType>
189 IteratorType begin, IteratorType end,
bool sorted =
false)
195 std::sort(begin, end);
198 Size size = std::distance(begin, end);
201 return median(begin, begin + (size/2)-1,
true);
203 return median(begin, begin + (size/2),
true);
219 template <
typename IteratorType>
221 IteratorType begin, IteratorType end,
bool sorted =
false)
226 std::sort(begin, end);
229 Size size = std::distance(begin, end);
230 return median(begin + (size/2)+1, end,
true);
240 template <
typename IteratorType>
242 IteratorType begin, IteratorType end,
243 double mean = std::numeric_limits<double>::max())
247 if (
mean == std::numeric_limits<double>::max())
251 for (IteratorType iter=begin; iter!=end; ++iter)
253 double diff = *iter -
mean;
256 return sum / (std::distance(begin, end)-1);
266 template <
typename IteratorType>
268 IteratorType begin, IteratorType end,
269 double mean = std::numeric_limits<double>::max())
282 template <
typename IteratorType>
284 IteratorType begin, IteratorType end,
285 double mean = std::numeric_limits<double>::max())
289 if (
mean == std::numeric_limits<double>::max())
293 for (IteratorType iter=begin; iter!=end; ++iter)
297 return sum / std::distance(begin, end);
309 template <
typename IteratorType1,
typename IteratorType2>
311 IteratorType1 begin_a, IteratorType1 end_a,
312 IteratorType2 begin_b, IteratorType2 end_b)
320 IteratorType1 iter_a = begin_a;
321 IteratorType2 iter_b = begin_b;
322 for (; iter_a != end_a; ++iter_a, ++iter_b)
326 sum += (*iter_a - mean_a) * (*iter_b - mean_b);
330 Size n = std::distance(begin_a, end_a);
346 template <
typename IteratorType1,
typename IteratorType2>
348 IteratorType1 begin_a, IteratorType1 end_a,
349 IteratorType2 begin_b, IteratorType2 end_b)
354 SignedSize dist = std::distance(begin_a, end_a);
356 IteratorType1 iter_a = begin_a;
357 IteratorType2 iter_b = begin_b;
358 for (; iter_a != end_a; ++iter_a, ++iter_b)
363 double tmp(*iter_a - *iter_b);
381 template <
typename IteratorType1,
typename IteratorType2>
383 IteratorType1 begin_a, IteratorType1 end_a,
384 IteratorType2 begin_b, IteratorType2 end_b)
389 SignedSize dist = std::distance(begin_a, end_a);
391 IteratorType1 iter_a = begin_a;
392 IteratorType2 iter_b = begin_b;
393 for (; iter_a != end_a; ++iter_a, ++iter_b)
397 if ((*iter_a < 0 && *iter_b >= 0) || (*iter_a >= 0 && *iter_b < 0))
406 return double(correct) / dist;
421 template <
typename IteratorType1,
typename IteratorType2>
423 IteratorType1 begin_a, IteratorType1 end_a,
424 IteratorType2 begin_b, IteratorType2 end_b)
433 IteratorType1 iter_a = begin_a;
434 IteratorType2 iter_b = begin_b;
435 for (; iter_a != end_a; ++iter_a, ++iter_b)
440 if (*iter_a < 0 && *iter_b >= 0)
444 else if (*iter_a < 0 && *iter_b < 0)
448 else if (*iter_a >= 0 && *iter_b >= 0)
452 else if (*iter_a >= 0 && *iter_b < 0)
460 return (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
474 template <
typename IteratorType1,
typename IteratorType2>
476 IteratorType1 begin_a, IteratorType1 end_a,
477 IteratorType2 begin_b, IteratorType2 end_b)
483 SignedSize dist = std::distance(begin_a, end_a);
484 double avg_a = std::accumulate(begin_a, end_a, 0.0) / dist;
485 double avg_b = std::accumulate(begin_b, end_b, 0.0) / dist;
487 double numerator = 0;
488 double denominator_a = 0;
489 double denominator_b = 0;
490 IteratorType1 iter_a = begin_a;
491 IteratorType2 iter_b = begin_b;
492 for (; iter_a != end_a; ++iter_a, ++iter_b)
496 double temp_a = *iter_a - avg_a;
497 double temp_b = *iter_b - avg_b;
498 numerator += (temp_a * temp_b);
499 denominator_a += (temp_a * temp_a);
500 denominator_b += (temp_b * temp_b);
504 return numerator / sqrt(denominator_a * denominator_b);
508 template <
typename Value>
514 Size n = (w.size() - 1);
516 std::vector<std::pair<Size, Value> > w_idx;
517 for (
Size j = 0; j < w.size(); ++j)
519 w_idx.push_back(std::make_pair(j, w[j]));
522 std::sort(w_idx.begin(), w_idx.end(),
523 boost::lambda::ret<bool>((&boost::lambda::_1->*& std::pair<Size, Value>::second) <
524 (&boost::lambda::_2->*& std::pair<Size, Value>::second)));
529 if (fabs(w_idx[i + 1].second - w_idx[i].second) > 0.0000001 * fabs(w_idx[i + 1].second))
531 w_idx[i].second = Value(i + 1);
537 for (z = i + 1; (z <= n) && fabs(w_idx[z].second - w_idx[i].second) <= 0.0000001 * fabs(w_idx[z].second); ++z)
541 rank = 0.5 * (i + z + 1);
543 for (
Size v = i; v <= z - 1; ++v)
545 w_idx[v].second = rank;
551 w_idx[n].second = Value(n + 1);
553 for (
Size j = 0; j < w.size(); ++j)
555 w[w_idx[j].first] = w_idx[j].second;
570 template <
typename IteratorType1,
typename IteratorType2>
572 IteratorType1 begin_a, IteratorType1 end_a,
573 IteratorType2 begin_b, IteratorType2 end_b)
579 SignedSize dist = std::distance(begin_a, end_a);
580 std::vector<double> ranks_data;
581 ranks_data.reserve(dist);
582 std::vector<double> ranks_model;
583 ranks_model.reserve(dist);
584 IteratorType1 iter_a = begin_a;
585 IteratorType2 iter_b = begin_b;
586 for (; iter_a != end_a; ++iter_a, ++iter_b)
591 ranks_model.push_back(*iter_a);
592 ranks_data.push_back(*iter_b);
601 double mu =
double(ranks_data.size() + 1) / 2.;
605 double sum_model_data = 0;
606 double sqsum_data = 0;
607 double sqsum_model = 0;
609 for (
Int i = 0; i < dist; ++i)
611 sum_model_data += (ranks_data[i] - mu) * (ranks_model[i] - mu);
612 sqsum_data += (ranks_data[i] - mu) * (ranks_data[i] - mu);
613 sqsum_model += (ranks_model[i] - mu) * (ranks_model[i] - mu);
617 if (!sqsum_data || !sqsum_model)
622 return sum_model_data / (sqrt(sqsum_data) * sqrt(sqsum_model));
628 #endif // OPENMS_MATH_STATISTICS_STATISTICFUNCTIONS_H
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b) ...
Definition: StatisticFunctions.h:347
static double variance(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the variance of a range of values.
Definition: StatisticFunctions.h:241
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:118
static void checkIteratorsAreValid(IteratorType1 begin_b, IteratorType1 end_b, IteratorType2 begin_a, IteratorType2 end_a)
Helper function checking if an iterator and a co-iterator both have a next element.
Definition: StatisticFunctions.h:103
static double quantile1st(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the first quantile of a range of values.
Definition: StatisticFunctions.h:188
static void computeRank(std::vector< Value > &w)
Replaces the elements in vector w by their ranks.
Definition: StatisticFunctions.h:509
static double covariance(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the covariance of two ranges of values.
Definition: StatisticFunctions.h:310
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:128
static void checkIteratorsEqual(IteratorType begin, IteratorType end)
Helper function checking if two iterators are equal.
Definition: StatisticFunctions.h:87
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:131
static double sd(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the standard deviation of a range of values.
Definition: StatisticFunctions.h:267
static double matthewsCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Matthews correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:422
static double absdev(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the absolute deviation of a range of values.
Definition: StatisticFunctions.h:283
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:475
static void checkIteratorsNotNULL(IteratorType begin, IteratorType end)
Helper function checking if two iterators are not equal.
Definition: StatisticFunctions.h:71
static double quantile3rd(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the third quantile of a range of values.
Definition: StatisticFunctions.h:220
static double median(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the median of a range of values.
Definition: StatisticFunctions.h:149
Invalid range exception.
Definition: Exception.h:286
static double rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:571
static double classificationRate(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the classification rate for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:382
int Int
Signed integer type.
Definition: Types.h:96