From: Arnaud Giersch Date: Tue, 25 Jan 2011 18:36:31 +0000 (+0100) Subject: Use online algorithms for statistics. X-Git-Tag: v0.1~188^2~5 X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/loba.git/commitdiff_plain/df659008699fecbb4fe12bed55e6fb5beafbaaa4?ds=inline;hp=-c Use online algorithms for statistics. --- df659008699fecbb4fe12bed55e6fb5beafbaaa4 diff --git a/Makefile b/Makefile index 2b0df87..2760531 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,6 @@ SRC.loba := main.cpp \ neighbor.cpp \ options.cpp \ process.cpp \ - statistics.cpp \ version.cpp SRC.simple_async := simple_async.cpp diff --git a/main.cpp b/main.cpp index 4e8864c..67af3fe 100644 --- a/main.cpp +++ b/main.cpp @@ -88,7 +88,7 @@ static void check_for_lost_load() #define PR_STATS(descr, st) \ INFO5("| %.*s: %g / %g / %g", 39, \ descr " total/avg./stddev. at exit.........................", \ - st.get_sum(), st.get_avg(), st.get_stddev()) + st.get_sum(), st.get_mean(), st.get_stddev()) int main(int argc, char* argv[]) { diff --git a/statistics.cpp b/statistics.cpp deleted file mode 100644 index 77f415f..0000000 --- a/statistics.cpp +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include - -#include "statistics.h" - -statistics::statistics() - : up2date(false) -{ -} - -void statistics::reset() -{ - values.clear(); - up2date = false; -} - -void statistics::push(double val) -{ - values.push_back(val); - up2date = false; -} - -size_t statistics::get_count() -{ - return values.size(); -} - -double statistics::get_sum() -{ - update(); - return sum; -} - -double statistics::get_avg() -{ - update(); - return avg; -} - -double statistics::get_stddev() -{ - update(); - return stddev; -} - -void statistics::update() -{ - if (up2date) - return; - - if (values.empty()) { - sum = 0.0; - avg = stddev = 0.0 / 0.0; - } - - using std::tr1::bind; - using std::tr1::placeholders::_1; - - unsigned n = values.size(); - sum = std::accumulate(values.begin(), values.end(), 0.0); - avg = sum / n; - - std::vector diff(values); - std::transform(diff.begin(), diff.end(), diff.begin(), - bind(std::minus(), _1, avg)); - double epsilon = std::accumulate(diff.begin(), diff.end(), 0.0); - double square_sum = std::inner_product(diff.begin(), diff.end(), - diff.begin(), 0.0); - double variance = (square_sum - (epsilon * epsilon) / n) / n; - stddev = sqrt(variance); - - up2date = true; -} diff --git a/statistics.h b/statistics.h index 2978747..7e3fb2e 100644 --- a/statistics.h +++ b/statistics.h @@ -1,29 +1,37 @@ #ifndef STATISTICS_H #define STATISTICS_H +#include #include class statistics { public: - statistics(); - - void reset(); - void push(double value); - - size_t get_count(); - double get_sum(); - double get_avg(); - double get_stddev(); + statistics() + : count(0) + , sum(0.0) + , mean(0.0) + , sqdiff_sum(0.0) + { } + + void push(double x) { + double delta = x - mean; + ++count; + sum += x; + mean = sum / count; + sqdiff_sum += delta * (x - mean); + } + + unsigned get_count() const { return count; } + double get_sum() const { return sum; } + double get_mean() const { return mean; } + double get_variance() const { return sqdiff_sum / count; } + double get_stddev() const { return sqrt(get_variance()); } private: - bool up2date; - - std::vector values; - double sum; - double avg; - double stddev; - - void update(); + int count; + double sum; // sum of x_i + double mean; // mean of x_i + double sqdiff_sum; // sum of (x_i - mean)^2 }; #endif // !STATISTICS_H