From df659008699fecbb4fe12bed55e6fb5beafbaaa4 Mon Sep 17 00:00:00 2001
From: Arnaud Giersch <arnaud.giersch@iut-bm.univ-fcomte.fr>
Date: Tue, 25 Jan 2011 19:36:31 +0100
Subject: [PATCH] Use online algorithms for statistics.

---
 Makefile       |  1 -
 main.cpp       |  2 +-
 statistics.cpp | 75 --------------------------------------------------
 statistics.h   | 42 ++++++++++++++++------------
 4 files changed, 26 insertions(+), 94 deletions(-)
 delete mode 100644 statistics.cpp

diff --git a/Makefile b/Makefile
index 2b0df87..2760531 100644
--- a/Makefile
+++ b/Makefile
@@ -38,7 +38,6 @@ SRC.loba := main.cpp		\
 	neighbor.cpp		\
 	options.cpp		\
 	process.cpp		\
-	statistics.cpp		\
 	version.cpp
 
 SRC.simple_async := simple_async.cpp
diff --git a/main.cpp b/main.cpp
index 4e8864c..67af3fe 100644
--- a/main.cpp
+++ b/main.cpp
@@ -88,7 +88,7 @@ static void check_for_lost_load()
 #define PR_STATS(descr, st)                                             \
     INFO5("| %.*s: %g / %g / %g", 39,                                   \
           descr " total/avg./stddev. at exit.........................", \
-          st.get_sum(), st.get_avg(), st.get_stddev())
+          st.get_sum(), st.get_mean(), st.get_stddev())
 
 int main(int argc, char* argv[])
 {
diff --git a/statistics.cpp b/statistics.cpp
deleted file mode 100644
index 77f415f..0000000
--- a/statistics.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <algorithm>
-#include <cmath>
-#include <tr1/functional>
-#include <numeric>
-
-#include "statistics.h"
-
-statistics::statistics()
-    : up2date(false)
-{
-}
-
-void statistics::reset()
-{
-    values.clear();
-    up2date = false;
-}
-
-void statistics::push(double val)
-{
-    values.push_back(val);
-    up2date = false;
-}
-
-size_t statistics::get_count()
-{
-    return values.size();
-}
-
-double statistics::get_sum()
-{
-    update();
-    return sum;
-}
-
-double statistics::get_avg()
-{
-    update();
-    return avg;
-}
-
-double statistics::get_stddev()
-{
-    update();
-    return stddev;
-}
-
-void statistics::update()
-{
-    if (up2date)
-        return;
-
-    if (values.empty()) {
-        sum = 0.0;
-        avg = stddev = 0.0 / 0.0;
-    }
-
-    using std::tr1::bind;
-    using std::tr1::placeholders::_1;
-
-    unsigned n = values.size();
-    sum = std::accumulate(values.begin(), values.end(), 0.0);
-    avg = sum / n;
-
-    std::vector<double> diff(values);
-    std::transform(diff.begin(), diff.end(), diff.begin(),
-                   bind(std::minus<double>(), _1, avg));
-    double epsilon = std::accumulate(diff.begin(), diff.end(), 0.0);
-    double square_sum = std::inner_product(diff.begin(), diff.end(),
-                                           diff.begin(), 0.0);
-    double variance = (square_sum - (epsilon * epsilon) / n) / n;
-    stddev = sqrt(variance);
-
-    up2date = true;
-}
diff --git a/statistics.h b/statistics.h
index 2978747..7e3fb2e 100644
--- a/statistics.h
+++ b/statistics.h
@@ -1,29 +1,37 @@
 #ifndef STATISTICS_H
 #define STATISTICS_H
 
+#include <cmath>
 #include <vector>
 
 class statistics {
 public:
-    statistics();
-
-    void reset();
-    void push(double value);
-
-    size_t get_count();
-    double get_sum();
-    double get_avg();
-    double get_stddev();
+    statistics()
+        : count(0)
+        , sum(0.0)
+        , mean(0.0)
+        , sqdiff_sum(0.0)
+    { }
+
+    void push(double x) {
+        double delta = x - mean;
+        ++count;
+        sum += x;
+        mean = sum / count;
+        sqdiff_sum += delta * (x - mean);
+    }
+
+    unsigned get_count() const  { return count;                }
+    double get_sum() const      { return sum;                  }
+    double get_mean() const     { return mean;                 }
+    double get_variance() const { return sqdiff_sum / count;   }
+    double get_stddev() const   { return sqrt(get_variance()); }
 
 private:
-    bool up2date;
-
-    std::vector<double> values;
-    double sum;
-    double avg;
-    double stddev;
-
-    void update();
+    int count;
+    double sum;                 // sum of x_i
+    double mean;                // mean of x_i
+    double sqdiff_sum;          // sum of (x_i - mean)^2
 };
 
 #endif // !STATISTICS_H
-- 
2.39.5