]> AND Private Git Repository - loba.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Add statistics about convergence (see parameter -%).
authorArnaud Giersch <arnaud.giersch@iut-bm.univ-fcomte.fr>
Mon, 3 Oct 2011 15:35:25 +0000 (17:35 +0200)
committerArnaud Giersch <arnaud.giersch@iut-bm.univ-fcomte.fr>
Mon, 3 Oct 2011 15:39:36 +0000 (17:39 +0200)
main.cpp
options.cpp
options.h
process.cpp
process.h

index 9ad4b53aacf49fb72d12864d86e7d99174e28986..12f1ae245140db23fa59c415f148edd5e34c7893 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -61,6 +61,7 @@ namespace {
     statistics ctrl_recv_amount;
     statistics ctrl_send_count;
     statistics ctrl_recv_count;
     statistics ctrl_recv_amount;
     statistics ctrl_send_count;
     statistics ctrl_recv_count;
+    statistics convergence;
 
 }
 
 
 }
 
@@ -90,6 +91,9 @@ static int simulation_main(int argc, char* argv[])
         ctrl_recv_amount.push(proc->get_ctrl_recv_amount());
         ctrl_send_count.push(proc->get_ctrl_send_count());
         ctrl_recv_count.push(proc->get_ctrl_recv_count());
         ctrl_recv_amount.push(proc->get_ctrl_recv_amount());
         ctrl_send_count.push(proc->get_ctrl_send_count());
         ctrl_recv_count.push(proc->get_ctrl_recv_count());
+        double c = proc->get_convergence();
+        if (c != -1)
+            convergence.push(c);
 
         // Synchronization barrier...
         // The goal is to circumvent a limitation in SimGrid (at least
 
         // Synchronization barrier...
         // The goal is to circumvent a limitation in SimGrid (at least
@@ -347,6 +351,10 @@ int main(int argc, char* argv[])
                  "percent of the load average");
         PR_VALUE("Data transfer amount", "%g %s", transfer_amount,
                  "times the total amount of data");
                  "percent of the load average");
         PR_VALUE("Data transfer amount", "%g %s", transfer_amount,
                  "times the total amount of data");
+        PR_VALUE("Number of hosts that converged", "%u / %u",
+                 convergence.get_count(), loads.get_count());
+        PR_VALUE("Date of first host convergence", "%g", convergence.get_min());
+        PR_VALUE("Date of last host convergence", "%g", convergence.get_max());
         XBT_INFO("`----");
 
     }
         XBT_INFO("`----");
 
     }
index 80274639e362c7e78f450aa4612a452a24e30504..01ae4470bc092cfe0e37b95efa35697690f46655 100644 (file)
@@ -76,6 +76,7 @@ namespace opt {
     double comp_time_delay = 0.0;               // fixme: find better defaults
 
     // Parameters for the end of the simulation
     double comp_time_delay = 0.0;               // fixme: find better defaults
 
     // Parameters for the end of the simulation
+    double avg_load_ratio = 0.0;
     unsigned lb_maxiter = 0;
     unsigned comp_maxiter = 0;
     double time_limit = 0;
     unsigned lb_maxiter = 0;
     unsigned comp_maxiter = 0;
     double time_limit = 0;
@@ -231,9 +232,13 @@ bool opt::parse_args(int* argc, char* argv[])
     int c;
     opterr = 0;
     while ((c = getopt(*argc, argv,
     int c;
     opterr = 0;
     while ((c = getopt(*argc, argv,
+                       "%:"
                        "a:bc:C:d:D:eEhi:I:k:l:L:m:M:N:r:Rs:S:t:T:vVx:X:Z"))
            != -1) {
         switch (c) {
                        "a:bc:C:d:D:eEhi:I:k:l:L:m:M:N:r:Rs:S:t:T:vVx:X:Z"))
            != -1) {
         switch (c) {
+        case '%':
+            PARSE_ARG(opt::avg_load_ratio);
+            break;
         case 'a':
             opt::loba_algo = optarg;
             result = opt_helper::nol_find_prefix(opt::loba_algorithms,
         case 'a':
             opt::loba_algo = optarg;
             result = opt_helper::nol_find_prefix(opt::loba_algorithms,
@@ -443,6 +448,7 @@ void opt::print()
           h.val_or_string(lb_maxiter, "no limit"));
     DESCR("maximum number of comp. iterations", "%s",
           h.val_or_string(comp_maxiter, "no limit"));
           h.val_or_string(lb_maxiter, "no limit"));
     DESCR("maximum number of comp. iterations", "%s",
           h.val_or_string(comp_maxiter, "no limit"));
+    DESCR("convergence is assumed within (\%)", "%g", opt::avg_load_ratio);
     DESCR("time limit", "%s", h.val_or_string(time_limit, "no limit"));
     DESCR("exit on close", "%s", h.on_off(exit_on_close));
     XBT_INFO("`----");
     DESCR("time limit", "%s", h.val_or_string(time_limit, "no limit"));
     DESCR("exit on close", "%s", h.on_off(exit_on_close));
     XBT_INFO("`----");
@@ -562,6 +568,9 @@ void opt::usage()
     std::clog << o("-I value")
               << "maximum number of comp. iterations, 0 for no limit"
               << " [" << opt::comp_maxiter << "]\n";
     std::clog << o("-I value")
               << "maximum number of comp. iterations, 0 for no limit"
               << " [" << opt::comp_maxiter << "]\n";
+    std::clog << o("-% value")
+              << "percent of the load average to assume the convergence"
+              << " [" << opt::avg_load_ratio << "]\n";
     std::clog << o("-t value")
               << "time limit (simulated time), 0 for no limit"
               << " [" << opt::time_limit << "]\n";
     std::clog << o("-t value")
               << "time limit (simulated time), 0 for no limit"
               << " [" << opt::time_limit << "]\n";
index ef7a73e6ce379d9915ee7e213a59db267487a4cd..264a566f25564831680df3027fc28ff9c5213180 100644 (file)
--- a/options.h
+++ b/options.h
@@ -59,6 +59,7 @@ namespace opt {
     extern double comp_time_delay;
 
     // Parameters for the end of the simulation
     extern double comp_time_delay;
 
     // Parameters for the end of the simulation
+    extern double avg_load_ratio;
     extern unsigned lb_maxiter;
     extern unsigned comp_maxiter;
     extern double time_limit;
     extern unsigned lb_maxiter;
     extern unsigned comp_maxiter;
     extern double time_limit;
index 07372dc0515f763e8f16729d4437c6b94c55df5d..76524fddaec59b27d08c71c2d492a9383e1f15f9 100644 (file)
@@ -20,6 +20,9 @@ double process::total_load_init = 0.0;
 double process::total_load_running = 0.0;
 double process::total_load_exit = 0.0;
 
 double process::total_load_running = 0.0;
 double process::total_load_exit = 0.0;
 
+int process::process_counter = 0;
+double process::total_load_average;
+
 namespace {
 
     void sleep_until_date(double& date, double duration)
 namespace {
 
     void sleep_until_date(double& date, double duration)
@@ -54,12 +57,20 @@ process::process(int argc, char* argv[])
         rev_neigh.insert(std::make_pair(host, ptr));
     }
 
         rev_neigh.insert(std::make_pair(host, ptr));
     }
 
+    // Note: there should not be race condition with the current
+    // version of Simgrid, when updating the global variables.
+
     prev_load_broadcast = -1;   // force sending of load on first send_all()
     expected_load = real_load;
     total_load_running += real_load;
     total_load_init += real_load;
     received_load = 0.0;
 
     prev_load_broadcast = -1;   // force sending of load on first send_all()
     expected_load = real_load;
     total_load_running += real_load;
     total_load_init += real_load;
     received_load = 0.0;
 
+    convergence = -1.0;
+
+    process_counter++;
+    total_load_average = total_load_running / process_counter;
+
     ctrl_close_pending = data_close_pending = neigh.size();
     close_received = false;
     finalizing = false;
     ctrl_close_pending = data_close_pending = neigh.size();
     close_received = false;
     finalizing = false;
@@ -95,6 +106,12 @@ process::~process()
         return;
     XBT_INFO("Final load after %d:%d:%d iterations: %g",
              lb_iter, comp_iter, all_comp_iter, real_load);
         return;
     XBT_INFO("Final load after %d:%d:%d iterations: %g",
              lb_iter, comp_iter, all_comp_iter, real_load);
+    if (convergence >= 0.0)
+        XBT_INFO("Convergence within %g%% was achieved at time %g",
+                 opt::avg_load_ratio, convergence);
+    else
+        XBT_INFO("Convergence within %g%% was not achieved",
+                 opt::avg_load_ratio);
     XBT_VERB("Expected load was: %g", expected_load);
     XBT_VERB("Total computation for this process: %g", get_comp_amount());
     print_loads(true, xbt_log_priority_debug);
     XBT_VERB("Expected load was: %g", expected_load);
     XBT_VERB("Total computation for this process: %g", get_comp_amount());
     print_loads(true, xbt_log_priority_debug);
@@ -199,6 +216,22 @@ void process::compute_loop()
         if (real_load == 0.0)
             continue;
 
         if (real_load == 0.0)
             continue;
 
+        double load_ratio =
+            100.0 * std::fabs(real_load / total_load_average - 1.0);
+        if (convergence >= 0.0) {
+            if (load_ratio > opt::avg_load_ratio) {
+                XBT_VERB("current load has diverged: %g (%.4g%%)",
+                         real_load, load_ratio);
+                convergence = -1.0;
+            }
+        } else {
+            if (load_ratio <= opt::avg_load_ratio) {
+                XBT_VERB("current load has converged: %g (%.4g%%)",
+                         real_load, load_ratio);
+                convergence = MSG_get_clock();
+            }
+        }
+
         // compute
         ++comp_iter;
         double flops = opt::comp_cost(real_load);
         // compute
         ++comp_iter;
         double flops = opt::comp_cost(real_load);
index 9ea0521ba08e5283f565e85d8e95ac046889a0b1..ff8b7517d5b3d062881388859ec53d4fc4f60882 100644 (file)
--- a/process.h
+++ b/process.h
@@ -44,6 +44,7 @@ public:
     double get_ctrl_recv_amount() const    { return acc.ctrl_recv.amount; }
     unsigned get_ctrl_send_count() const   { return acc.ctrl_send.count;  }
     unsigned get_ctrl_recv_count() const   { return acc.ctrl_recv.count;  }
     double get_ctrl_recv_amount() const    { return acc.ctrl_recv.amount; }
     unsigned get_ctrl_send_count() const   { return acc.ctrl_send.count;  }
     unsigned get_ctrl_recv_count() const   { return acc.ctrl_recv.count;  }
+    double get_convergence() const         { return convergence;          }
 
     int run();
 
 
     int run();
 
@@ -84,6 +85,9 @@ private:
     static double total_load_running; // sum of loads while running
     static double total_load_exit; // sum of process loads at exit
 
     static double total_load_running; // sum of loads while running
     static double total_load_exit; // sum of process loads at exit
 
+    static int process_counter;
+    static double total_load_average;
+
     typedef MAP_TEMPLATE<m_host_t, neighbor*> rev_neigh_type;
     neigh_type neigh;           // list of neighbors (do not alter
                                 // after construction!)
     typedef MAP_TEMPLATE<m_host_t, neighbor*> rev_neigh_type;
     neigh_type neigh;           // list of neighbors (do not alter
                                 // after construction!)
@@ -108,6 +112,8 @@ private:
     double expected_load;       // expected load in bookkeeping mode
     double received_load;       // load received from neighbors
 
     double expected_load;       // expected load in bookkeeping mode
     double received_load;       // load received from neighbors
 
+    double convergence;         // date when convergence was achieved, or -1.0
+
     mutex_t mutex;              // synchronization between threads
     condition_t cond;
 
     mutex_t mutex;              // synchronization between threads
     condition_t cond;