statistics ctrl_recv_amount;
statistics ctrl_send_count;
statistics ctrl_recv_count;
+ statistics convergence;
}
ctrl_recv_amount.push(proc->get_ctrl_recv_amount());
ctrl_send_count.push(proc->get_ctrl_send_count());
ctrl_recv_count.push(proc->get_ctrl_recv_count());
+ double c = proc->get_convergence();
+ if (c != -1)
+ convergence.push(c);
// Synchronization barrier...
// The goal is to circumvent a limitation in SimGrid (at least
"percent of the load average");
PR_VALUE("Data transfer amount", "%g %s", transfer_amount,
"times the total amount of data");
+ PR_VALUE("Number of hosts that converged", "%u / %u",
+ convergence.get_count(), loads.get_count());
+ PR_VALUE("Date of first host convergence", "%g", convergence.get_min());
+ PR_VALUE("Date of last host convergence", "%g", convergence.get_max());
XBT_INFO("`----");
}
double comp_time_delay = 0.0; // fixme: find better defaults
// Parameters for the end of the simulation
+ double avg_load_ratio = 0.0;
unsigned lb_maxiter = 0;
unsigned comp_maxiter = 0;
double time_limit = 0;
int c;
opterr = 0;
while ((c = getopt(*argc, argv,
+ "%:"
"a:bc:C:d:D:eEhi:I:k:l:L:m:M:N:r:Rs:S:t:T:vVx:X:Z"))
!= -1) {
switch (c) {
+ case '%':
+ PARSE_ARG(opt::avg_load_ratio);
+ break;
case 'a':
opt::loba_algo = optarg;
result = opt_helper::nol_find_prefix(opt::loba_algorithms,
h.val_or_string(lb_maxiter, "no limit"));
DESCR("maximum number of comp. iterations", "%s",
h.val_or_string(comp_maxiter, "no limit"));
+ DESCR("convergence is assumed within (\%)", "%g", opt::avg_load_ratio);
DESCR("time limit", "%s", h.val_or_string(time_limit, "no limit"));
DESCR("exit on close", "%s", h.on_off(exit_on_close));
XBT_INFO("`----");
std::clog << o("-I value")
<< "maximum number of comp. iterations, 0 for no limit"
<< " [" << opt::comp_maxiter << "]\n";
+ std::clog << o("-% value")
+ << "percent of the load average to assume the convergence"
+ << " [" << opt::avg_load_ratio << "]\n";
std::clog << o("-t value")
<< "time limit (simulated time), 0 for no limit"
<< " [" << opt::time_limit << "]\n";
extern double comp_time_delay;
// Parameters for the end of the simulation
+ extern double avg_load_ratio;
extern unsigned lb_maxiter;
extern unsigned comp_maxiter;
extern double time_limit;
double process::total_load_running = 0.0;
double process::total_load_exit = 0.0;
+int process::process_counter = 0;
+double process::total_load_average;
+
namespace {
void sleep_until_date(double& date, double duration)
rev_neigh.insert(std::make_pair(host, ptr));
}
+ // Note: there should not be race condition with the current
+ // version of Simgrid, when updating the global variables.
+
prev_load_broadcast = -1; // force sending of load on first send_all()
expected_load = real_load;
total_load_running += real_load;
total_load_init += real_load;
received_load = 0.0;
+ convergence = -1.0;
+
+ process_counter++;
+ total_load_average = total_load_running / process_counter;
+
ctrl_close_pending = data_close_pending = neigh.size();
close_received = false;
finalizing = false;
return;
XBT_INFO("Final load after %d:%d:%d iterations: %g",
lb_iter, comp_iter, all_comp_iter, real_load);
+ if (convergence >= 0.0)
+ XBT_INFO("Convergence within %g%% was achieved at time %g",
+ opt::avg_load_ratio, convergence);
+ else
+ XBT_INFO("Convergence within %g%% was not achieved",
+ opt::avg_load_ratio);
XBT_VERB("Expected load was: %g", expected_load);
XBT_VERB("Total computation for this process: %g", get_comp_amount());
print_loads(true, xbt_log_priority_debug);
if (real_load == 0.0)
continue;
+ double load_ratio =
+ 100.0 * std::fabs(real_load / total_load_average - 1.0);
+ if (convergence >= 0.0) {
+ if (load_ratio > opt::avg_load_ratio) {
+ XBT_VERB("current load has diverged: %g (%.4g%%)",
+ real_load, load_ratio);
+ convergence = -1.0;
+ }
+ } else {
+ if (load_ratio <= opt::avg_load_ratio) {
+ XBT_VERB("current load has converged: %g (%.4g%%)",
+ real_load, load_ratio);
+ convergence = MSG_get_clock();
+ }
+ }
+
// compute
++comp_iter;
double flops = opt::comp_cost(real_load);
double get_ctrl_recv_amount() const { return acc.ctrl_recv.amount; }
unsigned get_ctrl_send_count() const { return acc.ctrl_send.count; }
unsigned get_ctrl_recv_count() const { return acc.ctrl_recv.count; }
+ double get_convergence() const { return convergence; }
int run();
static double total_load_running; // sum of loads while running
static double total_load_exit; // sum of process loads at exit
+ static int process_counter;
+ static double total_load_average;
+
typedef MAP_TEMPLATE<m_host_t, neighbor*> rev_neigh_type;
neigh_type neigh; // list of neighbors (do not alter
// after construction!)
double expected_load; // expected load in bookkeeping mode
double received_load; // load received from neighbors
+ double convergence; // date when convergence was achieved, or -1.0
+
mutex_t mutex; // synchronization between threads
condition_t cond;