From: Arnaud Giersch Date: Tue, 18 Jan 2011 21:51:47 +0000 (+0100) Subject: There is a bug... X-Git-Tag: v0.1~188^2~29 X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/loba.git/commitdiff_plain/23529d55ccbfd0a851fdc86708320811ddda8ec4?ds=inline;hp=52cf3df3d9c14e871452621545b4ba7b8259bf0c There is a bug... --- diff --git a/TODO b/TODO index 30586a5..3152ad9 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,16 @@ * segfault with ./loba cluster1000.xml -N64 -a fairstrategy + this is a deadlock occuring when: + - a process is in the finalize stage; + - all processes but one are blocked on receive; + - the process that is still running owns all the remaining load, + and sends it all to the finalizing process, and then goes in + blocking receive. + The finalizing process receives the load, and blocks again, + waiting for a close message. + All processes are then blocked, and non-one is able to see that + there is no more load in the system! + * verify bookkeeping version. * add several metrics diff --git a/process.cpp b/process.cpp index 95ba1f9..0544b21 100644 --- a/process.cpp +++ b/process.cpp @@ -145,6 +145,8 @@ int process::run() opt::load_ratio_threshold) { VERB0("No more load to balance in system."); break; + } else { + DEBUG1("still %g load to balance, continuing...", total_load_running); } } VERB0("Going to finalize..."); @@ -264,6 +266,7 @@ void process::receive(bool wait) message* msg; m_host_t from; + DEBUG1("%sblocking receive", "\0non-" + !wait); while (may_receive() && comm.recv(msg, from, wait)) { switch (msg->get_type()) { case message::INFO: {