From 23529d55ccbfd0a851fdc86708320811ddda8ec4 Mon Sep 17 00:00:00 2001 From: Arnaud Giersch Date: Tue, 18 Jan 2011 22:51:47 +0100 Subject: [PATCH 1/1] There is a bug... --- TODO | 11 +++++++++++ process.cpp | 3 +++ 2 files changed, 14 insertions(+) diff --git a/TODO b/TODO index 30586a5..3152ad9 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,16 @@ * segfault with ./loba cluster1000.xml -N64 -a fairstrategy + this is a deadlock occuring when: + - a process is in the finalize stage; + - all processes but one are blocked on receive; + - the process that is still running owns all the remaining load, + and sends it all to the finalizing process, and then goes in + blocking receive. + The finalizing process receives the load, and blocks again, + waiting for a close message. + All processes are then blocked, and non-one is able to see that + there is no more load in the system! + * verify bookkeeping version. * add several metrics diff --git a/process.cpp b/process.cpp index 95ba1f9..0544b21 100644 --- a/process.cpp +++ b/process.cpp @@ -145,6 +145,8 @@ int process::run() opt::load_ratio_threshold) { VERB0("No more load to balance in system."); break; + } else { + DEBUG1("still %g load to balance, continuing...", total_load_running); } } VERB0("Going to finalize..."); @@ -264,6 +266,7 @@ void process::receive(bool wait) message* msg; m_host_t from; + DEBUG1("%sblocking receive", "\0non-" + !wait); while (may_receive() && comm.recv(msg, from, wait)) { switch (msg->get_type()) { case message::INFO: { -- 2.39.5