Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
better handling of ActivityImpl::wait_for with dying actors
authorSUTER Frederic <frederic.suter@cc.in2p3.fr>
Wed, 2 Feb 2022 10:03:13 +0000 (11:03 +0100)
committerSUTER Frederic <frederic.suter@cc.in2p3.fr>
Wed, 2 Feb 2022 10:03:13 +0000 (11:03 +0100)
examples/c/actor-exiting/actor-exiting.tesh
examples/cpp/actor-exiting/s4u-actor-exiting.tesh
src/kernel/activity/ActivityImpl.cpp
src/kernel/activity/CommImpl.cpp

index 9ca7685..0c05238 100644 (file)
@@ -8,6 +8,6 @@ $ ${bindir:=.}/c-actor-exiting ${platfdir}/small_platform.xml "--log=root.fmt:[%
 > [  3.000000] (maestro@) Oops! Deadlock or code not perfectly clean.
 > [  3.000000] (maestro@) 1 actors are still running, waiting for something.
 > [  3.000000] (maestro@) Legend of the following listing: "Actor <pid> (<name>@<host>): <status>"
-> [  3.000000] (maestro@) Actor 3 (C@Ginette): waiting for synchronization activity 0xdeadbeef () in state WAITING to finish
+> [  3.000000] (maestro@) Actor 3 (C@Ginette): waiting for communication activity 0xdeadbeef () in state WAITING to finish
 > [  3.000000] (C@Ginette) I was killed!
 > [  3.000000] (C@Ginette) The backtrace would be displayed here if --log=no_loc would not have been passed
index c5a6742..c46a118 100644 (file)
@@ -12,7 +12,7 @@ $ ${bindir:=.}/s4u-actor-exiting ${platfdir}/small_platform.xml "--log=root.fmt:
 > [  3.000000] (maestro@) Oops! Deadlock or code not perfectly clean.
 > [  3.000000] (maestro@) 1 actors are still running, waiting for something.
 > [  3.000000] (maestro@) Legend of the following listing: "Actor <pid> (<name>@<host>): <status>"
-> [  3.000000] (maestro@) Actor 3 (C@Ginette): waiting for synchronization activity 0xdeadbeef () in state WAITING to finish
+> [  3.000000] (maestro@) Actor 3 (C@Ginette): waiting for communication activity 0xdeadbeef () in state WAITING to finish
 > [  3.000000] (C@Ginette) I was killed!
 > [  3.000000] (C@Ginette) The backtrace would be displayed here if --log=no_loc would not have been passed
 > [  3.000000] (maestro@) Actor C terminates now
index 3840cc8..c95beb4 100644 (file)
@@ -118,6 +118,8 @@ void ActivityImpl::wait_for(actor::ActorImpl* issuer, double timeout)
   if (state_ != State::WAITING && state_ != State::RUNNING) {
     finish();
   } else {
+    /* we need a sleep action (even when the timeout is infinite) to be notified of host failures */
+    /* Comms handle that a bit differently of the other activities */
     auto* comm = dynamic_cast<CommImpl*>(this);
     if (comm != nullptr) {
       resource::Action* sleep = issuer->get_host()->get_cpu()->sleep(timeout);
@@ -127,18 +129,18 @@ void ActivityImpl::wait_for(actor::ActorImpl* issuer, double timeout)
         comm->src_timeout_ = sleep;
       else
         comm->dst_timeout_ = sleep;
+    } else {
+      RawImplPtr synchro(new RawImpl([this, issuer]() {
+        this->unregister_simcall(&issuer->simcall_);
+        issuer->waiting_synchro_ = nullptr;
+        issuer->exception_       = nullptr;
+        auto* observer           = dynamic_cast<kernel::actor::ActivityWaitSimcall*>(issuer->simcall_.observer_);
+        xbt_assert(observer != nullptr);
+        observer->set_result(true);
+      }));
+      synchro->set_host(issuer->get_host()).set_timeout(timeout).start();
+      synchro->register_simcall(&issuer->simcall_);
     }
-    /* we need a sleep action (even when the timeout is infinite) to be notified of host failures */
-    RawImplPtr synchro(new RawImpl([this, issuer]() {
-      this->unregister_simcall(&issuer->simcall_);
-      issuer->waiting_synchro_ = nullptr;
-      issuer->exception_       = nullptr;
-      auto* observer           = dynamic_cast<kernel::actor::ActivityWaitSimcall*>(issuer->simcall_.observer_);
-      xbt_assert(observer != nullptr);
-      observer->set_result(true);
-    }));
-    synchro->set_host(issuer->get_host()).set_timeout(timeout).start();
-    synchro->register_simcall(&issuer->simcall_);
   }
 }
 
index 44a6e80..53537a5 100644 (file)
@@ -599,8 +599,11 @@ void CommImpl::finish()
     if (not simcall->issuer_->get_host()->is_on()) {
       simcall->issuer_->context_->set_wannadie();
     } else {
-      set_exception(simcall->issuer_);
-      simcall->issuer_->simcall_answer();
+      // Do not answer to dying actors
+      if (not simcall->issuer_->context_->wannadie()) {
+        set_exception(simcall->issuer_);
+        simcall->issuer_->simcall_answer();
+      }
     }
 
     simcall->issuer_->waiting_synchro_ = nullptr;