From e599303392d7cf5795927906bc44619ab60dfcaf Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Sun, 10 May 2020 13:47:44 +0200 Subject: [PATCH] Better fix for FG#50 - The previous solution was not allowing the suspended actor to be resumed properly - Instead of going for a complex story where we create an activity on which to suspend the actor, simply unschedule it on suspend(), and explicitely reschedule it on resume() - This is much easier since we don't have to save and restore the simcall of the suspended actor, even if the price to pay is the explicit simix_global->actors_to_run.push_back(this); in resume() of which I'm not a big fan. Fixes https://framagit.org/simgrid/simgrid/-/issues/50 --- src/kernel/actor/ActorImpl.cpp | 25 +++++++++---------- teshsuite/s4u/actor-suspend/actor-suspend.cpp | 14 +++++++---- .../s4u/actor-suspend/actor-suspend.tesh | 8 +++--- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/kernel/actor/ActorImpl.cpp b/src/kernel/actor/ActorImpl.cpp index 258ce94f9a..4e0162f49e 100644 --- a/src/kernel/actor/ActorImpl.cpp +++ b/src/kernel/actor/ActorImpl.cpp @@ -295,9 +295,11 @@ void ActorImpl::yield() XBT_DEBUG("Hey! I'm suspended."); xbt_assert(exception_ == nullptr, "Gasp! This exception may be lost by subsequent calls."); - suspended_ = false; - suspend(); - yield(); // Yield back to maestro without proceeding with my execution. I'll get resumed at some point + + if (waiting_synchro_ != nullptr) // Not sure of when this will happen. Maybe when suspending early in the SR when a + waiting_synchro_->suspend(); // waiting_synchro was terminated + + yield(); // Yield back to maestro without proceeding with my execution. I'll get rescheduled by resume() } if (exception_ != nullptr) { @@ -366,15 +368,10 @@ void ActorImpl::suspend() suspended_ = true; - /* If the suspended actor is waiting on a sync, suspend its synchronization. */ - if (waiting_synchro_ == nullptr) { - auto exec = new activity::ExecImpl(); - exec->set_name("suspend").set_host(host_).set_flops_amount(0.0).start(); - waiting_synchro_ = activity::ExecImplPtr(exec); - - waiting_synchro_->simcalls_.push_back(&simcall_); - } - waiting_synchro_->suspend(); + /* If the suspended actor is waiting on a sync, suspend its synchronization. + * Otherwise, it will suspend itself when scheduled, ie, very soon. */ + if (waiting_synchro_ != nullptr) + waiting_synchro_->suspend(); } void ActorImpl::resume() @@ -390,9 +387,11 @@ void ActorImpl::resume() return; suspended_ = false; - /* resume the synchronization that was blocking the resumed actor. */ + /* resume the activity that was blocking the resumed actor. */ if (waiting_synchro_) waiting_synchro_->resume(); + else // Reschedule the actor if it was forcefully unscheduled in yield() + simix_global->actors_to_run.push_back(this); XBT_OUT(); } diff --git a/teshsuite/s4u/actor-suspend/actor-suspend.cpp b/teshsuite/s4u/actor-suspend/actor-suspend.cpp index 60c7e51c13..a75cc86ed5 100644 --- a/teshsuite/s4u/actor-suspend/actor-suspend.cpp +++ b/teshsuite/s4u/actor-suspend/actor-suspend.cpp @@ -22,11 +22,9 @@ public: void operator()() { XBT_INFO("Starting."); - simgrid::s4u::Mailbox* mailbox = simgrid::s4u::Mailbox::by_name("receiver"); - void* data = (void*)2; - data = mailbox->get(); - xbt_die("get() has returned (even though it shouldn't!) with a %s message", - (data == nullptr ? "null" : "non-null")); + auto mailbox = simgrid::s4u::Mailbox::by_name("receiver"); + int data = *(int*)mailbox->get(); + XBT_INFO("Got %d at the end", data); } }; @@ -46,6 +44,12 @@ public: XBT_INFO("Sleeping 10 sec..."); simgrid::s4u::this_actor::sleep_for(10); + + XBT_INFO("Sending a message to the receiver..."); + auto mailbox = simgrid::s4u::Mailbox::by_name("receiver"); + int data = 42; + mailbox->put(&data, 4); + XBT_INFO("Done!"); } }; diff --git a/teshsuite/s4u/actor-suspend/actor-suspend.tesh b/teshsuite/s4u/actor-suspend/actor-suspend.tesh index 8ee18977d7..a6ae097966 100644 --- a/teshsuite/s4u/actor-suspend/actor-suspend.tesh +++ b/teshsuite/s4u/actor-suspend/actor-suspend.tesh @@ -3,8 +3,6 @@ $ ./actor-suspend ${platfdir}/small_platform.xml --log=no_loc > [Tremblay:Receiver:(2) 0.000000] [mwe/INFO] Starting. > [Tremblay:Suspender:(1) 0.000000] [mwe/INFO] Resume the receiver... > [Tremblay:Suspender:(1) 0.000000] [mwe/INFO] Sleeping 10 sec... -> [Tremblay:Suspender:(1) 10.000000] [mwe/INFO] Done! -> [10.000000] [simix_kernel/CRITICAL] Oops! Deadlock or code not perfectly clean. -> [10.000000] [simix_kernel/INFO] 1 actors are still running, waiting for something. -> [10.000000] [simix_kernel/INFO] Legend of the following listing: "Actor (@): " -> [10.000000] [simix_kernel/INFO] Actor 2 (Receiver@Tremblay): waiting for execution activity 0xdeadbeef (suspend) in state 3 to finish +> [Tremblay:Suspender:(1) 10.000000] [mwe/INFO] Sending a message to the receiver... +> [Tremblay:Receiver:(2) 10.000195] [mwe/INFO] Got 42 at the end +> [Tremblay:Suspender:(1) 10.000195] [mwe/INFO] Done! -- 2.20.1