From: Martin Quinson Date: Sat, 9 May 2020 23:56:20 +0000 (+0200) Subject: Ensure that suspending an actor running later in the current scheduling round actuall... X-Git-Tag: v3.26~616 X-Git-Url: http://bilbo.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/25459e0d62df4c763113f6e525707444fbfc4b7e Ensure that suspending an actor running later in the current scheduling round actually works Fixes https://framagit.org/simgrid/simgrid/-/issues/50 --- diff --git a/ChangeLog b/ChangeLog index e302c5e9fe..118d39172f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -42,6 +42,7 @@ Fixed bugs (FG#.. -> FramaGit bugs; FG!.. -> FG merge requests) - FG#41: Add sg_actor_create C interface - FG#43: xbt::random needs some care - FG#48: The Impossible Did Happen (yet again) + - FG#50: Suspending an actor executed at the current timestamp fails - FG!24: Documentation and fix for xbt/random - FG!35: Add a modeling hint for parallel links in doc - FG!36: [xbt/random] Read/Write the state of the RNG diff --git a/src/kernel/actor/ActorImpl.cpp b/src/kernel/actor/ActorImpl.cpp index 59ec1c59e6..84c85e98fa 100644 --- a/src/kernel/actor/ActorImpl.cpp +++ b/src/kernel/actor/ActorImpl.cpp @@ -297,6 +297,7 @@ void ActorImpl::yield() xbt_assert(exception_ == nullptr, "Gasp! This exception may be lost by subsequent calls."); suspended_ = false; suspend(); + yield(); // Yield back to maestro without proceeding with my execution. I'll get resumed at some point } if (exception_ != nullptr) { diff --git a/teshsuite/s4u/CMakeLists.txt b/teshsuite/s4u/CMakeLists.txt index 03487a5e8b..a19f6d3696 100644 --- a/teshsuite/s4u/CMakeLists.txt +++ b/teshsuite/s4u/CMakeLists.txt @@ -1,4 +1,4 @@ -foreach(x actor actor-autorestart +foreach(x actor actor-autorestart actor-suspend activity-lifecycle comm-get-sender comm-pt2pt wait-any-for cloud-interrupt-migration cloud-sharing cloud-two-execs @@ -24,7 +24,7 @@ set_property(TARGET activity-lifecycle APPEND PROPERTY INCLUDE_DIRECTORIES "${IN ## Add the tests. ## Some need to be run with all factories, some don't need tesh to run -foreach(x actor actor-autorestart +foreach(x actor actor-autorestart actor-suspend activity-lifecycle comm-get-sender wait-any-for cloud-interrupt-migration cloud-two-execs concurrent_rw) set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh) diff --git a/teshsuite/s4u/actor-suspend/actor-suspend.cpp b/teshsuite/s4u/actor-suspend/actor-suspend.cpp new file mode 100644 index 0000000000..f87fadba5d --- /dev/null +++ b/teshsuite/s4u/actor-suspend/actor-suspend.cpp @@ -0,0 +1,67 @@ +/* Copyright (c) 2020. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +// This is the MWE of https://framagit.org/simgrid/simgrid/-/issues/50 +// The problem was occuring when suspending an actor that will be executed later in the same scheduling round + +#include +#include +#include +#include +#include + +XBT_LOG_NEW_DEFAULT_CATEGORY(mwe, "Minimum Working Example"); + +simgrid::s4u::ActorPtr receiver; + +class Receiver { + +public: + void operator()() + { + XBT_INFO("Starting."); + simgrid::s4u::Mailbox* mailbox = simgrid::s4u::Mailbox::by_name("receiver"); + void* data = (void*)2; + data = mailbox->get(); + xbt_die("get() has returned (even though it shouldn't!) with a %s message", + (data == nullptr ? "null" : "non-null")); + } +}; + +class Suspender { + +public: + void operator()() + { + + // If we sleep a bit here, this MWE works because the suspender is not trying to suspend someone executed later in + // the same scheduling round simgrid::s4u::this_actor::sleep_for(0.01); + + XBT_INFO("Suspend the receiver..."); + receiver->suspend(); + XBT_INFO("Resume the receiver..."); + receiver->resume(); + + XBT_INFO("Sleeping 10 sec..."); + simgrid::s4u::this_actor::sleep_for(10); + XBT_INFO("Done!"); + } +}; + +int main(int argc, char** argv) +{ + + simgrid::s4u::Engine* engine = new simgrid::s4u::Engine(&argc, argv); + + engine->load_platform(argv[1]); + simgrid::s4u::Host* host = simgrid::s4u::Host::by_name("Tremblay"); + + auto suspender = simgrid::s4u::Actor::create("Suspender", host, Suspender()); + receiver = simgrid::s4u::Actor::create("Receiver", host, Receiver()); + + engine->run(); + + return 0; +} diff --git a/teshsuite/s4u/actor-suspend/actor-suspend.tesh b/teshsuite/s4u/actor-suspend/actor-suspend.tesh new file mode 100644 index 0000000000..8ee18977d7 --- /dev/null +++ b/teshsuite/s4u/actor-suspend/actor-suspend.tesh @@ -0,0 +1,10 @@ +$ ./actor-suspend ${platfdir}/small_platform.xml --log=no_loc +> [Tremblay:Suspender:(1) 0.000000] [mwe/INFO] Suspend the receiver... +> [Tremblay:Receiver:(2) 0.000000] [mwe/INFO] Starting. +> [Tremblay:Suspender:(1) 0.000000] [mwe/INFO] Resume the receiver... +> [Tremblay:Suspender:(1) 0.000000] [mwe/INFO] Sleeping 10 sec... +> [Tremblay:Suspender:(1) 10.000000] [mwe/INFO] Done! +> [10.000000] [simix_kernel/CRITICAL] Oops! Deadlock or code not perfectly clean. +> [10.000000] [simix_kernel/INFO] 1 actors are still running, waiting for something. +> [10.000000] [simix_kernel/INFO] Legend of the following listing: "Actor (@): " +> [10.000000] [simix_kernel/INFO] Actor 2 (Receiver@Tremblay): waiting for execution activity 0xdeadbeef (suspend) in state 3 to finish