X-Git-Url: http://bilbo.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/2a6c5ce52acb820ea198f6c97cfbe0060219ed4f..f5f38e5b9cdacbf1ba1705bb7ed592b59f8e8712:/examples/cpp/exec-failure/s4u-exec-failure.cpp diff --git a/examples/cpp/exec-failure/s4u-exec-failure.cpp b/examples/cpp/exec-failure/s4u-exec-failure.cpp index f2f9b7e436..4a4063bda6 100644 --- a/examples/cpp/exec-failure/s4u-exec-failure.cpp +++ b/examples/cpp/exec-failure/s4u-exec-failure.cpp @@ -1,82 +1,105 @@ -/* Copyright (c) 2021. The SimGrid Team. All rights reserved. */ +/* Copyright (c) 2021-2023. The SimGrid Team. All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ -/* This example shows how to serialize a set of communications going through a link +/* This examples shows how to survive to host failure exceptions that occur when an host is turned off. * - * As for the other asynchronous examples, the sender initiates all the messages it wants to send and - * pack the resulting simgrid::s4u::CommPtr objects in a vector. - * At the same time, the receiver starts receiving all messages asynchronously. Without serialization, - * all messages would be received at the same timestamp in the receiver. + * The actors do not get notified when the host on which they run is turned off: they are just terminated + * in this case, and their ``on_exit()`` callback gets executed. * - * However, as they will be serialized in a link of the platform, the messages arrive 2 by 2. - * - * The sender then blocks until all ongoing communication terminate, using simgrid::s4u::Comm::wait_all() + * For remote executions on failing hosts however, any blocking operation such as ``exec`` or ``wait`` will + * raise an exception that you can catch and react to, as illustrated in this example. */ #include +#include "simgrid/kernel/ProfileBuilder.hpp" XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_exec_failure, "Messages specific for this s4u example"); namespace sg4 = simgrid::s4u; -static void dispatcher(sg4::Host* host1, sg4::Host* host2) +static void dispatcher(std::vector const& hosts) { std::vector pending_execs; - XBT_INFO("Initiating asynchronous exec on %s", host1->get_cname()); - auto exec1 = sg4::this_actor::exec_init(20)->set_host(host1); - pending_execs.push_back(exec1); - exec1->start(); - XBT_INFO("Initiating asynchronous exec on %s", host2->get_cname()); - auto exec2 = sg4::this_actor::exec_init(20)->set_host(host2); - pending_execs.push_back(exec2); - exec2->start(); - - XBT_INFO("Calling wait_any.."); - long index; + for (auto* host: hosts) { + XBT_INFO("Initiating asynchronous exec on %s", host->get_cname()); + // Computing 20 flops on an host which speed is 1f takes 20 seconds (when it does not fail) + auto exec = sg4::this_actor::exec_init(20)->set_host(host); + pending_execs.push_back(exec); + exec->start(); + } + + XBT_INFO("---------------------------------"); + XBT_INFO("Wait on the first exec, which host is turned off at t=10 by the another actor."); try { - index = sg4::Exec::wait_any(pending_execs); - XBT_INFO("Wait any returned index %ld (exec on %s)", index, pending_execs.at(index)->get_host()->get_cname()); - } catch (simgrid::HostFailureException& e) { - XBT_INFO("Dispatcher has experienced a host failure exception, so it knows that something went wrong"); - XBT_INFO("Now it needs to figure out which of the two execs failed by looking at their state"); + pending_execs[0]->wait(); + xbt_assert("This wait was not supposed to succeed."); + } catch (const simgrid::HostFailureException&) { + XBT_INFO("Dispatcher has experienced a host failure exception, so it knows that something went wrong."); } - XBT_INFO("Exec on %s has state: %s", pending_execs[0]->get_host()->get_cname(), pending_execs[0]->get_state_str()); - XBT_INFO("Exec on %s has state: %s", pending_execs[1]->get_host()->get_cname(), pending_execs[1]->get_state_str()); + XBT_INFO("State of each exec:"); + for (auto const& exec : pending_execs) + XBT_INFO(" Exec on %s has state: %s", exec->get_host()->get_cname(), exec->get_state_str()); + XBT_INFO("---------------------------------"); + XBT_INFO("Wait on the second exec, which host is turned off at t=12 by the state profile."); try { pending_execs[1]->wait(); - } catch (simgrid::HostFailureException& e) { - XBT_INFO("Waiting on a FAILED exec raises an exception: '%s'", e.what()); + xbt_assert("This wait was not supposed to succeed."); + } catch (const simgrid::HostFailureException&) { + XBT_INFO("Dispatcher has experienced a host failure exception, so it knows that something went wrong."); } - pending_execs.pop_back(); - XBT_INFO("Wait for remaining exec, just to be nice"); - index = simgrid::s4u::Exec::wait_any(pending_execs); - XBT_INFO("Dispatcher ends"); + XBT_INFO("State of each exec:"); + for (auto const& exec : pending_execs) + XBT_INFO(" Exec on %s has state: %s", exec->get_host()->get_cname(), exec->get_state_str()); + + XBT_INFO("---------------------------------"); + XBT_INFO("Wait on the third exec, which should succeed."); + try { + pending_execs[2]->wait(); + XBT_INFO("No exception occured."); + } catch (const simgrid::HostFailureException&) { + xbt_assert("This wait was not supposed to fail."); + } + XBT_INFO("State of each exec:"); + for (auto const& exec : pending_execs) + XBT_INFO(" Exec on %s has state: %s", exec->get_host()->get_cname(), exec->get_state_str()); } static void host_killer(sg4::Host* to_kill) { - XBT_INFO("HostKiller sleeping 10 seconds..."); sg4::this_actor::sleep_for(10.0); - XBT_INFO("HostKiller turning off host %s", to_kill->get_cname()); + XBT_INFO("HostKiller turns off the host '%s'.", to_kill->get_cname()); to_kill->turn_off(); - XBT_INFO("HostKiller ends"); } int main(int argc, char** argv) { - sg4::Engine engine(&argc, argv); - auto* zone = sg4::create_full_zone("AS0"); - auto* host1 = zone->create_host("Host1", "1f"); - auto* host2 = zone->create_host("Host2", "1f"); + auto* zone = sg4::create_full_zone("world"); + std::vector hosts; + for (const auto* name : {"Host1", "Host2", "Host3"}) { + auto* host = zone->create_host(name, "1f"); + hosts.push_back(host); + } + /* Attaching a state profile (ie a list of events changing the on/off state of the resource) to host3. + * The syntax of the profile (second parameter) is a list of: "date state\n" + * The R"( )" thing is the C++ way of writing multiline strings, including literals \n. + * You'd have the same behavior by using "12 0\n20 1\n" instead. + * So here, the link is turned off at t=12 and back on at t=20. + * The last parameter is the period of that profile, meaning that it loops after 30 seconds. + */ + hosts[1]->set_state_profile(simgrid::kernel::profile::ProfileBuilder::from_string("profile name", R"( +12 0 +20 1 +)", 30)); + zone->seal(); - sg4::Actor::create("Dispatcher", host1, dispatcher, host1, host2); - sg4::Actor::create("HostKiller", host1, host_killer, host2)->daemonize(); + sg4::Actor::create("Dispatcher", hosts[2], dispatcher, hosts); + sg4::Actor::create("HostKiller", hosts[2], host_killer, hosts[0]); engine.run();