int todo; // remaining amount of tasks to execute, a global variable
sg4::Mailbox* mailbox; // as a global to reduce the amount of simcalls during actor reboot
-static void master()
+XBT_ATTRIB_NORETURN static void master()
{
double comp_size = 1e6;
long comm_size = 1e6;
- XBT_INFO("Master booting");
- sg4::Actor::self()->daemonize();
- sg4::this_actor::on_exit(
- [](bool forcefully) { XBT_INFO("Master dying %s.", forcefully ? "forcefully" : "peacefully"); });
+ bool rebooting = sg4::Actor::self()->get_restart_count() > 0;
+
+ XBT_INFO("Master %s", rebooting ? "rebooting" : "booting");
+ if (not rebooting) // Starting for the first time
+ sg4::this_actor::on_exit(
+ [](bool forcefully) { XBT_INFO("Master dying %s.", forcefully ? "forcefully" : "peacefully"); });
while (true) { // This is a daemon
xbt_assert(sg4::Engine::get_clock() < cfg_deadline,
"Failed to run all tasks in less than %d seconds. Is this an infinite loop?", (int)cfg_deadline);
- auto* payload = new double(comp_size);
+ auto payload = std::make_unique<double>(comp_size);
try {
XBT_INFO("Try to send a message");
- mailbox->put(payload, comm_size, 10.0);
+ mailbox->put(payload.get(), comm_size, 10.0);
+ payload.release();
} catch (const simgrid::TimeoutException&) {
- delete payload;
XBT_INFO("Timeouted while sending a task");
} catch (const simgrid::NetworkFailureException&) {
- delete payload;
XBT_INFO("Got a NetworkFailureException. Wait a second before starting again.");
sg4::this_actor::sleep_for(1);
}
static void worker(int id)
{
- XBT_INFO("Worker booting");
- sg4::this_actor::on_exit(
- [id](bool forcefully) { XBT_INFO("worker %d dying %s.", id, forcefully ? "forcefully" : "peacefully"); });
+ bool rebooting = sg4::Actor::self()->get_restart_count() > 0;
+
+ XBT_INFO("Worker %s", rebooting ? "rebooting" : "booting");
+ if (not rebooting) // Starting for the first time
+ sg4::this_actor::on_exit(
+ [id](bool forcefully) { XBT_INFO("worker %d dying %s.", id, forcefully ? "forcefully" : "peacefully"); });
while (todo > 0) {
xbt_assert(sg4::Engine::get_clock() < cfg_deadline,
auto payload = mailbox->get_unique<double>(10);
xbt_assert(payload != nullptr, "mailbox->get() failed");
double comp_size = *payload;
- if (comp_size < 0) { /* - Exit when -1.0 is received */
- XBT_INFO("I'm done. See you!");
- break;
- }
- /* - Otherwise, process the task */
+
XBT_INFO("Start execution...");
sg4::this_actor::execute(comp_size);
XBT_INFO("Execution complete.");
todo--;
} catch (const simgrid::TimeoutException&) {
XBT_INFO("Timeouted while getting a task.");
-
} catch (const simgrid::NetworkFailureException&) {
XBT_INFO("Got a NetworkFailureException. Wait a second before starting again.");
sg4::this_actor::sleep_for(1);
{
sg4::Engine e(&argc, argv);
- XBT_INFO("host count: %d ", (int)cfg_host_count);
-
auto* rootzone = sg4::create_full_zone("root");
sg4::Host* main; // First host created, where the master will stay
std::vector<sg4::Host*> worker_hosts;
+
+ xbt_assert(cfg_host_count > 2, "You need at least 2 workers (i.e., 3 hosts) or the master will be auto-killed when "
+ "the only worker gets killed.");
for (int i = 0; i < cfg_host_count; i++) {
auto hostname = std::string("lilibeth ") + std::to_string(i);
auto* host = rootzone->create_host(hostname, 1e9);
}
}
rootzone->seal();
- sg4::Engine::get_instance()->on_platform_created(); // FIXME this should not be necessary
- sg4::Actor::create("master", main, master)->set_auto_restart(true);
+ sg4::Actor::create("master", main, master)->daemonize()->set_auto_restart(true);
int id = 0;
- for (auto* h : worker_hosts)
- sg4::Actor::create("worker", h, worker, id++)->set_auto_restart(true);
+ for (auto* h : worker_hosts) {
+ sg4::Actor::create("worker", h, worker, id)->set_auto_restart(true);
+ id++;
+ }
todo = cfg_task_count;
xbt_assert(todo > 0, "Please give more than %d tasks to run", todo);
mailbox = sg4::Mailbox::by_name("mailbox");
- xbt_assert(cfg_host_count > 2, "You need at least 2 workers (i.e., 3 hosts) or the master will be auto-killed when "
- "the only worker gets killed.");
e.run();