1 /* Copyright (c) 2017-2023. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 /* Parallel activities are convenient abstractions of parallel computational kernels that span over several machines.
7 * To create a new one, you have to provide several things:
8 * - a vector of hosts on which the activity will execute
9 * - a vector of values, the amount of computation for each of the hosts (in flops)
10 * - a matrix of values, the amount of communication between each pair of hosts (in bytes)
12 * Each of these operation will be processed at the same relative speed.
13 * This means that at some point in time, all sub-executions and all sub-communications will be at 20% of completion.
14 * Also, they will all complete at the exact same time.
16 * This is obviously a simplistic abstraction, but this is very handful in a large amount of situations.
18 * Please note that you must have the LV07 platform model enabled to use such constructs.
21 #include <simgrid/s4u.hpp>
23 XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_ptask, "Messages specific for this s4u example");
24 namespace sg4 = simgrid::s4u;
28 /* Retrieve the list of all hosts as an array of hosts */
29 auto hosts = sg4::Engine::get_instance()->get_all_hosts();
30 size_t hosts_count = hosts.size();
32 std::vector<double> computation_amounts;
33 std::vector<double> communication_amounts;
35 /* ------[ test 1 ]----------------- */
36 XBT_INFO("First, build a classical parallel activity, with 1 Gflop to execute on each node, "
37 "and 10MB to exchange between each pair");
39 computation_amounts.assign(hosts_count, 1e9 /*1Gflop*/);
40 communication_amounts.assign(hosts_count * hosts_count, 0);
41 for (size_t i = 0; i < hosts_count; i++)
42 for (size_t j = i + 1; j < hosts_count; j++)
43 communication_amounts[i * hosts_count + j] = 1e7; // 10 MB
45 sg4::this_actor::parallel_execute(hosts, computation_amounts, communication_amounts);
47 /* ------[ test 2 ]----------------- */
48 XBT_INFO("We can do the same with a timeout of 10 seconds enabled.");
49 computation_amounts.assign(hosts_count, 1e9 /*1Gflop*/);
50 communication_amounts.assign(hosts_count * hosts_count, 0);
51 for (size_t i = 0; i < hosts_count; i++)
52 for (size_t j = i + 1; j < hosts_count; j++)
53 communication_amounts[i * hosts_count + j] = 1e7; // 10 MB
55 sg4::ExecPtr activity = sg4::this_actor::exec_init(hosts, computation_amounts, communication_amounts);
57 activity->wait_for(10.0 /* timeout (in seconds)*/);
58 xbt_die("Woops, this did not timeout as expected... Please report that bug.");
59 } catch (const simgrid::TimeoutException&) {
60 XBT_INFO("Caught the expected timeout exception.");
64 /* ------[ test 3 ]----------------- */
65 XBT_INFO("Then, build a parallel activity involving only computations (of different amounts) and no communication");
66 computation_amounts = {3e8, 6e8, 1e9}; // 300Mflop, 600Mflop, 1Gflop
67 communication_amounts.clear(); // no comm
68 sg4::this_actor::parallel_execute(hosts, computation_amounts, communication_amounts);
70 /* ------[ test 4 ]----------------- */
71 XBT_INFO("Then, build a parallel activity with no computation nor communication (synchro only)");
72 computation_amounts.clear();
73 communication_amounts.clear();
74 sg4::this_actor::parallel_execute(hosts, computation_amounts, communication_amounts);
76 /* ------[ test 5 ]----------------- */
77 XBT_INFO("Then, Monitor the execution of a parallel activity");
78 computation_amounts.assign(hosts_count, 1e6 /*1Mflop*/);
79 communication_amounts = {0, 1e6, 0, 0, 0, 1e6, 1e6, 0, 0};
80 activity = sg4::this_actor::exec_init(hosts, computation_amounts, communication_amounts);
83 while (not activity->test()) {
84 XBT_INFO("Remaining flop ratio: %.0f%%", 100 * activity->get_remaining_ratio());
85 sg4::this_actor::sleep_for(5);
89 /* ------[ test 6 ]----------------- */
90 XBT_INFO("Finally, simulate a malleable task (a parallel execution that gets reconfigured after its start).");
91 XBT_INFO(" - Start a regular parallel execution, with both comm and computation");
92 computation_amounts.assign(hosts_count, 1e6 /*1Mflop*/);
93 communication_amounts = {0, 1e6, 0, 0, 1e6, 0, 1e6, 0, 0};
94 activity = sg4::this_actor::exec_init(hosts, computation_amounts, communication_amounts);
97 sg4::this_actor::sleep_for(10);
98 double remaining_ratio = activity->get_remaining_ratio();
99 XBT_INFO(" - After 10 seconds, %.2f%% remains to be done. Change it from 3 hosts to 2 hosts only.",
100 remaining_ratio * 100);
101 XBT_INFO(" Let's first suspend the task.");
104 XBT_INFO(" - Now, simulate the reconfiguration (modeled as a comm from the removed host to the remaining ones).");
105 std::vector<double> rescheduling_comp{0, 0, 0};
106 std::vector<double> rescheduling_comm{0, 0, 0, 0, 0, 0, 25000, 25000, 0};
107 sg4::this_actor::parallel_execute(hosts, rescheduling_comp, rescheduling_comm);
109 XBT_INFO(" - Now, let's cancel the old task and create a new task with modified comm and computation vectors:");
110 XBT_INFO(" What was already done is removed, and the load of the removed host is shared between remaining ones.");
111 for (int i = 0; i < 2; i++) {
112 // remove what we've done so far, for both comm and compute load
113 computation_amounts[i] *= remaining_ratio;
114 communication_amounts[i] *= remaining_ratio;
115 // The work from 1 must be shared between 2 remaining ones. 1/2=50% of extra work for each
116 computation_amounts[i] *= 1.5;
117 communication_amounts[i] *= 1.5;
120 computation_amounts.resize(2);
121 double remaining_comm = communication_amounts[1];
122 communication_amounts = {0, remaining_comm, remaining_comm, 0}; // Resizing a linearized matrix is hairly
125 activity = sg4::this_actor::exec_init(hosts, computation_amounts, communication_amounts);
127 XBT_INFO(" - Done, let's wait for the task completion");
130 XBT_INFO("Goodbye now!");
133 int main(int argc, char* argv[])
135 sg4::Engine e(&argc, argv);
137 xbt_assert(argc == 2, "Usage: %s <platform file>", argv[0]);
139 e.load_platform(argv[1]);
140 sg4::Actor::create("test", e.host_by_name("MyHost1"), runner);
143 XBT_INFO("Simulation done.");