1 /* Copyright (c) 2010-2023. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 /* This example shows how to build a torus cluster with multi-core hosts.
8 * However, each leaf in the torus is a StarZone, composed of several CPUs
10 * Each actor runs in a specific CPU. One sender broadcasts a message to all receivers.
13 #include "simgrid/s4u.hpp"
14 namespace sg4 = simgrid::s4u;
16 XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_torus_multicpu, "Messages specific for this s4u example");
19 long msg_size = 1e6; /* message size in bytes */
20 std::vector<sg4::Host*> hosts_;
23 explicit Sender(const std::vector<sg4::Host*>& hosts) : hosts_{hosts} {}
24 void operator()() const
26 /* Vector in which we store all ongoing communications */
27 sg4::ActivitySet pending_comms;
29 /* Make a vector of the mailboxes to use */
30 std::vector<sg4::Mailbox*> mboxes;
32 /* Start dispatching 1 message to all receivers */
33 std::string msg_content = "Hello, I'm alive and running on " + sg4::this_actor::get_host()->get_name();
34 for (const auto* host : hosts_) {
35 /* Copy the data we send: the 'msg_content' variable is not a stable storage location.
36 * It will be destroyed when this actor leaves the loop, ie before the receiver gets it */
37 auto* payload = new std::string(msg_content);
39 /* Create a communication representing the ongoing communication, and store it in pending_comms */
40 auto* mbox = sg4::Mailbox::by_name(host->get_name());
41 mboxes.push_back(mbox);
42 sg4::CommPtr comm = mbox->put_async(payload, msg_size);
43 pending_comms.push(comm);
46 XBT_INFO("Done dispatching all messages");
48 /* Now that all message exchanges were initiated, wait for their completion in one single call */
49 pending_comms.wait_all();
51 XBT_INFO("Goodbye now!");
55 /* Receiver actor: wait for 1 message on the mailbox identified by the hostname */
58 void operator()() const
60 auto* mbox = sg4::Mailbox::by_name(sg4::this_actor::get_host()->get_name());
61 auto received = mbox->get_unique<std::string>();
62 XBT_INFO("I got a '%s'.", received->c_str());
66 /*************************************************************************************************/
68 * @brief Callback to set a cluster leaf/element
70 * In our example, each leaf if a StarZone, composed of 8 CPUs.
71 * Each CPU is modeled as a host, connected to the outer world through a high-speed PCI link.
72 * Obs.: CPU0 is the gateway for this zone
80 * / / \ \<-- 100Gbs, 10us link (1 link UP and 1 link DOWN for full-duplex)
85 * @param zone Cluster netzone being created (usefull to create the hosts/links inside it)
86 * @param coord Coordinates in the cluster
87 * @param id Internal identifier in the torus (for information)
88 * @return netpoint, gateway: the netpoint to the StarZone and CPU0 as gateway
90 static std::pair<simgrid::kernel::routing::NetPoint*, simgrid::kernel::routing::NetPoint*>
91 create_hostzone(const sg4::NetZone* zone, const std::vector<unsigned long>& /*coord*/, unsigned long id)
93 constexpr int num_cpus = 8; //!< Number of CPUs in the zone
94 constexpr double speed = 1e9; //!< Speed of each CPU
95 constexpr double link_bw = 100e9; //!< Link bw connecting the CPU
96 constexpr double link_lat = 1e-9; //!< Link latency
98 std::string hostname = "host" + std::to_string(id);
99 /* create the StarZone */
100 auto* host_zone = sg4::create_star_zone(hostname);
101 /* setting my Torus parent zone */
102 host_zone->set_parent(zone);
104 simgrid::kernel::routing::NetPoint* gateway = nullptr;
106 for (int i = 0; i < num_cpus; i++) {
107 std::string cpu_name = hostname + "-cpu" + std::to_string(i);
108 const sg4::Host* host = host_zone->create_host(cpu_name, speed);
109 /* the first CPU is the gateway */
111 gateway = host->get_netpoint();
112 /* create split-duplex link */
113 auto* link = host_zone->create_split_duplex_link("link-" + cpu_name, link_bw)->set_latency(link_lat);
114 /* connecting CPU to outer world */
115 host_zone->add_route(host, nullptr, {{link, sg4::LinkInRoute::Direction::UP}}, true);
117 /* seal newly created netzone */
119 return std::make_pair(host_zone->get_netpoint(), gateway);
122 /*************************************************************************************************/
124 * @brief Callback to create limiter link (1Gbs) for each netpoint
126 * The coord parameter depends on the cluster being created:
127 * - Torus: Direct translation of the Torus' dimensions, e.g. (0, 0, 0) for a 3-D Torus
128 * - Fat-Tree: A pair (level in the tree, id), e.g. (0, 0) for first leaf in the tree and (1,0) for the first switch at
130 * - Dragonfly: a tuple (group, chassis, blades/routers, nodes), e.g. (0, 0, 0, 0) for first node in the cluster. To
131 * identify the router inside a (group, chassis, blade), we use MAX_UINT in the last parameter (e.g. 0, 0, 0,
134 * @param zone Torus netzone being created (usefull to create the hosts/links inside it)
135 * @param coord Coordinates in the cluster
136 * @param id Internal identifier in the torus (for information)
137 * @return Limiter link
139 static sg4::Link* create_limiter(sg4::NetZone* zone, const std::vector<unsigned long>& /*coord*/, unsigned long id)
141 return zone->create_link("limiter-" + std::to_string(id), 1e9)->seal();
145 * @brief Creates a TORUS cluster
147 * Creates a TORUS cluster with dimensions 2x2x2
149 * The cluster has 8 elements/leaves in total. Each element is a StarZone containing 8 Hosts.
150 * Each pair in the torus is connected through 2 links:
151 * 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
152 * 2) link: 10Gbs link connecting the components (created automatically)
161 * B-----+----C (X-axis=2)
163 * For example, a communication from A to C goes through:
164 * <tt> A->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C </tt>
166 * More precisely, considering that A and C are StarZones, a
167 * communication from A-CPU-3 to C-CPU-7 goes through:
168 * 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
169 * 2) A-CPU-0->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C-CPU-0
170 * 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
172 * Note that we don't have limiter links inside the StarZones(A, B, C),
173 * but we have limiters in the Torus that are added to the links in the path (as we can see in "2)")
175 * More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html?highlight=torus#torus-cluster">Torus
178 static void create_torus_cluster()
180 /* create the torus cluster, 10Gbs link between elements in the cluster */
181 sg4::create_torus_zone("cluster", nullptr, {2, 2, 2}, {create_hostzone, {}, create_limiter}, 10e9, 10e-6,
182 sg4::Link::SharingPolicy::SPLITDUPLEX)
186 /*************************************************************************************************/
188 * @brief Creates a Fat-Tree cluster
190 * Creates a Fat-Tree cluster with 2 levels and 6 nodes
191 * The following parameters are used to create this cluster:
192 * - Levels: 2 - two-level of switches in the cluster
193 * - Down links: 2, 3 - L2 routers is connected to 2 elements, L1 routers to 3 elements
194 * - Up links: 1, 2 - Each node (A-F) is connected to 1 L1 router, L1 routers are connected to 2 L2
195 * - Link count: 1, 1 - Use 1 link in each level
197 * The first parameter describes how many levels we have.
198 * The following ones describe the connection between the elements and must have exactly n_levels components.
201 * S3 S4 <-- Level 2 routers
202 * link:limiter - / \ / \
204 * link: 10GBps --> | / \ |
205 * (full-duplex) | / \ |
208 * S1 S2 <-- Level 1 routers
209 * link:limiter -> | |
211 * link:10GBps --> /|\ /|\
214 * link:limiter -> / | \ / | \
215 * A B C D E F <-- level 0 Nodes
217 * Each element (A to F) is a StarZone containing 8 Hosts.
218 * The connection uses 2 links:
219 * 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
220 * 2) link: 10Gbs link connecting the components (created automatically)
222 * For example, a communication from A to C goes through:
223 * <tt> A->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->->limiter(C)->C</tt>
225 * More precisely, considering that A and C are StarZones, a
226 * communication from A-CPU-3 to C-CPU-7 goes through:
227 * 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
228 * 2) A-CPU-0->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->limiter(C)->C-CPU-0
229 * 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
231 * More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#fat-tree-cluster">Fat-Tree
234 static void create_fatTree_cluster()
236 /* create the fat tree cluster, 10Gbs link between elements in the cluster */
237 sg4::create_fatTree_zone("cluster", nullptr, {2, {2, 3}, {1, 2}, {1, 1}}, {create_hostzone, {}, create_limiter}, 10e9,
238 10e-6, sg4::Link::SharingPolicy::SPLITDUPLEX)
242 /*************************************************************************************************/
244 * @brief Creates a Dragonfly cluster
246 * Creates a Dragonfly cluster with 2 groups and 16 nodes
247 * The following parameters are used to create this cluster:
248 * - Groups: 2 groups, connected with 2 links (blue links)
249 * - Chassis: 2 chassis, connected with a single link (black links)
250 * - Routers: 2 routers, connected with 2 links (green links)
251 * - Nodes: 2 leaves per router, single link
253 * The diagram below illustrates a group in the dragonfly cluster
255 * +------------------------------------------------+
257 * | +------------------------+ |
258 * | +---|--------------+ +---|--------------+ |
259 * | | | green | | | green | |
260 * | | | links (2) | | | links (2) | | blue links(2)
261 * | | R1 ====== R2 | | R3 ----- R4 ======================> "Group 2"
262 * | | / \ / \ | | / \ / \ | |
263 * | | A B C D | | E F G H | |
264 * | +------------------+ +------------------+ |
265 * | Chassis 1 Chassis 2 |
266 * +------------------------------------------------+
269 * Each element (A, B, C, etc) is a StarZone containing 8 Hosts.
270 * The connection between elements (e.g. A->R1) uses 2 links:
271 * 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
272 * 2) link: 10Gbs link connecting the components (created automatically)
274 * For example, a communication from A to C goes through:
275 * <tt> A->limiter(A)->link(A-R1)->limiter(R1)->link(R1-R2)->limiter(R2)->link(R2-C)limiter(C)->C</tt>
277 * More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#dragonfly-cluster">Dragonfly
280 static void create_dragonfly_cluster()
282 /* create the dragonfly cluster, 10Gbs link between elements in the cluster */
283 sg4::create_dragonfly_zone("cluster", nullptr, {{2, 2}, {2, 1}, {2, 2}, 2}, {create_hostzone, {}, create_limiter},
284 10e9, 10e-6, sg4::Link::SharingPolicy::SPLITDUPLEX)
288 /*************************************************************************************************/
290 int main(int argc, char* argv[])
292 sg4::Engine e(&argc, argv);
294 /* create platform */
295 if (std::string platform(argv[1]); platform == "torus")
296 create_torus_cluster();
297 else if (platform == "fatTree")
298 create_fatTree_cluster();
299 else if (platform == "dragonfly")
300 create_dragonfly_cluster();
302 std::vector<sg4::Host*> host_list = e.get_all_hosts();
303 /* create the sender actor running on first host */
304 sg4::Actor::create("sender", host_list[0], Sender(host_list));
305 /* create receiver in every host */
306 for (auto* host : host_list) {
307 sg4::Actor::create("receiver-" + host->get_name(), host, Receiver());
310 /* runs the simulation */