1 # Copyright (c) 2006-2023. The SimGrid Team. All rights reserved.
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the license (GNU LGPL) which comes with this package.
7 This example shows how to build a torus cluster with multi-core hosts.
9 However, each leaf in the torus is a StarZone, composed of several CPUs
11 Each actor runs in a specific CPU. One sender broadcasts a message to all receivers.
21 Send a msg for each host in its host list
24 def __init__(self, hosts, msg_size=int(1e6)):
26 self.msg_size = msg_size
28 # Actors that are created as object will execute their __call__ method.
29 # So, the following constitutes the main function of the Sender actor.
31 pending_comms = simgrid.ActivitySet()
34 for host in self.hosts:
35 msg = "Hello, I'm alive and running on " + simgrid.this_actor.get_host().name
36 mbox = simgrid.Mailbox.by_name(host.name)
38 pending_comms.push(mbox.put_async(msg, self.msg_size))
40 simgrid.this_actor.info("Done dispatching all messages")
42 # Now that all message exchanges were initiated, wait for their completion in one single call
43 pending_comms.wait_all()
45 simgrid.this_actor.info("Goodbye now!")
50 Receiver actor: wait for 1 message on the mailbox identified by the hostname
54 mbox = simgrid.Mailbox.by_name(simgrid.this_actor.get_host().name)
56 simgrid.this_actor.info("I got a '%s'." % received)
58 #####################################################################################################
61 def create_hostzone(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> typing.Tuple[simgrid.NetPoint,
64 Callback to set a cluster leaf/element
66 In our example, each leaf if a StarZone, composed of 8 CPUs.
67 Each CPU is modeled as a host, connected to the outer world through a high-speed PCI link.
68 Obs.: CPU0 is the gateway for this zone
76 / / \ \<-- 100Gbs, 10us link (1 link UP and 1 link DOWN for full-duplex)
81 :param zone: Cluster netzone being created (usefull to create the hosts/links inside it)
82 :param coord: Coordinates in the cluster
83 :param ident: Internal identifier in the torus (for information)
84 :return netpoint, gateway: the netpoint to the StarZone and CPU0 as gateway
86 num_cpus = 8 # Number of CPUs in the zone
87 speed = "1Gf" # Speed of each CPU
88 link_bw = "100GBps" # Link bw connecting the CPU
89 link_lat = "1ns" # Link latency
91 hostname = "host" + str(ident)
93 host_zone = simgrid.NetZone.create_star_zone(hostname)
94 # setting my Torus parent zone
95 host_zone.set_parent(zone)
99 for i in range(num_cpus):
100 cpu_name = hostname + "-cpu" + str(i)
101 host = host_zone.create_host(cpu_name, speed).seal()
102 # the first CPU is the gateway
104 gateway = host.netpoint
105 # create split-duplex link
106 link = host_zone.create_split_duplex_link("link-" + cpu_name, link_bw)
107 link.set_latency(link_lat).seal()
108 # connecting CPU to outer world
109 host_zone.add_route(host, None, [simgrid.LinkInRoute(link, simgrid.LinkInRoute.Direction.UP)], True)
111 # seal newly created netzone
113 return host_zone.netpoint, gateway
115 #####################################################################################################
118 def create_limiter(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> simgrid.Link:
120 Callback to create limiter link (1Gbs) for each netpoint
122 The coord parameter depends on the cluster being created:
123 - Torus: Direct translation of the Torus' dimensions, e.g. (0, 0, 0) for a 3-D Torus
124 - Fat-Tree: A pair (level in the tree, ident), e.g. (0, 0) for first leaf in the tree and (1,0) for the first switch
126 - Dragonfly: a tuple (group, chassis, blades/routers, nodes), e.g. (0, 0, 0, 0) for first node in the cluster. To
127 identify the router inside a (group, chassis, blade), we use MAX_UINT in the last parameter (e.g. 0, 0, 0,
130 :param zone: Torus netzone being created (usefull to create the hosts/links inside it)
131 :param coord: Coordinates in the cluster
132 :param ident: Internal identifier in the torus (for information)
133 :return: Limiter link
135 return zone.create_link("limiter-" + str(ident), [1e9]).seal()
138 def create_torus_cluster():
140 Creates a TORUS cluster
142 Creates a TORUS cluster with dimensions 2x2x2
144 The cluster has 8 elements/leaves in total. Each element is a StarZone containing 8 Hosts.
145 Each pair in the torus is connected through 2 links:
146 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
147 2) link: 10Gbs link connecting the components (created automatically)
156 B-----+----C (X-axis=2)
158 For example, a communication from A to C goes through:
159 <tt> A->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C </tt>
161 More precisely, considering that A and C are StarZones, a
162 communication from A-CPU-3 to C-CPU-7 goes through:
163 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
164 2) A-CPU-0->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C-CPU-0
165 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
167 Note that we don't have limiter links inside the StarZones(A, B, C),
168 but we have limiters in the Torus that are added to the links in the path (as we can see in "2)")
170 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html?highlight=torus#torus-cluster">Torus
173 # create the torus cluster, 10Gbs link between elements in the cluster
174 simgrid.NetZone.create_torus_zone("cluster", None, [2, 2, 2],
175 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
176 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
178 #####################################################################################################
181 def create_fat_tree_cluster():
183 Creates a Fat-Tree cluster
185 Creates a Fat-Tree cluster with 2 levels and 6 nodes
186 The following parameters are used to create this cluster:
187 - Levels: 2 - two-level of switches in the cluster
188 - Down links: 2, 3 - L2 routers is connected to 2 elements, L1 routers to 3 elements
189 - Up links: 1, 2 - Each node (A-F) is connected to 1 L1 router, L1 routers are connected to 2 L2
190 - Link count: 1, 1 - Use 1 link in each level
192 The first parameter describes how many levels we have.
193 The following ones describe the connection between the elements and must have exactly n_levels components.
196 S3 S4 <-- Level 2 routers
197 link:limiter - / \ / \
199 link: 10GBps --> | / \ |
200 (full-duplex) | / \ |
203 S1 S2 <-- Level 1 routers
206 link:10GBps --> /|\ /|\
209 link:limiter -> / | \ / | \
210 A B C D E F <-- level 0 Nodes
212 Each element (A to F) is a StarZone containing 8 Hosts.
213 The connection uses 2 links:
214 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
215 2) link: 10Gbs link connecting the components (created automatically)
217 For example, a communication from A to C goes through:
218 <tt> A->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->->limiter(C)->C</tt>
220 More precisely, considering that A and C are StarZones, a
221 communication from A-CPU-3 to C-CPU-7 goes through:
222 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
223 2) A-CPU-0->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->limiter(C)->C-CPU-0
224 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
226 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#fat-tree-cluster">Fat-Tree
229 # create the fat tree cluster, 10Gbs link between elements in the cluster
230 simgrid.NetZone.create_fatTree_zone("cluster", None, simgrid.FatTreeParams(2, [2, 3], [1, 2], [1, 1]),
231 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
232 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
234 #####################################################################################################
237 def create_dragonfly_cluster():
239 Creates a Dragonfly cluster
241 Creates a Dragonfly cluster with 2 groups and 16 nodes
242 The following parameters are used to create this cluster:
243 - Groups: 2 groups, connected with 2 links (blue links)
244 - Chassis: 2 chassis, connected with a single link (black links)
245 - Routers: 2 routers, connected with 2 links (green links)
246 - Nodes: 2 leaves per router, single link
248 The diagram below illustrates a group in the dragonfly cluster
250 +------------------------------------------------+
252 | +------------------------+ |
253 | +---|--------------+ +---|--------------+ |
254 | | | green | | | green | |
255 | | | links (2) | | | links (2) | | blue links(2)
256 | | R1 ====== R2 | | R3 ----- R4 ======================> "Group 2"
257 | | / \ / \ | | / \ / \ | |
258 | | A B C D | | E F G H | |
259 | +------------------+ +------------------+ |
260 | Chassis 1 Chassis 2 |
261 +------------------------------------------------+
264 Each element (A, B, C, etc) is a StarZone containing 8 Hosts.
265 The connection between elements (e.g. A->R1) uses 2 links:
266 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
267 2) link: 10Gbs link connecting the components (created automatically)
269 For example, a communication from A to C goes through:
270 <tt> A->limiter(A)->link(A-R1)->limiter(R1)->link(R1-R2)->limiter(R2)->link(R2-C)limiter(C)->C</tt>
272 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#dragonfly-cluster">Dragonfly
275 # create the dragonfly cluster, 10Gbs link between elements in the cluster
276 simgrid.NetZone.create_dragonfly_zone("cluster", None, simgrid.DragonflyParams([2, 2], [2, 1], [2, 2], 2),
277 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
278 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
280 ###################################################################################################
284 e = simgrid.Engine(sys.argv)
285 platform = sys.argv[1]
288 if platform == "torus":
289 create_torus_cluster()
290 elif platform == "fatTree":
291 create_fat_tree_cluster()
292 elif platform == "dragonfly":
293 create_dragonfly_cluster()
295 sys.exit("invalid param")
297 host_list = e.all_hosts
298 # create the sender actor running on first host
299 simgrid.Actor.create("sender", host_list[0], Sender(host_list))
300 # create receiver in every host
301 for host in host_list:
302 simgrid.Actor.create("receiver-" + host.name, host, Receiver())
304 # runs the simulation
307 if __name__ == '__main__':