1 # Copyright (c) 2006-2023. The SimGrid Team. All rights reserved.
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the license (GNU LGPL) which comes with this package.
7 This example shows how to build a torus cluster with multi-core hosts.
9 However, each leaf in the torus is a StarZone, composed of several CPUs
11 Each actor runs in a specific CPU. One sender broadcasts a message to all receivers.
21 Send a msg for each host in its host list
24 def __init__(self, hosts, msg_size=int(1e6)):
26 self.msg_size = msg_size
28 # Actors that are created as object will execute their __call__ method.
29 # So, the following constitutes the main function of the Sender actor.
31 pending_comms = simgrid.ActivitySet()
34 for host in self.hosts:
35 msg = "Hello, I'm alive and running on " + simgrid.this_actor.get_host().name
36 mbox = simgrid.Mailbox.by_name(host.name)
38 pending_comms.push(mbox.put_async(msg, self.msg_size))
40 simgrid.this_actor.info("Done dispatching all messages")
42 # Now that all message exchanges were initiated, wait for their completion in one single call
43 pending_comms.wait_all()
45 simgrid.this_actor.info("Goodbye now!")
50 Receiver actor: wait for 1 message on the mailbox identified by the hostname
54 mbox = simgrid.Mailbox.by_name(simgrid.this_actor.get_host().name)
56 simgrid.this_actor.info("I got a '%s'." % received)
58 #####################################################################################################
61 def create_hostzone(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> simgrid.NetZone:
63 Callback to set a cluster leaf/element
65 In our example, each leaf if a StarZone, composed of 8 CPUs.
66 Each CPU is modeled as a host, connected to the outer world through a high-speed PCI link.
67 Obs.: CPU0 is the gateway for this zone
75 / / \ \<-- 100Gbs, 10us link (1 link UP and 1 link DOWN for full-duplex)
80 :param zone: Cluster netzone being created (usefull to create the hosts/links inside it)
81 :param coord: Coordinates in the cluster
82 :param ident: Internal identifier in the torus (for information)
83 :return netpoint, gateway: the netpoint to the StarZone and CPU0 as gateway
85 num_cpus = 8 # Number of CPUs in the zone
86 speed = "1Gf" # Speed of each CPU
87 link_bw = "100GBps" # Link bw connecting the CPU
88 link_lat = "1ns" # Link latency
90 hostname = "host" + str(ident)
92 host_zone = simgrid.NetZone.create_star_zone(hostname)
93 # setting my Torus parent zone
94 host_zone.set_parent(zone)
97 for i in range(num_cpus):
98 cpu_name = hostname + "-cpu" + str(i)
99 host = host_zone.create_host(cpu_name, speed).seal()
100 # the first CPU is the gateway
102 host_zone.set_gateway(host.netpoint)
103 # create split-duplex link
104 link = host_zone.create_split_duplex_link("link-" + cpu_name, link_bw)
105 link.set_latency(link_lat).seal()
106 # connecting CPU to outer world
107 host_zone.add_route(host, None, [simgrid.LinkInRoute(link, simgrid.LinkInRoute.Direction.UP)], True)
109 # seal newly created netzone
113 #####################################################################################################
116 def create_limiter(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> simgrid.Link:
118 Callback to create limiter link (1Gbs) for each netpoint
120 The coord parameter depends on the cluster being created:
121 - Torus: Direct translation of the Torus' dimensions, e.g. (0, 0, 0) for a 3-D Torus
122 - Fat-Tree: A pair (level in the tree, ident), e.g. (0, 0) for first leaf in the tree and (1,0) for the first switch
124 - Dragonfly: a tuple (group, chassis, blades/routers, nodes), e.g. (0, 0, 0, 0) for first node in the cluster. To
125 identify the router inside a (group, chassis, blade), we use MAX_UINT in the last parameter (e.g. 0, 0, 0,
128 :param zone: Torus netzone being created (usefull to create the hosts/links inside it)
129 :param coord: Coordinates in the cluster
130 :param ident: Internal identifier in the torus (for information)
131 :return: Limiter link
133 return zone.create_link("limiter-" + str(ident), [1e9]).seal()
136 def create_torus_cluster():
138 Creates a TORUS cluster
140 Creates a TORUS cluster with dimensions 2x2x2
142 The cluster has 8 elements/leaves in total. Each element is a StarZone containing 8 Hosts.
143 Each pair in the torus is connected through 2 links:
144 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
145 2) link: 10Gbs link connecting the components (created automatically)
154 B-----+----C (X-axis=2)
156 For example, a communication from A to C goes through:
157 <tt> A->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C </tt>
159 More precisely, considering that A and C are StarZones, a
160 communication from A-CPU-3 to C-CPU-7 goes through:
161 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
162 2) A-CPU-0->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C-CPU-0
163 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
165 Note that we don't have limiter links inside the StarZones(A, B, C),
166 but we have limiters in the Torus that are added to the links in the path (as we can see in "2)")
168 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html?highlight=torus#torus-cluster">Torus
171 # create the torus cluster, 10Gbs link between elements in the cluster
172 simgrid.NetZone.create_torus_zone("cluster", None, [2, 2, 2],
173 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
174 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
176 #####################################################################################################
179 def create_fat_tree_cluster():
181 Creates a Fat-Tree cluster
183 Creates a Fat-Tree cluster with 2 levels and 6 nodes
184 The following parameters are used to create this cluster:
185 - Levels: 2 - two-level of switches in the cluster
186 - Down links: 2, 3 - L2 routers is connected to 2 elements, L1 routers to 3 elements
187 - Up links: 1, 2 - Each node (A-F) is connected to 1 L1 router, L1 routers are connected to 2 L2
188 - Link count: 1, 1 - Use 1 link in each level
190 The first parameter describes how many levels we have.
191 The following ones describe the connection between the elements and must have exactly n_levels components.
194 S3 S4 <-- Level 2 routers
195 link:limiter - / \ / \
197 link: 10GBps --> | / \ |
198 (full-duplex) | / \ |
201 S1 S2 <-- Level 1 routers
204 link:10GBps --> /|\ /|\
207 link:limiter -> / | \ / | \
208 A B C D E F <-- level 0 Nodes
210 Each element (A to F) is a StarZone containing 8 Hosts.
211 The connection uses 2 links:
212 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
213 2) link: 10Gbs link connecting the components (created automatically)
215 For example, a communication from A to C goes through:
216 <tt> A->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->->limiter(C)->C</tt>
218 More precisely, considering that A and C are StarZones, a
219 communication from A-CPU-3 to C-CPU-7 goes through:
220 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
221 2) A-CPU-0->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->limiter(C)->C-CPU-0
222 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
224 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#fat-tree-cluster">Fat-Tree
227 # create the fat tree cluster, 10Gbs link between elements in the cluster
228 simgrid.NetZone.create_fatTree_zone("cluster", None, simgrid.FatTreeParams(2, [2, 3], [1, 2], [1, 1]),
229 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
230 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
232 #####################################################################################################
235 def create_dragonfly_cluster():
237 Creates a Dragonfly cluster
239 Creates a Dragonfly cluster with 2 groups and 16 nodes
240 The following parameters are used to create this cluster:
241 - Groups: 2 groups, connected with 2 links (blue links)
242 - Chassis: 2 chassis, connected with a single link (black links)
243 - Routers: 2 routers, connected with 2 links (green links)
244 - Nodes: 2 leaves per router, single link
246 The diagram below illustrates a group in the dragonfly cluster
248 +------------------------------------------------+
250 | +------------------------+ |
251 | +---|--------------+ +---|--------------+ |
252 | | | green | | | green | |
253 | | | links (2) | | | links (2) | | blue links(2)
254 | | R1 ====== R2 | | R3 ----- R4 ======================> "Group 2"
255 | | / \ / \ | | / \ / \ | |
256 | | A B C D | | E F G H | |
257 | +------------------+ +------------------+ |
258 | Chassis 1 Chassis 2 |
259 +------------------------------------------------+
262 Each element (A, B, C, etc) is a StarZone containing 8 Hosts.
263 The connection between elements (e.g. A->R1) uses 2 links:
264 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
265 2) link: 10Gbs link connecting the components (created automatically)
267 For example, a communication from A to C goes through:
268 <tt> A->limiter(A)->link(A-R1)->limiter(R1)->link(R1-R2)->limiter(R2)->link(R2-C)limiter(C)->C</tt>
270 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#dragonfly-cluster">Dragonfly
273 # create the dragonfly cluster, 10Gbs link between elements in the cluster
274 simgrid.NetZone.create_dragonfly_zone("cluster", None, simgrid.DragonflyParams([2, 2], [2, 1], [2, 2], 2),
275 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
276 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
278 ###################################################################################################
282 e = simgrid.Engine(sys.argv)
283 platform = sys.argv[1]
286 if platform == "torus":
287 create_torus_cluster()
288 elif platform == "fatTree":
289 create_fat_tree_cluster()
290 elif platform == "dragonfly":
291 create_dragonfly_cluster()
293 sys.exit("invalid param")
295 host_list = e.all_hosts
296 # create the sender actor running on first host
297 simgrid.Actor.create("sender", host_list[0], Sender(host_list))
298 # create receiver in every host
299 for host in host_list:
300 simgrid.Actor.create("receiver-" + host.name, host, Receiver())
302 # runs the simulation
305 if __name__ == '__main__':