1 # Copyright (c) 2006-2023. The SimGrid Team. All rights reserved.
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the license (GNU LGPL) which comes with this package.
7 This example shows how to build a torus cluster with multi-core hosts.
9 However, each leaf in the torus is a StarZone, composed of several CPUs
11 Each actor runs in a specific CPU. One sender broadcasts a message to all receivers.
21 Send a msg for each host in its host list
24 def __init__(self, hosts, msg_size=int(1e6)):
26 self.msg_size = msg_size
28 # Actors that are created as object will execute their __call__ method.
29 # So, the following constitutes the main function of the Sender actor.
31 pending_comms = simgrid.ActivitySet()
34 for host in self.hosts:
35 msg = "Hello, I'm alive and running on " + simgrid.this_actor.get_host().name
36 mbox = simgrid.Mailbox.by_name(host.name)
38 pending_comms.push(mbox.put_async(msg, self.msg_size))
40 simgrid.this_actor.info("Done dispatching all messages")
42 # Now that all message exchanges were initiated, wait for their completion in one single call
43 pending_comms.wait_all()
45 simgrid.this_actor.info("Goodbye now!")
50 Receiver actor: wait for 1 message on the mailbox identified by the hostname
54 mbox = simgrid.Mailbox.by_name(simgrid.this_actor.get_host().name)
56 simgrid.this_actor.info("I got a '%s'." % received)
58 #####################################################################################################
61 def create_hostzone(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> typing.Tuple[simgrid.NetPoint,
64 Callback to set a cluster leaf/element
66 In our example, each leaf if a StarZone, composed of 8 CPUs.
67 Each CPU is modeled as a host, connected to the outer world through a high-speed PCI link.
68 Obs.: CPU0 is the gateway for this zone
76 / / \ \<-- 100Gbs, 10us link (1 link UP and 1 link DOWN for full-duplex)
81 :param zone: Cluster netzone being created (usefull to create the hosts/links inside it)
82 :param coord: Coordinates in the cluster
83 :param ident: Internal identifier in the torus (for information)
84 :return netpoint, gateway: the netpoint to the StarZone and CPU0 as gateway
86 num_cpus = 8 # Number of CPUs in the zone
87 speed = "1Gf" # Speed of each CPU
88 link_bw = "100GBps" # Link bw connecting the CPU
89 link_lat = "1ns" # Link latency
91 hostname = "host" + str(ident)
93 host_zone = simgrid.NetZone.create_star_zone(hostname)
94 # setting my Torus parent zone
95 host_zone.set_parent(zone)
99 for i in range(num_cpus):
100 cpu_name = hostname + "-cpu" + str(i)
101 host = host_zone.create_host(cpu_name, speed).seal()
102 # the first CPU is the gateway
104 gateway = host.netpoint
105 # create split-duplex link
106 link = host_zone.create_split_duplex_link("link-" + cpu_name, link_bw)
107 link.set_latency(link_lat).seal()
108 # connecting CPU to outer world
109 host_zone.add_route(host.netpoint, None, None, None,
110 [simgrid.LinkInRoute(link, simgrid.LinkInRoute.Direction.UP)], True)
112 # seal newly created netzone
114 return host_zone.netpoint, gateway
116 #####################################################################################################
119 def create_limiter(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> simgrid.Link:
121 Callback to create limiter link (1Gbs) for each netpoint
123 The coord parameter depends on the cluster being created:
124 - Torus: Direct translation of the Torus' dimensions, e.g. (0, 0, 0) for a 3-D Torus
125 - Fat-Tree: A pair (level in the tree, ident), e.g. (0, 0) for first leaf in the tree and (1,0) for the first switch
127 - Dragonfly: a tuple (group, chassis, blades/routers, nodes), e.g. (0, 0, 0, 0) for first node in the cluster. To
128 identify the router inside a (group, chassis, blade), we use MAX_UINT in the last parameter (e.g. 0, 0, 0,
131 :param zone: Torus netzone being created (usefull to create the hosts/links inside it)
132 :param coord: Coordinates in the cluster
133 :param ident: Internal identifier in the torus (for information)
134 :return: Limiter link
136 return zone.create_link("limiter-" + str(ident), [1e9]).seal()
139 def create_torus_cluster():
141 Creates a TORUS cluster
143 Creates a TORUS cluster with dimensions 2x2x2
145 The cluster has 8 elements/leaves in total. Each element is a StarZone containing 8 Hosts.
146 Each pair in the torus is connected through 2 links:
147 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
148 2) link: 10Gbs link connecting the components (created automatically)
157 B-----+----C (X-axis=2)
159 For example, a communication from A to C goes through:
160 <tt> A->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C </tt>
162 More precisely, considering that A and C are StarZones, a
163 communication from A-CPU-3 to C-CPU-7 goes through:
164 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
165 2) A-CPU-0->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C-CPU-0
166 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
168 Note that we don't have limiter links inside the StarZones(A, B, C),
169 but we have limiters in the Torus that are added to the links in the path (as we can see in "2)")
171 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html?highlight=torus#torus-cluster">Torus
174 # create the torus cluster, 10Gbs link between elements in the cluster
175 simgrid.NetZone.create_torus_zone("cluster", None, [2, 2, 2],
176 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
177 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
179 #####################################################################################################
182 def create_fat_tree_cluster():
184 Creates a Fat-Tree cluster
186 Creates a Fat-Tree cluster with 2 levels and 6 nodes
187 The following parameters are used to create this cluster:
188 - Levels: 2 - two-level of switches in the cluster
189 - Down links: 2, 3 - L2 routers is connected to 2 elements, L1 routers to 3 elements
190 - Up links: 1, 2 - Each node (A-F) is connected to 1 L1 router, L1 routers are connected to 2 L2
191 - Link count: 1, 1 - Use 1 link in each level
193 The first parameter describes how many levels we have.
194 The following ones describe the connection between the elements and must have exactly n_levels components.
197 S3 S4 <-- Level 2 routers
198 link:limiter - / \ / \
200 link: 10GBps --> | / \ |
201 (full-duplex) | / \ |
204 S1 S2 <-- Level 1 routers
207 link:10GBps --> /|\ /|\
210 link:limiter -> / | \ / | \
211 A B C D E F <-- level 0 Nodes
213 Each element (A to F) is a StarZone containing 8 Hosts.
214 The connection uses 2 links:
215 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
216 2) link: 10Gbs link connecting the components (created automatically)
218 For example, a communication from A to C goes through:
219 <tt> A->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->->limiter(C)->C</tt>
221 More precisely, considering that A and C are StarZones, a
222 communication from A-CPU-3 to C-CPU-7 goes through:
223 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
224 2) A-CPU-0->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->limiter(C)->C-CPU-0
225 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
227 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#fat-tree-cluster">Fat-Tree
230 # create the fat tree cluster, 10Gbs link between elements in the cluster
231 simgrid.NetZone.create_fatTree_zone("cluster", None, simgrid.FatTreeParams(2, [2, 3], [1, 2], [1, 1]),
232 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
233 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
235 #####################################################################################################
238 def create_dragonfly_cluster():
240 Creates a Dragonfly cluster
242 Creates a Dragonfly cluster with 2 groups and 16 nodes
243 The following parameters are used to create this cluster:
244 - Groups: 2 groups, connected with 2 links (blue links)
245 - Chassis: 2 chassis, connected with a single link (black links)
246 - Routers: 2 routers, connected with 2 links (green links)
247 - Nodes: 2 leaves per router, single link
249 The diagram below illustrates a group in the dragonfly cluster
251 +------------------------------------------------+
253 | +------------------------+ |
254 | +---|--------------+ +---|--------------+ |
255 | | | green | | | green | |
256 | | | links (2) | | | links (2) | | blue links(2)
257 | | R1 ====== R2 | | R3 ----- R4 ======================> "Group 2"
258 | | / \ / \ | | / \ / \ | |
259 | | A B C D | | E F G H | |
260 | +------------------+ +------------------+ |
261 | Chassis 1 Chassis 2 |
262 +------------------------------------------------+
265 Each element (A, B, C, etc) is a StarZone containing 8 Hosts.
266 The connection between elements (e.g. A->R1) uses 2 links:
267 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
268 2) link: 10Gbs link connecting the components (created automatically)
270 For example, a communication from A to C goes through:
271 <tt> A->limiter(A)->link(A-R1)->limiter(R1)->link(R1-R2)->limiter(R2)->link(R2-C)limiter(C)->C</tt>
273 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#dragonfly-cluster">Dragonfly
276 # create the dragonfly cluster, 10Gbs link between elements in the cluster
277 simgrid.NetZone.create_dragonfly_zone("cluster", None, simgrid.DragonflyParams([2, 2], [2, 1], [2, 2], 2),
278 simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
279 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
281 ###################################################################################################
285 e = simgrid.Engine(sys.argv)
286 platform = sys.argv[1]
289 if platform == "torus":
290 create_torus_cluster()
291 elif platform == "fatTree":
292 create_fat_tree_cluster()
293 elif platform == "dragonfly":
294 create_dragonfly_cluster()
296 sys.exit("invalid param")
298 host_list = e.all_hosts
299 # create the sender actor running on first host
300 simgrid.Actor.create("sender", host_list[0], Sender(host_list))
301 # create receiver in every host
302 for host in host_list:
303 simgrid.Actor.create("receiver-" + host.name, host, Receiver())
305 # runs the simulation
308 if __name__ == '__main__':