include examples/cpp/synchro-mutex/s4u-synchro-mutex.tesh
include examples/cpp/synchro-semaphore/s4u-synchro-semaphore.cpp
include examples/cpp/synchro-semaphore/s4u-synchro-semaphore.tesh
+include examples/cpp/torus-multicpu/s4u-torus-multicpu.cpp
+include examples/cpp/torus-multicpu/s4u-torus-multicpu.tesh
include examples/cpp/trace-categories/s4u-trace-categories.cpp
include examples/cpp/trace-categories/s4u-trace-categories.tesh
include examples/cpp/trace-host-user-variables/s4u-trace-host-user-variables.cpp
plugin-host-load plugin-link-load
replay-comm replay-io
routing-get-clusters
- synchro-barrier synchro-condition-variable synchro-condition-variable-waituntil synchro-mutex synchro-semaphore)
+ synchro-barrier synchro-condition-variable synchro-condition-variable-waituntil synchro-mutex synchro-semaphore
+ torus-multicpu)
# Use default source file unless specified otherwise
if(NOT DEFINED _${example}_sources)
--- /dev/null
+/* Copyright (c) 2010-2021. The SimGrid Team. All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* This example shows how to build a torus cluster with multi-core hosts.
+ *
+ * However, each leaf in the torus is a StarZone, composed of several CPUs
+ *
+ * Each actor runs in a specific CPU. One sender broadcasts a message to all receivers.
+ */
+
+#include "simgrid/s4u.hpp"
+namespace sg4 = simgrid::s4u;
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_torus_multicpu, "Messages specific for this s4u example");
+
+class Sender {
+ long msg_size = 1e6; /* message size in bytes */
+ std::vector<sg4::Host*> hosts_;
+
+public:
+ explicit Sender(const std::vector<sg4::Host*> hosts) : hosts_{hosts} {}
+ void operator()() const
+ {
+ /* Vector in which we store all ongoing communications */
+ std::vector<sg4::CommPtr> pending_comms;
+
+ /* Make a vector of the mailboxes to use */
+ std::vector<sg4::Mailbox*> mboxes;
+
+ /* Start dispatching 1 message to all receivers */
+ std::string msg_content =
+ std::string("Hello, I'm alive and running on ") + std::string(sg4::this_actor::get_host()->get_name());
+ for (const auto* host : hosts_) {
+ /* Copy the data we send: the 'msg_content' variable is not a stable storage location.
+ * It will be destroyed when this actor leaves the loop, ie before the receiver gets it */
+ auto* payload = new std::string(msg_content);
+
+ /* Create a communication representing the ongoing communication, and store it in pending_comms */
+ auto mbox = sg4::Mailbox::by_name(host->get_name());
+ mboxes.push_back(mbox);
+ sg4::CommPtr comm = mbox->put_async(payload, msg_size);
+ pending_comms.push_back(comm);
+ }
+
+ XBT_INFO("Done dispatching all messages");
+
+ /* Now that all message exchanges were initiated, wait for their completion in one single call */
+ sg4::Comm::wait_all(&pending_comms);
+
+ XBT_INFO("Goodbye now!");
+ }
+};
+
+/* Receiver actor: wait for 1 message on the mailbox identified by the hostname */
+class Receiver {
+public:
+ void operator()()
+ {
+ auto mbox = sg4::Mailbox::by_name(sg4::this_actor::get_host()->get_name());
+ auto received = mbox->get_unique<std::string>();
+ XBT_INFO("I got a '%s'.", received->c_str());
+ }
+};
+
+/**
+ * @brief Callback to set a Torus leaf/element
+ *
+ * In our example, each leaf if a StarZone, composed of 8 CPUs.
+ * Each CPU is modeled as a host, connected to the outer world through a high-speed PCI link.
+ * Obs.: CPU0 is the gateway for this zone
+ *
+ * (outer world)
+ * CPU0 (gateway)
+ * |
+ * |
+ * +
+ * /|\
+ * / | \ <-- 100Gbs, 10us link (single link for UP/DOWN communications)
+ * / | \
+ * / | \
+ * CPU1 ... CPU8
+ *
+ * @param zone Torus netzone being created (usefull to create the hosts/links inside it)
+ * @param coord Coordinates in the torus (e.g. "0,0,0", "0,1,0")
+ * @param id Internal identifier in the torus (for information)
+ * @return netpoint, gateway: the netpoint to the StarZone and CPU0 as gateway
+ */
+static std::pair<simgrid::kernel::routing::NetPoint*, simgrid::kernel::routing::NetPoint*>
+create_hostzone(sg4::NetZone* zone, const std::vector<unsigned int>& coord, int id)
+{
+ constexpr int num_cpus = 8; //!< Number of CPUs in the zone
+ constexpr double speed = 1e9; //!< Speed of each CPU
+ constexpr double link_bw = 100e9; //!< Link bw connecting the CPU
+ constexpr double link_lat = 1e-9; //!< Link latency
+
+ std::string hostname = "host" + std::to_string(id);
+ /* create the StarZone */
+ auto* host_zone = sg4::create_star_zone(hostname);
+ /* setting my Torus parent zone */
+ host_zone->set_parent(zone);
+
+ sg4::Host* gateway = nullptr;
+ /* create CPUs */
+ for (int i = 0; i < num_cpus; i++) {
+ std::string cpu_name = hostname + "-cpu" + std::to_string(i);
+ sg4::Host* host = host_zone->create_host(cpu_name, speed)->seal();
+ /* the first CPU is the gateway */
+ if (i == 0)
+ gateway = host;
+ /* create link and add route to external world */
+ sg4::Link* link = host_zone->create_link("link-" + cpu_name, link_bw)->set_latency(link_lat)->seal();
+ host_zone->add_route(host->get_netpoint(), nullptr, nullptr, nullptr, {link});
+ }
+ return std::make_pair(host_zone->get_netpoint(), gateway->get_netpoint());
+}
+
+/**
+ * @brief Creates a TORUS cluster
+ *
+ * Creates a TORUS clustes with dimensions 2x2x2
+ *
+ * The cluster has 8 elements/leaves in total. Each element is a StarZone containing 8 Hosts.
+ * Each pair in the torus is connected through 2 links:
+ * 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
+ * 2) link: 10Gbs link connecting the components (created automatically)
+ *
+ * (Y-axis=2)
+ * A
+ * |
+ * | X (Z-axis=2)
+ * | / 10 Gbs
+ * | +
+ * |/ limiter=1Gps
+ * B----------C (X-axis=2)
+ *
+ * For example, a communication from A to C goes through:
+ * <tt> A->limiter(A)->link(A-B)->limiter(B)->link(B-C)->C </tt>
+ *
+ * More precisely, considering that A and C are StarZones, a
+ * communication from A-CPU-3 to C-CPU-7 goes through:
+ * 1) StarZone A: A-CPU-3 -> link -> A-CPU-0
+ * 2) A-CPU-0->limiter(A)->link(A-B)->limiter(B)->link(B-C)->C-CPU-0
+ * 3) C-CPU-0-> link -> C-CPU-7
+ *
+ * More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html?highlight=torus#torus-cluster">Torus
+ * Cluster</a>
+ */
+static void create_torus_cluster()
+{
+ // Callback to create limiter link (1Gbs) for each host
+ auto create_limiter = [](sg4::NetZone* zone, const std::vector<unsigned int>& coord, int id) -> sg4::Link* {
+ return zone->create_link("limiter-" + std::to_string(id), 1e9)->seal();
+ };
+
+ /* create the torus cluster, 10Gbs link between elements in the cluster */
+ sg4::create_torus_zone("cluster", nullptr, {2, 2, 2}, 10e9, 10e-6, sg4::Link::SharingPolicy::SPLITDUPLEX,
+ create_hostzone, {}, create_limiter)
+ ->seal();
+}
+
+int main(int argc, char* argv[])
+{
+ sg4::Engine e(&argc, argv);
+
+ /* create platform */
+ create_torus_cluster();
+
+ std::vector<sg4::Host*> host_list = e.get_all_hosts();
+ /* create the sender actor running on first host */
+ sg4::Actor::create("sender", host_list[0], Sender(host_list));
+ /* create receiver in every host */
+ for (auto* host : host_list) {
+ sg4::Actor::create(std::string("receiver-") + std::string(host->get_name()), host, Receiver());
+ }
+
+ /* runs the simulation */
+ e.run();
+
+ return 0;
+}
--- /dev/null
+#!/usr/bin/env tesh
+
+$ ${bindir:=.}/s4u-torus-multicpu
+> [host0-cpu0:sender:(1) 0.000000] [s4u_torus_multicpu/INFO] Done dispatching all messages
+> [host0-cpu0:receiver-host0-cpu0:(2) 0.000049] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host0-cpu7:receiver-host0-cpu7:(9) 0.000087] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host0-cpu6:receiver-host0-cpu6:(8) 0.000087] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host0-cpu5:receiver-host0-cpu5:(7) 0.000087] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host0-cpu4:receiver-host0-cpu4:(6) 0.000087] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host0-cpu3:receiver-host0-cpu3:(5) 0.000087] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host0-cpu2:receiver-host0-cpu2:(4) 0.000087] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host0-cpu1:receiver-host0-cpu1:(3) 0.000087] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host4-cpu0:receiver-host4-cpu0:(34) 0.039554] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host2-cpu0:receiver-host2-cpu0:(18) 0.039554] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host1-cpu0:receiver-host1-cpu0:(10) 0.039554] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host4-cpu7:receiver-host4-cpu7:(41) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host4-cpu6:receiver-host4-cpu6:(40) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host4-cpu5:receiver-host4-cpu5:(39) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host4-cpu4:receiver-host4-cpu4:(38) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host4-cpu3:receiver-host4-cpu3:(37) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host4-cpu2:receiver-host4-cpu2:(36) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host4-cpu1:receiver-host4-cpu1:(35) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host2-cpu7:receiver-host2-cpu7:(25) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host2-cpu6:receiver-host2-cpu6:(24) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host2-cpu5:receiver-host2-cpu5:(23) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host2-cpu4:receiver-host2-cpu4:(22) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host2-cpu3:receiver-host2-cpu3:(21) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host2-cpu2:receiver-host2-cpu2:(20) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host2-cpu1:receiver-host2-cpu1:(19) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host1-cpu7:receiver-host1-cpu7:(17) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host1-cpu6:receiver-host1-cpu6:(16) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host1-cpu5:receiver-host1-cpu5:(15) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host1-cpu4:receiver-host1-cpu4:(14) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host1-cpu3:receiver-host1-cpu3:(13) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host1-cpu2:receiver-host1-cpu2:(12) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host1-cpu1:receiver-host1-cpu1:(11) 0.040015] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host6-cpu0:receiver-host6-cpu0:(50) 0.054947] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host5-cpu0:receiver-host5-cpu0:(42) 0.054947] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host3-cpu0:receiver-host3-cpu0:(26) 0.054947] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host6-cpu7:receiver-host6-cpu7:(57) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host6-cpu6:receiver-host6-cpu6:(56) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host6-cpu5:receiver-host6-cpu5:(55) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host6-cpu4:receiver-host6-cpu4:(54) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host6-cpu3:receiver-host6-cpu3:(53) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host6-cpu2:receiver-host6-cpu2:(52) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host6-cpu1:receiver-host6-cpu1:(51) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host5-cpu7:receiver-host5-cpu7:(49) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host5-cpu6:receiver-host5-cpu6:(48) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host5-cpu5:receiver-host5-cpu5:(47) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host5-cpu4:receiver-host5-cpu4:(46) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host5-cpu3:receiver-host5-cpu3:(45) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host5-cpu2:receiver-host5-cpu2:(44) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host5-cpu1:receiver-host5-cpu1:(43) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host3-cpu7:receiver-host3-cpu7:(33) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host3-cpu6:receiver-host3-cpu6:(32) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host3-cpu5:receiver-host3-cpu5:(31) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host3-cpu4:receiver-host3-cpu4:(30) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host3-cpu3:receiver-host3-cpu3:(29) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host3-cpu2:receiver-host3-cpu2:(28) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host3-cpu1:receiver-host3-cpu1:(27) 0.055118] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host7-cpu0:receiver-host7-cpu0:(58) 0.057832] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host7-cpu7:receiver-host7-cpu7:(65) 0.057862] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host7-cpu6:receiver-host7-cpu6:(64) 0.057862] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host7-cpu5:receiver-host7-cpu5:(63) 0.057862] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host7-cpu4:receiver-host7-cpu4:(62) 0.057862] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host7-cpu3:receiver-host7-cpu3:(61) 0.057862] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host7-cpu2:receiver-host7-cpu2:(60) 0.057862] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host7-cpu1:receiver-host7-cpu1:(59) 0.057862] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'.
+> [host0-cpu0:sender:(1) 0.057862] [s4u_torus_multicpu/INFO] Goodbye now!
/* We use a map instead of a std::vector here because that's a sparse vector. Some values may not exist */
/* The pair is {link_up, link_down} */
std::unordered_map<unsigned int, std::pair<resource::LinkImpl*, resource::LinkImpl*>> private_links_;
+ std::unordered_map<unsigned int, NetPoint*> gateways_; //!< list of gateways for leafs (if they're netzones)
resource::LinkImpl* backbone_ = nullptr;
NetPoint* router_ = nullptr;
bool has_limiter_ = false;
void set_backbone(resource::LinkImpl* bb) { backbone_ = bb; }
bool has_backbone() const { return backbone_ != nullptr; }
void set_router(NetPoint* router) { router_ = router; }
+ /** @brief Sets gateway for the leaf */
+ void set_gateway(unsigned int position, NetPoint* gateway);
+ /** @brief Gets gateway for the leaf or nullptr */
+ NetPoint* get_gateway(unsigned int position);
void add_private_link_at(unsigned int position, std::pair<resource::LinkImpl*, resource::LinkImpl*> link);
bool private_link_exists_at(unsigned int position) const
{
}
private_links_.insert({position, {linkUp->get_impl(), linkDown->get_impl()}});
}
+
+void ClusterZone::set_gateway(unsigned int position, NetPoint* gateway)
+{
+ xbt_assert(not gateway || not gateway->is_netzone(), "ClusterZone: gateway cannot be another netzone %s",
+ gateway->get_cname());
+ gateways_[position] = gateway;
+}
+
+NetPoint* ClusterZone::get_gateway(unsigned int position)
+{
+ NetPoint* res = nullptr;
+ auto it = gateways_.find(position);
+ if (it != gateways_.end()) {
+ res = it->second;
+ }
+ return res;
+}
+
} // namespace routing
} // namespace kernel
current_node = next_node;
}
+ // set gateways (if any)
+ route->gw_src = get_gateway(src->id());
+ route->gw_dst = get_gateway(dst->id());
}
/** @brief Auxiliary function to create hosts */
} else {
xbt_assert(not gw, "TorusZone: Netpoint (%s) isn't netzone, gateway must be nullptr", netpoint->get_cname());
}
- // FIXME: add gateway if set
+ // setting gateway
+ zone->set_gateway(i, gw);
if (set_loopback) {
Link* loopback = set_loopback(zone->get_iface(), dims, i);