From: Bruno Donassolo Date: Wed, 28 Apr 2021 13:38:28 +0000 (+0200) Subject: New: s4u::create_dragonfly_zone X-Git-Tag: v3.28~397 X-Git-Url: http://bilbo.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/e0e78e5fd1f74efdc62ed85e5b20d4cb70838a8f New: s4u::create_dragonfly_zone Implements the builder method for Dragonfly zones. New example: Dragonfly cluster of multi-cpu hosts(Star Zones) --- diff --git a/MANIFEST.in b/MANIFEST.in index 13ef11d8a0..a0132080f4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2201,6 +2201,7 @@ include src/kernel/resource/profile/StochasticDatedValue.hpp include src/kernel/routing/ClusterZone.cpp include src/kernel/routing/DijkstraZone.cpp include src/kernel/routing/DragonflyZone.cpp +include src/kernel/routing/DragonflyZone_test.cpp include src/kernel/routing/EmptyZone.cpp include src/kernel/routing/FatTreeZone.cpp include src/kernel/routing/FatTreeZone_test.cpp diff --git a/examples/cpp/clusters-multicpu/s4u-clusters-multicpu.cpp b/examples/cpp/clusters-multicpu/s4u-clusters-multicpu.cpp index 27686f8aed..763326250a 100644 --- a/examples/cpp/clusters-multicpu/s4u-clusters-multicpu.cpp +++ b/examples/cpp/clusters-multicpu/s4u-clusters-multicpu.cpp @@ -176,10 +176,11 @@ static void create_torus_cluster() ->seal(); } +/*************************************************************************************************/ /** - * @brief Creates a Fat Tree cluster + * @brief Creates a Fat-Tree cluster * - * Creates a Fat Tree cluster with 2 levels and 6 nodes + * Creates a Fat-Tree cluster with 2 levels and 6 nodes * The following parameters are used to create this cluster: * - Levels: 2 - two-level cluster * - Down links: 2, 3 - L1 routers is connected to 2 elements, L2 routers to 3 elements @@ -217,7 +218,7 @@ static void create_torus_cluster() * * Note that limiters are only valid for leaves, not routers. * - * More details in: Fat Tree + * More details in: Fat-Tree * Cluster */ static void create_fatTree_cluster() @@ -228,6 +229,54 @@ static void create_fatTree_cluster() ->seal(); } +/*************************************************************************************************/ +/** + * @brief Creates a Dragonfly cluster + * + * Creates a Dragonfly cluster with 2 groups and 16 nodes + * The following parameters are used to create this cluster: + * - Groups: 2 groups, connected with 2 links (blue links) + * - Chassis: 2 chassis, connected with a single link (black links) + * - Routers: 2 routers, connected with 2 links (green links) + * - Nodes: 2 leaves per router, single link + * + * The diagram below illustrates a group in the dragonfly cluster + * + * +------------------------------------------------+ + * | black link(1) | + * | +------------------------+ | + * | +---|--------------+ +---|--------------+ | + * | | | green | | | green | | + * | | | links (2) | | | links (2) | | blue links(2) + * | | R1 ====== R2 | | R3 ----- R4 ======================> "Group 2" + * | | / \ / \ | | / \ / \ | | + * | | A B C D | | E F G H | | + * | +------------------+ +------------------+ | + * | Chassis 1 Chassis 2 | + * +------------------------------------------------+ + * Group 1 + * + * Each element (A, B, C, etc) is a StarZone containing 8 Hosts. + * The connection between nodes and routers (e.g. A->R1) uses 2 links: + * 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback) + * 2) link: 10Gbs link connecting the components (created automatically) + * + * For example, a communication from A to C goes through: + * A->limiter(A)->link(A-R1)->link(R1-R2)->limiter(C)->C + * + * Note that limiters are only valid for leaves, not routers. + * + * More details in: Dragonfly + * Cluster + */ +static void create_dragonfly_cluster() +{ + /* create the dragonfly cluster, 10Gbs link between elements in the cluster */ + sg4::create_dragonfly_zone("cluster", nullptr, {{2, 2}, {2, 1}, {2, 2}, 2}, 10e9, 10e-6, + sg4::Link::SharingPolicy::SPLITDUPLEX, create_hostzone, {}, create_limiter) + ->seal(); +} + /*************************************************************************************************/ int main(int argc, char* argv[]) @@ -240,6 +289,8 @@ int main(int argc, char* argv[]) create_torus_cluster(); else if (platform == "fatTree") create_fatTree_cluster(); + else if (platform == "dragonfly") + create_dragonfly_cluster(); std::vector host_list = e.get_all_hosts(); /* create the sender actor running on first host */ diff --git a/examples/cpp/clusters-multicpu/s4u-clusters-multicpu.tesh b/examples/cpp/clusters-multicpu/s4u-clusters-multicpu.tesh index 8f4cf82a5d..405873786f 100644 --- a/examples/cpp/clusters-multicpu/s4u-clusters-multicpu.tesh +++ b/examples/cpp/clusters-multicpu/s4u-clusters-multicpu.tesh @@ -119,3 +119,135 @@ $ ${bindir:=.}/s4u-clusters-multicpu fatTree > [host2-cpu2:receiver-host2-cpu2:(20) 0.043559] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. > [host2-cpu1:receiver-host2-cpu1:(19) 0.043559] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. > [host0-cpu0:sender:(1) 0.043559] [s4u_torus_multicpu/INFO] Goodbye now! + +$ ${bindir:=.}/s4u-clusters-multicpu dragonfly +> [host0-cpu0:sender:(1) 0.000000] [s4u_torus_multicpu/INFO] Done dispatching all messages +> [host0-cpu7:receiver-host0-cpu7:(9) 0.000083] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host0-cpu6:receiver-host0-cpu6:(8) 0.000083] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host0-cpu5:receiver-host0-cpu5:(7) 0.000083] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host0-cpu4:receiver-host0-cpu4:(6) 0.000083] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host0-cpu3:receiver-host0-cpu3:(5) 0.000083] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host0-cpu2:receiver-host0-cpu2:(4) 0.000083] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host0-cpu1:receiver-host0-cpu1:(3) 0.000083] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host0-cpu0:receiver-host0-cpu0:(2) 0.000083] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host1-cpu0:receiver-host1-cpu0:(10) 0.095153] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host1-cpu7:receiver-host1-cpu7:(17) 0.095759] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host1-cpu6:receiver-host1-cpu6:(16) 0.095759] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host1-cpu5:receiver-host1-cpu5:(15) 0.095759] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host1-cpu4:receiver-host1-cpu4:(14) 0.095759] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host1-cpu3:receiver-host1-cpu3:(13) 0.095759] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host1-cpu2:receiver-host1-cpu2:(12) 0.095759] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host1-cpu1:receiver-host1-cpu1:(11) 0.095759] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host3-cpu0:receiver-host3-cpu0:(74) 0.111399] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host2-cpu0:receiver-host2-cpu0:(66) 0.111399] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host3-cpu7:receiver-host3-cpu7:(81) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host3-cpu6:receiver-host3-cpu6:(80) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host3-cpu5:receiver-host3-cpu5:(79) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host3-cpu4:receiver-host3-cpu4:(78) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host3-cpu3:receiver-host3-cpu3:(77) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host3-cpu2:receiver-host3-cpu2:(76) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host3-cpu1:receiver-host3-cpu1:(75) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host2-cpu7:receiver-host2-cpu7:(73) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host2-cpu6:receiver-host2-cpu6:(72) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host2-cpu5:receiver-host2-cpu5:(71) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host2-cpu4:receiver-host2-cpu4:(70) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host2-cpu3:receiver-host2-cpu3:(69) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host2-cpu2:receiver-host2-cpu2:(68) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host2-cpu1:receiver-host2-cpu1:(67) 0.111944] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host5-cpu0:receiver-host5-cpu0:(90) 0.112633] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host4-cpu0:receiver-host4-cpu0:(82) 0.112633] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host5-cpu7:receiver-host5-cpu7:(97) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host5-cpu6:receiver-host5-cpu6:(96) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host5-cpu5:receiver-host5-cpu5:(95) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host5-cpu4:receiver-host5-cpu4:(94) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host5-cpu3:receiver-host5-cpu3:(93) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host5-cpu2:receiver-host5-cpu2:(92) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host5-cpu1:receiver-host5-cpu1:(91) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host4-cpu7:receiver-host4-cpu7:(89) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host4-cpu6:receiver-host4-cpu6:(88) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host4-cpu5:receiver-host4-cpu5:(87) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host4-cpu4:receiver-host4-cpu4:(86) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host4-cpu3:receiver-host4-cpu3:(85) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host4-cpu2:receiver-host4-cpu2:(84) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host4-cpu1:receiver-host4-cpu1:(83) 0.113085] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host9-cpu0:receiver-host9-cpu0:(122) 0.121906] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host8-cpu0:receiver-host8-cpu0:(114) 0.121906] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host9-cpu7:receiver-host9-cpu7:(129) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host9-cpu6:receiver-host9-cpu6:(128) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host9-cpu5:receiver-host9-cpu5:(127) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host9-cpu4:receiver-host9-cpu4:(126) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host9-cpu3:receiver-host9-cpu3:(125) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host9-cpu2:receiver-host9-cpu2:(124) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host9-cpu1:receiver-host9-cpu1:(123) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host8-cpu7:receiver-host8-cpu7:(121) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host8-cpu6:receiver-host8-cpu6:(120) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host8-cpu5:receiver-host8-cpu5:(119) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host8-cpu4:receiver-host8-cpu4:(118) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host8-cpu3:receiver-host8-cpu3:(117) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host8-cpu2:receiver-host8-cpu2:(116) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host8-cpu1:receiver-host8-cpu1:(115) 0.122267] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host7-cpu0:receiver-host7-cpu0:(106) 0.122697] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host6-cpu0:receiver-host6-cpu0:(98) 0.122697] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host7-cpu7:receiver-host7-cpu7:(113) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host7-cpu6:receiver-host7-cpu6:(112) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host7-cpu5:receiver-host7-cpu5:(111) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host7-cpu4:receiver-host7-cpu4:(110) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host7-cpu3:receiver-host7-cpu3:(109) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host7-cpu2:receiver-host7-cpu2:(108) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host7-cpu1:receiver-host7-cpu1:(107) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host6-cpu7:receiver-host6-cpu7:(105) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host6-cpu6:receiver-host6-cpu6:(104) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host6-cpu5:receiver-host6-cpu5:(103) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host6-cpu4:receiver-host6-cpu4:(102) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host6-cpu3:receiver-host6-cpu3:(101) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host6-cpu2:receiver-host6-cpu2:(100) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host6-cpu1:receiver-host6-cpu1:(99) 0.122977] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host11-cpu0:receiver-host11-cpu0:(26) 0.127894] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host10-cpu0:receiver-host10-cpu0:(18) 0.127894] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host11-cpu7:receiver-host11-cpu7:(33) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host11-cpu6:receiver-host11-cpu6:(32) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host11-cpu5:receiver-host11-cpu5:(31) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host11-cpu4:receiver-host11-cpu4:(30) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host11-cpu3:receiver-host11-cpu3:(29) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host11-cpu2:receiver-host11-cpu2:(28) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host11-cpu1:receiver-host11-cpu1:(27) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host10-cpu7:receiver-host10-cpu7:(25) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host10-cpu6:receiver-host10-cpu6:(24) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host10-cpu5:receiver-host10-cpu5:(23) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host10-cpu4:receiver-host10-cpu4:(22) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host10-cpu3:receiver-host10-cpu3:(21) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host10-cpu2:receiver-host10-cpu2:(20) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host10-cpu1:receiver-host10-cpu1:(19) 0.128094] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host13-cpu0:receiver-host13-cpu0:(42) 0.128297] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host12-cpu0:receiver-host12-cpu0:(34) 0.128297] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host13-cpu7:receiver-host13-cpu7:(49) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host13-cpu6:receiver-host13-cpu6:(48) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host13-cpu5:receiver-host13-cpu5:(47) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host13-cpu4:receiver-host13-cpu4:(46) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host13-cpu3:receiver-host13-cpu3:(45) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host13-cpu2:receiver-host13-cpu2:(44) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host13-cpu1:receiver-host13-cpu1:(43) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host12-cpu7:receiver-host12-cpu7:(41) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host12-cpu6:receiver-host12-cpu6:(40) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host12-cpu5:receiver-host12-cpu5:(39) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host12-cpu4:receiver-host12-cpu4:(38) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host12-cpu3:receiver-host12-cpu3:(37) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host12-cpu2:receiver-host12-cpu2:(36) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host12-cpu1:receiver-host12-cpu1:(35) 0.128424] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host15-cpu0:receiver-host15-cpu0:(58) 0.130101] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host14-cpu0:receiver-host14-cpu0:(50) 0.130101] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host15-cpu7:receiver-host15-cpu7:(65) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host15-cpu6:receiver-host15-cpu6:(64) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host15-cpu5:receiver-host15-cpu5:(63) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host15-cpu4:receiver-host15-cpu4:(62) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host15-cpu3:receiver-host15-cpu3:(61) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host15-cpu2:receiver-host15-cpu2:(60) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host15-cpu1:receiver-host15-cpu1:(59) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host14-cpu7:receiver-host14-cpu7:(57) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host14-cpu6:receiver-host14-cpu6:(56) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host14-cpu5:receiver-host14-cpu5:(55) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host14-cpu4:receiver-host14-cpu4:(54) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host14-cpu3:receiver-host14-cpu3:(53) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host14-cpu2:receiver-host14-cpu2:(52) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host14-cpu1:receiver-host14-cpu1:(51) 0.130157] [s4u_torus_multicpu/INFO] I got a 'Hello, I'm alive and running on host0-cpu0'. +> [host0-cpu0:sender:(1) 0.130157] [s4u_torus_multicpu/INFO] Goodbye now! diff --git a/include/simgrid/kernel/routing/DragonflyZone.hpp b/include/simgrid/kernel/routing/DragonflyZone.hpp index dfabbb2edc..d5f77ddf6e 100644 --- a/include/simgrid/kernel/routing/DragonflyZone.hpp +++ b/include/simgrid/kernel/routing/DragonflyZone.hpp @@ -70,7 +70,15 @@ public: explicit DragonflyZone(const std::string& name); void get_local_route(NetPoint* src, NetPoint* dst, RouteCreationArgs* into, double* latency) override; void parse_specific_arguments(ClusterCreationArgs* cluster) override; - + /** @brief Checks topology parameters */ + static void check_topology(unsigned int n_groups, unsigned int groups_links, unsigned int n_chassis, + unsigned int chassis_links, unsigned int n_routers, unsigned int routers_links, + unsigned int nodes); + /** @brief Set Dragonfly topology */ + void set_topology(unsigned int n_groups, unsigned int groups_links, unsigned int n_chassis, + unsigned int chassis_links, unsigned int n_routers, unsigned int routers_links, unsigned int nodes); + /** @brief Set the characteristics of links inside the Dragonfly zone */ + void set_link_characteristics(double bw, double lat, s4u::Link::SharingPolicy sharing_policy); Coords rankId_to_coords(int rank_id) const; XBT_ATTRIB_DEPRECATED_v330("Please use rankId_to_coords(int)") void rankId_to_coords(int rank_id, unsigned int coords[4]) const; @@ -82,8 +90,8 @@ private: void generate_link(const std::string& id, int numlinks, resource::LinkImpl** linkup, resource::LinkImpl** linkdown); simgrid::s4u::Link::SharingPolicy sharing_policy_ = simgrid::s4u::Link::SharingPolicy::SHARED; - double bw_ = 0; - double lat_ = 0; + double bw_ = 0; + double lat_ = 0; unsigned int num_nodes_per_blade_ = 0; unsigned int num_blades_per_chassis_ = 0; diff --git a/include/simgrid/s4u/NetZone.hpp b/include/simgrid/s4u/NetZone.hpp index a8c6ad3181..1014aff3c6 100644 --- a/include/simgrid/s4u/NetZone.hpp +++ b/include/simgrid/s4u/NetZone.hpp @@ -256,7 +256,52 @@ XBT_PUBLIC NetZone* create_fatTree_zone(const std::string& name, const NetZone* const std::function& set_loopback = {}, const std::function& set_limiter = {}); -XBT_PUBLIC NetZone* create_dragonfly_zone(const std::string& name); +/** @brief Aggregates the parameters necessary to build a Dragonfly zone */ +struct DragonflyParams { + std::pair groups; + std::pair chassis; + std::pair routers; + unsigned int nodes; + DragonflyParams(const std::pair& groups, + const std::pair& chassis, + const std::pair& routers, unsigned int nodes); +}; +/** + * @brief Create a Dragonfly zone + * + * Dragonfly clusters are characterized by: + * - groups: number of groups and links between each group, e.g. 2,2. + * - chassis: number of chassis in each group and the number of links used to connect the chassis, e.g. 2,3 + * - routers: number of routers in each chassis and their links, e.g. 3,1 + * - nodes: number of nodes connected to each router using a single link, e.g. 2 + * + * In total, the cluster will have groups * chassis * routers * nodes elements/leaves. + * + * The best way to understand it is looking to the doc available in: Dragonfly Cluster + * + * Moreover, this method accepts 3 callbacks to populate the cluster: set_netpoint, set_loopback and set_limiter . + * + * Note that the all elements in a Dragonfly cluster must have (or not) the same elements (loopback and limiter) + * + * @param name NetZone's name + * @param parent Pointer to parent's netzone (nullptr if root netzone). Needed to be able to create the resources inside + * the netzone + * @param parameters Characteristics of this Dragonfly + * @param bandwidth Characteristics of the inter-nodes link + * @param latency Characteristics of the inter-nodes link + * @param sharing_policy Characteristics of the inter-nodes link + * @param set_netpoint Callback to set the netpoint of an element in the torus + * @param set_loopback Callback to set the loopback + * @param set_limiter Callback to set the limiter + * @return Pointer to new netzone + */ +XBT_PUBLIC NetZone* create_dragonfly_zone(const std::string& name, const NetZone* parent, + const DragonflyParams& parameters, double bandwidth, double latency, + Link::SharingPolicy sharing_policy, + const std::function& set_netpoint, + const std::function& set_loopback = {}, + const std::function& set_limiter = {}); } // namespace s4u } // namespace simgrid diff --git a/src/kernel/routing/DragonflyZone.cpp b/src/kernel/routing/DragonflyZone.cpp index 1c1cb13e19..ebb1a7200e 100644 --- a/src/kernel/routing/DragonflyZone.cpp +++ b/src/kernel/routing/DragonflyZone.cpp @@ -10,6 +10,7 @@ #include #include +#include #include XBT_LOG_NEW_DEFAULT_SUBCATEGORY(surf_route_cluster_dragonfly, surf_route_cluster, "Dragonfly Routing part of surf"); @@ -42,6 +43,31 @@ void DragonflyZone::rankId_to_coords(int rankId, unsigned int coords[4]) const / coords[3] = s_coords.node; } +void DragonflyZone::set_link_characteristics(double bw, double lat, s4u::Link::SharingPolicy sharing_policy) +{ + sharing_policy_ = sharing_policy; + if (sharing_policy == s4u::Link::SharingPolicy::SPLITDUPLEX) + num_links_per_link_ = 2; + bw_ = bw; + lat_ = lat; +} + +void DragonflyZone::set_topology(unsigned int n_groups, unsigned int groups_links, unsigned int n_chassis, + unsigned int chassis_links, unsigned int n_routers, unsigned int routers_links, + unsigned int nodes) +{ + num_groups_ = n_groups; + num_links_blue_ = groups_links; + + num_chassis_per_group_ = n_chassis; + num_links_black_ = chassis_links; + + num_blades_per_chassis_ = n_routers; + num_links_green_ = routers_links; + + num_nodes_per_blade_ = nodes; +} + void DragonflyZone::parse_specific_arguments(ClusterCreationArgs* cluster) { std::vector parameters; @@ -110,11 +136,7 @@ void DragonflyZone::parse_specific_arguments(ClusterCreationArgs* cluster) throw std::invalid_argument(std::string("Last parameter is not the amount of nodes per blade:") + parameters[3]); } - sharing_policy_ = cluster->sharing_policy; - if (cluster->sharing_policy == s4u::Link::SharingPolicy::SPLITDUPLEX) - num_links_per_link_ = 2; - bw_ = cluster->bw; - lat_ = cluster->lat; + set_link_characteristics(cluster->bw, cluster->lat, cluster->sharing_policy); } /* Generate the cluster once every node is created */ @@ -331,14 +353,70 @@ void DragonflyZone::get_local_route(NetPoint* src, NetPoint* dst, RouteCreationA if (latency) *latency += targetRouter->my_nodes_[targetCoords.node * num_links_per_link_ + num_links_per_link_ - 1]->get_latency(); + + // set gateways (if any) + route->gw_src = get_gateway(src->id()); + route->gw_dst = get_gateway(dst->id()); } } // namespace routing } // namespace kernel namespace s4u { -NetZone* create_dragonfly_zone(const std::string& name) +DragonflyParams::DragonflyParams(const std::pair& groups, + const std::pair& chassis, + const std::pair& routers, unsigned int nodes) + : groups(groups), chassis(chassis), routers(routers), nodes(nodes) +{ + if (groups.first == 0) + throw std::invalid_argument("Dragonfly: Invalid number of groups, must be > 0"); + if (groups.second == 0) + throw std::invalid_argument("Dragonfly: Invalid number of blue (groups) links, must be > 0"); + if (chassis.first == 0) + throw std::invalid_argument("Dragonfly: Invalid number of chassis, must be > 0"); + if (chassis.second == 0) + throw std::invalid_argument("Dragonfly: Invalid number of black (chassis) links, must be > 0"); + if (routers.first == 0) + throw std::invalid_argument("Dragonfly: Invalid number of routers, must be > 0"); + if (routers.second == 0) + throw std::invalid_argument("Dragonfly: Invalid number of green (routers) links, must be > 0"); + if (nodes == 0) + throw std::invalid_argument("Dragonfly: Invalid number of nodes, must be > 0"); +} + +NetZone* create_dragonfly_zone(const std::string& name, const NetZone* parent, const DragonflyParams& params, + double bandwidth, double latency, Link::SharingPolicy sharing_policy, + const std::function& set_netpoint, + const std::function& set_loopback, + const std::function& set_limiter) { - return (new kernel::routing::DragonflyZone(name))->get_iface(); + /* initial checks */ + if (bandwidth <= 0) + throw std::invalid_argument("DragonflyZone: incorrect bandwidth for internode communication, bw=" + + std::to_string(bandwidth)); + if (latency < 0) + throw std::invalid_argument("DragonflyZone: incorrect latency for internode communication, lat=" + + std::to_string(latency)); + + /* creating zone */ + auto* zone = new kernel::routing::DragonflyZone(name); + zone->set_topology(params.groups.first, params.groups.second, params.chassis.first, params.chassis.second, + params.routers.first, params.routers.second, params.nodes); + if (parent) + zone->set_parent(parent->get_impl()); + zone->set_link_characteristics(bandwidth, latency, sharing_policy); + + /* populating it */ + std::vector dimensions = {params.groups.first, params.chassis.first, params.routers.first, + params.nodes}; + int tot_elements = std::accumulate(dimensions.begin(), dimensions.end(), 1, std::multiplies<>()); + for (int i = 0; i < tot_elements; i++) { + kernel::routing::NetPoint* netpoint; + Link* limiter; + Link* loopback; + zone->fill_leaf_from_cb(i, dimensions, set_netpoint, set_loopback, set_limiter, &netpoint, &loopback, &limiter); + } + + return zone->get_iface(); } } // namespace s4u diff --git a/src/kernel/routing/DragonflyZone_test.cpp b/src/kernel/routing/DragonflyZone_test.cpp new file mode 100644 index 0000000000..254680c34b --- /dev/null +++ b/src/kernel/routing/DragonflyZone_test.cpp @@ -0,0 +1,105 @@ +/* Copyright (c) 2017-2021. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "catch.hpp" + +#include "simgrid/kernel/routing/DragonflyZone.hpp" +#include "simgrid/kernel/routing/NetPoint.hpp" +#include "simgrid/s4u/Engine.hpp" +#include "simgrid/s4u/Host.hpp" +#include "simgrid/s4u/NetZone.hpp" + +namespace { +class EngineWrapper { +public: + explicit EngineWrapper(std::string name) : argv(&name[0]), e(&argc, &argv) {} + int argc = 1; + char* argv; + simgrid::s4u::Engine e; +}; + +std::pair +create_host(simgrid::s4u::NetZone* zone, const std::vector& /*coord*/, int id) +{ + const simgrid::s4u::Host* host = zone->create_host(std::to_string(id), 1e9)->seal(); + return std::make_pair(host->get_netpoint(), nullptr); +} +} // namespace + +TEST_CASE("kernel::routing::DragonflyZone: Creating Zone", "") +{ + using namespace simgrid::s4u; + EngineWrapper e("test"); + REQUIRE(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 4}, {4, 3}, {5, 1}, 2}, 1e9, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host)); +} + +TEST_CASE("kernel::routing::DragonflyZone: Invalid params", "") +{ + using namespace simgrid::s4u; + EngineWrapper e("test"); + + SECTION("0 nodes") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 4}, {4, 3}, {5, 1}, 0}, 1e9, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } + + SECTION("0 groups") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{0, 4}, {4, 3}, {5, 1}, 2}, 1e9, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } + SECTION("0 groups links") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 0}, {4, 3}, {5, 1}, 2}, 1e9, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } + + SECTION("0 chassis") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 4}, {0, 3}, {5, 1}, 2}, 1e9, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } + + SECTION("0 chassis links") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 4}, {4, 0}, {5, 1}, 2}, 1e9, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } + + SECTION("0 routers") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 4}, {4, 3}, {0, 1}, 2}, 1e9, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } + + SECTION("0 routers links") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 4}, {4, 3}, {5, 0}, 2}, 1e9, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } + + SECTION("0 bandwidth") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 4}, {4, 3}, {5, 1}, 2}, 0, 10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } + + SECTION("Negative latency") + { + REQUIRE_THROWS_AS(create_dragonfly_zone("test", e.e.get_netzone_root(), {{3, 4}, {4, 3}, {5, 1}, 2}, 1e9, -10, + simgrid::s4u::Link::SharingPolicy::SHARED, create_host), + std::invalid_argument); + } +} \ No newline at end of file diff --git a/tools/cmake/Tests.cmake b/tools/cmake/Tests.cmake index fca894cb26..9ee26e52d0 100644 --- a/tools/cmake/Tests.cmake +++ b/tools/cmake/Tests.cmake @@ -124,6 +124,7 @@ ENDIF() # New tests should use the Catch Framework set(UNIT_TESTS src/xbt/unit-tests_main.cpp src/kernel/resource/profile/Profile_test.cpp + src/kernel/routing/DragonflyZone_test.cpp src/kernel/routing/FatTreeZone_test.cpp src/kernel/routing/StarZone_test.cpp src/kernel/routing/TorusZone_test.cpp