-/* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
+/* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector
+ * Updated 02/2022 */
-/* Copyright (c) 2009-2019. The SimGrid Team.
+/* Copyright (c) 2009-2023. The SimGrid Team.
* All rights reserved. */
/* This program is free software; you can redistribute it and/or modify it
#include "colls_private.hpp"
-namespace simgrid{
-namespace smpi{
+#include <memory>
-int Coll_allreduce_ompi::allreduce(const void *sbuf, void *rbuf, int count,
- MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
-{
- size_t dsize, block_dsize;
- int comm_size = comm->size();
- const size_t intermediate_message = 10000;
+namespace simgrid::smpi {
- /**
- * Decision function based on MX results from the Grig cluster at UTK.
+int allreduce__ompi(const void *sbuf, void *rbuf, int count,
+ MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
+{
+ size_t total_dsize = dtype->size() * (ptrdiff_t)count;
+ int communicator_size = comm->size();
+ int alg = 1;
+ int(*funcs[]) (const void*, void*, int, MPI_Datatype, MPI_Op, MPI_Comm)={
+ &allreduce__redbcast,
+ &allreduce__redbcast,
+ &allreduce__rdb,
+ &allreduce__lr,
+ &allreduce__ompi_ring_segmented,
+ &allreduce__rab_rdb
+ };
+
+ /** Algorithms:
+ * {1, "basic_linear"},
+ * {2, "nonoverlapping"},
+ * {3, "recursive_doubling"},
+ * {4, "ring"},
+ * {5, "segmented_ring"},
+ * {6, "rabenseifner"
*
- * Currently, linear, recursive doubling, and nonoverlapping algorithms
- * can handle both commutative and non-commutative operations.
- * Ring algorithm does not support non-commutative operations.
+ * Currently, ring, segmented ring, and rabenseifner do not support
+ * non-commutative operations.
*/
- dsize = dtype->size();
- block_dsize = dsize * count;
-
- if (block_dsize < intermediate_message) {
- return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
- count, dtype,
- op, comm));
- }
-
- if( ((op==MPI_OP_NULL) || op->is_commutative()) && (count > comm_size) ) {
- const size_t segment_size = 1 << 20; /* 1 MB */
- if ((comm_size * segment_size >= block_dsize)) {
- //FIXME: ok, these are not the right algorithms, try to find closer ones
- // lr is a good match for allreduce_ring (difference is mainly the use of sendrecv)
- return Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype,
- op, comm);
+ if ((op != MPI_OP_NULL) && not op->is_commutative()) {
+ if (communicator_size < 4) {
+ if (total_dsize < 131072) {
+ alg = 3;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 8) {
+ alg = 3;
+ } else if (communicator_size < 16) {
+ if (total_dsize < 1048576) {
+ alg = 3;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 128) {
+ alg = 3;
+ } else if (communicator_size < 256) {
+ if (total_dsize < 131072) {
+ alg = 2;
+ } else if (total_dsize < 524288) {
+ alg = 3;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 512) {
+ if (total_dsize < 4096) {
+ alg = 2;
+ } else if (total_dsize < 524288) {
+ alg = 3;
+ } else {
+ alg = 2;
+ }
} else {
- return (Coll_allreduce_ompi_ring_segmented::allreduce (sbuf, rbuf,
- count, dtype,
- op, comm
- /*segment_size*/));
+ if (total_dsize < 2048) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ }
+ } else {
+ if (communicator_size < 4) {
+ if (total_dsize < 8) {
+ alg = 4;
+ } else if (total_dsize < 4096) {
+ alg = 3;
+ } else if (total_dsize < 8192) {
+ alg = 4;
+ } else if (total_dsize < 16384) {
+ alg = 3;
+ } else if (total_dsize < 65536) {
+ alg = 4;
+ } else if (total_dsize < 262144) {
+ alg = 5;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 8) {
+ if (total_dsize < 16) {
+ alg = 4;
+ } else if (total_dsize < 8192) {
+ alg = 3;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 16) {
+ if (total_dsize < 8192) {
+ alg = 3;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 32) {
+ if (total_dsize < 64) {
+ alg = 5;
+ } else if (total_dsize < 4096) {
+ alg = 3;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 64) {
+ if (total_dsize < 128) {
+ alg = 5;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 128) {
+ if (total_dsize < 262144) {
+ alg = 3;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 256) {
+ if (total_dsize < 131072) {
+ alg = 2;
+ } else if (total_dsize < 262144) {
+ alg = 3;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 512) {
+ if (total_dsize < 4096) {
+ alg = 2;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 2048) {
+ if (total_dsize < 2048) {
+ alg = 2;
+ } else if (total_dsize < 16384) {
+ alg = 3;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 4096) {
+ if (total_dsize < 2048) {
+ alg = 2;
+ } else if (total_dsize < 4096) {
+ alg = 5;
+ } else if (total_dsize < 16384) {
+ alg = 3;
+ } else {
+ alg = 6;
+ }
+ } else {
+ if (total_dsize < 2048) {
+ alg = 2;
+ } else if (total_dsize < 16384) {
+ alg = 5;
+ } else if (total_dsize < 32768) {
+ alg = 3;
+ } else {
+ alg = 6;
+ }
}
}
-
- return (Coll_allreduce_redbcast::allreduce(sbuf, rbuf, count,
- dtype, op, comm));
+ return funcs[alg-1](sbuf, rbuf, count, dtype, op, comm);
}
-int Coll_alltoall_ompi::alltoall(const void *sbuf, int scount,
- MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
- MPI_Comm comm)
+int alltoall__ompi(const void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
+ MPI_Comm comm)
{
- int communicator_size;
- size_t dsize, block_dsize;
- communicator_size = comm->size();
+ int alg = 1;
+ size_t dsize, total_dsize;
+ int communicator_size = comm->size();
- /* Decision function based on measurement on Grig cluster at
- the University of Tennessee (2GB MX) up to 64 nodes.
- Has better performance for messages of intermediate sizes than the old one */
- /* determine block size */
- dsize = sdtype->size();
- block_dsize = dsize * scount;
-
- if ((block_dsize < 200) && (communicator_size > 12)) {
- return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
-
- } else if (block_dsize < 3000) {
- return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
+ if (MPI_IN_PLACE != sbuf) {
+ dsize = sdtype->size();
+ } else {
+ dsize = rdtype->size();
+ }
+ total_dsize = dsize * (ptrdiff_t)scount;
+ int (*funcs[])(const void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm) = {
+ &alltoall__basic_linear,
+ &alltoall__pair,
+ &alltoall__bruck,
+ &alltoall__basic_linear,
+ &alltoall__basic_linear
+ };
+ /** Algorithms:
+ * {1, "linear"},
+ * {2, "pairwise"},
+ * {3, "modified_bruck"},
+ * {4, "linear_sync"},
+ * {5, "two_proc"},
+ */
+ if (communicator_size == 2) {
+ if (total_dsize < 2) {
+ alg = 2;
+ } else if (total_dsize < 4) {
+ alg = 5;
+ } else if (total_dsize < 16) {
+ alg = 2;
+ } else if (total_dsize < 64) {
+ alg = 5;
+ } else if (total_dsize < 256) {
+ alg = 2;
+ } else if (total_dsize < 4096) {
+ alg = 5;
+ } else if (total_dsize < 32768) {
+ alg = 2;
+ } else if (total_dsize < 262144) {
+ alg = 4;
+ } else if (total_dsize < 1048576) {
+ alg = 5;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 8) {
+ if (total_dsize < 8192) {
+ alg = 4;
+ } else if (total_dsize < 16384) {
+ alg = 1;
+ } else if (total_dsize < 65536) {
+ alg = 4;
+ } else if (total_dsize < 524288) {
+ alg = 1;
+ } else if (total_dsize < 1048576) {
+ alg = 2;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 16) {
+ if (total_dsize < 262144) {
+ alg = 4;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 32) {
+ if (total_dsize < 4) {
+ alg = 4;
+ } else if (total_dsize < 512) {
+ alg = 3;
+ } else if (total_dsize < 8192) {
+ alg = 4;
+ } else if (total_dsize < 32768) {
+ alg = 1;
+ } else if (total_dsize < 262144) {
+ alg = 4;
+ } else if (total_dsize < 524288) {
+ alg = 1;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 64) {
+ if (total_dsize < 512) {
+ alg = 3;
+ } else if (total_dsize < 524288) {
+ alg = 1;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 128) {
+ if (total_dsize < 1024) {
+ alg = 3;
+ } else if (total_dsize < 2048) {
+ alg = 1;
+ } else if (total_dsize < 4096) {
+ alg = 4;
+ } else if (total_dsize < 262144) {
+ alg = 1;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 256) {
+ if (total_dsize < 1024) {
+ alg = 3;
+ } else if (total_dsize < 2048) {
+ alg = 4;
+ } else if (total_dsize < 262144) {
+ alg = 1;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 512) {
+ if (total_dsize < 1024) {
+ alg = 3;
+ } else if (total_dsize < 8192) {
+ alg = 4;
+ } else if (total_dsize < 32768) {
+ alg = 1;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 1024) {
+ if (total_dsize < 512) {
+ alg = 3;
+ } else if (total_dsize < 8192) {
+ alg = 4;
+ } else if (total_dsize < 16384) {
+ alg = 1;
+ } else if (total_dsize < 131072) {
+ alg = 4;
+ } else if (total_dsize < 262144) {
+ alg = 1;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 2048) {
+ if (total_dsize < 512) {
+ alg = 3;
+ } else if (total_dsize < 1024) {
+ alg = 4;
+ } else if (total_dsize < 2048) {
+ alg = 1;
+ } else if (total_dsize < 16384) {
+ alg = 4;
+ } else if (total_dsize < 262144) {
+ alg = 1;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 4096) {
+ if (total_dsize < 1024) {
+ alg = 3;
+ } else if (total_dsize < 4096) {
+ alg = 4;
+ } else if (total_dsize < 8192) {
+ alg = 1;
+ } else if (total_dsize < 131072) {
+ alg = 4;
+ } else {
+ alg = 1;
+ }
+ } else {
+ if (total_dsize < 2048) {
+ alg = 3;
+ } else if (total_dsize < 8192) {
+ alg = 4;
+ } else if (total_dsize < 16384) {
+ alg = 1;
+ } else if (total_dsize < 32768) {
+ alg = 4;
+ } else if (total_dsize < 65536) {
+ alg = 1;
+ } else {
+ alg = 4;
+ }
}
- return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
+ return funcs[alg-1](sbuf, scount, sdtype,
+ rbuf, rcount, rdtype, comm);
}
-int Coll_alltoallv_ompi::alltoallv(const void *sbuf, const int *scounts, const int *sdisps,
- MPI_Datatype sdtype,
- void *rbuf, const int *rcounts, const int *rdisps,
- MPI_Datatype rdtype,
- MPI_Comm comm
- )
+int alltoallv__ompi(const void *sbuf, const int *scounts, const int *sdisps,
+ MPI_Datatype sdtype,
+ void *rbuf, const int *rcounts, const int *rdisps,
+ MPI_Datatype rdtype,
+ MPI_Comm comm
+ )
{
- /* For starters, just keep the original algorithm. */
- return Coll_alltoallv_ring::alltoallv(sbuf, scounts, sdisps, sdtype,
- rbuf, rcounts, rdisps,rdtype,
- comm);
-}
-
-
-int Coll_barrier_ompi::barrier(MPI_Comm comm)
-{ int communicator_size = comm->size();
-
- if( 2 == communicator_size )
- return Coll_barrier_ompi_two_procs::barrier(comm);
-/* * Basic optimisation. If we have a power of 2 number of nodes*/
-/* * the use the recursive doubling algorithm, otherwise*/
-/* * bruck is the one we want.*/
- {
- int has_one = 0;
- for( ; communicator_size > 0; communicator_size >>= 1 ) {
- if( communicator_size & 0x1 ) {
- if( has_one )
- return Coll_barrier_ompi_bruck::barrier(comm);
- has_one = 1;
- }
- }
+ int communicator_size = comm->size();
+ int alg = 1;
+ int (*funcs[])(const void *, const int*, const int*, MPI_Datatype, void*, const int*, const int*, MPI_Datatype, MPI_Comm) = {
+ &alltoallv__ompi_basic_linear,
+ &alltoallv__pair
+ };
+ /** Algorithms:
+ * {1, "basic_linear"},
+ * {2, "pairwise"},
+ *
+ * We can only optimize based on com size
+ */
+ if (communicator_size < 4) {
+ alg = 2;
+ } else if (communicator_size < 64) {
+ alg = 1;
+ } else if (communicator_size < 128) {
+ alg = 2;
+ } else if (communicator_size < 256) {
+ alg = 1;
+ } else if (communicator_size < 1024) {
+ alg = 2;
+ } else {
+ alg = 1;
}
- return Coll_barrier_ompi_recursivedoubling::barrier(comm);
+ return funcs[alg-1](sbuf, scounts, sdisps, sdtype,
+ rbuf, rcounts, rdisps,rdtype,
+ comm);
}
-int Coll_bcast_ompi::bcast(void *buff, int count,
- MPI_Datatype datatype, int root,
- MPI_Comm comm
- )
+int barrier__ompi(MPI_Comm comm)
{
- /* Decision function based on MX results for
- messages up to 36MB and communicator sizes up to 64 nodes */
- const size_t small_message_size = 2048;
- const size_t intermediate_message_size = 370728;
- const double a_p16 = 3.2118e-6; /* [1 / byte] */
- const double b_p16 = 8.7936;
- const double a_p64 = 2.3679e-6; /* [1 / byte] */
- const double b_p64 = 1.1787;
- const double a_p128 = 1.6134e-6; /* [1 / byte] */
- const double b_p128 = 2.1102;
-
- int communicator_size;
- //int segsize = 0;
- size_t message_size, dsize;
-
- communicator_size = comm->size();
-
- /* else we need data size for decision function */
- dsize = datatype->size();
- message_size = dsize * (unsigned long)count; /* needed for decision */
-
- /* Handle messages of small and intermediate size, and
- single-element broadcasts */
- if ((message_size < small_message_size) || (count <= 1)) {
- /* Binomial without segmentation */
- return Coll_bcast_binomial_tree::bcast (buff, count, datatype,
- root, comm);
-
- } else if (message_size < intermediate_message_size) {
- // SplittedBinary with 1KB segments
- return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype,
- root, comm);
-
+ int communicator_size = comm->size();
+ int alg = 1;
+ int (*funcs[])(MPI_Comm) = {
+ &barrier__ompi_basic_linear,
+ &barrier__ompi_basic_linear,
+ &barrier__ompi_recursivedoubling,
+ &barrier__ompi_bruck,
+ &barrier__ompi_two_procs,
+ &barrier__ompi_tree
+ };
+ /** Algorithms:
+ * {1, "linear"},
+ * {2, "double_ring"},
+ * {3, "recursive_doubling"},
+ * {4, "bruck"},
+ * {5, "two_proc"},
+ * {6, "tree"},
+ *
+ * We can only optimize based on com size
+ */
+ if (communicator_size < 4) {
+ alg = 3;
+ } else if (communicator_size < 8) {
+ alg = 1;
+ } else if (communicator_size < 64) {
+ alg = 3;
+ } else if (communicator_size < 256) {
+ alg = 4;
+ } else if (communicator_size < 512) {
+ alg = 6;
+ } else if (communicator_size < 1024) {
+ alg = 4;
+ } else if (communicator_size < 4096) {
+ alg = 6;
+ } else {
+ alg = 4;
}
- //Handle large message sizes
- else if (communicator_size < (a_p128 * message_size + b_p128)) {
- //Pipeline with 128KB segments
- //segsize = 1024 << 7;
- return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
- root, comm);
-
-
- } else if (communicator_size < 13) {
- // Split Binary with 8KB segments
- return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype,
- root, comm);
-
- } else if (communicator_size < (a_p64 * message_size + b_p64)) {
- // Pipeline with 64KB segments
- //segsize = 1024 << 6;
- return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
- root, comm);
+ return funcs[alg-1](comm);
+}
- } else if (communicator_size < (a_p16 * message_size + b_p16)) {
- //Pipeline with 16KB segments
- //segsize = 1024 << 4;
- return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
- root, comm);
+int bcast__ompi(void *buff, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
+{
+ int alg = 1;
+ size_t total_dsize, dsize;
+ int communicator_size = comm->size();
+ dsize = datatype->size();
+ total_dsize = dsize * (unsigned long)count;
+ int (*funcs[])(void*, int, MPI_Datatype, int, MPI_Comm) = {
+ &bcast__NTSL,
+ &bcast__ompi_pipeline,
+ &bcast__ompi_pipeline,
+ &bcast__ompi_split_bintree,
+ &bcast__NTSB,
+ &bcast__binomial_tree,
+ &bcast__mvapich2_knomial_intra_node,
+ &bcast__scatter_rdb_allgather,
+ &bcast__scatter_LR_allgather,
+ };
+ /** Algorithms:
+ * {1, "basic_linear"},
+ * {2, "chain"},
+ * {3, "pipeline"},
+ * {4, "split_binary_tree"},
+ * {5, "binary_tree"},
+ * {6, "binomial"},
+ * {7, "knomial"},
+ * {8, "scatter_allgather"},
+ * {9, "scatter_allgather_ring"},
+ */
+ if (communicator_size < 4) {
+ if (total_dsize < 32) {
+ alg = 3;
+ } else if (total_dsize < 256) {
+ alg = 5;
+ } else if (total_dsize < 512) {
+ alg = 3;
+ } else if (total_dsize < 1024) {
+ alg = 7;
+ } else if (total_dsize < 32768) {
+ alg = 1;
+ } else if (total_dsize < 131072) {
+ alg = 5;
+ } else if (total_dsize < 262144) {
+ alg = 2;
+ } else if (total_dsize < 524288) {
+ alg = 1;
+ } else if (total_dsize < 1048576) {
+ alg = 6;
+ } else {
+ alg = 5;
+ }
+ } else if (communicator_size < 8) {
+ if (total_dsize < 64) {
+ alg = 5;
+ } else if (total_dsize < 128) {
+ alg = 6;
+ } else if (total_dsize < 2048) {
+ alg = 5;
+ } else if (total_dsize < 8192) {
+ alg = 6;
+ } else if (total_dsize < 1048576) {
+ alg = 1;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 16) {
+ if (total_dsize < 8) {
+ alg = 7;
+ } else if (total_dsize < 64) {
+ alg = 5;
+ } else if (total_dsize < 4096) {
+ alg = 7;
+ } else if (total_dsize < 16384) {
+ alg = 5;
+ } else if (total_dsize < 32768) {
+ alg = 6;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 32) {
+ if (total_dsize < 4096) {
+ alg = 7;
+ } else if (total_dsize < 1048576) {
+ alg = 6;
+ } else {
+ alg = 8;
+ }
+ } else if (communicator_size < 64) {
+ if (total_dsize < 2048) {
+ alg = 6;
+ } else {
+ alg = 7;
+ }
+ } else if (communicator_size < 128) {
+ alg = 7;
+ } else if (communicator_size < 256) {
+ if (total_dsize < 2) {
+ alg = 6;
+ } else if (total_dsize < 16384) {
+ alg = 5;
+ } else if (total_dsize < 32768) {
+ alg = 1;
+ } else if (total_dsize < 65536) {
+ alg = 5;
+ } else {
+ alg = 7;
+ }
+ } else if (communicator_size < 1024) {
+ if (total_dsize < 16384) {
+ alg = 7;
+ } else if (total_dsize < 32768) {
+ alg = 4;
+ } else {
+ alg = 7;
+ }
+ } else if (communicator_size < 2048) {
+ if (total_dsize < 524288) {
+ alg = 7;
+ } else {
+ alg = 8;
+ }
+ } else if (communicator_size < 4096) {
+ if (total_dsize < 262144) {
+ alg = 7;
+ } else {
+ alg = 8;
+ }
+ } else {
+ if (total_dsize < 8192) {
+ alg = 7;
+ } else if (total_dsize < 16384) {
+ alg = 5;
+ } else if (total_dsize < 262144) {
+ alg = 7;
+ } else {
+ alg = 8;
+ }
}
- /* Pipeline with 8KB segments */
- //segsize = 1024 << 3;
- return Coll_bcast_flattree_pipeline::bcast (buff, count, datatype,
- root, comm
- /*segsize*/);
-#if 0
- /* this is based on gige measurements */
-
- if (communicator_size < 4) {
- return Coll_bcast_intra_basic_linear::bcast (buff, count, datatype, root, comm, module);
- }
- if (communicator_size == 4) {
- if (message_size < 524288) segsize = 0;
- else segsize = 16384;
- return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
- }
- if (communicator_size <= 8 && message_size < 4096) {
- return Coll_bcast_intra_basic_linear::bcast (buff, count, datatype, root, comm, module);
- }
- if (communicator_size > 8 && message_size >= 32768 && message_size < 524288) {
- segsize = 16384;
- return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
- }
- if (message_size >= 524288) {
- segsize = 16384;
- return Coll_bcast_intra_pipeline::bcast (buff, count, datatype, root, comm, module, segsize);
- }
- segsize = 0;
- /* once tested can swap this back in */
- /* return Coll_bcast_intra_bmtree::bcast (buff, count, datatype, root, comm, segsize); */
- return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
-#endif /* 0 */
+ return funcs[alg-1](buff, count, datatype, root, comm);
}
-int Coll_reduce_ompi::reduce(const void *sendbuf, void *recvbuf,
- int count, MPI_Datatype datatype,
- MPI_Op op, int root,
- MPI_Comm comm
- )
+int reduce__ompi(const void *sendbuf, void *recvbuf,
+ int count, MPI_Datatype datatype,
+ MPI_Op op, int root,
+ MPI_Comm comm)
{
- int communicator_size=0;
- //int segsize = 0;
- size_t message_size, dsize;
- const double a1 = 0.6016 / 1024.0; /* [1/B] */
- const double b1 = 1.3496;
- const double a2 = 0.0410 / 1024.0; /* [1/B] */
- const double b2 = 9.7128;
- const double a3 = 0.0422 / 1024.0; /* [1/B] */
- const double b3 = 1.1614;
- //const double a4 = 0.0033 / 1024.0; [1/B]
- //const double b4 = 1.6761;
-
- /* no limit on # of outstanding requests */
- //const int max_requests = 0;
-
- communicator_size = comm->size();
+ size_t total_dsize, dsize;
+ int alg = 1;
+ int communicator_size = comm->size();
- /* need data size for decision function */
dsize=datatype->size();
- message_size = dsize * count; /* needed for decision */
-
- /**
- * If the operation is non commutative we currently have choice of linear
- * or in-order binary tree algorithm.
+ total_dsize = dsize * count;
+ int (*funcs[])(const void*, void*, int, MPI_Datatype, MPI_Op, int, MPI_Comm) = {
+ &reduce__ompi_basic_linear,
+ &reduce__ompi_chain,
+ &reduce__ompi_pipeline,
+ &reduce__ompi_binary,
+ &reduce__ompi_binomial,
+ &reduce__ompi_in_order_binary,
+ //&reduce__rab our rab can't be used with all datatypes
+ &reduce__ompi_basic_linear
+ };
+ /** Algorithms:
+ * {1, "linear"},
+ * {2, "chain"},
+ * {3, "pipeline"},
+ * {4, "binary"},
+ * {5, "binomial"},
+ * {6, "in-order_binary"},
+ * {7, "rabenseifner"},
+ *
+ * Currently, only linear and in-order binary tree algorithms are
+ * capable of non commutative ops.
*/
- if ((op != MPI_OP_NULL) && not op->is_commutative()) {
- if ((communicator_size < 12) && (message_size < 2048)) {
- return Coll_reduce_ompi_basic_linear::reduce(sendbuf, recvbuf, count, datatype, op, root, comm /*, module*/);
- }
- return Coll_reduce_ompi_in_order_binary::reduce(sendbuf, recvbuf, count, datatype, op, root, comm /*, module,
- 0, max_requests*/);
+ if ((op != MPI_OP_NULL) && not op->is_commutative()) {
+ if (communicator_size < 4) {
+ if (total_dsize < 8) {
+ alg = 6;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 8) {
+ alg = 1;
+ } else if (communicator_size < 16) {
+ if (total_dsize < 1024) {
+ alg = 6;
+ } else if (total_dsize < 8192) {
+ alg = 1;
+ } else if (total_dsize < 16384) {
+ alg = 6;
+ } else if (total_dsize < 262144) {
+ alg = 1;
+ } else {
+ alg = 6;
+ }
+ } else if (communicator_size < 128) {
+ alg = 6;
+ } else if (communicator_size < 256) {
+ if (total_dsize < 512) {
+ alg = 6;
+ } else if (total_dsize < 1024) {
+ alg = 1;
+ } else {
+ alg = 6;
+ }
+ } else {
+ alg = 6;
+ }
+ } else {
+ if (communicator_size < 4) {
+ if (total_dsize < 8) {
+ alg = 7;
+ } else if (total_dsize < 16) {
+ alg = 4;
+ } else if (total_dsize < 32) {
+ alg = 3;
+ } else if (total_dsize < 262144) {
+ alg = 1;
+ } else if (total_dsize < 524288) {
+ alg = 3;
+ } else if (total_dsize < 1048576) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 8) {
+ if (total_dsize < 4096) {
+ alg = 4;
+ } else if (total_dsize < 65536) {
+ alg = 2;
+ } else if (total_dsize < 262144) {
+ alg = 5;
+ } else if (total_dsize < 524288) {
+ alg = 1;
+ } else if (total_dsize < 1048576) {
+ alg = 5;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 16) {
+ if (total_dsize < 8192) {
+ alg = 4;
+ } else {
+ alg = 5;
+ }
+ } else if (communicator_size < 32) {
+ if (total_dsize < 4096) {
+ alg = 4;
+ } else {
+ alg = 5;
+ }
+ } else if (communicator_size < 256) {
+ alg = 5;
+ } else if (communicator_size < 512) {
+ if (total_dsize < 8192) {
+ alg = 5;
+ } else if (total_dsize < 16384) {
+ alg = 6;
+ } else {
+ alg = 5;
+ }
+ } else if (communicator_size < 2048) {
+ alg = 5;
+ } else if (communicator_size < 4096) {
+ if (total_dsize < 512) {
+ alg = 5;
+ } else if (total_dsize < 1024) {
+ alg = 6;
+ } else if (total_dsize < 8192) {
+ alg = 5;
+ } else if (total_dsize < 16384) {
+ alg = 6;
+ } else {
+ alg = 5;
+ }
+ } else {
+ if (total_dsize < 16) {
+ alg = 5;
+ } else if (total_dsize < 32) {
+ alg = 6;
+ } else if (total_dsize < 1024) {
+ alg = 5;
+ } else if (total_dsize < 2048) {
+ alg = 6;
+ } else if (total_dsize < 8192) {
+ alg = 5;
+ } else if (total_dsize < 16384) {
+ alg = 6;
+ } else {
+ alg = 5;
+ }
+ }
}
- if ((communicator_size < 8) && (message_size < 512)){
- /* Linear_0K */
- return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm);
- } else if (((communicator_size < 8) && (message_size < 20480)) ||
- (message_size < 2048) || (count <= 1)) {
- /* Binomial_0K */
- //segsize = 0;
- return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
- segsize, max_requests*/);
- } else if (communicator_size > (a1 * message_size + b1)) {
- // Binomial_1K
- //segsize = 1024;
- return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
- segsize, max_requests*/);
- } else if (communicator_size > (a2 * message_size + b2)) {
- // Pipeline_1K
- //segsize = 1024;
- return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
- segsize, max_requests*/);
- } else if (communicator_size > (a3 * message_size + b3)) {
- // Binary_32K
- //segsize = 32*1024;
- return Coll_reduce_ompi_binary::reduce( sendbuf, recvbuf, count, datatype, op, root,
- comm/*, module, segsize, max_requests*/);
- }
-// if (communicator_size > (a4 * message_size + b4)) {
- // Pipeline_32K
-// segsize = 32*1024;
-// } else {
- // Pipeline_64K
-// segsize = 64*1024;
-// }
- return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
- segsize, max_requests*/);
-
-#if 0
- /* for small messages use linear algorithm */
- if (message_size <= 4096) {
- segsize = 0;
- fanout = communicator_size - 1;
- /* when linear implemented or taken from basic put here, right now using chain as a linear system */
- /* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */
- return Coll_reduce_intra_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module);
- /* return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
- }
- if (message_size < 524288) {
- if (message_size <= 65536 ) {
- segsize = 32768;
- fanout = 8;
- } else {
- segsize = 1024;
- fanout = communicator_size/2;
- }
- /* later swap this for a binary tree */
- /* fanout = 2; */
- return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module,
- segsize, fanout, max_requests);
- }
- segsize = 1024;
- return Coll_reduce_intra_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module,
- segsize, max_requests);
-#endif /* 0 */
+ return funcs[alg-1] (sendbuf, recvbuf, count, datatype, op, root, comm);
}
-int Coll_reduce_scatter_ompi::reduce_scatter(const void *sbuf, void *rbuf,
- const int *rcounts,
- MPI_Datatype dtype,
- MPI_Op op,
- MPI_Comm comm
- )
+int reduce_scatter__ompi(const void *sbuf, void *rbuf,
+ const int *rcounts,
+ MPI_Datatype dtype,
+ MPI_Op op,
+ MPI_Comm comm
+ )
{
- int comm_size, i, pow2;
- size_t total_message_size, dsize;
- const double a = 0.0012;
- const double b = 8.0;
- const size_t small_message_size = 12 * 1024;
- const size_t large_message_size = 256 * 1024;
+ size_t total_dsize, dsize;
+ int communicator_size = comm->size();
+ int alg = 1;
int zerocounts = 0;
-
- XBT_DEBUG("Coll_reduce_scatter_ompi::reduce_scatter");
-
- comm_size = comm->size();
- // We need data size for decision function
dsize=dtype->size();
- total_message_size = 0;
- for (i = 0; i < comm_size; i++) {
- total_message_size += rcounts[i];
- if (0 == rcounts[i]) {
- zerocounts = 1;
- }
+ total_dsize = 0;
+ for (int i = 0; i < communicator_size; i++) {
+ total_dsize += rcounts[i];
+ // if (0 == rcounts[i]) {
+ // zerocounts = 1;
+ //}
}
-
+ total_dsize *= dsize;
+ int (*funcs[])(const void*, void*, const int*, MPI_Datatype, MPI_Op, MPI_Comm) = {
+ &reduce_scatter__default,
+ &reduce_scatter__ompi_basic_recursivehalving,
+ &reduce_scatter__ompi_ring,
+ &reduce_scatter__ompi_butterfly,
+ };
+ /** Algorithms:
+ * {1, "non-overlapping"},
+ * {2, "recursive_halving"},
+ * {3, "ring"},
+ * {4, "butterfly"},
+ *
+ * Non commutative algorithm capability needs re-investigation.
+ * Defaulting to non overlapping for non commutative ops.
+ */
if (((op != MPI_OP_NULL) && not op->is_commutative()) || (zerocounts)) {
- Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
- return MPI_SUCCESS;
- }
-
- total_message_size *= dsize;
-
- // compute the nearest power of 2
- for (pow2 = 1; pow2 < comm_size; pow2 <<= 1);
-
- if ((total_message_size <= small_message_size) ||
- ((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
- (comm_size >= a * total_message_size + b)) {
- return
- Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,
- dtype, op,
- comm);
+ alg = 1;
+ } else {
+ if (communicator_size < 4) {
+ if (total_dsize < 65536) {
+ alg = 3;
+ } else if (total_dsize < 131072) {
+ alg = 4;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 8) {
+ if (total_dsize < 8) {
+ alg = 1;
+ } else if (total_dsize < 262144) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 32) {
+ if (total_dsize < 262144) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 64) {
+ if (total_dsize < 64) {
+ alg = 1;
+ } else if (total_dsize < 2048) {
+ alg = 2;
+ } else if (total_dsize < 524288) {
+ alg = 4;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 128) {
+ if (total_dsize < 256) {
+ alg = 1;
+ } else if (total_dsize < 512) {
+ alg = 2;
+ } else if (total_dsize < 2048) {
+ alg = 4;
+ } else if (total_dsize < 4096) {
+ alg = 2;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 256) {
+ if (total_dsize < 256) {
+ alg = 1;
+ } else if (total_dsize < 512) {
+ alg = 2;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 512) {
+ if (total_dsize < 256) {
+ alg = 1;
+ } else if (total_dsize < 1024) {
+ alg = 2;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 1024) {
+ if (total_dsize < 512) {
+ alg = 1;
+ } else if (total_dsize < 2048) {
+ alg = 2;
+ } else if (total_dsize < 8192) {
+ alg = 4;
+ } else if (total_dsize < 16384) {
+ alg = 2;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 2048) {
+ if (total_dsize < 512) {
+ alg = 1;
+ } else if (total_dsize < 4096) {
+ alg = 2;
+ } else if (total_dsize < 16384) {
+ alg = 4;
+ } else if (total_dsize < 32768) {
+ alg = 2;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 4096) {
+ if (total_dsize < 512) {
+ alg = 1;
+ } else if (total_dsize < 4096) {
+ alg = 2;
+ } else {
+ alg = 4;
+ }
+ } else {
+ if (total_dsize < 1024) {
+ alg = 1;
+ } else if (total_dsize < 8192) {
+ alg = 2;
+ } else {
+ alg = 4;
+ }
+ }
}
- return Coll_reduce_scatter_ompi_ring::reduce_scatter(sbuf, rbuf, rcounts,
- dtype, op,
- comm);
-
-
+ return funcs[alg-1] (sbuf, rbuf, rcounts, dtype, op, comm);
}
-int Coll_allgather_ompi::allgather(const void *sbuf, int scount,
- MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
- MPI_Comm comm
- )
+int allgather__ompi(const void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
+ MPI_Comm comm
+ )
{
- int communicator_size, pow2_size;
+ int communicator_size;
size_t dsize, total_dsize;
-
+ int alg = 1;
communicator_size = comm->size();
-
- /* Special case for 2 processes */
- if (communicator_size == 2) {
- return Coll_allgather_pair::allgather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm/*, module*/);
+ if (MPI_IN_PLACE != sbuf) {
+ dsize = sdtype->size();
+ } else {
+ dsize = rdtype->size();
}
-
- /* Determine complete data size */
- dsize=sdtype->size();
- total_dsize = dsize * scount * communicator_size;
-
- for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
-
- /* Decision based on MX 2Gb results from Grig cluster at
- The University of Tennesse, Knoxville
- - if total message size is less than 50KB use either bruck or
- recursive doubling for non-power of two and power of two nodes,
- respectively.
- - else use ring and neighbor exchange algorithms for odd and even
- number of nodes, respectively.
- */
- if (total_dsize < 50000) {
- if (pow2_size == communicator_size) {
- return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
- } else {
- return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
+ total_dsize = dsize * (ptrdiff_t)scount;
+ int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm) = {
+ &allgather__NTSLR_NB,
+ &allgather__bruck,
+ &allgather__rdb,
+ &allgather__ring,
+ &allgather__ompi_neighborexchange,
+ &allgather__pair
+ };
+ /** Algorithms:
+ * {1, "linear"},
+ * {2, "bruck"},
+ * {3, "recursive_doubling"},
+ * {4, "ring"},
+ * {5, "neighbor"},
+ * {6, "two_proc"}
+ */
+ if (communicator_size == 2) {
+ alg = 6;
+ } else if (communicator_size < 32) {
+ alg = 3;
+ } else if (communicator_size < 64) {
+ if (total_dsize < 1024) {
+ alg = 3;
+ } else if (total_dsize < 65536) {
+ alg = 5;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 128) {
+ if (total_dsize < 512) {
+ alg = 3;
+ } else if (total_dsize < 65536) {
+ alg = 5;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 256) {
+ if (total_dsize < 512) {
+ alg = 3;
+ } else if (total_dsize < 131072) {
+ alg = 5;
+ } else if (total_dsize < 524288) {
+ alg = 4;
+ } else if (total_dsize < 1048576) {
+ alg = 5;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 512) {
+ if (total_dsize < 32) {
+ alg = 3;
+ } else if (total_dsize < 128) {
+ alg = 2;
+ } else if (total_dsize < 1024) {
+ alg = 3;
+ } else if (total_dsize < 131072) {
+ alg = 5;
+ } else if (total_dsize < 524288) {
+ alg = 4;
+ } else if (total_dsize < 1048576) {
+ alg = 5;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 1024) {
+ if (total_dsize < 64) {
+ alg = 3;
+ } else if (total_dsize < 256) {
+ alg = 2;
+ } else if (total_dsize < 2048) {
+ alg = 3;
+ } else {
+ alg = 5;
+ }
+ } else if (communicator_size < 2048) {
+ if (total_dsize < 4) {
+ alg = 3;
+ } else if (total_dsize < 8) {
+ alg = 2;
+ } else if (total_dsize < 16) {
+ alg = 3;
+ } else if (total_dsize < 32) {
+ alg = 2;
+ } else if (total_dsize < 256) {
+ alg = 3;
+ } else if (total_dsize < 512) {
+ alg = 2;
+ } else if (total_dsize < 4096) {
+ alg = 3;
+ } else {
+ alg = 5;
+ }
+ } else if (communicator_size < 4096) {
+ if (total_dsize < 32) {
+ alg = 2;
+ } else if (total_dsize < 128) {
+ alg = 3;
+ } else if (total_dsize < 512) {
+ alg = 2;
+ } else if (total_dsize < 4096) {
+ alg = 3;
+ } else {
+ alg = 5;
}
} else {
- if (communicator_size % 2) {
- return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
+ if (total_dsize < 2) {
+ alg = 3;
+ } else if (total_dsize < 8) {
+ alg = 2;
+ } else if (total_dsize < 16) {
+ alg = 3;
+ } else if (total_dsize < 512) {
+ alg = 2;
+ } else if (total_dsize < 4096) {
+ alg = 3;
} else {
- return Coll_allgather_ompi_neighborexchange::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
+ alg = 5;
}
}
-#if defined(USE_MPICH2_DECISION)
- /* Decision as in MPICH-2
- presented in Thakur et.al. "Optimization of Collective Communication
- Operations in MPICH", International Journal of High Performance Computing
- Applications, Vol. 19, No. 1, 49-66 (2005)
- - for power-of-two processes and small and medium size messages
- (up to 512KB) use recursive doubling
- - for non-power-of-two processes and small messages (80KB) use bruck,
- - for everything else use ring.
- */
- if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
- return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
- } else if (total_dsize <= 81920) {
- return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
- }
- return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- comm);
-#endif /* defined(USE_MPICH2_DECISION) */
+ return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
+
}
-int Coll_allgatherv_ompi::allgatherv(const void *sbuf, int scount,
- MPI_Datatype sdtype,
- void* rbuf, const int *rcounts,
- const int *rdispls,
- MPI_Datatype rdtype,
- MPI_Comm comm
- )
+int allgatherv__ompi(const void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, const int *rcounts,
+ const int *rdispls,
+ MPI_Datatype rdtype,
+ MPI_Comm comm
+ )
{
int i;
int communicator_size;
size_t dsize, total_dsize;
-
+ int alg = 1;
communicator_size = comm->size();
-
- /* Special case for 2 processes */
- if (communicator_size == 2) {
- return Coll_allgatherv_pair::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
- comm);
+ if (MPI_IN_PLACE != sbuf) {
+ dsize = sdtype->size();
+ } else {
+ dsize = rdtype->size();
}
- /* Determine complete data size */
- dsize=sdtype->size();
total_dsize = 0;
for (i = 0; i < communicator_size; i++) {
total_dsize += dsize * rcounts[i];
}
- /* Decision based on allgather decision. */
- if (total_dsize < 50000) {
- return Coll_allgatherv_ompi_bruck::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
- comm);
-
+ /* use the per-rank data size as basis, similar to allgather */
+ size_t per_rank_dsize = total_dsize / communicator_size;
+
+ int (*funcs[])(const void*, int, MPI_Datatype, void*, const int*, const int*, MPI_Datatype, MPI_Comm) = {
+ &allgatherv__GB,
+ &allgatherv__ompi_bruck,
+ &allgatherv__mpich_ring,
+ &allgatherv__ompi_neighborexchange,
+ &allgatherv__pair
+ };
+ /** Algorithms:
+ * {1, "default"},
+ * {2, "bruck"},
+ * {3, "ring"},
+ * {4, "neighbor"},
+ * {5, "two_proc"},
+ */
+ if (communicator_size == 2) {
+ if (per_rank_dsize < 2048) {
+ alg = 3;
+ } else if (per_rank_dsize < 4096) {
+ alg = 5;
+ } else if (per_rank_dsize < 8192) {
+ alg = 3;
+ } else {
+ alg = 5;
+ }
+ } else if (communicator_size < 8) {
+ if (per_rank_dsize < 256) {
+ alg = 1;
+ } else if (per_rank_dsize < 4096) {
+ alg = 4;
+ } else if (per_rank_dsize < 8192) {
+ alg = 3;
+ } else if (per_rank_dsize < 16384) {
+ alg = 4;
+ } else if (per_rank_dsize < 262144) {
+ alg = 2;
+ } else {
+ alg = 4;
+ }
+ } else if (communicator_size < 16) {
+ if (per_rank_dsize < 1024) {
+ alg = 1;
+ } else {
+ alg = 2;
+ }
+ } else if (communicator_size < 32) {
+ if (per_rank_dsize < 128) {
+ alg = 1;
+ } else if (per_rank_dsize < 262144) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 64) {
+ if (per_rank_dsize < 256) {
+ alg = 1;
+ } else if (per_rank_dsize < 8192) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 128) {
+ if (per_rank_dsize < 256) {
+ alg = 1;
+ } else if (per_rank_dsize < 4096) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 256) {
+ if (per_rank_dsize < 1024) {
+ alg = 2;
+ } else if (per_rank_dsize < 65536) {
+ alg = 4;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 512) {
+ if (per_rank_dsize < 1024) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 1024) {
+ if (per_rank_dsize < 512) {
+ alg = 2;
+ } else if (per_rank_dsize < 1024) {
+ alg = 1;
+ } else if (per_rank_dsize < 4096) {
+ alg = 2;
+ } else if (per_rank_dsize < 1048576) {
+ alg = 4;
+ } else {
+ alg = 3;
+ }
} else {
- if (communicator_size % 2) {
- return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
- comm);
+ if (per_rank_dsize < 4096) {
+ alg = 2;
} else {
- return Coll_allgatherv_ompi_neighborexchange::allgatherv(sbuf, scount, sdtype,
- rbuf, rcounts, rdispls, rdtype,
- comm);
+ alg = 4;
}
}
+
+ return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm);
}
-int Coll_gather_ompi::gather(const void *sbuf, int scount,
- MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
- int root,
- MPI_Comm comm
- )
+int gather__ompi(const void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
+ int root,
+ MPI_Comm comm
+ )
{
- //const int large_segment_size = 32768;
- //const int small_segment_size = 1024;
-
- //const size_t large_block_size = 92160;
- const size_t intermediate_block_size = 6000;
- const size_t small_block_size = 1024;
-
- const int large_communicator_size = 60;
- const int small_communicator_size = 10;
-
int communicator_size, rank;
- size_t dsize, block_size;
-
- XBT_DEBUG("smpi_coll_tuned_gather_ompi");
-
+ size_t dsize, total_dsize;
+ int alg = 1;
communicator_size = comm->size();
rank = comm->rank();
- // Determine block size
if (rank == root) {
dsize = rdtype->size();
- block_size = dsize * rcount;
+ total_dsize = dsize * rcount;
} else {
dsize = sdtype->size();
- block_size = dsize * scount;
+ total_dsize = dsize * scount;
}
-
-/* if (block_size > large_block_size) {*/
-/* return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, */
-/* rbuf, rcount, rdtype, */
-/* root, comm);*/
-
-/* } else*/ if (block_size > intermediate_block_size) {
- return Coll_gather_ompi_linear_sync::gather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- root, comm);
-
- } else if ((communicator_size > large_communicator_size) ||
- ((communicator_size > small_communicator_size) &&
- (block_size < small_block_size))) {
- return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- root, comm);
-
+ int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
+ &gather__ompi_basic_linear,
+ &gather__ompi_binomial,
+ &gather__ompi_linear_sync
+ };
+ /** Algorithms:
+ * {1, "basic_linear"},
+ * {2, "binomial"},
+ * {3, "linear_sync"},
+ *
+ * We do not make any rank specific checks since the params
+ * should be uniform across ranks.
+ */
+ if (communicator_size < 4) {
+ if (total_dsize < 2) {
+ alg = 3;
+ } else if (total_dsize < 4) {
+ alg = 1;
+ } else if (total_dsize < 32768) {
+ alg = 2;
+ } else if (total_dsize < 65536) {
+ alg = 1;
+ } else if (total_dsize < 131072) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 8) {
+ if (total_dsize < 1024) {
+ alg = 2;
+ } else if (total_dsize < 8192) {
+ alg = 1;
+ } else if (total_dsize < 32768) {
+ alg = 2;
+ } else if (total_dsize < 262144) {
+ alg = 1;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 256) {
+ alg = 2;
+ } else if (communicator_size < 512) {
+ if (total_dsize < 2048) {
+ alg = 2;
+ } else if (total_dsize < 8192) {
+ alg = 1;
+ } else {
+ alg = 2;
+ }
+ } else {
+ alg = 2;
}
- // Otherwise, use basic linear
- return Coll_gather_ompi_basic_linear::gather (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- root, comm);
+
+ return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
}
-int Coll_scatter_ompi::scatter(const void *sbuf, int scount,
- MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
- int root, MPI_Comm comm
- )
+int scatter__ompi(const void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
+ int root, MPI_Comm comm
+ )
{
- const size_t small_block_size = 300;
- const int small_comm_size = 10;
int communicator_size, rank;
- size_t dsize, block_size;
-
- XBT_DEBUG("Coll_scatter_ompi::scatter");
+ size_t dsize, total_dsize;
+ int alg = 1;
communicator_size = comm->size();
rank = comm->rank();
- // Determine block size
if (root == rank) {
dsize=sdtype->size();
- block_size = dsize * scount;
+ total_dsize = dsize * scount;
} else {
dsize=rdtype->size();
- block_size = dsize * rcount;
+ total_dsize = dsize * rcount;
}
-
- if ((communicator_size > small_comm_size) &&
- (block_size < small_block_size)) {
- std::unique_ptr<unsigned char[]> tmp_buf;
- if (rank != root) {
- tmp_buf.reset(new unsigned char[rcount * rdtype->get_extent()]);
- sbuf = tmp_buf.get();
- scount = rcount;
- sdtype = rdtype;
- }
- return Coll_scatter_ompi_binomial::scatter(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
+ int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
+ &scatter__ompi_basic_linear,
+ &scatter__ompi_binomial,
+ &scatter__ompi_linear_nb
+ };
+ /** Algorithms:
+ * {1, "basic_linear"},
+ * {2, "binomial"},
+ * {3, "linear_nb"},
+ *
+ * We do not make any rank specific checks since the params
+ * should be uniform across ranks.
+ */
+ if (communicator_size < 4) {
+ if (total_dsize < 2) {
+ alg = 3;
+ } else if (total_dsize < 131072) {
+ alg = 1;
+ } else if (total_dsize < 262144) {
+ alg = 3;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 8) {
+ if (total_dsize < 2048) {
+ alg = 2;
+ } else if (total_dsize < 4096) {
+ alg = 1;
+ } else if (total_dsize < 8192) {
+ alg = 2;
+ } else if (total_dsize < 32768) {
+ alg = 1;
+ } else if (total_dsize < 1048576) {
+ alg = 3;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 16) {
+ if (total_dsize < 16384) {
+ alg = 2;
+ } else if (total_dsize < 1048576) {
+ alg = 3;
+ } else {
+ alg = 1;
+ }
+ } else if (communicator_size < 32) {
+ if (total_dsize < 16384) {
+ alg = 2;
+ } else if (total_dsize < 32768) {
+ alg = 1;
+ } else {
+ alg = 3;
+ }
+ } else if (communicator_size < 64) {
+ if (total_dsize < 512) {
+ alg = 2;
+ } else if (total_dsize < 8192) {
+ alg = 3;
+ } else if (total_dsize < 16384) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
+ } else {
+ if (total_dsize < 512) {
+ alg = 2;
+ } else {
+ alg = 3;
+ }
}
- return Coll_scatter_ompi_basic_linear::scatter (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- root, comm);
-}
+ return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
}
-}
+
+} // namespace simgrid::smpi