From 933849e1079bf5389d6a38447d49cbe5cc65101d Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Wed, 23 Feb 2022 20:44:06 +0100 Subject: [PATCH] add openmpi scatter linear_nb algorithm --- docs/source/app_smpi.rst | 1 + src/smpi/colls/scatter/scatter-ompi.cpp | 74 ++++++++++++++++++++++++ src/smpi/colls/smpi_coll.cpp | 1 + src/smpi/colls/smpi_openmpi_selector.cpp | 3 +- src/smpi/include/smpi_coll.hpp | 1 + teshsuite/smpi/CMakeLists.txt | 2 +- 6 files changed, 79 insertions(+), 3 deletions(-) diff --git a/docs/source/app_smpi.rst b/docs/source/app_smpi.rst index 79265c9290..1a3a186f92 100644 --- a/docs/source/app_smpi.rst +++ b/docs/source/app_smpi.rst @@ -294,6 +294,7 @@ MPI_Scatter ``impi``: use intel mpi selector for the scatter operations. |br| ``automatic (experimental)``: use an automatic self-benchmarking algorithm. |br| ``ompi_basic_linear``: basic linear scatter. |br| +``ompi_linear_nb``: linear scatter, non blocking sends. |br| ``ompi_binomial``: binomial tree scatter. |br| ``mvapich2_two_level_direct``: SMP aware algorithm, with an intra-node stage (default set to mpich selector), and then a basic linear inter node stage. Use mvapich2 selector to change these to tuned algorithms for Stampede cluster. |br| ``mvapich2_two_level_binomial``: SMP aware algorithm, with an intra-node stage (default set to mpich selector), and then a binomial phase. Use mvapich2 selector to change these to tuned algorithms for Stampede cluster. |br| diff --git a/src/smpi/colls/scatter/scatter-ompi.cpp b/src/smpi/colls/scatter/scatter-ompi.cpp index 5067eca1c5..6bc77d60a9 100644 --- a/src/smpi/colls/scatter/scatter-ompi.cpp +++ b/src/smpi/colls/scatter/scatter-ompi.cpp @@ -248,5 +248,79 @@ int scatter__ompi_basic_linear(const void* sbuf, int scount, MPI_Datatype sdtype return MPI_SUCCESS; } +/* + * Use isends for distributing the data with periodic sync by blocking send. + * Blocking send acts like a local resources flush, because it ensures + * progression until the message is sent/(copied to some sort of transmit buffer). + */ +int scatter__ompi_linear_nb(const void *sbuf, int scount, + MPI_Datatype sdtype, + void *rbuf, int rcount, + MPI_Datatype rdtype, + int root, + MPI_Comm comm) +{ + int i, rank, size, err, line, nreqs; + ptrdiff_t incr; + char *ptmp; + MPI_Request *reqs = nullptr; + MPI_Request *preq = nullptr; + + rank = comm->rank(); + size = comm->size(); + + /* If not root, receive data. */ + if (rank != root) { + Request::recv(rbuf, rcount, rdtype, root, + COLL_TAG_SCATTER, + comm, MPI_STATUS_IGNORE); + return MPI_SUCCESS; + } + + nreqs = size - 1; /* no send for myself */ + + reqs = new MPI_Request[nreqs]; + if (NULL == reqs) { + err = MPI_ERR_OTHER; + line = __LINE__; goto err_hndl; + } + + incr = sdtype->get_extent(); + incr *= scount; + + /* I am the root, loop sending data. */ + for (i = 0, ptmp = (char *)sbuf, preq = reqs; i < size; ++i, ptmp += incr) { + /* simple optimization */ + if (i == rank) { + if (MPI_IN_PLACE != rbuf) { + err = Datatype::copy(ptmp, scount, sdtype, rbuf, rcount, + rdtype); + } + } else { + *preq = Request::isend(ptmp, scount, sdtype, i, + COLL_TAG_SCATTER, comm); + preq++; + } + if (MPI_SUCCESS != err) { + line = __LINE__; goto err_hndl; + } + } + + err = Request::waitall(preq - reqs, reqs, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { + line = __LINE__; goto err_hndl; + } + + return MPI_SUCCESS; + +err_hndl: + if (NULL != reqs) { + delete reqs; + } + XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank); + (void)line; /* silence compiler warning */ + return err; +} + } } diff --git a/src/smpi/colls/smpi_coll.cpp b/src/smpi/colls/smpi_coll.cpp index ea44ce1a9d..c8c322e5d9 100644 --- a/src/smpi/colls/smpi_coll.cpp +++ b/src/smpi/colls/smpi_coll.cpp @@ -116,6 +116,7 @@ std::map, std::less<>> smpi_c {{"default", "scatter default collective", (void*)scatter__default}, {"ompi", "scatter ompi collective", (void*)scatter__ompi}, {"ompi_basic_linear", "scatter ompi_basic_linear collective", (void*)scatter__ompi_basic_linear}, + {"ompi_linear_nb", "scatter ompi_linear nonblocking collective", (void*)scatter__ompi_linear_nb}, {"ompi_binomial", "scatter ompi_binomial collective", (void*)scatter__ompi_binomial}, {"mpich", "scatter mpich collective", (void*)scatter__mpich}, {"mvapich2", "scatter mvapich2 collective", (void*)scatter__mvapich2}, diff --git a/src/smpi/colls/smpi_openmpi_selector.cpp b/src/smpi/colls/smpi_openmpi_selector.cpp index 5a762008ab..6f448ae7d4 100644 --- a/src/smpi/colls/smpi_openmpi_selector.cpp +++ b/src/smpi/colls/smpi_openmpi_selector.cpp @@ -16,7 +16,6 @@ add algos: allreduce nonoverlapping, basic linear alltoall linear_sync bcast chain -scatter linear_nb */ namespace simgrid { @@ -1273,7 +1272,7 @@ int scatter__ompi(const void *sbuf, int scount, int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = { &scatter__ompi_basic_linear, &scatter__ompi_binomial, - &scatter__ompi_basic_linear + &scatter__ompi_linear_nb }; /** Algorithms: * {1, "basic_linear"}, diff --git a/src/smpi/include/smpi_coll.hpp b/src/smpi/include/smpi_coll.hpp index e9dc7e9499..963de8a066 100644 --- a/src/smpi/include/smpi_coll.hpp +++ b/src/smpi/include/smpi_coll.hpp @@ -313,6 +313,7 @@ int reduce_scatter__automatic(const void *sbuf, void *rbuf, const int *rcounts, int scatter__default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); int scatter__ompi(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); int scatter__ompi_basic_linear(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); +int scatter__ompi_linear_nb(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); int scatter__ompi_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); int scatter__mpich(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); int scatter__mvapich2(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); diff --git a/teshsuite/smpi/CMakeLists.txt b/teshsuite/smpi/CMakeLists.txt index cea94e0dad..0b5bab84da 100644 --- a/teshsuite/smpi/CMakeLists.txt +++ b/teshsuite/smpi/CMakeLists.txt @@ -130,7 +130,7 @@ if(enable_smpi) ADD_TESH(tesh-smpi-coll-reduce-scatter-${REDUCE_SCATTER} --cfg smpi/reduce_scatter:${REDUCE_SCATTER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-reduce-scatter --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-reduce-scatter ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-reduce-scatter/coll-reduce-scatter.tesh) endforeach() - foreach (SCATTER ompi mpich ompi_basic_linear ompi_binomial mvapich2 mvapich2_two_level_binomial mvapich2_two_level_direct impi) + foreach (SCATTER ompi mpich ompi_basic_linear ompi_binomial ompi_linear_nb mvapich2 mvapich2_two_level_binomial mvapich2_two_level_direct impi) ADD_TESH(tesh-smpi-coll-scatter-${SCATTER} --cfg smpi/scatter:${SCATTER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-scatter --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-scatter ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-scatter/coll-scatter.tesh) endforeach() -- 2.20.1