``impi``: use intel mpi selector for the scatter operations. |br|
``automatic (experimental)``: use an automatic self-benchmarking algorithm. |br|
``ompi_basic_linear``: basic linear scatter. |br|
+``ompi_linear_nb``: linear scatter, non blocking sends. |br|
``ompi_binomial``: binomial tree scatter. |br|
``mvapich2_two_level_direct``: SMP aware algorithm, with an intra-node stage (default set to mpich selector), and then a basic linear inter node stage. Use mvapich2 selector to change these to tuned algorithms for Stampede cluster. |br|
``mvapich2_two_level_binomial``: SMP aware algorithm, with an intra-node stage (default set to mpich selector), and then a binomial phase. Use mvapich2 selector to change these to tuned algorithms for Stampede cluster. |br|
return MPI_SUCCESS;
}
+/*
+ * Use isends for distributing the data with periodic sync by blocking send.
+ * Blocking send acts like a local resources flush, because it ensures
+ * progression until the message is sent/(copied to some sort of transmit buffer).
+ */
+int scatter__ompi_linear_nb(const void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void *rbuf, int rcount,
+ MPI_Datatype rdtype,
+ int root,
+ MPI_Comm comm)
+{
+ int i, rank, size, err, line, nreqs;
+ ptrdiff_t incr;
+ char *ptmp;
+ MPI_Request *reqs = nullptr;
+ MPI_Request *preq = nullptr;
+
+ rank = comm->rank();
+ size = comm->size();
+
+ /* If not root, receive data. */
+ if (rank != root) {
+ Request::recv(rbuf, rcount, rdtype, root,
+ COLL_TAG_SCATTER,
+ comm, MPI_STATUS_IGNORE);
+ return MPI_SUCCESS;
+ }
+
+ nreqs = size - 1; /* no send for myself */
+
+ reqs = new MPI_Request[nreqs];
+ if (NULL == reqs) {
+ err = MPI_ERR_OTHER;
+ line = __LINE__; goto err_hndl;
+ }
+
+ incr = sdtype->get_extent();
+ incr *= scount;
+
+ /* I am the root, loop sending data. */
+ for (i = 0, ptmp = (char *)sbuf, preq = reqs; i < size; ++i, ptmp += incr) {
+ /* simple optimization */
+ if (i == rank) {
+ if (MPI_IN_PLACE != rbuf) {
+ err = Datatype::copy(ptmp, scount, sdtype, rbuf, rcount,
+ rdtype);
+ }
+ } else {
+ *preq = Request::isend(ptmp, scount, sdtype, i,
+ COLL_TAG_SCATTER, comm);
+ preq++;
+ }
+ if (MPI_SUCCESS != err) {
+ line = __LINE__; goto err_hndl;
+ }
+ }
+
+ err = Request::waitall(preq - reqs, reqs, MPI_STATUSES_IGNORE);
+ if (MPI_SUCCESS != err) {
+ line = __LINE__; goto err_hndl;
+ }
+
+ return MPI_SUCCESS;
+
+err_hndl:
+ if (NULL != reqs) {
+ delete reqs;
+ }
+ XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank);
+ (void)line; /* silence compiler warning */
+ return err;
+}
+
}
}
{{"default", "scatter default collective", (void*)scatter__default},
{"ompi", "scatter ompi collective", (void*)scatter__ompi},
{"ompi_basic_linear", "scatter ompi_basic_linear collective", (void*)scatter__ompi_basic_linear},
+ {"ompi_linear_nb", "scatter ompi_linear nonblocking collective", (void*)scatter__ompi_linear_nb},
{"ompi_binomial", "scatter ompi_binomial collective", (void*)scatter__ompi_binomial},
{"mpich", "scatter mpich collective", (void*)scatter__mpich},
{"mvapich2", "scatter mvapich2 collective", (void*)scatter__mvapich2},
allreduce nonoverlapping, basic linear
alltoall linear_sync
bcast chain
-scatter linear_nb
*/
namespace simgrid {
int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
&scatter__ompi_basic_linear,
&scatter__ompi_binomial,
- &scatter__ompi_basic_linear
+ &scatter__ompi_linear_nb
};
/** Algorithms:
* {1, "basic_linear"},
int scatter__default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
int scatter__ompi(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
int scatter__ompi_basic_linear(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
+int scatter__ompi_linear_nb(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
int scatter__ompi_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
int scatter__mpich(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
int scatter__mvapich2(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
ADD_TESH(tesh-smpi-coll-reduce-scatter-${REDUCE_SCATTER} --cfg smpi/reduce_scatter:${REDUCE_SCATTER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-reduce-scatter --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-reduce-scatter ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-reduce-scatter/coll-reduce-scatter.tesh)
endforeach()
- foreach (SCATTER ompi mpich ompi_basic_linear ompi_binomial mvapich2 mvapich2_two_level_binomial mvapich2_two_level_direct impi)
+ foreach (SCATTER ompi mpich ompi_basic_linear ompi_binomial ompi_linear_nb mvapich2 mvapich2_two_level_binomial mvapich2_two_level_direct impi)
ADD_TESH(tesh-smpi-coll-scatter-${SCATTER} --cfg smpi/scatter:${SCATTER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-scatter --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-scatter ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-scatter/coll-scatter.tesh)
endforeach()