src/smpi/smpi_mpi_dt.c
src/smpi/smpi_pmpi.c
src/smpi/smpi_replay.c
+ src/smpi/colls/alltoall-2dmesh.c
+ src/smpi/colls/alltoall-3dmesh.c
+ # src/smpi/colls/alltoall-bruck.c
+ src/smpi/colls/alltoall-pair.c
+ src/smpi/colls/alltoall-pair-light-barrier.c
+ src/smpi/colls/alltoall-pair-mpi-barrier.c
+ src/smpi/colls/alltoall-pair-one-barrier.c
+ src/smpi/colls/alltoall-rdb.c
+ src/smpi/colls/alltoall-ring.c
+ src/smpi/colls/alltoall-ring-light-barrier.c
+ src/smpi/colls/alltoall-ring-mpi-barrier.c
+ src/smpi/colls/alltoall-ring-one-barrier.c
+ src/smpi/colls/alltoall-simple.c
)
if(SMPI_F2C)
${SURF_SRC}
${TRACING_SRC}
${XBT_SRC}
+ ${SMPI_SRC}
)
if(${HAVE_JEDULE})
+#ifndef _SMPI_INTERFACE_H
+#define _SMPI_INTERFACE_H
+#include "smpi/smpi.h"
+
/********** Tracing **********/
/* from smpi_instr.c */
void TRACE_smpi_alloc(void);
void TRACE_smpi_recv(int rank, int src, int dst);
void TRACE_smpi_init(int rank);
void TRACE_smpi_finalize(int rank);
+
+/** \brief MPI collective description
+ */
+
+typedef struct mpi_coll_description {
+ const char *name;
+ const char *description;
+ void* coll;
+} s_mpi_coll_description_t, *mpi_coll_description_t;
+
+/** \ingroup MPI alltoallcollectives
+ * \brief The list of all available alltoall collectives
+ */
+XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_alltoall_description[];
+int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+
+/** \ingroup MPI allgather
+ * \brief The list of all available allgather collectives
+ */
+XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allgather_description[];
+int (*mpi_coll_allgather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+
+XBT_PUBLIC(void) coll_help(const char *category, s_mpi_coll_description_t * table);
+XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table,
+ const char *name);
+
+#endif /* _SMPI_INTERFAC_H */
sg_weight_S_parameter = xbt_cfg_get_double(_sg_cfg_set, name);
}
+/* callback of the mpi collectives */
+static void _sg_cfg_cb__coll(char *category,
+ s_mpi_coll_description_t * table,
+ const char *name, int pos)
+{
+ char *val;
+
+ xbt_assert(_sg_init_status == 1,
+ "Cannot change the model after the initialization");
+
+ val = xbt_cfg_get_string(_sg_cfg_set, name);
+
+ if (!strcmp(val, "help")) {
+ coll_help(category, table);
+ exit(0);
+ }
+
+ /* New Module missing */
+ find_coll_description(table, val);
+}
+static void _sg_cfg_cb__coll_alltoall(const char *name, int pos)
+{
+ _sg_cfg_cb__coll("alltoall", mpi_coll_alltoall_description, name, pos);
+}
+static void _sg_cfg_cb__coll_allgather(const char *name, int pos){
+ _sg_cfg_cb__coll("allgather", mpi_coll_allgather_description, name, pos);
+}
+
+
/* callback of the inclusion path */
static void _sg_cfg_cb__surf_path(const char *name, int pos)
{
NULL);
xbt_cfg_setdefault_string(_sg_cfg_set, "smpi/or", "1:0:0:0:0");
-
+ default_value = xbt_strdup("ompi");
+ xbt_cfg_register(&_sg_cfg_set, "smpi/alltoall",
+ "Which collective to use for alltoall",
+ xbt_cfgelm_string, &default_value, 1, 1, &_sg_cfg_cb__coll_alltoall,
+ NULL);
+
+ default_value = xbt_strdup("default");
+ xbt_cfg_register(&_sg_cfg_set, "smpi/allgather",
+ "Which collective to use for allgather",
+ xbt_cfgelm_string, &default_value, 1, 1, &_sg_cfg_cb__coll_allgather,
+ NULL);
//END SMPI
--- /dev/null
+Copyright (c) 2006, Ahmad Faraj & Xin Yuan,
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the Florida State University nor the names of its
+ contributors may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ***************************************************************************
+ * Any results obtained from executing this software require the *
+ * acknowledgment and citation of the software and its owners. *
+ * The full citation is given below: *
+ * *
+ * A. Faraj, X. Yuan, and D. Lowenthal. "STAR-MPI: Self Tuned Adaptive *
+ * Routines for MPI Collective Operations." The 20th ACM International *
+ * Conference on Supercomputing (ICS), Queensland, Australia *
+ * June 28-July 1, 2006. *
+ ***************************************************************************
+
--- /dev/null
+#include "colls.h"
+#include <math.h>
+XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi,
+ "Logging specific to SMPI collectives");
+
+/*****************************************************************************
+
+ * Function: alltoall_2dmesh_shoot
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function realizes the alltoall operation using the 2dmesh
+ algorithm. It actually performs allgather operation in x dimension
+ then in the y dimension. Each node then extracts the needed data.
+ The communication in each dimension follows "simple."
+
+ * Auther: Ahmad Faraj
+
+****************************************************************************/
+int alltoall_check_is_2dmesh(int num, int * i, int * j)
+{
+ int x, max = num / 2;
+ x = sqrt(num);
+
+ while (x <= max)
+ {
+ if ((num % x) == 0)
+ {
+ * i = x;
+ * j = num / x;
+
+ if (* i > * j)
+ {
+ x = * i;
+ * i = * j;
+ * j = x;
+ }
+
+ return 1;
+ }
+ x++;
+ }
+ return 0;
+}
+
+int
+smpi_coll_tuned_alltoall_2dmesh(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Status * statuses, s;
+ MPI_Request * reqs, * req_ptr;;
+ MPI_Aint extent;
+
+ char * tmp_buff1, * tmp_buff2;
+ int i, j, src, dst, rank, num_procs, count, num_reqs;
+ int rows, cols, my_row, my_col, X, Y, send_offset, recv_offset;
+ int two_dsize, my_row_base, my_col_base, src_row_base, block_size;
+ int tag = 1, failure = 0, success = 1;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &extent);
+
+ if (!alltoall_check_is_2dmesh(num_procs, &X, &Y))
+ return failure;
+
+ two_dsize = X * Y;
+
+ my_row_base = (rank / Y) * Y;
+ my_col_base = rank % Y;
+
+ block_size = extent * send_count;
+
+ tmp_buff1 =(char *) malloc(block_size * num_procs * Y);
+ if (!tmp_buff1)
+ {
+ XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ tmp_buff2 =(char *) malloc(block_size * Y);
+ if (!tmp_buff2)
+ {
+ XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+
+
+ num_reqs = X;
+ if (Y > X) num_reqs = Y;
+
+ statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
+ reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
+ if (!reqs)
+ {
+ XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ req_ptr = reqs;
+
+ send_offset = recv_offset = (rank % Y) * block_size * num_procs;
+
+ count = send_count * num_procs;
+
+ for (i = 0; i < Y; i++)
+ {
+ src = i + my_row_base;
+ if (src == rank)
+ continue;
+
+ recv_offset = (src % Y) * block_size * num_procs;
+ MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
+ req_ptr++);
+ }
+
+ for (i = 0; i < Y; i++)
+ {
+ dst = i + my_row_base;
+ if (dst == rank)
+ continue;
+ MPI_Send(send_buff, count, send_type, dst, tag, comm);
+ }
+
+ MPI_Waitall(Y - 1, reqs, statuses);
+ req_ptr = reqs;
+
+ for (i = 0; i < Y; i++)
+ {
+ send_offset = (rank * block_size) + (i * block_size * num_procs);
+ recv_offset = (my_row_base * block_size) + (i * block_size);
+
+ if (i + my_row_base == rank)
+ MPI_Sendrecv (send_buff + recv_offset, send_count, send_type,
+ rank, tag, recv_buff + recv_offset, recv_count,
+ recv_type, rank, tag, comm, &s);
+
+ else
+ MPI_Sendrecv (tmp_buff1 + send_offset, send_count, send_type,
+ rank, tag,
+ recv_buff + recv_offset, recv_count, recv_type,
+ rank, tag, comm, &s);
+ }
+
+
+ for (i = 0; i < X; i++)
+ {
+ src = (i * Y + my_col_base);
+ if (src == rank)
+ continue;
+ src_row_base = (src / Y) * Y;
+
+ MPI_Irecv(recv_buff + src_row_base * block_size, recv_count * Y,
+ recv_type, src, tag, comm, req_ptr++);
+ }
+
+ for (i = 0; i < X; i++)
+ {
+ dst = (i * Y + my_col_base);
+ if (dst == rank)
+ continue;
+
+ recv_offset = 0;
+ for (j = 0; j < Y; j++)
+ {
+ send_offset = (dst + j * num_procs) * block_size;
+
+ if (j + my_row_base == rank)
+ MPI_Sendrecv (send_buff + dst * block_size, send_count, send_type,
+ rank, tag,
+ tmp_buff2 + recv_offset, recv_count, recv_type,
+ rank, tag, comm, &s);
+ else
+ MPI_Sendrecv (tmp_buff1 + send_offset, send_count, send_type,
+ rank, tag,
+ tmp_buff2 + recv_offset, recv_count, recv_type,
+ rank, tag, comm, &s);
+
+ recv_offset += block_size;
+ }
+
+ MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
+ }
+ MPI_Waitall(X - 1, reqs, statuses);
+ free(reqs);
+ free(statuses);
+ free(tmp_buff1);
+ free(tmp_buff2);
+ return success;
+}
--- /dev/null
+#include "colls.h"
+#include <math.h>
+
+/*****************************************************************************
+
+ * Function: alltoall_3dmesh_shoot
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function realizes the alltoall operation using the 3dmesh
+ algorithm. It actually performs allgather operation in x dimension,
+ y dimension, then in z dimension. Each node then extracts the
+ needed data. The communication in all dimension is simple.
+
+ * Auther: Ahmad Faraj
+****************************************************************************/
+
+int alltoall_check_is_3dmesh(int num, int * i, int * j, int * k)
+{
+ int x, max = num / 3;
+ x = cbrt(num);
+ * i = * j = * k = 0;
+ while (x <= max)
+ {
+ if ((num % (x * x)) == 0)
+ {
+ * i = * j = x;
+ * k = num / (x * x);
+ return 1;
+ }
+ x++;
+ }
+ return 0;
+}
+
+int smpi_coll_tuned_alltoall_3dmesh(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Request * reqs, * req_ptr;
+ MPI_Aint extent;
+ MPI_Status status, * statuses;
+ int i, j, src, dst, rank, num_procs, num_reqs, X, Y, Z, block_size, count;
+ int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base;
+ int src_z_base, send_offset, recv_offset, tag = 1, failure = 0, success = 1;
+
+ char * tmp_buff1, * tmp_buff2;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &extent);
+
+ if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z))
+ return failure;
+
+ num_reqs = X;
+ if (Y > X) num_reqs = Y;
+ if (Z > Y) num_reqs = Z;
+
+ two_dsize = X * Y;
+ my_z = rank / two_dsize;
+
+ my_row_base = (rank / X) * X;
+ my_col_base = (rank % Y) + (my_z * two_dsize);
+ my_z_base = my_z * two_dsize;
+
+ block_size = extent * send_count;
+
+ tmp_buff1 =(char *) malloc(block_size * num_procs * two_dsize);
+ if (!tmp_buff1)
+ {
+ printf("alltoall-3Dmesh:97: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ tmp_buff2 =(char *) malloc(block_size * two_dsize);
+ if (!tmp_buff2)
+ {
+ printf("alltoall-3Dmesh:105: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
+ reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
+ if (!reqs)
+ {
+ printf("alltoall-3Dmesh:113: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ req_ptr = reqs;
+
+ send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs;
+
+ MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
+ tmp_buff1 + recv_offset, num_procs * recv_count,
+ recv_type, rank, tag, comm, &status);
+
+ count = send_count * num_procs;
+
+ for (i = 0; i < Y; i++)
+ {
+ src = i + my_row_base;
+ if (src == rank) continue;
+ recv_offset = (src % two_dsize) * block_size * num_procs;
+ MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
+ req_ptr++);
+ }
+
+ for (i = 0; i < Y; i++)
+ {
+ dst = i + my_row_base;
+ if (dst == rank) continue;
+ MPI_Send(send_buff, count, send_type, dst, tag, comm);
+ }
+
+ MPI_Waitall(Y - 1, reqs, statuses);
+ req_ptr = reqs;
+
+
+ for (i = 0; i < X; i++)
+ {
+ src = (i * Y + my_col_base);
+ if (src == rank) continue;
+
+ src_row_base = (src / X) * X;
+
+ recv_offset = (src_row_base % two_dsize) * block_size * num_procs;
+ MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
+ recv_type, src, tag, comm, req_ptr++);
+ }
+
+ send_offset = (my_row_base % two_dsize) * block_size * num_procs;
+ for (i = 0; i < X; i++)
+ {
+ dst = (i * Y + my_col_base);
+ if (dst == rank) continue;
+ MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
+ dst, tag, comm);
+ }
+
+ MPI_Waitall(X - 1, reqs, statuses);
+ req_ptr = reqs;
+
+ for (i = 0; i < two_dsize; i++)
+ {
+ send_offset = (rank * block_size) + (i * block_size * num_procs);
+ recv_offset = (my_z_base * block_size) + (i * block_size);
+ MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
+ recv_buff + recv_offset, recv_count, recv_type, rank, tag,
+ comm, &status);
+ }
+
+ for (i = 1; i < Z; i++)
+ {
+ src = (rank + i * two_dsize) % num_procs;
+ src_z_base = (src / two_dsize) * two_dsize;
+
+ recv_offset = (src_z_base * block_size);
+
+ MPI_Irecv(recv_buff + recv_offset, recv_count * two_dsize, recv_type,
+ src, tag, comm, req_ptr++);
+ }
+
+ for (i = 1; i < Z; i++)
+ {
+ dst = (rank + i * two_dsize) % num_procs;
+
+ recv_offset = 0;
+ for (j = 0; j < two_dsize; j++)
+ {
+ send_offset = (dst + j * num_procs) * block_size;
+ MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
+ rank, tag, tmp_buff2 + recv_offset, recv_count,
+ recv_type, rank, tag, comm, &status);
+
+ recv_offset += block_size;
+ }
+
+ MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
+
+ }
+
+ MPI_Waitall(Z - 1, reqs, statuses);
+
+ free(reqs);
+ free(statuses);
+ free(tmp_buff1);
+ free(tmp_buff2);
+ return success;
+}
--- /dev/null
+/*****************************************************************************
+
+ * Function: alltoall_bruck
+
+ * Return: int
+
+ * inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function realizes the alltoall operation using the bruck algorithm.
+
+ * Auther: MPICH / modified by Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_bruck(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Status status;
+ MPI_Aint extent;
+ MPI_Datatype new_type;
+
+ int * blocks_length, * disps;
+ int i, src, dst, rank, num_procs, count, remainder, block, position;
+ int pack_size, tag = 1, pof2 = 1, success = 1, failure = 0;
+
+
+ char * tmp_buff;
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Comm_rank(comm, &rank);
+
+ MPI_Type_extent(recv_type, &extent);
+
+ tmp_buff = (char *) malloc (num_procs * recv_count * extent);
+ if (!tmp_buff)
+ {
+ printf("alltoall-bruck:53: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ disps = (int *) malloc(sizeof(int) * num_procs);
+ if (!disps)
+ {
+ printf("alltoall-bruck:61: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ blocks_length = (int *) malloc(sizeof(int) * num_procs);
+ if (!blocks_length)
+ {
+ printf("alltoall-bruck:69: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+
+ MPI_Sendrecv(send_ptr + rank * send_count * extent,
+ (num_procs - rank) * send_count, send_type, rank, tag,
+ recv_ptr, (num_procs - rank) * recv_count, recv_type, rank,
+ tag, comm, &status);
+
+ MPI_Sendrecv(send_ptr, rank * send_count, send_type, rank, tag,
+ recv_ptr + (num_procs - rank) * recv_count * extent,
+ rank * recv_count, recv_type, rank, tag, comm, &status);
+
+
+
+ MPI_Pack_size(send_count * num_procs, send_type, comm, &pack_size);
+
+ while (pof2 < num_procs)
+ {
+ dst = (rank + pof2) % num_procs;
+ src = (rank - pof2 + num_procs) % num_procs;
+
+
+ count = 0;
+ for (block = 1; block < num_procs; block++)
+ if (block & pof2)
+ {
+ blocks_length[count] = send_count;
+ disps[count] = block * send_count;
+ count++;
+ }
+
+ MPI_Type_indexed(count, blocks_length, disps, recv_type, &new_type);
+ MPI_Type_commit(&new_type);
+
+ position = 0;
+ MPI_Pack(recv_buff, 1, new_type, tmp_buff, pack_size, &position, comm);
+
+ MPI_Sendrecv(tmp_buff, position, MPI_PACKED, dst, tag, recv_buff, 1,
+ new_type, src, tag, comm, &status);
+ MPI_Type_free(&new_type);
+
+ pof2 *= 2;
+ }
+
+ free(disps);
+ free(blocks_length);
+
+ MPI_Sendrecv(recv_ptr + (rank + 1) * recv_count * extent,
+ (num_procs - rank - 1) * recv_count, send_type,
+ rank, tag, tmp_buff, (num_procs - rank - 1) * recv_count,
+ recv_type, rank, tag, comm, &status);
+
+ MPI_Sendrecv(recv_ptr, (rank + 1) * recv_count, send_type, rank, tag,
+ tmp_buff + (num_procs - rank - 1) * recv_count * extent,
+ (rank + 1) * recv_count, recv_type, rank, tag, comm, &status);
+
+
+ for (i = 0; i < num_procs; i++)
+ MPI_Sendrecv(tmp_buff + i * recv_count * extent, recv_count, send_type,
+ rank, tag,
+ recv_ptr + (num_procs - i - 1) * recv_count * extent,
+ recv_count, recv_type, rank, tag, comm, &status);
+
+ free(tmp_buff);
+ return success;
+}
--- /dev/null
+int
+alltoall_native(void * send_buff, int send_count,
+ MPI_Datatype send_type, void * recv_buff,
+ int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ return MPI_Alltoall(send_buff, send_count, send_type, recv_buff, recv_count,
+ recv_type, comm);
+}
--- /dev/null
+#include "colls.h"
+/*****************************************************************************
+
+ * Function: alltoall_pair_light_barrier
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function works in P - 1 steps. In step i, node j exchanges data
+ with node i ^ j. Light barriers are inserted between
+ communications in different phases.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_pair_light_barrier(void * send_buff, int send_count,
+ MPI_Datatype send_type, void * recv_buff,
+ int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Aint send_chunk, recv_chunk;
+ MPI_Status s;
+ int i, src, dst, rank, num_procs, next_partner;
+ int tag = 1, success = 1; /*, failure = 0;*/
+
+ char send_sync = 'a', recv_sync = 'b';
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ MPI_Sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag,
+ recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag,
+ comm, &s);
+
+ for (i = 1; i < num_procs; i++)
+ {
+ src = dst = rank ^ i;
+
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type,
+ dst, tag, recv_ptr + src * recv_chunk, recv_count,
+ recv_type, src, tag, comm, &s);
+
+ if ((i + 1) < num_procs)
+ {
+ next_partner = rank ^ (i + 1);
+ MPI_Sendrecv(&send_sync, 1, MPI_CHAR, next_partner, tag,
+ &recv_sync, 1, MPI_CHAR, next_partner, tag,
+ comm, &s);
+ }
+ }
+ return success;
+}
--- /dev/null
+#include "colls.h"
+/*****************************************************************************
+
+ * Function: alltoall_pair_mpi_barrier
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function works when P is power of two. In each phase of P - 1
+ phases, nodes in pair communicate their data. MPI barriers are
+ inserted between each two phases.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_pair_mpi_barrier(void * send_buff, int send_count,
+ MPI_Datatype send_type, void * recv_buff,
+ int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Status s;
+ MPI_Aint send_chunk, recv_chunk;
+ int i, src, dst, rank, num_procs;
+ int tag = 101, success = 1; /*, failure = 0, pof2 = 1;*/
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ for (i = 0; i < num_procs; i++)
+ {
+ src = dst = rank ^ i;
+ MPI_Barrier(comm);
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
+ src, tag, comm, &s);
+ }
+ return success;
+}
--- /dev/null
+#include "colls.h"
+/*****************************************************************************
+
+ * Function: alltoall_pair
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function works when P is power of two. In each phase of P - 1
+ phases, nodes in pair communicate their data.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_pair_one_barrier(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+
+ MPI_Aint send_chunk, recv_chunk;
+ MPI_Status s;
+ int i, src, dst, rank, num_procs;
+ int tag = 1, success = 1; /*, failure = 0, pof2 = 1; */
+
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ MPI_Barrier(comm);
+ for (i = 0; i < num_procs; i++)
+ {
+ src = dst = rank ^ i;
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
+ src, tag, comm, &s);
+ }
+
+ return success;
+}
--- /dev/null
+#include "smpi/mpi.h"
+/*****************************************************************************
+
+ * Function: alltoall_pair
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function works when P is power of two. In each phase of P - 1
+ phases, nodes in pair communicate their data.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+/*
+int
+alltoall_pair(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+
+ MPI_Aint send_chunk, recv_chunk;
+ MPI_Status s;
+ MPI_Win win;
+ int assert = 0;
+ int i, src, dst, rank, num_procs;
+ int tag = 1, success = 1, failure = 0, pof2 = 1;
+
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ MPI_Win_create(recv_buff, num_procs*recv_chunk*send_count,recv_chunk,0,
+ comm, &win);
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ MPI_Win_fence(assert, win);
+ for (i = 0; i < num_procs; i++)
+ {
+ src = dst = rank ^ i;
+ MPI_Put(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ rank*send_chunk, send_count, send_type, win);
+ }
+ MPI_Win_fence (assert, win);
+ MPI_Win_free(&win);
+ return 0;
+}
+*/
+
+int
+smpi_coll_tuned_alltoall_pair(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+
+ MPI_Aint send_chunk, recv_chunk;
+ MPI_Status s;
+ int i, src, dst, rank, num_procs;
+ int tag = 1, success = 1;
+
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ for (i = 0; i < num_procs; i++)
+ {
+ src = dst = rank ^ i;
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
+ src, tag, comm, &s);
+ }
+
+ return success;
+}
+
--- /dev/null
+#include "colls.h"
+/*****************************************************************************
+
+ * Function: alltoall_rdb
+
+ * Return: int
+
+ * inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function realizes the allgather operation using the recursive
+ doubling algorithm.
+
+ * Auther: MPICH / slightly modified by Ahmad Faraj.
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_rdb(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ /* MPI variables */
+ MPI_Status status;
+ MPI_Aint send_increment, recv_increment, extent;
+
+ int dst_tree_root, rank_tree_root, send_offset, recv_offset;
+ int rank, num_procs, j, k, dst, curr_size, max_size;
+ int last_recv_count, tmp_mask, tree_root, num_procs_completed;
+ int tag = 1, mask = 1, success = 1, failure = 0, c = 0, i = 0;
+
+ char * tmp_buff;
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Comm_rank(comm, &rank);
+ MPI_Type_extent(send_type, &send_increment);
+ MPI_Type_extent(recv_type, &recv_increment);
+ MPI_Type_extent(recv_type, &extent);
+
+ send_increment *= (send_count * num_procs);
+ recv_increment *= (recv_count * num_procs);
+
+ max_size = num_procs * recv_increment;
+
+ tmp_buff = (char * ) malloc(max_size);
+ if (!tmp_buff)
+ {
+ printf("alltoall-rdb:56: cannot allocate memory\n");
+ MPI_Finalize();
+ exit(failure);
+ }
+
+ curr_size = send_count * num_procs;
+
+ MPI_Sendrecv(send_ptr, curr_size, send_type, rank, tag,
+ tmp_buff + (rank * recv_increment),
+ curr_size, recv_type, rank, tag, comm, &status);
+
+ while (mask < num_procs)
+ {
+ dst = rank ^ mask;
+ dst_tree_root = dst >> i;
+ dst_tree_root <<= i;
+ rank_tree_root = rank >> i;
+ rank_tree_root <<= i;
+ send_offset = rank_tree_root * send_increment;
+ recv_offset = dst_tree_root * recv_increment;
+
+ if (dst < num_procs)
+ {
+ MPI_Sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag,
+ tmp_buff + recv_offset, mask * recv_count * num_procs,
+ recv_type, dst, tag, comm, &status);
+
+ MPI_Get_count(&status, recv_type, &last_recv_count);
+ curr_size += last_recv_count;
+ }
+
+
+ if (dst_tree_root + mask > num_procs)
+ {
+
+ num_procs_completed = num_procs - rank_tree_root - mask;
+ /* num_procs_completed is the number of processes in this
+ subtree that have all the data. Send data to others
+ in a tree fashion. First find root of current tree
+ that is being divided into two. k is the number of
+ least-significant bits in this process's rank that
+ must be zeroed out to find the rank of the root */
+
+ j = mask;
+ k = 0;
+ while (j)
+ {
+ j >>= 1;
+ k++;
+ }
+ k--;
+
+ tmp_mask = mask >> 1;
+
+ while (tmp_mask)
+ {
+ dst = rank ^ tmp_mask;
+
+ tree_root = rank >> k;
+ tree_root <<= k;
+
+ /* send only if this proc has data and destination
+ doesn't have data. at any step, multiple processes
+ can send if they have the data */
+
+ if ((dst > rank)
+ && (rank < tree_root + num_procs_completed)
+ && (dst >= tree_root + num_procs_completed))
+ {
+ MPI_Send(tmp_buff + dst_tree_root * send_increment,
+ last_recv_count, send_type, dst, tag, comm);
+
+ }
+
+ /* recv only if this proc. doesn't have data and sender
+ has data */
+
+ else if ((dst < rank)
+ && (dst < tree_root + num_procs_completed)
+ && (rank >= tree_root + num_procs_completed))
+ {
+ MPI_Recv(tmp_buff + dst_tree_root * send_increment,
+ mask * num_procs * send_count, send_type, dst,
+ tag, comm, &status);
+
+ MPI_Get_count(&status, send_type, &last_recv_count);
+ curr_size += last_recv_count;
+ }
+
+ tmp_mask >>= 1;
+ k--;
+ }
+ }
+
+ mask <<= 1;
+ i++;
+ }
+
+ for (i = 0; i < num_procs; i++)
+ MPI_Sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent,
+ send_count, send_type, rank, tag,
+ recv_ptr + (i * recv_count * extent),
+ recv_count, recv_type, rank, tag, comm, &status);
+ free(tmp_buff);
+ return success;
+}
--- /dev/null
+#include "colls.h"
+/*****************************************************************************
+
+ * Function: alltoall_ring_light_barrier
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j + i.
+ Light barriers are inserted between communications in different
+ phases.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_ring_light_barrier(void * send_buff, int send_count,
+ MPI_Datatype send_type, void * recv_buff,
+ int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Aint send_chunk, recv_chunk;
+ MPI_Status s;
+ int i, src, dst, rank, num_procs, next_dst, next_src;
+ int tag = 1, success = 1; /*, failure = 0;*/
+
+ char send_sync = 'a', recv_sync = 'b';
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ MPI_Sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag,
+ recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag,
+ comm, &s);
+
+ for (i = 1; i < num_procs; i++)
+ {
+ src = (rank - i + num_procs) % num_procs;
+ dst = (rank + i) % num_procs;
+
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type,
+ dst, tag, recv_ptr + src * recv_chunk, recv_count,
+ recv_type, src, tag, comm, &s);
+
+ if ((i + 1) < num_procs)
+ {
+ next_src = (rank - (i + 1) + num_procs) % num_procs;
+ next_dst = (rank + (i + 1) + num_procs) % num_procs;
+ MPI_Sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag,
+ &recv_sync, 1, MPI_CHAR, next_dst, tag,
+ comm, &s);
+
+ }
+ }
+
+ return success;
+}
--- /dev/null
+#include "colls.h"
+/*****************************************************************************
+
+ * Function: alltoall_ring_mpi_barrier
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j + i.
+ MPI barriers are added between each two phases.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_ring_mpi_barrier(void * send_buff, int send_count,
+ MPI_Datatype send_type, void * recv_buff,
+ int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Status s;
+ MPI_Aint send_chunk, recv_chunk;
+ int i, src, dst, rank, num_procs;
+ int tag = 1, success = 1; /*, failure = 0, pof2 = 1;*/
+
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ for (i = 0; i < num_procs; i++)
+ {
+ src = (rank - i + num_procs) % num_procs;
+ dst = (rank + i) % num_procs;
+
+ MPI_Barrier(comm);
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
+ src, tag, comm, &s);
+ }
+
+ return success;
+}
--- /dev/null
+#include "colls.h"
+/*****************************************************************************
+
+ * Function: alltoall_ring
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j + i.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_ring_one_barrier(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Status s;
+ MPI_Aint send_chunk, recv_chunk;
+ int i, src, dst, rank, num_procs;
+ int tag = 1, success = 1; /*, failure = 0, pof2 = 1; */
+
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ MPI_Barrier(comm);
+ for (i = 0; i < num_procs; i++)
+ {
+ src = (rank - i + num_procs) % num_procs;
+ dst = (rank + i) % num_procs;
+
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
+ src, tag, comm, &s);
+ }
+ return success;
+}
--- /dev/null
+#include "colls.h"
+/*****************************************************************************
+
+ * Function: alltoall_ring
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j + i.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_ring(void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ MPI_Status s;
+ MPI_Aint send_chunk, recv_chunk;
+ int i, src, dst, rank, num_procs;
+ int tag = 1, success = 1; /*, failure = 0, pof2 = 1*/;
+
+ char * send_ptr = (char *) send_buff;
+ char * recv_ptr = (char *) recv_buff;
+
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &num_procs);
+ MPI_Type_extent(send_type, &send_chunk);
+ MPI_Type_extent(recv_type, &recv_chunk);
+
+ send_chunk *= send_count;
+ recv_chunk *= recv_count;
+
+ for (i = 0; i < num_procs; i++)
+ {
+ src = (rank - i + num_procs) % num_procs;
+ dst = (rank + i) % num_procs;
+
+ MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
+ src, tag, comm, &s);
+ }
+ return success;
+}
--- /dev/null
+#include "colls.h"
+
+/*****************************************************************************
+
+ * Function: alltoall_spreading_simple
+
+ * Return: int
+
+ * Inputs:
+ send_buff: send input buffer
+ send_count: number of elements to send
+ send_type: data type of elements being sent
+ recv_buff: receive output buffer
+ recv_count: number of elements to received
+ recv_type: data type of elements being received
+ comm: communicator
+
+ * Descrp: Let i -> j denote the communication from node i to node j. The
+ order of communications for node i is i -> i + 1, i -> i + 2, ...,
+ i -> (i + p -1) % P.
+
+ * Auther: Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_alltoall_simple(void * send_buff, int send_count,
+ MPI_Datatype send_type, void * recv_buff,
+ int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm)
+{
+ int i, rank, size, nreqs, err, src, dst, tag = 101;
+ char *psnd;
+ char *prcv;
+ MPI_Aint sndinc;
+ MPI_Aint rcvinc;
+ MPI_Request *req;
+ MPI_Request *preq;
+ MPI_Request *qreq;
+ MPI_Status s, * statuses;
+
+
+ MPI_Comm_size(comm, &size);
+ MPI_Comm_rank(comm, &rank);
+ MPI_Type_extent(send_type, &sndinc);
+ MPI_Type_extent(recv_type, &rcvinc);
+ sndinc *= send_count;
+ rcvinc *= recv_count;
+
+ /* Allocate arrays of requests. */
+
+ nreqs = 2 * (size - 1);
+ if (nreqs > 0)
+ {
+ req = (MPI_Request *) malloc(nreqs * sizeof(MPI_Request));
+ statuses = (MPI_Status *) malloc(nreqs * sizeof(MPI_Status));
+ if (!req || !statuses)
+ {
+ free(req);
+ free(statuses);
+ return 0;
+ }
+ }
+ else
+ req = 0;
+
+ /* simple optimization */
+
+ psnd = ((char *) send_buff) + (rank * sndinc);
+ prcv = ((char *) recv_buff) + (rank * rcvinc);
+ MPI_Sendrecv (psnd, send_count, send_type, rank, tag,
+ prcv, recv_count, recv_type,
+ rank, tag, comm, &s);
+
+
+ /* Initiate all send/recv to/from others. */
+
+ preq = req;
+ qreq = req + size - 1;
+ prcv = (char*) recv_buff;
+ psnd = (char*) send_buff;
+ for (i = 0; i < size; i++)
+ {
+ src = dst = (rank + i) % size;
+ if (src == rank) continue;
+ if (dst == rank) continue;
+ MPI_Recv_init(prcv + (src * rcvinc), recv_count, recv_type, src,
+ tag, comm, preq++);
+ MPI_Send_init(psnd + (dst * sndinc), send_count, send_type, dst,
+ tag, comm, qreq++);
+ }
+
+ /* Start all the requests. */
+
+ err = MPI_Startall(nreqs, req);
+
+ /* Wait for them all. */
+
+ err = MPI_Waitall(nreqs, req, statuses);
+
+ if (err != MPI_SUCCESS) {
+ if (req)
+ free((char *) req);
+ return err;
+ }
+
+ for (i = 0, preq = req; i < nreqs; ++i, ++preq) {
+ err = MPI_Request_free(preq);
+ if (err != MPI_SUCCESS) {
+ if (req)
+ free((char *) req);
+ if (statuses)
+ free(statuses);
+ return err;
+ }
+ }
+
+ /* All done */
+
+ if (req)
+ free((char *) req);
+ if (statuses)
+ free(statuses);
+ return (1);
+}
+
+
--- /dev/null
+#ifndef SMPI_COLLS_H
+#define SMPI_COLLS_H
+
+#include "smpi/mpi.h"
+#include "xbt.h"
+
+int smpi_coll_tuned_alltoall_2dmesh(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_3dmesh(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+/*int smpi_coll_tuned_alltoall_bruck(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);*/
+int smpi_coll_tuned_alltoall_pair(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_pair_light_barrier(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_pair_mpi_barrier(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_pair_one_barrier(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_rdb(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_ring(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_ring_light_barrier(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_ring_mpi_barrier(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_ring_one_barrier(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int smpi_coll_tuned_alltoall_simple(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+
+
+int smpi_coll_tuned_allgather_2dmesh(
+ void * send_buff, int send_count, MPI_Datatype send_type,
+ void * recv_buff, int recv_count, MPI_Datatype recv_type,
+ MPI_Comm comm);
+
+#endif
MPI_Comm comm, int arity);
void nary_tree_barrier(MPI_Comm comm, int arity);
+int smpi_coll_tuned_alltoall_ompi(void *sendbuf, int sendcount,
+ MPI_Datatype sendtype, void *recvbuf,
+ int recvcount, MPI_Datatype recvtype,
+ MPI_Comm comm);
int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount,
MPI_Datatype sendtype, void *recvbuf,
int recvcount, MPI_Datatype recvtype,
#include <assert.h>
#include "private.h"
+#include "colls/colls.h"
+
+s_mpi_coll_description_t mpi_coll_alltoall_description[] = {
+ {"ompi",
+ "Ompi alltoall default collective",
+ smpi_coll_tuned_alltoall_ompi},
+
+ {"2dmesh",
+ "Alltoall 2dmesh collective",
+ smpi_coll_tuned_alltoall_2dmesh},
+ {"3dmesh",
+ "Alltoall 3dmesh collective",
+ smpi_coll_tuned_alltoall_3dmesh},
+ /*{"bruck",
+ "Alltoall Bruck collective",
+ smpi_coll_tuned_alltoall_bruck},*/
+ {"pair",
+ "Alltoall pair collective",
+ smpi_coll_tuned_alltoall_pair},
+ {"pair_light_barrier",
+ "Alltoall pair_light_barrier collective",
+ smpi_coll_tuned_alltoall_pair_light_barrier},
+ {"pair_mpi_barrier",
+ "Alltoall pair_mpi_barrier collective",
+ smpi_coll_tuned_alltoall_pair_mpi_barrier},
+ {"rdb",
+ "Alltoall rdb collective",
+ smpi_coll_tuned_alltoall_rdb},
+ {"ring",
+ "Alltoall ring collective",
+ smpi_coll_tuned_alltoall_ring},
+ {"ring_light_barrier",
+ "Alltoall ring_light_barrier collective",
+ smpi_coll_tuned_alltoall_ring_light_barrier},
+ {"ring_light_barrier",
+ "Alltoall ring_light_barrier collective",
+ smpi_coll_tuned_alltoall_ring_light_barrier},
+ {"ring_mpi_barrier",
+ "Alltoall ring_mpi_barrier collective",
+ smpi_coll_tuned_alltoall_ring_mpi_barrier},
+ {"ring_one_barrier",
+ "Alltoall ring_one_barrier collective",
+ smpi_coll_tuned_alltoall_ring_one_barrier},
+ {"simple",
+ "Alltoall simple collective",
+ smpi_coll_tuned_alltoall_simple},
+
+ {"bruck",
+ "Alltoall Bruck (SG) collective",
+ smpi_coll_tuned_alltoall_bruck},
+ {"basic_linear",
+ "Alltoall basic linear (SG) collective",
+ smpi_coll_tuned_alltoall_basic_linear},
+ {"pairwise",
+ "Alltoall pairwise (SG) collective",
+ smpi_coll_tuned_alltoall_pairwise},
+
+ {NULL, NULL, NULL} /* this array must be NULL terminated */
+};
+
+s_mpi_coll_description_t mpi_coll_allgather_description[] = {
+ {"default",
+ "allgather default collective",
+ smpi_mpi_gather},
+
+ {NULL, NULL, NULL} /* this array must be NULL terminated */
+};
+
+
+/** Displays the long description of all registered models, and quit */
+void coll_help(const char *category, s_mpi_coll_description_t * table)
+{
+ int i;
+ printf("Long description of the %s models accepted by this simulator:\n",
+ category);
+ for (i = 0; table[i].name; i++)
+ printf(" %s: %s\n", table[i].name, table[i].description);
+}
+
+int find_coll_description(s_mpi_coll_description_t * table,
+ const char *name)
+{
+ int i;
+ char *name_list = NULL;
+
+ for (i = 0; table[i].name; i++)
+ if (!strcmp(name, table[i].name)) {
+ return i;
+ }
+ name_list = strdup(table[0].name);
+ for (i = 1; table[i].name; i++) {
+ name_list =
+ xbt_realloc(name_list,
+ strlen(name_list) + strlen(table[i].name) + 3);
+ strcat(name_list, ", ");
+ strcat(name_list, table[i].name);
+ }
+ xbt_die("Model '%s' is invalid! Valid models are: %s.", name, name_list);
+ return -1;
+}
+
+
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi,
"Logging specific to SMPI (coll)");
free_tree(tree);
}
+int smpi_coll_tuned_alltoall_ompi(void *sendbuf, int sendcount,
+ MPI_Datatype sendtype, void *recvbuf,
+ int recvcount, MPI_Datatype recvtype,
+ MPI_Comm comm)
+{
+ int size, sendsize;
+ size = smpi_comm_size(comm);
+ sendsize = smpi_datatype_size(sendtype) * sendcount;
+ if (sendsize < 200 && size > 12) {
+ return
+ smpi_coll_tuned_alltoall_bruck(sendbuf, sendcount, sendtype,
+ recvbuf, recvcount, recvtype,
+ comm);
+ } else if (sendsize < 3000) {
+ return
+ smpi_coll_tuned_alltoall_basic_linear(sendbuf, sendcount,
+ sendtype, recvbuf,
+ recvcount, recvtype, comm);
+ } else {
+ return
+ smpi_coll_tuned_alltoall_pairwise(sendbuf, sendcount, sendtype,
+ recvbuf, recvcount, recvtype,
+ comm);
+ }
+}
+
/**
* Alltoall Bruck
*
}
/**
- * Alltoall basic_linear
+ * Alltoall basic_linear (STARMPI:alltoall-simple)
**/
int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount,
MPI_Datatype sendtype,
int MPI_Init(int *argc, char ***argv)
{
+ int alltoall_id = find_coll_description(mpi_coll_alltoall_description,
+ sg_cfg_get_string("smpi/alltoall"));
+ mpi_coll_alltoall_fun = (int (*)(void *, int, MPI_Datatype,
+ void*, int, MPI_Datatype, MPI_Comm))
+ mpi_coll_alltoall_description[alltoall_id].coll;
+
+ int allgather_id = find_coll_description(mpi_coll_allgather_description,
+ sg_cfg_get_string("smpi/allgather"));
+ mpi_coll_allgather_fun = (int (*)(void *, int, MPI_Datatype,
+ void*, int, MPI_Datatype, MPI_Comm))
+ mpi_coll_allgather_description[allgather_id].coll;
+
return PMPI_Init(argc, argv);
}
void *recvbuf, int recvcount, MPI_Datatype recvtype,
MPI_Comm comm)
{
- int retval, size, sendsize;
+ int retval;
smpi_bench_end();
#ifdef HAVE_TRACING
|| recvtype == MPI_DATATYPE_NULL) {
retval = MPI_ERR_TYPE;
} else {
- size = smpi_comm_size(comm);
- sendsize = smpi_datatype_size(sendtype) * sendcount;
- if (sendsize < 200 && size > 12) {
- retval =
- smpi_coll_tuned_alltoall_bruck(sendbuf, sendcount, sendtype,
- recvbuf, recvcount, recvtype,
- comm);
- } else if (sendsize < 3000) {
- retval =
- smpi_coll_tuned_alltoall_basic_linear(sendbuf, sendcount,
- sendtype, recvbuf,
- recvcount, recvtype, comm);
- } else {
- retval =
- smpi_coll_tuned_alltoall_pairwise(sendbuf, sendcount, sendtype,
- recvbuf, recvcount, recvtype,
- comm);
- }
+ retval = mpi_coll_alltoall_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
}
#ifdef HAVE_TRACING
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);