4 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi,
5 "Logging specific to SMPI collectives");
7 /*****************************************************************************
9 * Function: alltoall_2dmesh_shoot
14 send_buff: send input buffer
15 send_count: number of elements to send
16 send_type: data type of elements being sent
17 recv_buff: receive output buffer
18 recv_count: number of elements to received
19 recv_type: data type of elements being received
22 * Descrp: Function realizes the alltoall operation using the 2dmesh
23 algorithm. It actually performs allgather operation in x dimension
24 then in the y dimension. Each node then extracts the needed data.
25 The communication in each dimension follows "simple."
29 ****************************************************************************/
30 static int alltoall_check_is_2dmesh(int num, int *i, int *j)
53 int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count,
54 MPI_Datatype send_type,
55 void *recv_buff, int recv_count,
56 MPI_Datatype recv_type,
59 MPI_Status *statuses, s;
60 MPI_Request *reqs, *req_ptr;;
63 char *tmp_buff1, *tmp_buff2;
64 int i, j, src, dst, rank, num_procs, count, num_reqs;
65 int X, Y, send_offset, recv_offset;
66 int my_row_base, my_col_base, src_row_base, block_size;
67 int tag = 1, failure = 0, success = 1;
69 MPI_Comm_rank(comm, &rank);
70 MPI_Comm_size(comm, &num_procs);
71 MPI_Type_extent(send_type, &extent);
73 if (!alltoall_check_is_2dmesh(num_procs, &X, &Y))
76 my_row_base = (rank / Y) * Y;
77 my_col_base = rank % Y;
79 block_size = extent * send_count;
81 tmp_buff1 = (char *) malloc(block_size * num_procs * Y);
83 XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
88 tmp_buff2 = (char *) malloc(block_size * Y);
90 XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
101 statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
102 reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
104 XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
111 send_offset = recv_offset = (rank % Y) * block_size * num_procs;
113 count = send_count * num_procs;
115 for (i = 0; i < Y; i++) {
116 src = i + my_row_base;
120 recv_offset = (src % Y) * block_size * num_procs;
121 MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
125 for (i = 0; i < Y; i++) {
126 dst = i + my_row_base;
129 MPI_Send(send_buff, count, send_type, dst, tag, comm);
132 MPI_Waitall(Y - 1, reqs, statuses);
135 for (i = 0; i < Y; i++) {
136 send_offset = (rank * block_size) + (i * block_size * num_procs);
137 recv_offset = (my_row_base * block_size) + (i * block_size);
139 if (i + my_row_base == rank)
140 MPI_Sendrecv((char *)send_buff + recv_offset, send_count, send_type,
142 (char*)recv_buff + recv_offset, recv_count, recv_type,
143 rank, tag, comm, &s);
146 MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
148 (char *)recv_buff + recv_offset, recv_count, recv_type,
149 rank, tag, comm, &s);
153 for (i = 0; i < X; i++) {
154 src = (i * Y + my_col_base);
157 src_row_base = (src / Y) * Y;
159 MPI_Irecv((char *)recv_buff + src_row_base * block_size, recv_count * Y,
160 recv_type, src, tag, comm, req_ptr++);
163 for (i = 0; i < X; i++) {
164 dst = (i * Y + my_col_base);
169 for (j = 0; j < Y; j++) {
170 send_offset = (dst + j * num_procs) * block_size;
172 if (j + my_row_base == rank)
173 MPI_Sendrecv((char *)send_buff + dst * block_size, send_count, send_type,
175 tmp_buff2 + recv_offset, recv_count, recv_type,
176 rank, tag, comm, &s);
178 MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
180 tmp_buff2 + recv_offset, recv_count, recv_type,
181 rank, tag, comm, &s);
183 recv_offset += block_size;
186 MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
188 MPI_Waitall(X - 1, reqs, statuses);