1 /* Copyright (c) 2013-2017. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "../colls_private.h"
10 /*****************************************************************************
12 * Function: alltoall_2dmesh_shoot
17 send_buff: send input buffer
18 send_count: number of elements to send
19 send_type: data type of elements being sent
20 recv_buff: receive output buffer
21 recv_count: number of elements to received
22 recv_type: data type of elements being received
25 * Descrp: Function realizes the alltoall operation using the 2dmesh
26 algorithm. It actually performs allgather operation in x dimension
27 then in the y dimension. Each node then extracts the needed data.
28 The communication in each dimension follows "simple."
32 ****************************************************************************/
33 static int alltoall_check_is_2dmesh(int num, int *i, int *j)
58 int Coll_alltoall_2dmesh::alltoall(void *send_buff, int send_count,
59 MPI_Datatype send_type,
60 void *recv_buff, int recv_count,
61 MPI_Datatype recv_type, MPI_Comm comm)
63 MPI_Status *statuses, s;
64 MPI_Request *reqs, *req_ptr;;
67 char *tmp_buff1, *tmp_buff2;
68 int i, j, src, dst, rank, num_procs, count, num_reqs;
69 int X, Y, send_offset, recv_offset;
70 int my_row_base, my_col_base, src_row_base, block_size;
71 int tag = COLL_TAG_ALLTOALL;
74 num_procs = comm->size();
75 extent = send_type->get_extent();
77 if (not alltoall_check_is_2dmesh(num_procs, &X, &Y))
80 my_row_base = (rank / Y) * Y;
81 my_col_base = rank % Y;
83 block_size = extent * send_count;
85 tmp_buff1 = (char *) smpi_get_tmp_sendbuffer(block_size * num_procs * Y);
86 tmp_buff2 = (char *) smpi_get_tmp_recvbuffer(block_size * Y);
92 statuses = (MPI_Status *) xbt_malloc(num_reqs * sizeof(MPI_Status));
93 reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request));
97 count = send_count * num_procs;
99 for (i = 0; i < Y; i++) {
100 src = i + my_row_base;
104 recv_offset = (src % Y) * block_size * num_procs;
105 *(req_ptr++) = Request::irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm);
108 for (i = 0; i < Y; i++) {
109 dst = i + my_row_base;
112 Request::send(send_buff, count, send_type, dst, tag, comm);
115 Request::waitall(Y - 1, reqs, statuses);
118 for (i = 0; i < Y; i++) {
119 send_offset = (rank * block_size) + (i * block_size * num_procs);
120 recv_offset = (my_row_base * block_size) + (i * block_size);
122 if (i + my_row_base == rank)
123 Request::sendrecv((char *) send_buff + recv_offset, send_count, send_type,
125 (char *) recv_buff + recv_offset, recv_count, recv_type,
126 rank, tag, comm, &s);
129 Request::sendrecv(tmp_buff1 + send_offset, send_count, send_type,
131 (char *) recv_buff + recv_offset, recv_count, recv_type,
132 rank, tag, comm, &s);
136 for (i = 0; i < X; i++) {
137 src = (i * Y + my_col_base);
140 src_row_base = (src / Y) * Y;
142 *(req_ptr++) = Request::irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y,
143 recv_type, src, tag, comm);
146 for (i = 0; i < X; i++) {
147 dst = (i * Y + my_col_base);
152 for (j = 0; j < Y; j++) {
153 send_offset = (dst + j * num_procs) * block_size;
155 if (j + my_row_base == rank)
156 Request::sendrecv((char *) send_buff + dst * block_size, send_count,
157 send_type, rank, tag, tmp_buff2 + recv_offset, recv_count,
158 recv_type, rank, tag, comm, &s);
160 Request::sendrecv(tmp_buff1 + send_offset, send_count, send_type,
162 tmp_buff2 + recv_offset, recv_count, recv_type,
163 rank, tag, comm, &s);
165 recv_offset += block_size;
168 Request::send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
170 Request::waitall(X - 1, reqs, statuses);
173 smpi_free_tmp_buffer(tmp_buff1);
174 smpi_free_tmp_buffer(tmp_buff2);