1 /* Copyright (c) 2013-2017. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 * ompi_coll_tuned_allgather_intra_neighborexchange
10 * Function: allgather using N/2 steps (O(N))
11 * Accepts: Same arguments as MPI_Allgather
12 * Returns: MPI_SUCCESS or error code
14 * Description: Neighbor Exchange algorithm for allgather.
15 * Described by Chen et.al. in
16 * "Performance Evaluation of Allgather Algorithms on
17 * Terascale Linux Cluster with Fast Ethernet",
18 * Proceedings of the Eighth International Conference on
19 * High-Performance Computing inn Asia-Pacific Region
22 * Rank r exchanges message with one of its neighbors and
23 * forwards the data further in the next step.
25 * No additional memory requirements.
27 * Limitations: Algorithm works only on even number of processes.
28 * For odd number of processes we switch to ring algorithm.
33 * [0] [ ] [ ] [ ] [ ] [ ]
34 * [ ] [1] [ ] [ ] [ ] [ ]
35 * [ ] [ ] [2] [ ] [ ] [ ]
36 * [ ] [ ] [ ] [3] [ ] [ ]
37 * [ ] [ ] [ ] [ ] [4] [ ]
38 * [ ] [ ] [ ] [ ] [ ] [5]
41 * [0] [0] [ ] [ ] [ ] [ ]
42 * [1] [1] [ ] [ ] [ ] [ ]
43 * [ ] [ ] [2] [2] [ ] [ ]
44 * [ ] [ ] [3] [3] [ ] [ ]
45 * [ ] [ ] [ ] [ ] [4] [4]
46 * [ ] [ ] [ ] [ ] [5] [5]
49 * [0] [0] [0] [ ] [ ] [0]
50 * [1] [1] [1] [ ] [ ] [1]
51 * [ ] [2] [2] [2] [2] [ ]
52 * [ ] [3] [3] [3] [3] [ ]
53 * [4] [ ] [ ] [4] [4] [4]
54 * [5] [ ] [ ] [5] [5] [5]
57 * [0] [0] [0] [0] [0] [0]
58 * [1] [1] [1] [1] [1] [1]
59 * [2] [2] [2] [2] [2] [2]
60 * [3] [3] [3] [3] [3] [3]
61 * [4] [4] [4] [4] [4] [4]
62 * [5] [5] [5] [5] [5] [5]
65 #include "../colls_private.h"
71 Coll_allgather_ompi_neighborexchange::allgather(void *sbuf, int scount,
73 void* rbuf, int rcount,
80 int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
83 ptrdiff_t slb, rlb, sext, rext;
84 char *tmpsend = NULL, *tmprecv = NULL;
91 "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
93 return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
99 "coll:tuned:allgather_intra_neighborexchange rank %d", rank);
101 err = sdtype->extent(&slb, &sext);
102 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
104 err = rdtype->extent(&rlb, &rext);
105 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
107 /* Initialization step:
108 - if send buffer is not MPI_IN_PLACE, copy send buffer to appropriate block
111 tmprecv = (char*) rbuf + rank * rcount * rext;
112 if (MPI_IN_PLACE != sbuf) {
113 tmpsend = (char*) sbuf;
114 Datatype::copy (tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
117 /* Determine neighbors, order in which blocks will arrive, etc. */
118 even_rank = not(rank % 2);
120 neighbor[0] = (rank + 1) % size;
121 neighbor[1] = (rank - 1 + size) % size;
122 recv_data_from[0] = rank;
123 recv_data_from[1] = rank;
124 offset_at_step[0] = (+2);
125 offset_at_step[1] = (-2);
127 neighbor[0] = (rank - 1 + size) % size;
128 neighbor[1] = (rank + 1) % size;
129 recv_data_from[0] = neighbor[0];
130 recv_data_from[1] = neighbor[0];
131 offset_at_step[0] = (-2);
132 offset_at_step[1] = (+2);
135 /* Communication loop:
136 - First step is special: exchange a single block with neighbor[0].
138 update recv_data_from according to offset, and
139 exchange two blocks with appropriate neighbor.
140 the send location becomes previous receve location.
142 tmprecv = (char*)rbuf + neighbor[0] * rcount * rext;
143 tmpsend = (char*)rbuf + rank * rcount * rext;
145 Request::sendrecv(tmpsend, rcount, rdtype, neighbor[0],
147 tmprecv, rcount, rdtype, neighbor[0],
149 comm, MPI_STATUS_IGNORE);
151 /* Determine initial sending location */
153 send_data_from = rank;
155 send_data_from = recv_data_from[0];
158 for (i = 1; i < (size / 2); i++) {
159 const int i_parity = i % 2;
160 recv_data_from[i_parity] =
161 (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
163 tmprecv = (char*)rbuf + recv_data_from[i_parity] * rcount * rext;
164 tmpsend = (char*)rbuf + send_data_from * rcount * rext;
167 Request::sendrecv(tmpsend, 2 * rcount, rdtype,
170 tmprecv, 2 * rcount, rdtype,
173 comm, MPI_STATUS_IGNORE);
175 send_data_from = recv_data_from[i_parity];
181 XBT_DEBUG( "%s:%4d\tError occurred %d, rank %2d",
182 __FILE__, line, err, rank);