1 /* Copyright (c) 2013-2023. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 * ompi_coll_tuned_allgather_intra_neighborexchange
10 * Function: allgather using N/2 steps (O(N))
11 * Accepts: Same arguments as MPI_Allgather
12 * Returns: MPI_SUCCESS or error code
14 * Description: Neighbor Exchange algorithm for allgather.
15 * Described by Chen et.al. in
16 * "Performance Evaluation of Allgather Algorithms on
17 * Terascale Linux Cluster with Fast Ethernet",
18 * Proceedings of the Eighth International Conference on
19 * High-Performance Computing inn Asia-Pacific Region
22 * Rank r exchanges message with one of its neighbors and
23 * forwards the data further in the next step.
25 * No additional memory requirements.
27 * Limitations: Algorithm works only on even number of processes.
28 * For odd number of processes we switch to ring algorithm.
33 * [0] [ ] [ ] [ ] [ ] [ ]
34 * [ ] [1] [ ] [ ] [ ] [ ]
35 * [ ] [ ] [2] [ ] [ ] [ ]
36 * [ ] [ ] [ ] [3] [ ] [ ]
37 * [ ] [ ] [ ] [ ] [4] [ ]
38 * [ ] [ ] [ ] [ ] [ ] [5]
41 * [0] [0] [ ] [ ] [ ] [ ]
42 * [1] [1] [ ] [ ] [ ] [ ]
43 * [ ] [ ] [2] [2] [ ] [ ]
44 * [ ] [ ] [3] [3] [ ] [ ]
45 * [ ] [ ] [ ] [ ] [4] [4]
46 * [ ] [ ] [ ] [ ] [5] [5]
49 * [0] [0] [0] [ ] [ ] [0]
50 * [1] [1] [1] [ ] [ ] [1]
51 * [ ] [2] [2] [2] [2] [ ]
52 * [ ] [3] [3] [3] [3] [ ]
53 * [4] [ ] [ ] [4] [4] [4]
54 * [5] [ ] [ ] [5] [5] [5]
57 * [0] [0] [0] [0] [0] [0]
58 * [1] [1] [1] [1] [1] [1]
59 * [2] [2] [2] [2] [2] [2]
60 * [3] [3] [3] [3] [3] [3]
61 * [4] [4] [4] [4] [4] [4]
62 * [5] [5] [5] [5] [5] [5]
65 #include "../colls_private.hpp"
67 namespace simgrid::smpi {
70 allgather__ompi_neighborexchange(const void *sbuf, int scount,
72 void* rbuf, int rcount,
79 int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
82 ptrdiff_t slb, rlb, sext, rext;
83 char *tmpsend = nullptr, *tmprecv = nullptr;
90 "coll:tuned:allgather_intra_neighborexchange: odd size %d, switching to ring algorithm",
92 return allgather__ring(sbuf, scount, sdtype,
98 "coll:tuned:allgather_intra_neighborexchange rank %d", rank);
100 err = sdtype->extent(&slb, &sext);
101 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
103 err = rdtype->extent(&rlb, &rext);
104 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
106 /* Initialization step:
107 - if send buffer is not MPI_IN_PLACE, copy send buffer to appropriate block
110 tmprecv = (char*) rbuf + rank * rcount * rext;
111 if (MPI_IN_PLACE != sbuf) {
112 tmpsend = (char*) sbuf;
113 Datatype::copy (tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
116 /* Determine neighbors, order in which blocks will arrive, etc. */
117 even_rank = not(rank % 2);
119 neighbor[0] = (rank + 1) % size;
120 neighbor[1] = (rank - 1 + size) % size;
121 recv_data_from[0] = rank;
122 recv_data_from[1] = rank;
123 offset_at_step[0] = (+2);
124 offset_at_step[1] = (-2);
126 neighbor[0] = (rank - 1 + size) % size;
127 neighbor[1] = (rank + 1) % size;
128 recv_data_from[0] = neighbor[0];
129 recv_data_from[1] = neighbor[0];
130 offset_at_step[0] = (-2);
131 offset_at_step[1] = (+2);
134 /* Communication loop:
135 - First step is special: exchange a single block with neighbor[0].
137 update recv_data_from according to offset, and
138 exchange two blocks with appropriate neighbor.
139 the send location becomes previous receive location.
141 tmprecv = (char*)rbuf + neighbor[0] * rcount * rext;
142 tmpsend = (char*)rbuf + rank * rcount * rext;
144 Request::sendrecv(tmpsend, rcount, rdtype, neighbor[0],
146 tmprecv, rcount, rdtype, neighbor[0],
148 comm, MPI_STATUS_IGNORE);
150 /* Determine initial sending location */
152 send_data_from = rank;
154 send_data_from = recv_data_from[0];
157 for (i = 1; i < (size / 2); i++) {
158 const int i_parity = i % 2;
159 recv_data_from[i_parity] =
160 (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
162 tmprecv = (char*)rbuf + recv_data_from[i_parity] * rcount * rext;
163 tmpsend = (char*)rbuf + send_data_from * rcount * rext;
166 Request::sendrecv(tmpsend, 2 * rcount, rdtype,
169 tmprecv, 2 * rcount, rdtype,
172 comm, MPI_STATUS_IGNORE);
174 send_data_from = recv_data_from[i_parity];
180 XBT_DEBUG( "%s:%4d\tError occurred %d, rank %2d",
181 __FILE__, line, err, rank);
185 } // namespace simgrid::smpi