2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3 * University Research and Technology
4 * Corporation. All rights reserved.
5 * Copyright (c) 2004-2006 The University of Tennessee and The University
6 * of Tennessee Research Foundation. All rights
8 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9 * University of Stuttgart. All rights reserved.
10 * Copyright (c) 2004-2005 The Regents of the University of California.
11 * All rights reserved.
14 * Additional copyrights may follow
20 #include "colls_private.h"
21 #include "coll_tuned_topo.h"
23 #define MCA_COLL_BASE_TAG_SCATTER 111
26 smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount,
28 void *rbuf, int rcount,
43 ompi_coll_tree_t* bmtree;
45 MPI_Aint sextent, slb, strue_lb, strue_extent;
46 MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
48 size = smpi_comm_size(comm);
49 rank = smpi_comm_rank(comm);
52 "smpi_coll_tuned_scatter_ompi_binomial rank %d", rank);
54 /* create the binomial tree */
56 // COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
57 bmtree = ompi_coll_tuned_topo_build_in_order_bmtree( comm, root);//ompi_ data->cached_in_order_bmtree;
59 smpi_datatype_extent(sdtype, &slb, &sextent);
60 smpi_datatype_extent(sdtype, &strue_lb, &strue_extent);
61 smpi_datatype_extent(rdtype, &rlb, &rextent);
62 smpi_datatype_extent(rdtype, &rtrue_lb, &rtrue_extent);
64 vrank = (rank - root + size) % size;
68 /* root on 0, just use the send buffer */
70 if (rbuf != MPI_IN_PLACE) {
71 /* local copy to rbuf */
72 err = smpi_datatype_copy(sbuf, scount, sdtype,
73 rbuf, rcount, rdtype);
74 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
77 /* root is not on 0, allocate temp buffer for send */
78 tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
79 if (NULL == tempbuf) {
80 err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
85 /* and rotate data so they will eventually in the right place */
86 err = smpi_datatype_copy((char *) sbuf + sextent*root*scount, scount*(size-root), sdtype,
87 ptmp, scount*(size-root), sdtype);
88 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
91 err = smpi_datatype_copy((char*)sbuf, scount*root, sdtype,
92 ptmp + sextent*scount*(size - root), scount*root, sdtype);
93 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
95 if (rbuf != MPI_IN_PLACE) {
96 /* local copy to rbuf */
97 err = smpi_datatype_copy(ptmp, scount, sdtype,
98 rbuf, rcount, rdtype);
99 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
103 } else if (!(vrank % 2)) {
104 /* non-root, non-leaf nodes, allocte temp buffer for recv
105 * the most we need is rcount*size/2 */
106 tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
107 if (NULL == tempbuf) {
108 err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
111 ptmp = tempbuf - rlb;
118 /* leaf nodes, just use rbuf */
119 ptmp = (char *) rbuf;
124 /* recv from parent on non-root */
125 smpi_mpi_recv(ptmp, rcount*size, rdtype, bmtree->tree_prev,
126 MCA_COLL_BASE_TAG_SCATTER, comm, &status);
127 /* local copy to rbuf */
128 err = smpi_datatype_copy(ptmp, scount, sdtype,
129 rbuf, rcount, rdtype);
131 /* send to children on all non-leaf */
132 for (i = 0; i < bmtree->tree_nextsize; i++) {
133 int mycount = 0, vkid;
134 /* figure out how much data I have to send to this child */
135 vkid = (bmtree->tree_next[i] - root + size) % size;
136 mycount = vkid - vrank;
137 if (mycount > (size - vkid))
138 mycount = size - vkid;
141 smpi_mpi_send(ptmp + total_send*sextent, mycount, sdtype,
142 bmtree->tree_next[i],
143 MCA_COLL_BASE_TAG_SCATTER,
146 total_send += mycount;
152 /* recv from parent on leaf nodes */
153 smpi_mpi_recv(ptmp, rcount, rdtype, bmtree->tree_prev,
154 MCA_COLL_BASE_TAG_SCATTER, comm, &status);
163 XBT_DEBUG( "%s:%4d\tError occurred %d, rank %2d",
164 __FILE__, line, err, rank);
169 * Linear functions are copied from the BASIC coll module
170 * they do not segment the message and are simple implementations
171 * but for some small number of nodes and/or small data sizes they
172 * are just as fast as tuned/tree based segmenting operations
173 * and as such may be selected by the decision functions
174 * These are copied into this module due to the way we select modules
175 * in V1. i.e. in V2 we will handle this differently and so will not
176 * have to duplicate code.
177 * JPG following the examples from other coll_tuned implementations. Dec06.
180 /* copied function (with appropriate renaming) starts here */
184 * Function: - basic scatter operation
185 * Accepts: - same arguments as MPI_Scatter()
186 * Returns: - MPI_SUCCESS or error code
189 smpi_coll_tuned_scatter_ompi_basic_linear(void *sbuf, int scount,
191 void *rbuf, int rcount,
197 int i, rank, size, err;
203 rank = smpi_comm_rank(comm);
204 size = smpi_comm_size(comm);
206 /* If not root, receive data. */
209 smpi_mpi_recv(rbuf, rcount, rdtype, root,
210 MCA_COLL_BASE_TAG_SCATTER,
211 comm, MPI_STATUS_IGNORE);
215 /* I am the root, loop sending data. */
217 err = smpi_datatype_extent(sdtype, &lb, &incr);
218 if (MPI_SUCCESS != err) {
219 return MPI_ERR_OTHER;
223 for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
225 /* simple optimization */
228 if (MPI_IN_PLACE != rbuf) {
230 smpi_datatype_copy(ptmp, scount, sdtype, rbuf, rcount,
234 smpi_mpi_send(ptmp, scount, sdtype, i,
235 MCA_COLL_BASE_TAG_SCATTER,
238 if (MPI_SUCCESS != err) {