2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3 * University Research and Technology
4 * Corporation. All rights reserved.
5 * Copyright (c) 2004-2006 The University of Tennessee and The University
6 * of Tennessee Research Foundation. All rights
8 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9 * University of Stuttgart. All rights reserved.
10 * Copyright (c) 2004-2005 The Regents of the University of California.
11 * All rights reserved.
14 * Additional copyrights may follow
20 #include "colls_private.h"
21 #include "coll_tuned_topo.h"
25 smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount,
27 void *rbuf, int rcount,
42 ompi_coll_tree_t* bmtree;
44 MPI_Aint sextent, slb, strue_lb, strue_extent;
45 MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
47 size = smpi_comm_size(comm);
48 rank = smpi_comm_rank(comm);
51 "smpi_coll_tuned_scatter_ompi_binomial rank %d", rank);
53 /* create the binomial tree */
55 // COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
56 bmtree = ompi_coll_tuned_topo_build_in_order_bmtree( comm, root);//ompi_ data->cached_in_order_bmtree;
58 smpi_datatype_extent(sdtype, &slb, &sextent);
59 smpi_datatype_extent(sdtype, &strue_lb, &strue_extent);
60 smpi_datatype_extent(rdtype, &rlb, &rextent);
61 smpi_datatype_extent(rdtype, &rtrue_lb, &rtrue_extent);
63 vrank = (rank - root + size) % size;
67 /* root on 0, just use the send buffer */
69 if (rbuf != MPI_IN_PLACE) {
70 /* local copy to rbuf */
71 err = smpi_datatype_copy(sbuf, scount, sdtype,
72 rbuf, rcount, rdtype);
73 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
76 /* root is not on 0, allocate temp buffer for send */
77 tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
78 if (NULL == tempbuf) {
79 err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
84 /* and rotate data so they will eventually in the right place */
85 err = smpi_datatype_copy((char *) sbuf + sextent*root*scount, scount*(size-root), sdtype,
86 ptmp, scount*(size-root), sdtype);
87 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
90 err = smpi_datatype_copy((char*)sbuf, scount*root, sdtype,
91 ptmp + sextent*scount*(size - root), scount*root, sdtype);
92 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
94 if (rbuf != MPI_IN_PLACE) {
95 /* local copy to rbuf */
96 err = smpi_datatype_copy(ptmp, scount, sdtype,
97 rbuf, rcount, rdtype);
98 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
102 } else if (!(vrank % 2)) {
103 /* non-root, non-leaf nodes, allocate temp buffer for recv
104 * the most we need is rcount*size/2 */
105 tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
106 if (NULL == tempbuf) {
107 err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
110 ptmp = tempbuf - rlb;
117 /* leaf nodes, just use rbuf */
118 ptmp = (char *) rbuf;
123 /* recv from parent on non-root */
124 smpi_mpi_recv(ptmp, rcount*size, rdtype, bmtree->tree_prev,
125 COLL_TAG_SCATTER, comm, &status);
126 /* local copy to rbuf */
127 err = smpi_datatype_copy(ptmp, scount, sdtype,
128 rbuf, rcount, rdtype);
130 /* send to children on all non-leaf */
131 for (i = 0; i < bmtree->tree_nextsize; i++) {
132 int mycount = 0, vkid;
133 /* figure out how much data I have to send to this child */
134 vkid = (bmtree->tree_next[i] - root + size) % size;
135 mycount = vkid - vrank;
136 if (mycount > (size - vkid))
137 mycount = size - vkid;
140 smpi_mpi_send(ptmp + total_send*sextent, mycount, sdtype,
141 bmtree->tree_next[i],
145 total_send += mycount;
151 /* recv from parent on leaf nodes */
152 smpi_mpi_recv(ptmp, rcount, rdtype, bmtree->tree_prev,
153 COLL_TAG_SCATTER, comm, &status);
155 //!FIXME : store the tree, as done in ompi, instead of calculating it each time ?
164 XBT_DEBUG( "%s:%4d\tError occurred %d, rank %2d",
165 __FILE__, line, err, rank);
170 * Linear functions are copied from the BASIC coll module
171 * they do not segment the message and are simple implementations
172 * but for some small number of nodes and/or small data sizes they
173 * are just as fast as tuned/tree based segmenting operations
174 * and as such may be selected by the decision functions
175 * These are copied into this module due to the way we select modules
176 * in V1. i.e. in V2 we will handle this differently and so will not
177 * have to duplicate code.
178 * JPG following the examples from other coll_tuned implementations. Dec06.
181 /* copied function (with appropriate renaming) starts here */
185 * Function: - basic scatter operation
186 * Accepts: - same arguments as MPI_Scatter()
187 * Returns: - MPI_SUCCESS or error code
190 smpi_coll_tuned_scatter_ompi_basic_linear(void *sbuf, int scount,
192 void *rbuf, int rcount,
198 int i, rank, size, err;
204 rank = smpi_comm_rank(comm);
205 size = smpi_comm_size(comm);
207 /* If not root, receive data. */
210 smpi_mpi_recv(rbuf, rcount, rdtype, root,
212 comm, MPI_STATUS_IGNORE);
216 /* I am the root, loop sending data. */
218 err = smpi_datatype_extent(sdtype, &lb, &incr);
219 if (MPI_SUCCESS != err) {
220 return MPI_ERR_OTHER;
224 for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
226 /* simple optimization */
229 if (MPI_IN_PLACE != rbuf) {
231 smpi_datatype_copy(ptmp, scount, sdtype, rbuf, rcount,
235 smpi_mpi_send(ptmp, scount, sdtype, i,
239 if (MPI_SUCCESS != err) {