1 #include "colls_private.h"
3 //#include <star-reduction.c>
5 int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count,
6 MPI_Datatype datatype, MPI_Op op, int root,
11 int mask, relrank, source;
13 int tag = COLL_TAG_REDUCE;
16 MPI_Aint true_lb, true_extent;
19 rank = smpi_comm_rank(comm);
20 comm_size = smpi_comm_size(comm);
22 extent = smpi_datatype_get_extent(datatype);
24 tmp_buf = (void *) xbt_malloc(count * extent);
25 int is_commutative = smpi_op_is_commute(op);
26 smpi_mpi_sendrecv(sendbuf, count, datatype, rank, tag,
27 recvbuf, count, datatype, rank, tag, comm, &status);
35 relrank = (rank - lroot + comm_size) % comm_size;
37 smpi_datatype_extent(datatype, &true_lb, &true_extent);
39 /* adjust for potential negative lower bound in datatype */
40 tmp_buf = (void *)((char*)tmp_buf - true_lb);
42 /* If I'm not the root, then my recvbuf may not be valid, therefore
43 I have to allocate a temporary one */
45 recvbuf = (void *) malloc(count*(max(extent,true_extent)));
46 recvbuf = (void *)((char*)recvbuf - true_lb);
48 if ((rank != root) || (sendbuf != MPI_IN_PLACE)) {
49 smpi_datatype_copy(sendbuf, count, datatype, recvbuf,count, datatype);
52 while (mask < comm_size) {
54 if ((mask & relrank) == 0) {
55 source = (relrank | mask);
56 if (source < comm_size) {
57 source = (source + lroot) % comm_size;
58 smpi_mpi_recv(tmp_buf, count, datatype, source, tag, comm, &status);
61 smpi_op_apply(op, tmp_buf, recvbuf, &count, &datatype);
63 smpi_op_apply(op, recvbuf, tmp_buf, &count, &datatype);
64 smpi_datatype_copy(tmp_buf, count, datatype,recvbuf, count, datatype);
68 dst = ((relrank & (~mask)) + lroot) % comm_size;
69 smpi_mpi_send(recvbuf, count, datatype, dst, tag, comm);
75 if (!is_commutative && (root != 0)){
77 smpi_mpi_send(recvbuf, count, datatype, root,tag, comm);
78 }else if (rank == root){
79 smpi_mpi_recv(recvbuf, count, datatype, 0, tag, comm, &status);