1 #include "colls_private.h"
6 int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount,
7 MPI_Datatype stype, void *rbuf,
8 int rcount, MPI_Datatype rtype,
11 int src, dst, comm_size, rank;
12 comm_size = smpi_comm_size(comm);
13 rank = smpi_comm_rank(comm);
14 MPI_Aint rextent, sextent;
15 rextent = smpi_datatype_get_extent(rtype);
16 sextent = smpi_datatype_get_extent(stype);
19 MPI_Request rrequest_array[128];
22 int i, send_offset, recv_offset;
23 int intra_rank, inter_rank;
24 intra_rank = rank % NUM_CORE;
25 inter_rank = rank / NUM_CORE;
26 int inter_comm_size = (comm_size + NUM_CORE - 1) / NUM_CORE;
27 int num_core_in_current_smp = NUM_CORE;
29 /* for too small number of processes, use default implementation */
30 if (comm_size <= NUM_CORE) {
31 XBT_WARN("MPI_allgather_SMP_NTS use default MPI_allgather.");
32 smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
36 // the last SMP node may have fewer number of running processes than all others
37 if (inter_rank == (inter_comm_size - 1)) {
38 num_core_in_current_smp = comm_size - (inter_rank * NUM_CORE);
40 //copy corresponding message from sbuf to rbuf
41 recv_offset = rank * rextent * rcount;
42 smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag,
43 ((char *) rbuf + recv_offset), rcount, rtype, rank, tag, comm,
46 //gather to root of each SMP
48 for (i = 1; i < num_core_in_current_smp; i++) {
51 (inter_rank * NUM_CORE) + (intra_rank + i) % (num_core_in_current_smp);
53 (inter_rank * NUM_CORE) + (intra_rank - i +
54 num_core_in_current_smp) %
55 (num_core_in_current_smp);
56 recv_offset = src * rextent * rcount;
58 smpi_mpi_sendrecv(sbuf, scount, stype, dst, tag,
59 ((char *) rbuf + recv_offset), rcount, rtype, src, tag, comm,
64 // INTER-SMP-ALLGATHER
65 // Every root of each SMP node post INTER-Sendrecv, then do INTRA-Bcast for each receiving message
66 // Use logical ring algorithm
69 if (intra_rank == 0) {
70 src = ((inter_rank - 1 + inter_comm_size) % inter_comm_size) * NUM_CORE;
71 dst = ((inter_rank + 1) % inter_comm_size) * NUM_CORE;
73 // post all inter Irecv
74 for (i = 0; i < inter_comm_size - 1; i++) {
76 ((inter_rank - i - 1 +
77 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
78 rrequest_array[i] = smpi_mpi_irecv((char *)rbuf+recv_offset, rcount * NUM_CORE, rtype, src, tag+i, comm);
84 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
85 smpi_mpi_isend((char *) rbuf + send_offset, scount * NUM_CORE, stype,
88 // loop : recv-inter , send-inter, send-intra (linear-bcast)
89 for (i = 0; i < inter_comm_size - 2; i++) {
91 ((inter_rank - i - 1 +
92 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
93 smpi_mpi_wait(&rrequest_array[i], &status);
94 smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
95 dst, tag + i + 1, comm);
96 if (num_core_in_current_smp > 1) {
97 request = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
98 (rank + 1), tag + i + 1, comm);
102 // recv last message and send_intra
104 ((inter_rank - i - 1 +
105 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
106 //recv_offset = ((inter_rank + 1) % inter_comm_size) * NUM_CORE * sextent * scount;
107 //i=inter_comm_size-2;
108 smpi_mpi_wait(&rrequest_array[i], &status);
109 if (num_core_in_current_smp > 1) {
110 request = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
111 (rank + 1), tag + i + 1, comm);
114 // last rank of each SMP
115 else if (intra_rank == (num_core_in_current_smp - 1)) {
116 for (i = 0; i < inter_comm_size - 1; i++) {
118 ((inter_rank - i - 1 +
119 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
120 request = smpi_mpi_irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype,
121 rank - 1, tag + i + 1, comm);
122 smpi_mpi_wait(&request, &status);
125 // intermediate rank of each SMP
127 for (i = 0; i < inter_comm_size - 1; i++) {
129 ((inter_rank - i - 1 +
130 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
131 request = smpi_mpi_irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype,
132 rank - 1, tag + i + 1, comm);
133 smpi_mpi_wait(&request, &status);
134 request = smpi_mpi_isend((char *) rbuf + recv_offset, (scount * NUM_CORE), stype,
135 (rank + 1), tag + i + 1, comm);