1 #include "colls_private.h"
6 int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount,
7 MPI_Datatype stype, void *rbuf,
8 int rcount, MPI_Datatype rtype,
11 int src, dst, comm_size, rank;
12 comm_size = smpi_comm_size(comm);
13 rank = smpi_comm_rank(comm);
14 MPI_Aint rextent, sextent;
15 rextent = smpi_datatype_get_extent(rtype);
16 sextent = smpi_datatype_get_extent(stype);
19 MPI_Request rrequest_array[128];
20 MPI_Request srequest_array[128];
23 int i, send_offset, recv_offset;
24 int intra_rank, inter_rank;
25 intra_rank = rank % NUM_CORE;
26 inter_rank = rank / NUM_CORE;
27 int inter_comm_size = (comm_size + NUM_CORE - 1) / NUM_CORE;
28 int num_core_in_current_smp = NUM_CORE;
30 /* for too small number of processes, use default implementation */
31 if (comm_size <= NUM_CORE) {
32 XBT_WARN("MPI_allgather_SMP_NTS use default MPI_allgather.");
33 smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
37 // the last SMP node may have fewer number of running processes than all others
38 if (inter_rank == (inter_comm_size - 1)) {
39 num_core_in_current_smp = comm_size - (inter_rank * NUM_CORE);
41 //copy corresponding message from sbuf to rbuf
42 recv_offset = rank * rextent * rcount;
43 smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag,
44 ((char *) rbuf + recv_offset), rcount, rtype, rank, tag, comm,
47 //gather to root of each SMP
49 for (i = 1; i < num_core_in_current_smp; i++) {
52 (inter_rank * NUM_CORE) + (intra_rank + i) % (num_core_in_current_smp);
54 (inter_rank * NUM_CORE) + (intra_rank - i +
55 num_core_in_current_smp) %
56 (num_core_in_current_smp);
57 recv_offset = src * rextent * rcount;
59 smpi_mpi_sendrecv(sbuf, scount, stype, dst, tag,
60 ((char *) rbuf + recv_offset), rcount, rtype, src, tag, comm,
65 // INTER-SMP-ALLGATHER
66 // Every root of each SMP node post INTER-Sendrecv, then do INTRA-Bcast for each receiving message
67 // Use logical ring algorithm
70 if (intra_rank == 0) {
71 src = ((inter_rank - 1 + inter_comm_size) % inter_comm_size) * NUM_CORE;
72 dst = ((inter_rank + 1) % inter_comm_size) * NUM_CORE;
74 // post all inter Irecv
75 for (i = 0; i < inter_comm_size - 1; i++) {
77 ((inter_rank - i - 1 +
78 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
79 rrequest_array[i] = smpi_mpi_irecv((char *)rbuf+recv_offset, rcount * NUM_CORE, rtype, src, tag+i, comm);
85 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
86 srequest_array[0] = smpi_mpi_isend((char *) rbuf + send_offset, scount * NUM_CORE, stype, dst, tag,
89 // loop : recv-inter , send-inter, send-intra (linear-bcast)
90 for (i = 0; i < inter_comm_size - 2; i++) {
92 ((inter_rank - i - 1 +
93 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
94 smpi_mpi_wait(&rrequest_array[i], &status);
95 srequest_array[i + 1] = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, dst,
97 if (num_core_in_current_smp > 1) {
98 request = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
99 (rank + 1), tag + i + 1, comm);
103 // recv last message and send_intra
105 ((inter_rank - i - 1 +
106 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
107 //recv_offset = ((inter_rank + 1) % inter_comm_size) * NUM_CORE * sextent * scount;
108 //i=inter_comm_size-2;
109 smpi_mpi_wait(&rrequest_array[i], &status);
110 if (num_core_in_current_smp > 1) {
111 request = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
112 (rank + 1), tag + i + 1, comm);
115 // last rank of each SMP
116 else if (intra_rank == (num_core_in_current_smp - 1)) {
117 for (i = 0; i < inter_comm_size - 1; i++) {
119 ((inter_rank - i - 1 +
120 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
121 request = smpi_mpi_irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype,
122 rank - 1, tag + i + 1, comm);
123 smpi_mpi_wait(&request, &status);
126 // intermediate rank of each SMP
128 for (i = 0; i < inter_comm_size - 1; i++) {
130 ((inter_rank - i - 1 +
131 inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
132 request = smpi_mpi_irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype,
133 rank - 1, tag + i + 1, comm);
134 smpi_mpi_wait(&request, &status);
135 request = smpi_mpi_isend((char *) rbuf + recv_offset, (scount * NUM_CORE), stype,
136 (rank + 1), tag + i + 1, comm);