1 /* Copyright (c) 2013-2022. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
9 * University Research and Technology
10 * Corporation. All rights reserved.
11 * Copyright (c) 2004-2009 The University of Tennessee and The University
12 * of Tennessee Research Foundation. All rights
14 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
15 * University of Stuttgart. All rights reserved.
16 * Copyright (c) 2004-2005 The Regents of the University of California.
17 * All rights reserved.
19 * Additional copyrights may follow
21 /* -*- Mode: C; c-basic-offset:4 ; -*- */
22 /* Copyright (c) 2001-2014, The Ohio State University. All rights
25 * This file is part of the MVAPICH2 software package developed by the
26 * team members of The Ohio State University's Network-Based Computing
27 * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
29 * For detailed copyright and licensing information, please refer to the
30 * copyright file COPYRIGHT in the top level MVAPICH2 directory.
34 * (C) 2001 by Argonne National Laboratory.
35 * See COPYRIGHT in top-level directory.
37 #include "../colls_private.hpp"
38 namespace simgrid::smpi {
40 int allgather__mvapich2_smp(const void *sendbuf,int sendcnt, MPI_Datatype sendtype,
41 void *recvbuf, int recvcnt,MPI_Datatype recvtype,
45 int local_rank, local_size;
46 int leader_comm_size = 0;
47 int mpi_errno = MPI_SUCCESS;
48 MPI_Aint recvtype_extent = 0; /* Datatype extent */
49 MPI_Comm shmem_comm, leader_comm;
51 if(comm->get_leaders_comm()==MPI_COMM_NULL){
55 if (not comm->is_uniform() || not comm->is_blocked())
56 throw std::invalid_argument("allgather MVAPICH2 smp algorithm can't be used with irregular deployment. Please "
57 "insure that processes deployed on the same node are contiguous and that each node has "
58 "the same number of processes");
67 /* extract the rank,size information for the intra-node communicator */
68 recvtype_extent=recvtype->get_extent();
70 shmem_comm = comm->get_intra_comm();
71 local_rank = shmem_comm->rank();
72 local_size = shmem_comm->size();
74 if (local_rank == 0) {
75 /* Node leader. Extract the rank, size information for the leader communicator */
76 leader_comm = comm->get_leaders_comm();
77 if(leader_comm==MPI_COMM_NULL){
78 leader_comm = MPI_COMM_WORLD;
80 leader_comm_size = leader_comm->size();
83 /*If there is just one node, after gather itself,
84 * root has all the data and it can do bcast*/
87 colls::gather(sendbuf, sendcnt, sendtype, (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
88 recvcnt, recvtype, 0, shmem_comm);
90 /*Since in allgather all the processes could have
91 * its own data in place*/
92 if(sendbuf == MPI_IN_PLACE) {
93 mpi_errno = colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), recvcnt, recvtype,
94 recvbuf, recvcnt, recvtype, 0, shmem_comm);
96 mpi_errno = colls::gather(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, 0, shmem_comm);
99 /* Exchange the data between the node leaders*/
100 if (local_rank == 0 && (leader_comm_size > 1)) {
101 /*When data in each socket is different*/
102 if (not comm->is_uniform()) {
104 int* node_sizes = nullptr;
107 node_sizes = comm->get_non_uniform_map();
109 int* displs = new int[leader_comm_size];
110 int* recvcnts = new int[leader_comm_size];
111 recvcnts[0] = node_sizes[0] * recvcnt;
114 for (i = 1; i < leader_comm_size; i++) {
115 displs[i] = displs[i - 1] + node_sizes[i - 1] * recvcnt;
116 recvcnts[i] = node_sizes[i] * recvcnt;
119 void* sendtmpbuf = ((char*)recvbuf) + recvtype->get_extent() * displs[leader_comm->rank()];
121 mpi_errno = colls::allgatherv(sendtmpbuf, (recvcnt * local_size), recvtype, recvbuf, recvcnts, displs,
122 recvtype, leader_comm);
126 void* sendtmpbuf=((char*)recvbuf)+recvtype->get_extent()*(recvcnt*local_size)*leader_comm->rank();
130 mpi_errno = allgather__mpich(sendtmpbuf,
131 (recvcnt*local_size),
133 recvbuf, (recvcnt*local_size), recvtype,
139 /*Bcast the entire data from node leaders to all other cores*/
140 mpi_errno = colls::bcast(recvbuf, recvcnt * size, recvtype, 0, shmem_comm);
144 } // namespace simgrid::smpi