1 /* Copyright (c) 2013-2022. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
9 * University Research and Technology
10 * Corporation. All rights reserved.
11 * Copyright (c) 2004-2009 The University of Tennessee and The University
12 * of Tennessee Research Foundation. All rights
14 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
15 * University of Stuttgart. All rights reserved.
16 * Copyright (c) 2004-2005 The Regents of the University of California.
17 * All rights reserved.
19 * Additional copyrights may follow
21 /* -*- Mode: C; c-basic-offset:4 ; -*- */
22 /* Copyright (c) 2001-2014, The Ohio State University. All rights
25 * This file is part of the MVAPICH2 software package developed by the
26 * team members of The Ohio State University's Network-Based Computing
27 * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
29 * For detailed copyright and licensing information, please refer to the
30 * copyright file COPYRIGHT in the top level MVAPICH2 directory.
34 * (C) 2001 by Argonne National Laboratory.
35 * See COPYRIGHT in top-level directory.
37 #include "../colls_private.hpp"
41 int allgather__mvapich2_smp(const void *sendbuf,int sendcnt, MPI_Datatype sendtype,
42 void *recvbuf, int recvcnt,MPI_Datatype recvtype,
46 int local_rank, local_size;
47 int leader_comm_size = 0;
48 int mpi_errno = MPI_SUCCESS;
49 MPI_Aint recvtype_extent = 0; /* Datatype extent */
50 MPI_Comm shmem_comm, leader_comm;
52 if(comm->get_leaders_comm()==MPI_COMM_NULL){
56 if (not comm->is_uniform() || not comm->is_blocked())
57 throw std::invalid_argument("allgather MVAPICH2 smp algorithm can't be used with irregular deployment. Please "
58 "insure that processes deployed on the same node are contiguous and that each node has "
59 "the same number of processes");
68 /* extract the rank,size information for the intra-node communicator */
69 recvtype_extent=recvtype->get_extent();
71 shmem_comm = comm->get_intra_comm();
72 local_rank = shmem_comm->rank();
73 local_size = shmem_comm->size();
75 if (local_rank == 0) {
76 /* Node leader. Extract the rank, size information for the leader communicator */
77 leader_comm = comm->get_leaders_comm();
78 if(leader_comm==MPI_COMM_NULL){
79 leader_comm = MPI_COMM_WORLD;
81 leader_comm_size = leader_comm->size();
84 /*If there is just one node, after gather itself,
85 * root has all the data and it can do bcast*/
88 colls::gather(sendbuf, sendcnt, sendtype, (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
89 recvcnt, recvtype, 0, shmem_comm);
91 /*Since in allgather all the processes could have
92 * its own data in place*/
93 if(sendbuf == MPI_IN_PLACE) {
94 mpi_errno = colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), recvcnt, recvtype,
95 recvbuf, recvcnt, recvtype, 0, shmem_comm);
97 mpi_errno = colls::gather(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, 0, shmem_comm);
100 /* Exchange the data between the node leaders*/
101 if (local_rank == 0 && (leader_comm_size > 1)) {
102 /*When data in each socket is different*/
103 if (not comm->is_uniform()) {
105 int* node_sizes = nullptr;
108 node_sizes = comm->get_non_uniform_map();
110 int* displs = new int[leader_comm_size];
111 int* recvcnts = new int[leader_comm_size];
112 recvcnts[0] = node_sizes[0] * recvcnt;
115 for (i = 1; i < leader_comm_size; i++) {
116 displs[i] = displs[i - 1] + node_sizes[i - 1] * recvcnt;
117 recvcnts[i] = node_sizes[i] * recvcnt;
120 void* sendtmpbuf = ((char*)recvbuf) + recvtype->get_extent() * displs[leader_comm->rank()];
122 mpi_errno = colls::allgatherv(sendtmpbuf, (recvcnt * local_size), recvtype, recvbuf, recvcnts, displs,
123 recvtype, leader_comm);
127 void* sendtmpbuf=((char*)recvbuf)+recvtype->get_extent()*(recvcnt*local_size)*leader_comm->rank();
131 mpi_errno = allgather__mpich(sendtmpbuf,
132 (recvcnt*local_size),
134 recvbuf, (recvcnt*local_size), recvtype,
140 /*Bcast the entire data from node leaders to all other cores*/
141 mpi_errno = colls::bcast(recvbuf, recvcnt * size, recvtype, 0, shmem_comm);