1 /* Copyright (c) 2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "xbt/sysdep.h"
13 typedef struct s_smpi_mpi_cart_topology {
19 } s_smpi_mpi_cart_topology_t;
21 typedef struct s_smpi_mpi_graph_topology {
26 } s_smpi_mpi_graph_topology_t;
28 typedef struct s_smpi_dist_graph_topology {
36 } s_smpi_mpi_dist_graph_topology_t;
38 typedef struct s_smpi_mpi_topology {
41 MPIR_Graph_Topology graph;
42 MPIR_Cart_Topology cart;
43 MPIR_Dist_Graph_Topology dist_graph;
45 } s_smpi_mpi_topology_t;
47 void smpi_topo_destroy(MPI_Topology topo) {
54 // smpi_graph_topo_destroy(topo->topo.graph);
57 smpi_cart_topo_destroy(topo->topo.cart);
61 // smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
69 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
70 MPI_Topology newTopo = xbt_malloc(sizeof(*newTopo));
72 // Allocate and initialize the right topo should be done by the caller
76 /*******************************************************************************
77 * Cartesian topologies
78 ******************************************************************************/
79 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
94 MPI_Topology smpi_cart_topo_create(int ndims) {
95 MPI_Topology newTopo = smpi_topo_create(MPI_CART);
96 MPIR_Cart_Topology newCart = xbt_malloc(sizeof(*newCart));
98 newCart->ndims = ndims;
99 newCart->dims = xbt_malloc(ndims * sizeof(*newCart->dims));
100 newCart->periodic = xbt_malloc(ndims * sizeof(*newCart->periodic));
101 newCart->position = xbt_malloc(ndims * sizeof(*newCart->position));
102 newTopo->topo.cart = newCart;
106 /* reorder is ignored, don't know what would be the consequences of a dumb
107 * reordering but neither do I see the point of reordering*/
108 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[],
109 int periods[], int reorder, MPI_Comm *comm_cart) {
110 int retval = MPI_SUCCESS;
112 MPI_Topology newCart;
113 MPI_Group newGroup, oldGroup;
114 int rank, nranks, newSize;
116 rank = smpi_comm_rank(comm_old);
120 for (i = 0 ; i < ndims ; i++) {
123 if(rank >= newSize) {
124 *comm_cart = MPI_COMM_NULL;
127 newCart = smpi_cart_topo_create(ndims);
128 oldGroup = smpi_comm_group(comm_old);
129 newGroup = smpi_group_new(newSize);
130 for (i = 0 ; i < newSize ; i++) {
131 smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
134 newCart->topo.cart->nnodes = newSize;
136 /* memcpy(newCart->topo.cart->dims, dims, */
137 /* ndims * sizeof(*newCart->topo.cart->dims)); */
138 /* memcpy(newCart->topo.cart->periodic, periods, */
139 /* ndims * sizeof(*newCart->topo.cart->periodic)); */
141 // FIXME : code duplication... See smpi_mpi_cart_coords
143 for (i=0; i<ndims; i++)
145 newCart->topo.cart->dims[i] = dims[i];
146 newCart->topo.cart->periodic[i] = periods[i];
147 nranks = nranks / dims[i];
148 /* FIXME: nranks could be zero (?) */
149 newCart->topo.cart->position[i] = rank / nranks;
150 rank = rank % nranks;
153 *comm_cart = smpi_comm_new(newGroup, newCart);
157 newCart = smpi_cart_topo_create(ndims);
158 *comm_cart = smpi_comm_new(smpi_comm_group(MPI_COMM_SELF), newCart);
161 *comm_cart = MPI_COMM_NULL;
167 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
168 MPI_Topology oldTopo = smpi_comm_topo(comm);
169 int oldNDims = oldTopo->topo.cart->ndims;
170 int i, j = 0, newNDims, *newDims = NULL, *newPeriodic = NULL;
172 if (remain_dims == NULL && oldNDims != 0) {
176 for (i = 0 ; i < oldNDims ; i++) {
177 if (remain_dims[i]) newNDims++;
181 newDims = xbt_malloc(newNDims * sizeof(*newDims));
182 newPeriodic = xbt_malloc(newNDims * sizeof(*newPeriodic));
184 // that should not segfault
185 for (i = 0 ; j < newNDims ; i++) {
187 newDims[j] = oldTopo->topo.cart->dims[i];
188 newPeriodic[j] = oldTopo->topo.cart->periodic[i];
193 return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
199 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims,
203 MPI_Topology topo = smpi_comm_topo(comm);
205 nnodes = topo->topo.cart->nnodes;
206 for ( i=0; i < topo->topo.cart->ndims; i++ ) {
207 nnodes = nnodes / topo->topo.cart->dims[i];
208 coords[i] = rank / nnodes;
209 rank = rank % nnodes;
214 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
215 MPI_Topology topo = smpi_comm_topo(comm);
217 for(i = 0 ; i < maxdims ; i++) {
218 dims[i] = topo->topo.cart->dims[i];
219 periods[i] = topo->topo.cart->periodic[i];
220 coords[i] = topo->topo.cart->position[i];
225 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
226 MPI_Topology topo = smpi_comm_topo(comm);
227 int ndims = topo->topo.cart->ndims;
228 int multiplier, coord,i;
232 for ( i=ndims-1; i >=0; i-- ) {
235 /* The user can give us whatever coordinates he wants. If one of them is
236 * out of range, either this dimension is periodic, and then we
237 * consider the equivalent coordinate inside the bounds, or it is not
238 * and then it is an error
240 if (coord >= topo->topo.cart->dims[i]) {
241 if ( topo->topo.cart->periodic[i] ) {
242 coord = coord % topo->topo.cart->dims[i];
245 // Should I do that ?
250 else if (coord < 0) {
251 if(topo->topo.cart->periodic[i]) {
252 coord = coord % topo->topo.cart->dims[i];
253 if (coord) coord = topo->topo.cart->dims[i] + coord;
261 *rank += multiplier * coord;
262 multiplier *= topo->topo.cart->dims[i];
267 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp,
268 int *rank_source, int *rank_dest) {
269 MPI_Topology topo = smpi_comm_topo(comm);
270 int position[topo->topo.cart->ndims];
273 if(topo->topo.cart->ndims == 0) {
276 if (topo->topo.cart->ndims < direction) {
280 smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims,
282 position[direction] += disp;
284 if(position[direction] < 0 ||
285 position[direction] >= topo->topo.cart->dims[direction]) {
286 if(topo->topo.cart->periodic[direction]) {
287 position[direction] %= topo->topo.cart->dims[direction];
288 smpi_mpi_cart_rank(comm, position, rank_dest);
291 *rank_dest = MPI_PROC_NULL;
295 smpi_mpi_cart_rank(comm, position, rank_dest);
298 position[direction] = topo->topo.cart->position[direction] - disp;
299 if(position[direction] < 0 ||
300 position[direction] >= topo->topo.cart->dims[direction]) {
301 if(topo->topo.cart->periodic[direction]) {
302 position[direction] %= topo->topo.cart->dims[direction];
303 smpi_mpi_cart_rank(comm, position, rank_source);
306 *rank_source = MPI_PROC_NULL;
310 smpi_mpi_cart_rank(comm, position, rank_source);
316 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
317 MPI_Topology topo = smpi_comm_topo(comm);
319 *ndims = topo->topo.cart->ndims;
325 // Everything below has been taken from ompi, but could be easily rewritten.
328 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
329 * University Research and Technology
330 * Corporation. All rights reserved.
331 * Copyright (c) 2004-2005 The University of Tennessee and The University
332 * of Tennessee Research Foundation. All rights
334 * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
335 * University of Stuttgart. All rights reserved.
336 * Copyright (c) 2004-2005 The Regents of the University of California.
337 * All rights reserved.
338 * Copyright (c) 2012 Los Alamos National Security, LLC. All rights
340 * Copyright (c) 2014 Intel, Inc. All rights reserved
343 * Additional copyrights may follow
349 /* static functions */
350 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
351 static int getfactors(int num, int *nfators, int **factors);
354 * This is a utility function, no need to have anything in the lower
355 * layer for this at all
357 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
368 /* Get # of free-to-be-assigned processes and # of free dimensions */
371 for (i = 0, p = dims; i < ndims; ++i,++p) {
374 } else if ((*p < 0) || ((nnodes % *p) != 0)) {
383 if (freeprocs == 1) {
389 if (freeprocs == 1) {
390 for (i = 0; i < ndims; ++i, ++dims) {
398 /* Factor the number of free processes */
399 if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
403 /* Assign free processes to free dimensions */
404 if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
408 /* Return assignment results */
410 for (i = 0; i < ndims; ++i, ++dims) {
416 free((char *) factors);
417 free((char *) procs);
426 * Function: - assign processes to dimensions
427 * - get "best-balanced" grid
428 * - greedy bin-packing algorithm used
429 * - sort dimensions in decreasing order
430 * - dimensions array dynamically allocated
431 * Accepts: - # of dimensions
432 * - # of prime factors
433 * - array of prime factors
434 * - ptr to array of dimensions (returned value)
435 * Returns: - 0 or ERROR
438 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
451 /* Allocate and initialize the bins */
452 bins = (int *) malloc((unsigned) ndim * sizeof(int));
454 return MPI_ERR_NO_MEM;
458 for (i = 0, p = bins; i < ndim; ++i, ++p) {
462 /* Loop assigning factors from the highest to the lowest */
463 for (j = nfactor - 1; j >= 0; --j) {
465 /* Assign a factor to the smallest bin */
467 for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
475 /* Sort dimensions in decreasing order (O(n^2) for now) */
476 for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
477 for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
492 * Function: - factorize a number
495 * - array of prime factors
496 * Returns: - MPI_SUCCESS or ERROR
499 getfactors(int num, int *nfactors, int **factors) {
510 /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
511 sqrtnum = ceil(sqrt(num));
512 size = ceil(log(num) / log(2));
513 *factors = (int *) malloc((unsigned) size * sizeof(int));
516 /* determine all occurences of factor 2 */
517 while((num % 2) == 0) {
521 /* determine all occurences of uneven prime numbers up to sqrt(num) */
522 for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
523 while((num % d) == 0) {
528 /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
530 (*factors)[i++] = num;