1 /* Copyright (c) 2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 #include "xbt/sysdep.h"
14 typedef struct s_smpi_mpi_cart_topology {
20 } s_smpi_mpi_cart_topology_t;
22 typedef struct s_smpi_mpi_graph_topology {
27 } s_smpi_mpi_graph_topology_t;
29 typedef struct s_smpi_dist_graph_topology {
37 } s_smpi_mpi_dist_graph_topology_t;
39 typedef struct s_smpi_mpi_topology {
42 MPIR_Graph_Topology graph;
43 MPIR_Cart_Topology cart;
44 MPIR_Dist_Graph_Topology dist_graph;
46 } s_smpi_mpi_topology_t;
48 void smpi_topo_destroy(MPI_Topology topo) {
55 // smpi_graph_topo_destroy(topo->topo.graph);
58 smpi_cart_topo_destroy(topo->topo.cart);
62 // smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
70 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
71 MPI_Topology newTopo = xbt_malloc(sizeof(*newTopo));
73 // Allocate and initialize the right topo should be done by the caller
77 /*******************************************************************************
78 * Cartesian topologies
79 ******************************************************************************/
80 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
95 MPI_Topology smpi_cart_topo_create(int ndims) {
96 MPI_Topology newTopo = smpi_topo_create(MPI_CART);
97 MPIR_Cart_Topology newCart = xbt_malloc(sizeof(*newCart));
99 newCart->ndims = ndims;
100 newCart->dims = xbt_malloc(ndims * sizeof(*newCart->dims));
101 newCart->periodic = xbt_malloc(ndims * sizeof(*newCart->periodic));
102 newCart->position = xbt_malloc(ndims * sizeof(*newCart->position));
103 newTopo->topo.cart = newCart;
107 /* reorder is ignored, don't know what would be the consequences of a dumb
108 * reordering but neither do I see the point of reordering*/
109 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[],
110 int periods[], int reorder, MPI_Comm *comm_cart) {
111 int retval = MPI_SUCCESS;
113 MPI_Topology newCart;
114 MPI_Group newGroup, oldGroup;
115 int rank, nranks, newSize;
117 rank = smpi_comm_rank(comm_old);
121 for (i = 0 ; i < ndims ; i++) {
124 if(rank >= newSize) {
125 *comm_cart = MPI_COMM_NULL;
128 newCart = smpi_cart_topo_create(ndims);
129 oldGroup = smpi_comm_group(comm_old);
130 newGroup = smpi_group_new(newSize);
131 for (i = 0 ; i < newSize ; i++) {
132 smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
135 newCart->topo.cart->nnodes = newSize;
137 /* memcpy(newCart->topo.cart->dims, dims, */
138 /* ndims * sizeof(*newCart->topo.cart->dims)); */
139 /* memcpy(newCart->topo.cart->periodic, periods, */
140 /* ndims * sizeof(*newCart->topo.cart->periodic)); */
142 // FIXME : code duplication... See smpi_mpi_cart_coords
144 for (i=0; i<ndims; i++)
146 newCart->topo.cart->dims[i] = dims[i];
147 newCart->topo.cart->periodic[i] = periods[i];
148 nranks = nranks / dims[i];
149 /* FIXME: nranks could be zero (?) */
150 newCart->topo.cart->position[i] = rank / nranks;
151 rank = rank % nranks;
154 *comm_cart = smpi_comm_new(newGroup, newCart);
158 newCart = smpi_cart_topo_create(ndims);
159 *comm_cart = smpi_comm_new(smpi_comm_group(MPI_COMM_SELF), newCart);
162 *comm_cart = MPI_COMM_NULL;
168 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
169 MPI_Topology oldTopo = smpi_comm_topo(comm);
170 int oldNDims = oldTopo->topo.cart->ndims;
171 int i, j = 0, newNDims, *newDims = NULL, *newPeriodic = NULL;
173 if (remain_dims == NULL && oldNDims != 0) {
177 for (i = 0 ; i < oldNDims ; i++) {
178 if (remain_dims[i]) newNDims++;
182 newDims = xbt_malloc(newNDims * sizeof(*newDims));
183 newPeriodic = xbt_malloc(newNDims * sizeof(*newPeriodic));
185 // that should not segfault
186 for (i = 0 ; j < newNDims ; i++) {
188 newDims[j] = oldTopo->topo.cart->dims[i];
189 newPeriodic[j] = oldTopo->topo.cart->periodic[i];
194 return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
200 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims,
204 MPI_Topology topo = smpi_comm_topo(comm);
206 nnodes = topo->topo.cart->nnodes;
207 for ( i=0; i < topo->topo.cart->ndims; i++ ) {
208 nnodes = nnodes / topo->topo.cart->dims[i];
209 coords[i] = rank / nnodes;
210 rank = rank % nnodes;
215 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
216 MPI_Topology topo = smpi_comm_topo(comm);
218 for(i = 0 ; i < maxdims ; i++) {
219 dims[i] = topo->topo.cart->dims[i];
220 periods[i] = topo->topo.cart->periodic[i];
221 coords[i] = topo->topo.cart->position[i];
226 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
227 MPI_Topology topo = smpi_comm_topo(comm);
228 int ndims = topo->topo.cart->ndims;
229 int multiplier, coord,i;
233 for ( i=ndims-1; i >=0; i-- ) {
236 /* The user can give us whatever coordinates he wants. If one of them is
237 * out of range, either this dimension is periodic, and then we
238 * consider the equivalent coordinate inside the bounds, or it is not
239 * and then it is an error
241 if (coord >= topo->topo.cart->dims[i]) {
242 if ( topo->topo.cart->periodic[i] ) {
243 coord = coord % topo->topo.cart->dims[i];
246 // Should I do that ?
251 else if (coord < 0) {
252 if(topo->topo.cart->periodic[i]) {
253 coord = coord % topo->topo.cart->dims[i];
254 if (coord) coord = topo->topo.cart->dims[i] + coord;
262 *rank += multiplier * coord;
263 multiplier *= topo->topo.cart->dims[i];
268 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp,
269 int *rank_source, int *rank_dest) {
270 MPI_Topology topo = smpi_comm_topo(comm);
271 int position[topo->topo.cart->ndims];
274 if(topo->topo.cart->ndims == 0) {
277 if (topo->topo.cart->ndims < direction) {
281 smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims,
283 position[direction] += disp;
285 if(position[direction] < 0 ||
286 position[direction] >= topo->topo.cart->dims[direction]) {
287 if(topo->topo.cart->periodic[direction]) {
288 position[direction] %= topo->topo.cart->dims[direction];
289 smpi_mpi_cart_rank(comm, position, rank_dest);
292 *rank_dest = MPI_PROC_NULL;
296 smpi_mpi_cart_rank(comm, position, rank_dest);
299 position[direction] = topo->topo.cart->position[direction] - disp;
300 if(position[direction] < 0 ||
301 position[direction] >= topo->topo.cart->dims[direction]) {
302 if(topo->topo.cart->periodic[direction]) {
303 position[direction] %= topo->topo.cart->dims[direction];
304 smpi_mpi_cart_rank(comm, position, rank_source);
307 *rank_source = MPI_PROC_NULL;
311 smpi_mpi_cart_rank(comm, position, rank_source);
317 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
318 MPI_Topology topo = smpi_comm_topo(comm);
320 *ndims = topo->topo.cart->ndims;
326 // Everything below has been taken from ompi, but could be easily rewritten.
329 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
330 * University Research and Technology
331 * Corporation. All rights reserved.
332 * Copyright (c) 2004-2005 The University of Tennessee and The University
333 * of Tennessee Research Foundation. All rights
335 * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
336 * University of Stuttgart. All rights reserved.
337 * Copyright (c) 2004-2005 The Regents of the University of California.
338 * All rights reserved.
339 * Copyright (c) 2012 Los Alamos National Security, LLC. All rights
341 * Copyright (c) 2014 Intel, Inc. All rights reserved
344 * Additional copyrights may follow
350 /* static functions */
351 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
352 static int getfactors(int num, int *nfators, int **factors);
355 * This is a utility function, no need to have anything in the lower
356 * layer for this at all
358 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
369 /* Get # of free-to-be-assigned processes and # of free dimensions */
372 for (i = 0, p = dims; i < ndims; ++i,++p) {
375 } else if ((*p < 0) || ((nnodes % *p) != 0)) {
384 if (freeprocs == 1) {
390 if (freeprocs == 1) {
391 for (i = 0; i < ndims; ++i, ++dims) {
399 /* Factor the number of free processes */
400 if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
404 /* Assign free processes to free dimensions */
405 if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
409 /* Return assignment results */
411 for (i = 0; i < ndims; ++i, ++dims) {
417 free((char *) factors);
418 free((char *) procs);
427 * Function: - assign processes to dimensions
428 * - get "best-balanced" grid
429 * - greedy bin-packing algorithm used
430 * - sort dimensions in decreasing order
431 * - dimensions array dynamically allocated
432 * Accepts: - # of dimensions
433 * - # of prime factors
434 * - array of prime factors
435 * - ptr to array of dimensions (returned value)
436 * Returns: - 0 or ERROR
439 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
452 /* Allocate and initialize the bins */
453 bins = (int *) malloc((unsigned) ndim * sizeof(int));
455 return MPI_ERR_NO_MEM;
459 for (i = 0, p = bins; i < ndim; ++i, ++p) {
463 /* Loop assigning factors from the highest to the lowest */
464 for (j = nfactor - 1; j >= 0; --j) {
466 /* Assign a factor to the smallest bin */
468 for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
476 /* Sort dimensions in decreasing order (O(n^2) for now) */
477 for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
478 for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
493 * Function: - factorize a number
496 * - array of prime factors
497 * Returns: - MPI_SUCCESS or ERROR
500 getfactors(int num, int *nfactors, int **factors) {
511 /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
512 sqrtnum = ceil(sqrt(num));
513 size = ceil(log(num) / log(2));
514 *factors = (int *) malloc((unsigned) size * sizeof(int));
517 /* determine all occurences of factor 2 */
518 while((num % 2) == 0) {
522 /* determine all occurences of uneven prime numbers up to sqrt(num) */
523 for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
524 while((num % d) == 0) {
529 /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
531 (*factors)[i++] = num;