teshsuite/smpi/mpich3-test/group/gtranksperf.c

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
   2 /*
   3  *  (C) 2010 by Argonne National Laboratory.
   4  *      See COPYRIGHT in top-level directory.
   5  */
   6 #include "mpi.h"
   7 #include <stdio.h>
   8 #include <stdlib.h>
   9 #include "mpitest.h"
  10
  11 #include <math.h>       /* for fabs(3) */
  12
  13 /* Measure and compare the relative performance of MPI_Group_translate_ranks
  14  * with small and large group2 sizes but a constant number of ranks.  This
  15  * serves as a performance sanity check for the Scalasca use case where we
  16  * translate to MPI_COMM_WORLD ranks.  The performance should only depend on the
  17  * number of ranks passed, not the size of either group (especially group2).
  18  *
  19  * This test is probably only meaningful for large-ish process counts, so we may
  20  * not be able to run this test by default in the nightlies. */
  21
  22 /* number of iterations used for timing */
  23 #define NUM_LOOPS (1000000)
  24
  25 int main(int argc, char *argv[])
  26 {
  27     int errs = 0;
  28     int *ranks;
  29     int *ranksout;
  30     MPI_Group gworld, grev, gself;
  31     MPI_Comm comm;
  32     MPI_Comm commrev;
  33     int rank, size, i;
  34     double start, end, time1, time2;
  35
  36     MTest_Init(&argc, &argv);
  37
  38     comm = MPI_COMM_WORLD;
  39
  40     MPI_Comm_size(comm, &size);
  41     MPI_Comm_rank(comm, &rank);
  42
  43     ranks = malloc(size * sizeof(int));
  44     ranksout = malloc(size * sizeof(int));
  45     if (!ranks || !ranksout) {
  46         fprintf(stderr, "out of memory\n");
  47         MPI_Abort(MPI_COMM_WORLD, 1);
  48     }
  49
  50     /* generate a comm with the rank order reversed */
  51     MPI_Comm_split(comm, 0, (size - rank - 1), &commrev);
  52     MPI_Comm_group(commrev, &grev);
  53     MPI_Comm_group(MPI_COMM_SELF, &gself);
  54     MPI_Comm_group(comm, &gworld);
  55
  56     /* sanity check correctness first */
  57     for (i = 0; i < size; i++) {
  58         ranks[i] = i;
  59         ranksout[i] = -1;
  60     }
  61     MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
  62     for (i = 0; i < size; i++) {
  63         if (ranksout[i] != (size - i - 1)) {
  64             if (rank == 0)
  65                 printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i,
  66                        (size - rank - 1), ranksout[i]);
  67             ++errs;
  68         }
  69     }
  70     MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
  71     for (i = 0; i < size; i++) {
  72         int expected = (i == (size - rank - 1) ? 0 : MPI_UNDEFINED);
  73         if (ranksout[i] != expected) {
  74             if (rank == 0)
  75                 printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected,
  76                        ranksout[i]);
  77             ++errs;
  78         }
  79     }
  80
  81     /* now compare relative performance */
  82
  83     /* we needs lots of procs to get a group large enough to have meaningful
  84      * numbers.  On most testing machines this means that we're oversubscribing
  85      * cores in a big way, which might perturb the timing results.  So we make
  86      * sure everyone started up and then everyone but rank 0 goes to sleep to
  87      * let rank 0 do all the timings. */
  88     MPI_Barrier(comm);
  89
  90     if (rank != 0) {
  91         MTestSleep(10);
  92     }
  93     else {      /* rank==0 */
  94
  95         MTestSleep(1);  /* try to avoid timing while everyone else is making syscalls */
  96
  97         MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter */
  98         start = MPI_Wtime();
  99         for (i = 0; i < NUM_LOOPS; ++i) {
 100             MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
 101         }
 102         end = MPI_Wtime();
 103         time1 = end - start;
 104
 105         MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);  /*throwaway iter */
 106         start = MPI_Wtime();
 107         for (i = 0; i < NUM_LOOPS; ++i) {
 108             MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
 109         }
 110         end = MPI_Wtime();
 111         time2 = end - start;
 112
 113         /* complain if the "gworld" time exceeds 2x the "gself" time */
 114         if (fabs(time1 - time2) > (2.00 * time2)) {
 115             printf("too much difference in MPI_Group_translate_ranks performance:\n");
 116             printf("time1=%f time2=%f\n", time1, time2);
 117             printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1 - time2) / time2));
 118             if (time1 < time2) {
 119                 printf("also, (time1<time2) is surprising...\n");
 120             }
 121             ++errs;
 122         }
 123     }
 124
 125     free(ranks);
 126     free(ranksout);
 127
 128     MPI_Group_free(&grev);
 129     MPI_Group_free(&gself);
 130     MPI_Group_free(&gworld);
 131
 132     MPI_Comm_free(&commrev);
 133
 134     MTest_Finalize(errs);
 135     MPI_Finalize();
 136
 137     return 0;
 138 }