1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2010 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
11 #include <math.h> /* for fabs(3) */
13 /* Measure and compare the relative performance of MPI_Group_translate_ranks
14 * with small and large group2 sizes but a constant number of ranks. This
15 * serves as a performance sanity check for the Scalasca use case where we
16 * translate to MPI_COMM_WORLD ranks. The performance should only depend on the
17 * number of ranks passed, not the size of either group (especially group2).
19 * This test is probably only meaningful for large-ish process counts, so we may
20 * not be able to run this test by default in the nightlies. */
22 /* number of iterations used for timing */
23 #define NUM_LOOPS (1000000)
25 int main(int argc, char *argv[])
30 MPI_Group gworld, grev, gself;
34 double start, end, time1, time2;
36 MTest_Init(&argc, &argv);
38 comm = MPI_COMM_WORLD;
40 MPI_Comm_size(comm, &size);
41 MPI_Comm_rank(comm, &rank);
43 ranks = malloc(size * sizeof(int));
44 ranksout = malloc(size * sizeof(int));
45 if (!ranks || !ranksout) {
46 fprintf(stderr, "out of memory\n");
47 MPI_Abort(MPI_COMM_WORLD, 1);
50 /* generate a comm with the rank order reversed */
51 MPI_Comm_split(comm, 0, (size - rank - 1), &commrev);
52 MPI_Comm_group(commrev, &grev);
53 MPI_Comm_group(MPI_COMM_SELF, &gself);
54 MPI_Comm_group(comm, &gworld);
56 /* sanity check correctness first */
57 for (i = 0; i < size; i++) {
61 MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
62 for (i = 0; i < size; i++) {
63 if (ranksout[i] != (size - i - 1)) {
65 printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i,
66 (size - rank - 1), ranksout[i]);
70 MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
71 for (i = 0; i < size; i++) {
72 int expected = (i == (size - rank - 1) ? 0 : MPI_UNDEFINED);
73 if (ranksout[i] != expected) {
75 printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected,
81 /* now compare relative performance */
83 /* we needs lots of procs to get a group large enough to have meaningful
84 * numbers. On most testing machines this means that we're oversubscribing
85 * cores in a big way, which might perturb the timing results. So we make
86 * sure everyone started up and then everyone but rank 0 goes to sleep to
87 * let rank 0 do all the timings. */
95 MTestSleep(1); /* try to avoid timing while everyone else is making syscalls */
97 MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter */
99 for (i = 0; i < NUM_LOOPS; ++i) {
100 MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
105 MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); /*throwaway iter */
107 for (i = 0; i < NUM_LOOPS; ++i) {
108 MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
113 /* complain if the "gworld" time exceeds 2x the "gself" time */
114 if (fabs(time1 - time2) > (2.00 * time2)) {
115 printf("too much difference in MPI_Group_translate_ranks performance:\n");
116 printf("time1=%f time2=%f\n", time1, time2);
117 printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1 - time2) / time2));
119 printf("also, (time1<time2) is surprising...\n");
128 MPI_Group_free(&grev);
129 MPI_Group_free(&gself);
130 MPI_Group_free(&gworld);
132 MPI_Comm_free(&commrev);
134 MTest_Finalize(errs);