1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2006 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 /* This program provides a simple test of send-receive performance between
8 two (or more) processes. This sometimes called head-to-head or
9 ping-ping test, as both processes send at the same time.
17 #define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */
19 static int verbose = 0;
21 int main(int argc, char *argv[])
23 int wsize, wrank, partner, len, maxlen, k, reps, repsleft;
27 double times[3][MAXTESTS];
29 MPI_Init(&argc, &argv);
30 if (getenv("MPITEST_VERBOSE"))
33 MPI_Comm_size(MPI_COMM_WORLD, &wsize);
34 MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
37 fprintf(stderr, "This program requires at least 2 processes\n");
38 MPI_Abort(MPI_COMM_WORLD, 1);
40 /* Set partner based on whether rank is odd or even */
44 else if (wrank < wsize - 1) {
48 /* Handle wsize odd */
49 partner = MPI_PROC_NULL;
51 /* Allocate and initialize buffers */
53 rbuf = (char *) malloc(maxlen);
54 sbuf = (char *) malloc(maxlen);
56 fprintf(stderr, "Could not allocate %d byte buffers\n", maxlen);
57 MPI_Abort(MPI_COMM_WORLD, 2);
59 for (k = 0; k < maxlen; k++) {
64 MPI_Barrier(MPI_COMM_WORLD);
66 /* Test Irecv and send, head to head */
67 if (wrank == 0 && verbose) {
68 printf("Irecv-send\n");
69 printf("len\ttime \trate\n");
72 /* Send powers of 2 bytes */
74 for (k = 0; k < 20; k++) {
75 /* We use a simple linear form for the number of tests to
76 * reduce the impact of the granularity of the timer */
79 /* Make sure that both processes are ready to start */
80 MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
81 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
84 MPI_Irecv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq);
85 MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
86 MPI_Wait(&rreq, MPI_STATUS_IGNORE);
88 t1 = MPI_Wtime() - t1;
89 times[0][k] = t1 / reps;
95 printf("%d\t%g\t%g\n", len, t1, len / t1);
100 printf("%d\t%g\tINF\n", len, t1);
109 MPI_Barrier(MPI_COMM_WORLD);
111 /* Test Sendrecv, head to head */
112 if (wrank == 0 && verbose) {
113 printf("Sendrecv\n");
114 printf("len\ttime (usec)\trate (MB/s)\n");
117 /* Send powers of 2 bytes */
119 for (k = 0; k < 20; k++) {
120 /* We use a simple linear form for the number of tests to
121 * reduce the impact of the granularity of the timer */
124 /* Make sure that both processes are ready to start */
125 MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
126 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
129 MPI_Sendrecv(sbuf, len, MPI_BYTE, partner, k,
130 rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
132 t1 = MPI_Wtime() - t1;
133 times[1][k] = t1 / reps;
139 printf("%d\t%g\t%g\n", len, t1, len / t1);
144 printf("%d\t%g\tINF\n", len, t1);
153 MPI_Barrier(MPI_COMM_WORLD);
155 /* Test Send/recv, ping-pong */
156 if (wrank == 0 && verbose) {
157 printf("Pingpong\n");
158 printf("len\ttime (usec)\trate (MB/s)\n");
161 /* Send powers of 2 bytes */
163 for (k = 0; k < 20; k++) {
164 /* We use a simple linear form for the number of tests to
165 * reduce the impact of the granularity of the timer */
168 /* Make sure that both processes are ready to start */
169 MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
170 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
174 MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
175 MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
178 MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
179 MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
182 t1 = MPI_Wtime() - t1;
183 times[2][k] = t1 / reps;
189 printf("%d\t%g\t%g\n", len, t1, len / t1);
194 printf("%d\t%g\tINF\n", len, t1);
204 /* At this point, we could optionally analyze the results and report
205 * success or failure based on some criteria, such as near monotone
206 * increases in bandwidth. This test was created because of a
207 * fall-off in performance noted in the ch3:sock device:channel */
212 for (k = 0; k < 20; k++) {
214 T0 = times[0][k] * 1.e6;
215 T1 = times[1][k] * 1.e6;
216 T2 = times[2][k] * 1.e6;
218 printf("%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2);
219 /* Lets look at long messages only */
221 double T0Old, T1Old, T2Old;
222 T0Old = times[0][k - 1] * 1.0e6;
223 T1Old = times[1][k - 1] * 1.0e6;
224 T2Old = times[2][k - 1] * 1.0e6;
225 if (T0 > (2 + ERROR_MARGIN) * T0Old) {
228 printf("Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0);
230 if (T1 > (2 + ERROR_MARGIN) * T1Old) {
233 printf("Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1);
235 if (T2 > (2 + ERROR_MARGIN) * T2Old) {
238 printf("Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2);
243 if (nPerfErrors > 8) {
244 /* Allow for 1-2 errors for eager-rendezvous shifting
245 * point and cache effects. There should be a better way
247 printf(" Found %d performance errors\n", nPerfErrors);
250 printf(" No Errors\n");