1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2006 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 /* This program provides a simple test of send-receive performance between
8 two (or more) processes. This sometimes called head-to-head or
9 ping-ping test, as both processes send at the same time.
17 #define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */
19 static int verbose = 0;
21 int main( int argc, char *argv[] )
23 int wsize, wrank, partner, len, maxlen, k, reps, repsleft;
27 double times[3][MAXTESTS];
29 MPI_Init( &argc, &argv );
30 if (getenv("MPITEST_VERBOSE")) verbose = 1;
32 MPI_Comm_size( MPI_COMM_WORLD, &wsize );
33 MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
36 fprintf( stderr, "This program requires at least 2 processes\n" );
37 MPI_Abort( MPI_COMM_WORLD, 1 );
39 /* Set partner based on whether rank is odd or even */
43 else if (wrank < wsize - 1) {
47 /* Handle wsize odd */
48 partner = MPI_PROC_NULL;
50 /* Allocate and initialize buffers */
52 rbuf = (char *)malloc( maxlen );
53 sbuf = (char *)malloc( maxlen );
55 fprintf( stderr, "Could not allocate %d byte buffers\n", maxlen );
56 MPI_Abort( MPI_COMM_WORLD, 2 );
58 for (k=0; k<maxlen; k++) {
63 MPI_Barrier( MPI_COMM_WORLD );
65 /* Test Irecv and send, head to head */
66 if (wrank == 0 && verbose) {
67 printf( "Irecv-send\n" );
68 printf( "len\ttime \trate\n" );
71 /* Send powers of 2 bytes */
73 for (k=0; k<20; k++) {
74 /* We use a simple linear form for the number of tests to
75 reduce the impact of the granularity of the timer */
78 /* Make sure that both processes are ready to start */
79 MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
80 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD,
84 MPI_Irecv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq );
85 MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
86 MPI_Wait( &rreq, MPI_STATUS_IGNORE );
88 t1 = MPI_Wtime() - t1;
89 times[0][k] = t1 / reps;
95 printf( "%d\t%g\t%g\n", len, t1, len/t1 );
100 printf( "%d\t%g\tINF\n", len, t1 );
109 MPI_Barrier( MPI_COMM_WORLD );
111 /* Test Sendrecv, head to head */
112 if (wrank == 0 && verbose) {
113 printf( "Sendrecv\n" );
114 printf( "len\ttime (usec)\trate (MB/s)\n" );
117 /* Send powers of 2 bytes */
119 for (k=0; k<20; k++) {
120 /* We use a simple linear form for the number of tests to
121 reduce the impact of the granularity of the timer */
124 /* Make sure that both processes are ready to start */
125 MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
126 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD,
130 MPI_Sendrecv( sbuf, len, MPI_BYTE, partner, k,
131 rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD,
134 t1 = MPI_Wtime() - t1;
135 times[1][k] = t1 / reps;
141 printf( "%d\t%g\t%g\n", len, t1, len/t1 );
146 printf( "%d\t%g\tINF\n", len, t1 );
155 MPI_Barrier( MPI_COMM_WORLD );
157 /* Test Send/recv, ping-pong */
158 if (wrank == 0 && verbose) {
159 printf( "Pingpong\n" );
160 printf( "len\ttime (usec)\trate (MB/s)\n" );
163 /* Send powers of 2 bytes */
165 for (k=0; k<20; k++) {
166 /* We use a simple linear form for the number of tests to
167 reduce the impact of the granularity of the timer */
170 /* Make sure that both processes are ready to start */
171 MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
172 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD,
177 MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
178 MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD,
182 MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD,
184 MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
187 t1 = MPI_Wtime() - t1;
188 times[2][k] = t1 / reps;
194 printf( "%d\t%g\t%g\n", len, t1, len/t1 );
199 printf( "%d\t%g\tINF\n", len, t1 );
209 /* At this point, we could optionally analyze the results and report
210 success or failure based on some criteria, such as near monotone
211 increases in bandwidth. This test was created because of a
212 fall-off in performance noted in the ch3:sock device:channel */
217 for (k=0; k<20; k++) {
219 T0 = times[0][k] * 1.e6;
220 T1 = times[1][k] * 1.e6;
221 T2 = times[2][k] * 1.e6;
223 printf( "%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2 );
224 /* Lets look at long messages only */
226 double T0Old, T1Old, T2Old;
227 T0Old = times[0][k-1] * 1.0e6;
228 T1Old = times[1][k-1] * 1.0e6;
229 T2Old = times[2][k-1] * 1.0e6;
230 if (T0 > (2+ERROR_MARGIN) * T0Old) {
233 printf( "Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0 );
235 if (T1 > (2+ERROR_MARGIN) * T1Old) {
238 printf( "Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1 );
240 if (T2 > (2+ERROR_MARGIN) * T2Old) {
243 printf( "Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2 );
248 if (nPerfErrors > 8) {
249 /* Allow for 1-2 errors for eager-rendezvous shifting
250 * point and cache effects. There should be a better way
252 printf( " Found %d performance errors\n", nPerfErrors );
255 printf( " No Errors\n" );