1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2008 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 * This code may be used to test the performance of some of the
8 * noncontiguous datatype operations, including vector and indexed
9 * pack and unpack operations. To simplify the use of this code for
10 * tuning an MPI implementation, it uses no communication, just the
11 * MPI_Pack and MPI_Unpack routines. In addition, the individual tests are
12 * in separate routines, making it easier to compare the compiler-generated
13 * code for the user (manual) pack/unpack with the code used by
14 * the MPI implementation. Further, to be fair to the MPI implementation,
15 * the routines are passed the source and destination buffers; this ensures
16 * that the compiler can't optimize for statically allocated buffers.
23 /* Needed for restrict and const definitions */
24 #include "mpitestconf.h"
26 static int verbose = 0;
29 #define THRESHOLD 0.10
30 #define VARIANCE_THRESHOLD ((THRESHOLD * THRESHOLD) / 2)
33 double mean(double *list, int count);
34 double mean(double *list, int count)
40 for (i = 0; i < count; i++)
47 double noise(double *list, int count);
48 double noise(double *list, int count)
50 double *margin, retval;
53 if (!(margin = malloc(count * sizeof(double)))) {
54 printf("Unable to allocate memory\n");
58 for (i = 0; i < count; i++)
59 margin[i] = list[i] / mean(list, count);
62 for (i = 0; i < count; i++) {
63 retval += ((margin[i] - 1) * (margin[i] - 1));
72 /* Here are the tests */
74 /* Test packing a vector of individual doubles */
75 /* We don't use restrict in the function args because assignments between
76 restrict pointers is not valid in C and some compilers, such as the
77 IBM xlc compilers, flag that use as an error.*/
78 int TestVecPackDouble(int n, int stride,
79 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
80 int TestVecPackDouble(int n, int stride,
81 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
83 double *restrict d_dest;
84 const double *restrict d_src;
87 double t1, t2, t[NTRIALS];
92 printf("TestVecPackDouble (USER): ");
93 for (j = 0; j < NTRIALS; j++) {
95 for (rep = 0; rep < N_REPS; rep++) {
104 t2 = MPI_Wtime() - t1;
107 printf("%.3f ", t[j]);
110 printf("[%.3f]\n", noise(t, NTRIALS));
111 /* If there is too much noise, discard the test */
112 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
116 printf("Too much noise; discarding measurement\n");
119 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
121 /* MPI Vector code */
122 MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vectype);
123 MPI_Type_commit(&vectype);
126 printf("TestVecPackDouble (MPI): ");
127 for (j = 0; j < NTRIALS; j++) {
129 for (rep = 0; rep < N_REPS; rep++) {
131 MPI_Pack((void *) src, 1, vectype, dest, n * sizeof(double), &position, MPI_COMM_SELF);
133 t2 = MPI_Wtime() - t1;
136 printf("%.3f ", t[j]);
139 printf("[%.3f]\n", noise(t, NTRIALS));
140 /* If there is too much noise, discard the test */
141 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
145 printf("Too much noise; discarding measurement\n");
148 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
151 MPI_Type_free(&vectype);
156 /* Test unpacking a vector of individual doubles */
157 /* See above for why restrict is not used in the function args */
158 int TestVecUnPackDouble(int n, int stride,
159 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
160 int TestVecUnPackDouble(int n, int stride,
161 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
163 double *restrict d_dest;
164 const double *restrict d_src;
167 double t1, t2, t[NTRIALS];
168 MPI_Datatype vectype;
172 printf("TestVecUnPackDouble (USER): ");
173 for (j = 0; j < NTRIALS; j++) {
175 for (rep = 0; rep < N_REPS; rep++) {
184 t2 = MPI_Wtime() - t1;
187 printf("%.3f ", t[j]);
190 printf("[%.3f]\n", noise(t, NTRIALS));
191 /* If there is too much noise, discard the test */
192 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
196 printf("Too much noise; discarding measurement\n");
199 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
201 /* MPI Vector code */
202 MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vectype);
203 MPI_Type_commit(&vectype);
206 printf("TestVecUnPackDouble (MPI): ");
207 for (j = 0; j < NTRIALS; j++) {
209 for (rep = 0; rep < N_REPS; rep++) {
211 MPI_Unpack((void *) src, n * sizeof(double),
212 &position, dest, 1, vectype, MPI_COMM_SELF);
214 t2 = MPI_Wtime() - t1;
217 printf("%.3f ", t[j]);
220 printf("[%.3f]\n", noise(t, NTRIALS));
221 /* If there is too much noise, discard the test */
222 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
226 printf("Too much noise; discarding measurement\n");
229 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
232 MPI_Type_free(&vectype);
237 /* Test packing a vector of 2-individual doubles */
238 /* See above for why restrict is not used in the function args */
239 int TestVecPack2Double(int n, int stride,
240 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
241 int TestVecPack2Double(int n, int stride,
242 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
244 double *restrict d_dest;
245 const double *restrict d_src;
248 double t1, t2, t[NTRIALS];
249 MPI_Datatype vectype;
253 printf("TestVecPack2Double (USER): ");
254 for (j = 0; j < NTRIALS; j++) {
256 for (rep = 0; rep < N_REPS; rep++) {
261 *d_dest++ = d_src[0];
262 *d_dest++ = d_src[1];
266 t2 = MPI_Wtime() - t1;
269 printf("%.3f ", t[j]);
272 printf("[%.3f]\n", noise(t, NTRIALS));
273 /* If there is too much noise, discard the test */
274 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
278 printf("Too much noise; discarding measurement\n");
281 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
283 /* MPI Vector code */
284 MPI_Type_vector(n, 2, stride, MPI_DOUBLE, &vectype);
285 MPI_Type_commit(&vectype);
288 printf("TestVecPack2Double (MPI): ");
289 for (j = 0; j < NTRIALS; j++) {
291 for (rep = 0; rep < N_REPS; rep++) {
293 MPI_Pack((void *) src, 1, vectype, dest, 2 * n * sizeof(double),
294 &position, MPI_COMM_SELF);
296 t2 = MPI_Wtime() - t1;
299 printf("%.3f ", t[j]);
302 printf("[%.3f]\n", noise(t, NTRIALS));
303 /* If there is too much noise, discard the test */
304 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
308 printf("Too much noise; discarding measurement\n");
311 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
313 MPI_Type_free(&vectype);
318 /* This creates an indexed type that is like a vector (for simplicity
319 of construction). There is a possibility that the MPI implementation
320 will recognize and simplify this (e.g., in MPI_Type_commit); if so,
321 let us know and we'll add a version that is not as regular
323 /* See above for why restrict is not used in the function args */
324 int TestIndexPackDouble(int n, int stride,
325 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
326 int TestIndexPackDouble(int n, int stride,
327 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
329 double *restrict d_dest;
330 const double *restrict d_src;
333 int *restrict displs = 0;
334 double t1, t2, t[NTRIALS];
335 MPI_Datatype indextype;
337 displs = (int *) malloc(n * sizeof(int));
338 for (i = 0; i < n; i++)
339 displs[i] = i * stride;
343 printf("TestIndexPackDouble (USER): ");
344 for (j = 0; j < NTRIALS; j++) {
346 for (rep = 0; rep < N_REPS; rep++) {
350 for (i = 0; i < n; i++) {
351 *d_dest++ = d_src[displs[i]];
354 t2 = MPI_Wtime() - t1;
357 printf("%.3f ", t[j]);
360 printf("[%.3f]\n", noise(t, NTRIALS));
361 /* If there is too much noise, discard the test */
362 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
366 printf("Too much noise; discarding measurement\n");
369 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
372 MPI_Type_create_indexed_block(n, 1, displs, MPI_DOUBLE, &indextype);
373 MPI_Type_commit(&indextype);
378 printf("TestIndexPackDouble (MPI): ");
379 for (j = 0; j < NTRIALS; j++) {
381 for (rep = 0; rep < N_REPS; rep++) {
383 MPI_Pack((void *) src, 1, indextype, dest, n * sizeof(double),
384 &position, MPI_COMM_SELF);
386 t2 = MPI_Wtime() - t1;
389 printf("%.3f ", t[j]);
392 printf("[%.3f]\n", noise(t, NTRIALS));
393 /* If there is too much noise, discard the test */
394 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
398 printf("Too much noise; discarding measurement\n");
401 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
403 MPI_Type_free(&indextype);
408 int Report(const char *name, const char *packname, double avgTimeMPI, double avgTimeUser);
409 int Report(const char *name, const char *packname, double avgTimeMPI, double avgTimeUser)
411 double diffTime, maxTime;
414 /* Move this into a common routine */
415 diffTime = avgTimeMPI - avgTimeUser;
417 diffTime = -diffTime;
418 if (avgTimeMPI > avgTimeUser)
419 maxTime = avgTimeMPI;
421 maxTime = avgTimeUser;
424 printf("%-30s:\t%g\t%g\t(%g%%)\n", name,
425 avgTimeMPI, avgTimeUser, 100 * (diffTime / maxTime));
428 if (avgTimeMPI > avgTimeUser && (diffTime > THRESHOLD * maxTime)) {
430 printf("%s:\tMPI %s code is too slow: MPI %g\t User %g\n",
431 name, packname, avgTimeMPI, avgTimeUser);
437 /* Finally, here's the main program */
438 int main(int argc, char *argv[])
440 int n, stride, errs = 0;
442 double avgTimeUser, avgTimeMPI;
444 MPI_Init(&argc, &argv);
445 if (getenv("MPITEST_VERBOSE"))
450 dest = (void *) malloc(n * sizeof(double));
451 src = (void *) malloc(n * ((1 + stride) * sizeof(double)));
452 /* Touch the source and destination arrays */
453 memset(src, 0, n * (1 + stride) * sizeof(double));
454 memset(dest, 0, n * sizeof(double));
456 TestVecPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
457 errs += Report("VecPackDouble", "Pack", avgTimeMPI, avgTimeUser);
459 TestVecUnPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, src, dest);
460 errs += Report("VecUnPackDouble", "Unpack", avgTimeMPI, avgTimeUser);
462 TestIndexPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
463 errs += Report("VecIndexDouble", "Pack", avgTimeMPI, avgTimeUser);
468 dest = (void *) malloc(2 * n * sizeof(double));
469 src = (void *) malloc((1 + n) * ((1 + stride) * sizeof(double)));
470 memset(dest, 0, 2 * n * sizeof(double));
471 memset(src, 0, (1 + n) * (1 + stride) * sizeof(double));
472 TestVecPack2Double(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
473 errs += Report("VecPack2Double", "Pack", avgTimeMPI, avgTimeUser);
481 printf(" No Errors\n");
484 printf(" Found %d performance problems\n", errs);