1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2012 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
8 * Tests that the performance of a struct that contains a vector type
9 * exploits the vector type correctly
11 * If PACK_IS_NATIVE is defined, MPI_Pack stores exactly the same bytes as the
12 * user would pack manually; in that case, there is a consistency check.
20 #include "mpitestconf.h"
26 /* MPICH (as of 6/2012) packs the native bytes */
27 #define PACK_IS_NATIVE
31 static int verbose = 0;
33 int main(int argc, char **argv)
37 int packsize, i, position, errs = 0;
38 double *outbuf, *outbuf2;
40 MPI_Datatype vtype, stype;
42 double tspack, tvpack, tmanual;
45 MPI_Aint displacements[2];
46 MPI_Datatype typesArray[2];
48 MPI_Init(&argc, &argv);
50 /* Create a struct consisting of a two 32-bit ints, followed by a
51 * vector of stride 3 but count 128k (less than a few MB of data area) */
54 MPI_Type_vector(vcount, 1, vstride, MPI_DOUBLE, &vtype);
56 vsource = (double *) malloc((vcount + 1) * (vstride + 1) * sizeof(double));
58 fprintf(stderr, "Unable to allocate vsource\n");
59 MPI_Abort(MPI_COMM_WORLD, 1);
61 for (i = 0; i < vcount * vstride; i++) {
65 MPI_Get_address(&counts[0], &displacements[0]);
67 MPI_Get_address(vsource, &displacements[1]);
69 printf("%p = %p?\n", vsource, (void *) displacements[1]);
71 typesArray[0] = MPI_INT32_T;
72 typesArray[1] = vtype;
73 MPI_Type_create_struct(2, blocklengths, displacements, typesArray, &stype);
74 MPI_Type_commit(&stype);
75 MPI_Type_commit(&vtype);
77 #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
78 /* To use MPIDU_Datatype_debug to print the datatype internals,
79 * you must configure MPICH with --enable-g=log */
81 printf("Original struct datatype:\n");
82 MPIDU_Datatype_debug(stype, 10);
86 MPI_Pack_size(1, stype, MPI_COMM_WORLD, &packsize);
87 outbuf = (double *) malloc(packsize);
88 outbuf2 = (double *) malloc(packsize);
90 fprintf(stderr, "Unable to allocate %ld for outbuf\n", (long) packsize);
91 MPI_Abort(MPI_COMM_WORLD, 1);
94 fprintf(stderr, "Unable to allocate %ld for outbuf2\n", (long) packsize);
95 MPI_Abort(MPI_COMM_WORLD, 1);
98 /* Warm up the code and data */
99 MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);
102 for (ntry = 0; ntry < 5; ntry++) {
105 MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);
106 t1 = MPI_Wtime() - t0;
110 MPI_Type_free(&stype);
112 /* An equivalent packing, using the 2 ints and the vector separately */
114 for (ntry = 0; ntry < 5; ntry++) {
117 MPI_Pack(counts, 2, MPI_INT32_T, outbuf, packsize, &position, MPI_COMM_WORLD);
118 MPI_Pack(vsource, 1, vtype, outbuf, packsize, &position, MPI_COMM_WORLD);
119 t1 = MPI_Wtime() - t0;
123 MPI_Type_free(&vtype);
125 /* Note that we exploit the fact that the vector type contains vblock
126 * instances of a contiguous type of size 24, or a single block of 24*vblock
130 for (ntry = 0; ntry < 5; ntry++) {
131 const double *restrict ppe = (const double *) vsource;
132 double *restrict ppo = outbuf2;
136 *(int32_t *) ppo = counts[0];
137 *(((int32_t *) ppo) + 1) = counts[1];
139 /* Some hand optimization because this file is not normally
140 * compiled with optimization by the test suite */
153 position += (1 + vcount);
154 position *= sizeof(double);
155 t1 = MPI_Wtime() - t0;
159 /* Check on correctness */
160 #ifdef PACK_IS_NATIVE
161 if (memcmp(outbuf, outbuf2, position) != 0) {
162 printf("Panic(manual) - pack buffers differ\n");
163 for (j = 0; j < 8; j++) {
164 printf("%d: %llx\t%llx\n", j, (long long unsigned) outbuf[j],
165 (long long unsigned) outbuf2[j]);
172 printf("Bytes packed = %d\n", position);
173 printf("MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n",
174 tspack, tvpack, tmanual);
177 if (4 * tmanual < tspack) {
179 printf("MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack,
181 printf("MPI_Pack time should be less than 4 times the manual time\n");
182 printf("For most informative results, be sure to compile this test with optimization\n");
184 if (4 * tmanual < tvpack) {
186 printf("MPI_Pack using vector = %e, manual pack time = %e\n", tvpack, tmanual);
187 printf("MPI_Pack time should be less than 4 times the manual time\n");
188 printf("For most informative results, be sure to compile this test with optimization\n");
190 if (4 * tvpack < tspack) {
192 printf("MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack);
194 ("MPI_Pack time using vector should be about the same as the struct containing the vector\n");
195 printf("For most informative results, be sure to compile this test with optimization\n");
199 printf(" Found %d errors\n", errs);
202 printf(" No Errors\n");