1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2012 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
8 * Tests that the performance of a struct that contains a vector type
9 * exploits the vector type correctly
11 * If PACK_IS_NATIVE is defined, MPI_Pack stores exactly the same bytes as the
12 * user would pack manually; in that case, there is a consistency check.
20 #include "mpitestconf.h"
26 /* MPICH (as of 6/2012) packs the native bytes */
27 #define PACK_IS_NATIVE
31 static int verbose = 0;
33 int main( int argc, char **argv )
37 int v2stride, typesize, packsize, i, position, errs = 0;
38 double *outbuf, *outbuf2;
40 MPI_Datatype vtype, stype;
43 double tspack, tvpack, tmanual;
46 MPI_Aint displacements[2];
47 MPI_Datatype typesArray[2];
49 MPI_Init( &argc, &argv );
51 /* Create a struct consisting of a two 32-bit ints, followed by a
52 vector of stride 3 but count 128k (less than a few MB of data area) */
55 MPI_Type_vector( vcount, 1, vstride, MPI_DOUBLE, &vtype );
57 vsource = (double *)malloc( (vcount + 1) * (vstride + 1) * sizeof(double) );
59 fprintf( stderr, "Unable to allocate vsource\n" );
60 MPI_Abort( MPI_COMM_WORLD, 1 );
62 for (i=0; i<vcount*vstride; i++) {
65 blocklengths[0] = 2; MPI_Get_address( &counts[0], &displacements[0] );
66 blocklengths[1] = 1; MPI_Get_address( vsource, &displacements[1] );
68 printf( "%p = %p?\n", vsource, (void *)displacements[1] );
70 typesArray[0] = MPI_INT32_T;
71 typesArray[1] = vtype;
72 MPI_Type_create_struct( 2, blocklengths, displacements, typesArray,
74 MPI_Type_commit( &stype );
75 MPI_Type_commit( &vtype );
77 #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
78 /* To use MPIDU_Datatype_debug to print the datatype internals,
79 you must configure MPICH with --enable-g=log */
81 printf( "Original struct datatype:\n" );
82 MPIDU_Datatype_debug( stype, 10 );
86 MPI_Pack_size( 1, stype, MPI_COMM_WORLD, &packsize );
87 outbuf = (double *)malloc( packsize );
88 outbuf2 = (double *)malloc( packsize );
90 fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize );
91 MPI_Abort( MPI_COMM_WORLD, 1 );
94 fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize );
95 MPI_Abort( MPI_COMM_WORLD, 1 );
98 /* Warm up the code and data */
99 MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position,
103 for (ntry = 0; ntry < 5; ntry++) {
106 MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position,
108 t1 = MPI_Wtime() - t0;
109 if (t1 < tspack) tspack = t1;
111 MPI_Type_free( &stype );
113 /* An equivalent packing, using the 2 ints and the vector separately */
115 for (ntry = 0; ntry < 5; ntry++) {
118 MPI_Pack( counts, 2, MPI_INT32_T, outbuf, packsize, &position,
120 MPI_Pack( vsource, 1, vtype, outbuf, packsize, &position,
122 t1 = MPI_Wtime() - t0;
123 if (t1 < tvpack) tvpack = t1;
125 MPI_Type_free( &vtype );
127 /* Note that we exploit the fact that the vector type contains vblock
128 instances of a contiguous type of size 24, or a single block of 24*vblock
132 for (ntry = 0; ntry < 5; ntry++) {
133 const double * restrict ppe = (const double *)vsource;
134 double * restrict ppo = outbuf2;
138 *(int32_t *)ppo = counts[0];
139 *( ((int32_t *)ppo) + 1) = counts[1];
141 /* Some hand optimization because this file is not normally
142 compiled with optimization by the test suite */
155 position += (1 + vcount);
156 position *= sizeof(double);
157 t1 = MPI_Wtime() - t0;
158 if (t1 < tmanual) tmanual = t1;
160 /* Check on correctness */
161 #ifdef PACK_IS_NATIVE
162 if (memcmp( outbuf, outbuf2, position ) != 0) {
163 printf( "Panic(manual) - pack buffers differ\n" );
164 for (j=0; j<8; j++) {
165 printf( "%d: %llx\t%llx\n", j, (long long unsigned)outbuf[j],
166 (long long unsigned)outbuf2[j] );
173 printf( "Bytes packed = %d\n", position );
174 printf( "MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n",
175 tspack, tvpack, tmanual );
178 if (4 * tmanual < tspack) {
180 printf( "MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack, tmanual )
182 printf( "MPI_Pack time should be less than 4 times the manual time\n" );
183 printf( "For most informative results, be sure to compile this test with optimization\n" );
185 if (4 * tmanual < tvpack) {
187 printf( "MPI_Pack using vector = %e, manual pack time = %e\n", tvpack,
189 printf( "MPI_Pack time should be less than 4 times the manual time\n" );
190 printf( "For most informative results, be sure to compile this test with optimization\n" );
192 if (4 * tvpack < tspack) {
194 printf( "MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack );
195 printf( "MPI_Pack time using vector should be about the same as the struct containing the vector\n" );
196 printf( "For most informative results, be sure to compile this test with optimization\n" );
200 printf( " Found %d errors\n", errs );
203 printf( " No Errors\n" );