1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * Changes to this example
4 * (C) 2001 by Argonne National Laboratory.
5 * See COPYRIGHT in top-level directory.
9 * This example is taken from MPI-The complete reference, Vol 1,
12 * Lines after the "--CUT HERE--" were added to make this into a complete
16 /* Specify the maximum number of errors to report. */
26 MPI_Datatype transpose_type(int M, int m, int n, MPI_Datatype type);
27 MPI_Datatype submatrix_type(int N, int m, int n, MPI_Datatype type);
28 void Transpose(float *localA, float *localB, int M, int N, MPI_Comm comm);
29 void Transpose(float *localA, float *localB, int M, int N, MPI_Comm comm)
30 /* transpose MxN matrix A that is block distributed (1-D) on
31 processes of comm onto block distributed matrix B */
33 int i, j, extent, myrank, p, n[2], m[2];
35 int *sendcounts, *recvcounts;
36 int *sdispls, *rdispls;
37 MPI_Datatype xtype[2][2], stype[2][2], *sendtypes, *recvtypes;
39 MTestPrintfMsg( 2, "M = %d, N = %d\n", M, N );
41 /* compute parameters */
42 MPI_Comm_size(comm, &p);
43 MPI_Comm_rank(comm, &myrank);
44 extent = sizeof(float);
47 sendcounts = (int *)malloc(p*sizeof(int));
48 recvcounts = (int *)malloc(p*sizeof(int));
49 sdispls = (int *)malloc(p*sizeof(int));
50 rdispls = (int *)malloc(p*sizeof(int));
51 sendtypes = (MPI_Datatype *)malloc(p*sizeof(MPI_Datatype));
52 recvtypes = (MPI_Datatype *)malloc(p*sizeof(MPI_Datatype));
54 /* compute block sizes */
56 m[1] = M - (p-1)*(M/p);
58 n[1] = N - (p-1)*(N/p);
61 for (i=0; i <= 1; i++)
62 for (j=0; j <= 1; j++) {
63 xtype[i][j] = transpose_type(N, m[i], n[j], MPI_FLOAT);
64 stype[i][j] = submatrix_type(M, m[i], n[j], MPI_FLOAT);
67 /* prepare collective operation arguments */
68 lasti = myrank == p-1;
69 for (j=0; j < p; j++) {
72 sdispls[j] = j*n[0]*extent;
73 sendtypes[j] = xtype[lasti][lastj];
75 rdispls[j] = j*m[0]*extent;
76 recvtypes[j] = stype[lastj][lasti];
80 MTestPrintfMsg( 2, "Begin Alltoallw...\n" );
81 /* -- Note that the book incorrectly uses &localA and &localB
82 as arguments to MPI_Alltoallw */
83 MPI_Alltoallw(localA, sendcounts, sdispls, sendtypes,
84 localB, recvcounts, rdispls, recvtypes, comm);
85 MTestPrintfMsg( 2, "Done with Alltoallw\n" );
96 for (i=0; i <= 1; i++)
97 for (j=0; j <= 1; j++) {
98 MPI_Type_free( &xtype[i][j] );
99 MPI_Type_free( &stype[i][j] );
104 /* Define an n x m submatrix in a n x M local matrix (this is the
105 destination in the transpose matrix */
106 MPI_Datatype submatrix_type(int M, int m, int n, MPI_Datatype type)
107 /* computes a datatype for an mxn submatrix within an MxN matrix
108 with entries of type type */
110 /* MPI_Datatype subrow; */
111 MPI_Datatype submatrix;
113 /* The book, MPI: The Complete Reference, has the wrong type constructor
114 here. Since the stride in the vector type is relative to the input
115 type, the stride in the book's code is n times as long as is intended.
116 Since n may not exactly divide N, it is better to simply use the
117 blocklength argument in Type_vector */
119 MPI_Type_contiguous(n, type, &subrow);
120 MPI_Type_vector(m, 1, N, subrow, &submatrix);
122 MPI_Type_vector(n, m, M, type, &submatrix );
123 MPI_Type_commit(&submatrix);
125 /* Add a consistency test: the size of submatrix should be
126 n * m * sizeof(type) and the extent should be ((n-1)*M+m) * sizeof(type) */
129 MPI_Aint textent, lb;
130 MPI_Type_size( type, &tsize );
131 MPI_Type_get_extent( submatrix, &lb, &textent );
133 if (textent != tsize * (M * (n-1)+m)) {
134 fprintf( stderr, "Submatrix extent is %ld, expected %ld (%d,%d,%d)\n",
135 (long)textent, (long)(tsize * (M * (n-1)+m)), M, n, m );
141 /* Extract an m x n submatrix within an m x N matrix and transpose it.
142 Assume storage by rows; the defined datatype accesses by columns */
143 MPI_Datatype transpose_type(int N, int m, int n, MPI_Datatype type)
144 /* computes a datatype for the transpose of an mxn matrix
145 with entries of type type */
147 MPI_Datatype subrow, subrow1, submatrix;
150 MPI_Type_vector(m, 1, N, type, &subrow);
151 MPI_Type_get_extent(type, &lb, &extent);
152 MPI_Type_create_resized(subrow, 0, extent, &subrow1);
153 MPI_Type_contiguous(n, subrow1, &submatrix);
154 MPI_Type_commit(&submatrix);
155 MPI_Type_free( &subrow );
156 MPI_Type_free( &subrow1 );
158 /* Add a consistency test: the size of submatrix should be
159 n * m * sizeof(type) and the extent should be ((m-1)*N+n) * sizeof(type) */
162 MPI_Aint textent, llb;
163 MPI_Type_size( type, &tsize );
164 MPI_Type_get_true_extent( submatrix, &llb, &textent );
166 if (textent != tsize * (N * (m-1)+n)) {
167 fprintf( stderr, "Transpose Submatrix extent is %ld, expected %ld (%d,%d,%d)\n",
168 (long)textent, (long)(tsize * (N * (m-1)+n)), N, n, m );
177 int main( int argc, char *argv[] )
179 int gM, gN, lm, lmlast, ln, lnlast, i, j, errs = 0;
181 float *localA, *localB;
184 MTest_Init( &argc, &argv );
185 comm = MPI_COMM_WORLD;
187 MPI_Comm_size( comm, &size );
188 MPI_Comm_rank( comm, &rank );
193 /* Each block is lm x ln in size, except for the last process,
194 which has lmlast x lnlast */
196 lmlast = gM - (size - 1)*lm;
198 lnlast = gN - (size - 1)*ln;
200 /* Create the local matrices.
201 Initialize the input matrix so that the entries are
202 consequtive integers, by row, starting at 0.
204 if (rank == size - 1) {
205 localA = (float *)malloc( gN * lmlast * sizeof(float) );
206 localB = (float *)malloc( gM * lnlast * sizeof(float) );
207 for (i=0; i<lmlast; i++) {
208 for (j=0; j<gN; j++) {
209 localA[i*gN+j] = (float)(i*gN+j + rank * gN * lm);
215 localA = (float *)malloc( gN * lm * sizeof(float) );
216 localB = (float *)malloc( gM * ln * sizeof(float) );
217 for (i=0; i<lm; i++) {
218 for (j=0; j<gN; j++) {
219 localA[i*gN+j] = (float)(i*gN+j + rank * gN * lm);
224 MTestPrintfMsg( 2, "Allocated local arrays\n" );
226 Transpose( localA, localB, gM, gN, comm );
228 /* check the transposed matrix
229 In the global matrix, the transpose has consequtive integers,
230 organized by columns.
232 if (rank == size - 1) {
233 for (i=0; i<lnlast; i++) {
234 for (j=0; j<gM; j++) {
235 int expected = i+gN*j + rank * ln;
236 if ((int)localB[i*gM+j] != expected) {
237 if (errs < MAX_ERRORS)
238 printf( "Found %d but expected %d\n",
239 (int)localB[i*gM+j], expected );
247 for (i=0; i<ln; i++) {
248 for (j=0; j<gM; j++) {
249 int expected = i+gN*j + rank * ln;
250 if ((int)localB[i*gM+j] != expected) {
251 if (errs < MAX_ERRORS)
252 printf( "Found %d but expected %d\n",
253 (int)localB[i*gM+j], expected );
264 MTest_Finalize( errs );