1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * Changes to this example
4 * (C) 2001 by Argonne National Laboratory.
5 * See COPYRIGHT in top-level directory.
9 * This example is taken from MPI-The complete reference, Vol 1,
12 * Lines after the "--CUT HERE--" were added to make this into a complete
16 /* Specify the maximum number of errors to report. */
26 MPI_Datatype transpose_type(int M, int m, int n, MPI_Datatype type);
27 MPI_Datatype submatrix_type(int N, int m, int n, MPI_Datatype type);
28 void Transpose(float *localA, float *localB, int M, int N, MPI_Comm comm);
29 void Transpose(float *localA, float *localB, int M, int N, MPI_Comm comm)
30 /* transpose MxN matrix A that is block distributed (1-D) on
31 processes of comm onto block distributed matrix B */
33 int i, j, extent, myrank, p, n[2], m[2];
35 int *sendcounts, *recvcounts;
36 int *sdispls, *rdispls;
37 MPI_Datatype xtype[2][2], stype[2][2], *sendtypes, *recvtypes;
39 MTestPrintfMsg(2, "M = %d, N = %d\n", M, N);
41 /* compute parameters */
42 MPI_Comm_size(comm, &p);
43 MPI_Comm_rank(comm, &myrank);
44 extent = sizeof(float);
47 sendcounts = (int *) malloc(p * sizeof(int));
48 recvcounts = (int *) malloc(p * sizeof(int));
49 sdispls = (int *) malloc(p * sizeof(int));
50 rdispls = (int *) malloc(p * sizeof(int));
51 sendtypes = (MPI_Datatype *) malloc(p * sizeof(MPI_Datatype));
52 recvtypes = (MPI_Datatype *) malloc(p * sizeof(MPI_Datatype));
54 /* compute block sizes */
56 m[1] = M - (p - 1) * (M / p);
58 n[1] = N - (p - 1) * (N / p);
61 for (i = 0; i <= 1; i++)
62 for (j = 0; j <= 1; j++) {
63 xtype[i][j] = transpose_type(N, m[i], n[j], MPI_FLOAT);
64 stype[i][j] = submatrix_type(M, m[i], n[j], MPI_FLOAT);
67 /* prepare collective operation arguments */
68 lasti = myrank == p - 1;
69 for (j = 0; j < p; j++) {
72 sdispls[j] = j * n[0] * extent;
73 sendtypes[j] = xtype[lasti][lastj];
75 rdispls[j] = j * m[0] * extent;
76 recvtypes[j] = stype[lastj][lasti];
80 MTestPrintfMsg(2, "Begin Alltoallw...\n");
81 /* -- Note that the book incorrectly uses &localA and &localB
82 * as arguments to MPI_Alltoallw */
83 MPI_Alltoallw(localA, sendcounts, sdispls, sendtypes,
84 localB, recvcounts, rdispls, recvtypes, comm);
85 MTestPrintfMsg(2, "Done with Alltoallw\n");
96 for (i = 0; i <= 1; i++)
97 for (j = 0; j <= 1; j++) {
98 MPI_Type_free(&xtype[i][j]);
99 MPI_Type_free(&stype[i][j]);
104 /* Define an n x m submatrix in a n x M local matrix (this is the
105 destination in the transpose matrix */
106 MPI_Datatype submatrix_type(int M, int m, int n, MPI_Datatype type)
107 /* computes a datatype for an mxn submatrix within an MxN matrix
108 with entries of type type */
110 /* MPI_Datatype subrow; */
111 MPI_Datatype submatrix;
113 /* The book, MPI: The Complete Reference, has the wrong type constructor
114 * here. Since the stride in the vector type is relative to the input
115 * type, the stride in the book's code is n times as long as is intended.
116 * Since n may not exactly divide N, it is better to simply use the
117 * blocklength argument in Type_vector */
119 * MPI_Type_contiguous(n, type, &subrow);
120 * MPI_Type_vector(m, 1, N, subrow, &submatrix);
122 MPI_Type_vector(n, m, M, type, &submatrix);
123 MPI_Type_commit(&submatrix);
125 /* Add a consistency test: the size of submatrix should be
126 * n * m * sizeof(type) and the extent should be ((n-1)*M+m) * sizeof(type) */
129 MPI_Aint textent, lb;
130 MPI_Type_size(type, &tsize);
131 MPI_Type_get_extent(submatrix, &lb, &textent);
133 if (textent != tsize * (M * (n - 1) + m)) {
134 fprintf(stderr, "Submatrix extent is %ld, expected %ld (%d,%d,%d)\n",
135 (long) textent, (long) (tsize * (M * (n - 1) + m)), M, n, m);
141 /* Extract an m x n submatrix within an m x N matrix and transpose it.
142 Assume storage by rows; the defined datatype accesses by columns */
143 MPI_Datatype transpose_type(int N, int m, int n, MPI_Datatype type)
144 /* computes a datatype for the transpose of an mxn matrix
145 with entries of type type */
147 MPI_Datatype subrow, subrow1, submatrix;
150 MPI_Type_vector(m, 1, N, type, &subrow);
151 MPI_Type_get_extent(type, &lb, &extent);
152 MPI_Type_create_resized(subrow, 0, extent, &subrow1);
153 MPI_Type_contiguous(n, subrow1, &submatrix);
154 MPI_Type_commit(&submatrix);
155 MPI_Type_free(&subrow);
156 MPI_Type_free(&subrow1);
158 /* Add a consistency test: the size of submatrix should be
159 * n * m * sizeof(type) and the extent should be ((m-1)*N+n) * sizeof(type) */
162 MPI_Aint textent, llb;
163 MPI_Type_size(type, &tsize);
164 MPI_Type_get_true_extent(submatrix, &llb, &textent);
166 /* if (textent != tsize * (N * (m - 1) + n)) {*/
167 /* fprintf(stderr, "Transpose Submatrix extent is %ld, expected %ld (%d,%d,%d)\n",*/
168 /* (long) textent, (long) (tsize * (N * (m - 1) + n)), N, n, m);*/
177 int main(int argc, char *argv[])
179 int gM, gN, lm, lmlast, ln, lnlast, i, j, errs = 0;
181 float *localA, *localB;
184 MTest_Init(&argc, &argv);
185 comm = MPI_COMM_WORLD;
187 MPI_Comm_size(comm, &size);
188 MPI_Comm_rank(comm, &rank);
193 /* Each block is lm x ln in size, except for the last process,
194 * which has lmlast x lnlast */
196 lmlast = gM - (size - 1) * lm;
198 lnlast = gN - (size - 1) * ln;
200 /* Create the local matrices.
201 * Initialize the input matrix so that the entries are
202 * consecutive integers, by row, starting at 0.
204 if (rank == size - 1) {
205 localA = (float *) malloc(gN * lmlast * sizeof(float));
206 localB = (float *) malloc(gM * lnlast * sizeof(float));
207 for (i = 0; i < lmlast; i++) {
208 for (j = 0; j < gN; j++) {
209 localA[i * gN + j] = (float) (i * gN + j + rank * gN * lm);
215 localA = (float *) malloc(gN * lm * sizeof(float));
216 localB = (float *) malloc(gM * ln * sizeof(float));
217 for (i = 0; i < lm; i++) {
218 for (j = 0; j < gN; j++) {
219 localA[i * gN + j] = (float) (i * gN + j + rank * gN * lm);
224 MTestPrintfMsg(2, "Allocated local arrays\n");
226 Transpose(localA, localB, gM, gN, comm);
228 /* check the transposed matrix
229 * In the global matrix, the transpose has consecutive integers,
230 * organized by columns.
232 if (rank == size - 1) {
233 for (i = 0; i < lnlast; i++) {
234 for (j = 0; j < gM; j++) {
235 int expected = i + gN * j + rank * ln;
236 if ((int) localB[i * gM + j] != expected) {
237 if (errs < MAX_ERRORS)
238 printf("Found %d but expected %d\n", (int) localB[i * gM + j], expected);
246 for (i = 0; i < ln; i++) {
247 for (j = 0; j < gM; j++) {
248 int expected = i + gN * j + rank * ln;
249 if ((int) localB[i * gM + j] != expected) {
250 if (errs < MAX_ERRORS)
251 printf("Found %d but expected %d\n", (int) localB[i * gM + j], expected);
262 MTest_Finalize(errs);