include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include/")
foreach(test allgather2 allgather3 allgather_struct allgatherv2 allgatherv3
- allred2 allred3 allred4 allred5 allred6 allredmany alltoall1
+ allred2 allred3 allred4 allred5 allred6 allredmany allred_derived allred_float alltoall1
alltoallv0 alltoallv alltoallw1 alltoallw2 alltoallw_zeros
bcasttest bcastzerotype coll2 coll3 coll4 coll5 coll6 coll7 coll8
coll9 coll10 coll11 coll12 coll13 exscan exscan2 gather gather2 gatherv
${CMAKE_CURRENT_SOURCE_DIR}/allred6.c
${CMAKE_CURRENT_SOURCE_DIR}/allred.c
${CMAKE_CURRENT_SOURCE_DIR}/allredmany.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/allred_derived.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/allred_float.c
${CMAKE_CURRENT_SOURCE_DIR}/alltoall1.c
${CMAKE_CURRENT_SOURCE_DIR}/alltoallv0.c
${CMAKE_CURRENT_SOURCE_DIR}/alltoallv.c
--- /dev/null
+/*
+ * Copyright (C) by Argonne National Laboratory
+ * See COPYRIGHT in top-level directory
+ */
+
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpitest.h"
+#include <assert.h>
+
+/*
+static char MTEST_Descrip[] = "Test MPI_Allreduce with commutative user-defined operations";
+*/
+
+#define MAX_BLOCKLEN 10000
+#define IS_COMMUTATIVE 1
+
+/* We make the error count global so that we can easily control the output
+ of error information (in particular, limiting it after the first 10
+ errors */
+int errs = 0;
+
+/* parameter for a vector type of MPI_INT */
+int g_blocklen = 1;
+int g_stride = 1;
+
+void uop(void *, void *, int *, MPI_Datatype *);
+void uop(void *cinPtr, void *coutPtr, int *count, MPI_Datatype * dtype)
+{
+ const int *cin = (const int *) cinPtr;
+ int *cout = (int *) coutPtr;
+
+ int k = 0;
+ for (int i = 0; i < *count; i++) {
+ for (int j = 0; j < g_blocklen; j++) {
+ cout[k] += cin[k];
+ k += g_stride;
+ }
+ }
+}
+
+static void init_buf(void *buf, int count, int rank)
+{
+ int *p = buf;
+
+ int k = 0;
+ for (int i = 0; i < count; i++) {
+ for (int j = 0; j < g_blocklen; j++) {
+ p[k] = rank + i + j;
+ k += g_stride;
+ }
+ }
+}
+
+static int check_result(void *buf, int count, int size)
+{
+ int lerrs = 0;
+ int *p = buf;
+
+ int k = 0;
+ for (int i = 0; i < count; i++) {
+ for (int j = 0; j < g_blocklen; j++) {
+ int exp = size * (size - 1) / 2 + (i + j) * size;
+ if (p[k] != exp) {
+ lerrs++;
+ if (errs + lerrs < 10) {
+ printf("[%d - %d] expected %d, got %d, %s\n",
+ i, j, exp, p[k], MTestGetIntracommName());
+ }
+ }
+ k += g_stride;
+ }
+ }
+ return lerrs;
+}
+
+int main(int argc, char *argv[])
+{
+ MPI_Comm comm;
+ void *buf, *bufout;
+ MPI_Op op;
+ MPI_Datatype datatype;
+
+ MTest_Init(&argc, &argv);
+
+ MPI_Op_create(uop, IS_COMMUTATIVE, &op);
+
+ while (MTestGetIntracommGeneral(&comm, 2, 1)) {
+ if (comm == MPI_COMM_NULL) {
+ continue;
+ }
+
+ int rank, size;
+ MPI_Comm_rank(comm, &rank);
+ MPI_Comm_size(comm, &size);
+
+ int count = 10;
+ for (int n = 1; n < MAX_BLOCKLEN; n *= 2) {
+ g_blocklen = n;
+ int extent = g_blocklen * g_stride * sizeof(int);
+ MPI_Type_vector(g_blocklen, 1, g_stride, MPI_INT, &datatype);
+ MPI_Type_commit(&datatype);
+
+ buf = (int *) malloc(extent * count);
+ if (!buf) {
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
+ bufout = (int *) malloc(extent * count);
+ if (!bufout) {
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
+
+ init_buf(buf, count, rank);
+ MPI_Allreduce(buf, bufout, count, datatype, op, comm);
+ errs += check_result(bufout, count, size);
+
+ /* do it again using MPI_IN_PLACE */
+ init_buf(bufout, count, rank);
+ MPI_Allreduce(MPI_IN_PLACE, bufout, count, datatype, op, comm);
+ errs += check_result(bufout, count, size);
+
+ free(buf);
+ free(bufout);
+ MPI_Type_free(&datatype);
+ }
+
+ MTestFreeComm(&comm);
+ }
+
+ MPI_Op_free(&op);
+
+ MTest_Finalize(errs);
+ return MTestReturnValue(errs);
+}
--- /dev/null
+/*
+ * Copyright (C) by Argonne National Laboratory
+ * See COPYRIGHT in top-level directory
+ */
+#include "mpi.h"
+#include "mpitest.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/* MPI_Allreduce need produce identical results on all ranks. This is
+ * particular challenging for floating point datatypes since computer
+ * floating point arithmetic do not follow associative law. This means
+ * certain algorithms that works for integers need to be excluded for
+ * floating point.
+ *
+ * This test checks when an inapproprate algorithms is used for floating
+ * point reduction.
+ */
+
+/* single-precision float has roughly a precision of 7 decimal digits */
+#define BIG 1e6
+#define TINY 1e-2
+
+#define N 8
+
+float buf[N];
+
+static void init_buf(int rank, int pos1, int pos2)
+{
+ /* Mix a pair of (BIG, -BIG) and TINY, the sum of array will be the sum of
+ * all TINYs if we add (BIG, -BIG) first, but different results following
+ * different associativity. A valid algorithm need to produce consistent
+ * results on all ranks.
+ */
+ for (int i = 0; i < N; i++) {
+ if (rank == pos1) {
+ buf[i] = BIG;
+ } else if (rank == pos2) {
+ buf[i] = -BIG;
+ } else {
+ buf[i] = TINY;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int errs = 0;
+
+ MTest_Init(&argc, &argv);
+
+ int rank, size;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+ if (size < 3) {
+ printf("At least 3 processes required. More (e.g. 10) is recommended.\n");
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
+
+ for (int pos1 = 0; pos1 < size; pos1++) {
+ for (int pos2 = pos1 + 1; pos2 < size; pos2++) {
+ init_buf(rank, pos1, pos2);
+
+ MPI_Allreduce(MPI_IN_PLACE, buf, N, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
+
+ float *check_buf;
+ if (rank == 0) {
+ check_buf = malloc(N * size * sizeof(float));
+ }
+ MPI_Gather(buf, N, MPI_FLOAT, check_buf, N, MPI_FLOAT, 0, MPI_COMM_WORLD);
+
+ if (rank == 0) {
+ MTestPrintfMsg(1, "BIG positions = (%d, %d), result = [", pos1, pos2);
+ for (int j = 0; j < N; j++) {
+ MTestPrintfMsg(1, "%f ", buf[j]);
+ }
+ MTestPrintfMsg(1, "]\n");
+
+ for (int i = 0; i < size; i++) {
+ for (int j = 0; j < N; j++) {
+ if (memcmp(&check_buf[i * N + j], &buf[j], sizeof(float)) != 0) {
+ if (errs < 10) {
+ printf("(%d - %d) Result [%d] from rank %d mismatch: %f != %f\n",
+ pos1, pos2, j, i, check_buf[i * N + j], buf[j]);
+ }
+ errs++;
+ }
+ }
+ }
+ free(check_buf);
+ }
+ }
+ }
+
+ MTest_Finalize(errs);
+ return MTestReturnValue(errs);
+}