1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2003 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 /* MPI-3 distributed linked list construction example
8 * --------------------------------------------------
10 * Construct a distributed shared linked list using proposed MPI-3 dynamic
11 * windows. Initially process 0 creates the head of the list, attaches it to
12 * the window, and broadcasts the pointer to all processes. Each process p then
13 * appends N new elements to the list when the tail reaches process p-1.
27 #define MAX_NPROBE nproc
29 #define ELEM_PER_ROW 16
31 #define MYMIN(X,Y) ((X < Y) ? (X) : (Y))
32 #define MYMAX(X,Y) ((X > Y) ? (X) : (Y))
34 /* Linked list pointer */
40 /* Linked list element */
46 static const llist_ptr_t nil = { -1, (MPI_Aint) MPI_BOTTOM };
48 static const int verbose = 0;
49 static const int print_perf = 0;
51 /* Allocate a new shared linked list element */
52 static MPI_Aint alloc_elem(int value, MPI_Win win, llist_elem_t ***my_elems, int* my_elems_size, int* my_elems_count)
55 llist_elem_t *elem_ptr;
57 /* Allocate the new element and register it with the window */
58 MPI_Alloc_mem(sizeof(llist_elem_t), MPI_INFO_NULL, &elem_ptr);
59 elem_ptr->value = value;
61 MPI_Win_attach(win, elem_ptr, sizeof(llist_elem_t));
63 /* Add the element to the list of local elements so we can free it later. */
64 if (*my_elems_size == *my_elems_count) {
65 *my_elems_size += 100;
66 *my_elems = realloc(*my_elems, *my_elems_size * sizeof(void *));
68 (*my_elems)[*my_elems_count] = elem_ptr;
71 MPI_Get_address(elem_ptr, &disp);
75 int main(int argc, char **argv)
77 int procid, nproc, i, j, my_nelem;
81 llist_ptr_t head_ptr, tail_ptr;
82 /* List of locally allocated list elements. */
83 llist_elem_t **my_elems = NULL;
84 int my_elems_size = 0;
85 int my_elems_count = 0;
87 MPI_Init(&argc, &argv);
89 MPI_Comm_rank(MPI_COMM_WORLD, &procid);
90 MPI_Comm_size(MPI_COMM_WORLD, &nproc);
92 MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win);
94 /* Process 0 creates the head node */
96 head_ptr.disp = alloc_elem(procid, llist_win, &my_elems, &my_elems_size, &my_elems_count);
98 /* Broadcast the head pointer to everyone */
100 MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD);
103 /* All processes append NUM_ELEMS elements to the list; rank 0 has already
104 * appended an element. */
109 my_nelem = NUM_ELEMS / nproc;
110 if (procid < NUM_ELEMS % nproc)
113 MPI_Barrier(MPI_COMM_WORLD);
116 for (; i < my_nelem; i++) {
117 llist_ptr_t new_elem_ptr;
120 MTEST_VG_MEM_INIT(&new_elem_ptr, sizeof(llist_ptr_t));
122 /* Create a new list element and register it with the window */
123 new_elem_ptr.rank = procid;
124 new_elem_ptr.disp = alloc_elem(procid, llist_win, &my_elems, &my_elems_size, &my_elems_count);
126 /* Append the new node to the list. This might take multiple attempts if
127 * others have already appended and our tail pointer is stale. */
131 /* The tail is at my left neighbor, append my element. */
132 if (tail_ptr.rank == (procid + nproc - 1) % nproc) {
134 printf("%d: Appending to <%d, %p>\n", procid, tail_ptr.rank,
135 (void *) tail_ptr.disp);
137 MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tail_ptr.rank, 0, llist_win);
139 MPI_Accumulate(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
140 (MPI_Aint) & (((llist_elem_t *) tail_ptr.disp)->next),
141 sizeof(llist_ptr_t), MPI_BYTE, MPI_REPLACE, llist_win);
143 MPI_Put(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
144 (MPI_Aint) & (((llist_elem_t *) tail_ptr.disp)->next), sizeof(llist_ptr_t),
145 MPI_BYTE, llist_win);
147 MPI_Win_unlock(tail_ptr.rank, llist_win);
150 tail_ptr = new_elem_ptr;
153 /* Otherwise, chase the tail. */
155 llist_ptr_t next_tail_ptr;
157 MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tail_ptr.rank, 0, llist_win);
159 MPI_Get_accumulate(NULL, 0, MPI_DATATYPE_NULL, &next_tail_ptr,
160 sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
161 (MPI_Aint) & (((llist_elem_t *) tail_ptr.disp)->next),
162 sizeof(llist_ptr_t), MPI_BYTE, MPI_NO_OP, llist_win);
164 MPI_Get(&next_tail_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
165 (MPI_Aint) & (((llist_elem_t *) tail_ptr.disp)->next),
166 sizeof(llist_ptr_t), MPI_BYTE, llist_win);
168 MPI_Win_unlock(tail_ptr.rank, llist_win);
170 if (next_tail_ptr.rank != nil.rank) {
172 printf("%d: Chasing to <%d, %p>\n", procid, next_tail_ptr.rank,
173 (void *) next_tail_ptr.disp);
174 tail_ptr = next_tail_ptr;
175 pollint = MYMAX(MIN_NPROBE, pollint / 2);
178 for (j = 0; j < pollint; j++)
179 MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag,
182 pollint = MYMIN(MAX_NPROBE, pollint * 2);
188 MPI_Barrier(MPI_COMM_WORLD);
189 time = MPI_Wtime() - time;
191 /* Traverse the list and verify that all processes inserted exactly the correct
192 * number of elements. */
195 int *counts, count = 0;
197 counts = (int *) malloc(sizeof(int) * nproc);
198 assert(counts != NULL);
200 for (i = 0; i < nproc; i++)
205 MPI_Win_lock_all(0, llist_win);
207 /* Walk the list and tally up the number of elements inserted by each rank */
208 while (tail_ptr.disp != nil.disp) {
211 MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE,
212 tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win);
214 MPI_Win_flush(tail_ptr.rank, llist_win);
216 tail_ptr = elem.next;
218 assert(elem.value >= 0 && elem.value < nproc);
219 counts[elem.value]++;
223 int last_elem = tail_ptr.disp == nil.disp;
224 printf("%2d%s", elem.value, last_elem ? "" : " -> ");
225 if (count % ELEM_PER_ROW == 0 && !last_elem)
230 MPI_Win_unlock_all(llist_win);
235 /* Verify the counts we collected */
236 for (i = 0; i < nproc; i++) {
239 expected = NUM_ELEMS / nproc;
240 if (i < NUM_ELEMS % nproc)
243 if (counts[i] != expected) {
244 printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i],
250 printf("%s\n", errors == 0 ? " No Errors" : "FAIL");
257 MPI_Reduce(&time, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
260 printf("Total time = %0.2f sec, elem/sec = %0.2f, sec/elem = %0.2f usec\n", max_time,
261 NUM_ELEMS / max_time, max_time / NUM_ELEMS * 1.0e6);
265 MPI_Win_free(&llist_win);
267 /* Free all the elements in the list */
268 for (; my_elems_count > 0; my_elems_count--)
269 MPI_Free_mem(my_elems[my_elems_count - 1]);