1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2013 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
13 #include "mcs-mutex.h"
15 /* TODO: Make these mutex operations no-ops for sequential runs */
17 /** Create an MCS mutex. Collective on comm.
19 * @param[out] comm communicator containing all processes that will use the
21 * @param[out] tail_rank rank of the process in comm that holds the tail
23 * @param[out] hdl handle to the mutex
26 int MCS_Mutex_create(int tail_rank, MPI_Comm comm, MCS_Mutex * hdl_out)
31 hdl = malloc(sizeof(struct mcs_mutex_s));
34 MPI_Comm_dup(comm, &hdl->comm);
36 MPI_Comm_rank(hdl->comm, &rank);
37 MPI_Comm_size(hdl->comm, &nproc);
39 hdl->tail_rank = tail_rank;
42 MPI_Win_allocate_shared(2*sizeof(int), sizeof(int), MPI_INFO_NULL,
43 hdl->comm, &hdl->base, &hdl->window);
45 MPI_Win_allocate(2*sizeof(int), sizeof(int), MPI_INFO_NULL, hdl->comm,
46 &hdl->base, &hdl->window);
49 MPI_Win_lock_all(0, hdl->window);
51 hdl->base[0] = MPI_PROC_NULL;
52 hdl->base[1] = MPI_PROC_NULL;
54 MPI_Win_sync(hdl->window);
55 MPI_Barrier(hdl->comm);
62 /** Free an MCS mutex. Collective on ranks in the communicator used at the
65 * @param[in] hdl handle to the group that will be freed
68 int MCS_Mutex_free(MCS_Mutex * hdl_ptr)
70 MCS_Mutex hdl = *hdl_ptr;
72 MPI_Win_unlock_all(hdl->window);
74 MPI_Win_free(&hdl->window);
75 MPI_Comm_free(&hdl->comm);
86 * @param[in] hdl Handle to the mutex
89 int MCS_Mutex_lock(MCS_Mutex hdl)
94 MPI_Comm_rank(hdl->comm, &rank);
95 MPI_Comm_size(hdl->comm, &nproc);
97 /* This store is safe, since it cannot happen concurrently with a remote
99 hdl->base[MCS_MTX_ELEM_DISP] = MPI_PROC_NULL;
100 MPI_Win_sync(hdl->window);
102 MPI_Fetch_and_op(&rank, &prev, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP,
103 MPI_REPLACE, hdl->window);
104 MPI_Win_flush(hdl->tail_rank, hdl->window);
106 /* If there was a previous tail, update their next pointer and wait for
107 * notification. Otherwise, the mutex was successfully acquired. */
108 if (prev != MPI_PROC_NULL) {
109 /* Wait for notification */
112 MPI_Accumulate(&rank, 1, MPI_INT, prev, MCS_MTX_ELEM_DISP, 1, MPI_INT, MPI_REPLACE, hdl->window);
113 MPI_Win_flush(prev, hdl->window);
115 debug_print("%2d: LOCK - waiting for notification from %d\n", rank, prev);
116 MPI_Recv(NULL, 0, MPI_BYTE, prev, MCS_MUTEX_TAG, hdl->comm, &status);
119 debug_print("%2d: LOCK - lock acquired\n", rank);
125 /** Attempt to acquire a mutex.
127 * @param[in] hdl Handle to the mutex
128 * @param[out] success Indicates whether the mutex was acquired
131 int MCS_Mutex_trylock(MCS_Mutex hdl, int *success)
134 int tail, nil = MPI_PROC_NULL;
136 MPI_Comm_rank(hdl->comm, &rank);
137 MPI_Comm_size(hdl->comm, &nproc);
139 /* This store is safe, since it cannot happen concurrently with a remote
141 hdl->base[MCS_MTX_ELEM_DISP] = MPI_PROC_NULL;
142 MPI_Win_sync(hdl->window);
144 /* Check if the lock is available and claim it if it is. */
145 MPI_Compare_and_swap(&rank, &nil, &tail, MPI_INT, hdl->tail_rank,
146 MCS_MTX_TAIL_DISP, hdl->window);
147 MPI_Win_flush(hdl->tail_rank, hdl->window);
149 /* If the old tail was MPI_PROC_NULL, we have claimed the mutex */
150 *success = (tail == nil);
152 debug_print("%2d: TRYLOCK - %s\n", rank, (*success) ? "Success" : "Non-success");
160 * @param[in] hdl Handle to the mutex
163 int MCS_Mutex_unlock(MCS_Mutex hdl)
165 int rank, nproc, next;
167 MPI_Comm_rank(hdl->comm, &rank);
168 MPI_Comm_size(hdl->comm, &nproc);
170 MPI_Win_sync(hdl->window);
172 /* Read my next pointer. FOP is used since another process may write to
173 * this location concurrent with this read. */
174 MPI_Fetch_and_op(NULL, &next, MPI_INT, rank, MCS_MTX_ELEM_DISP, MPI_NO_OP,
176 MPI_Win_flush(rank, hdl->window);
178 if ( next == MPI_PROC_NULL) {
180 int nil = MPI_PROC_NULL;
182 /* Check if we are the at the tail of the lock queue. If so, we're
183 * done. If not, we need to send notification. */
184 MPI_Compare_and_swap(&nil, &rank, &tail, MPI_INT, hdl->tail_rank,
185 MCS_MTX_TAIL_DISP, hdl->window);
186 MPI_Win_flush(hdl->tail_rank, hdl->window);
189 debug_print("%2d: UNLOCK - waiting for next pointer (tail = %d)\n", rank, tail);
190 assert(tail >= 0 && tail < nproc);
195 MPI_Fetch_and_op(NULL, &next, MPI_INT, rank, MCS_MTX_ELEM_DISP,
196 MPI_NO_OP, hdl->window);
198 MPI_Win_flush(rank, hdl->window);
199 if (next != MPI_PROC_NULL) break;
201 MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag,
207 /* Notify the next waiting process */
208 if (next != MPI_PROC_NULL) {
209 debug_print("%2d: UNLOCK - notifying %d\n", rank, next);
210 MPI_Send(NULL, 0, MPI_BYTE, next, MCS_MUTEX_TAG, hdl->comm);
213 debug_print("%2d: UNLOCK - lock released\n", rank);