1 /* A thread pool (C++ version). */
3 /* Copyright (c) 2004-2019 The SimGrid Team. All rights reserved. */
5 /* This program is free software; you can redistribute it and/or modify it
6 * under the terms of the license (GNU LGPL) which comes with this package. */
11 #include "src/internal_config.h" // HAVE_FUTEX_H
12 #include "src/kernel/context/Context.hpp"
13 #include "src/simix/smx_private.hpp" /* simix_global */
15 #include <boost/optional.hpp>
16 #include <condition_variable>
22 #include <linux/futex.h>
23 #include <sys/syscall.h>
27 #include <pthread_np.h>
30 XBT_LOG_EXTERNAL_CATEGORY(xbt_parmap);
35 /** @addtogroup XBT_parmap
37 * @brief Parallel map class
40 template <typename T> class Parmap {
42 Parmap(unsigned num_workers, e_xbt_parmap_mode_t mode);
43 Parmap(const Parmap&) = delete;
44 Parmap& operator=(const Parmap&) = delete;
46 void apply(std::function<void(T)>&& fun, const std::vector<T>& data);
47 boost::optional<T> next();
50 enum Flag { PARMAP_WORK, PARMAP_DESTROY };
53 * @brief Thread data transmission structure
57 ThreadData(Parmap<T>& parmap, int id) : parmap(parmap), worker_id(id) {}
63 * @brief Synchronization object (different specializations).
67 explicit Synchro(Parmap<T>& parmap) : parmap(parmap) {}
68 virtual ~Synchro() = default;
70 * @brief Wakes all workers and waits for them to finish the tasks.
72 * This function is called by the controller thread.
74 virtual void master_signal() = 0;
76 * @brief Starts the parmap: waits for all workers to be ready and returns.
78 * This function is called by the controller thread.
80 virtual void master_wait() = 0;
82 * @brief Ends the parmap: wakes the controller thread when all workers terminate.
84 * This function is called by all worker threads when they end (not including the controller).
86 virtual void worker_signal() = 0;
88 * @brief Waits for some work to process.
90 * This function is called by each worker thread (not including the controller) when it has no more work to do.
92 * @param round the expected round number
94 virtual void worker_wait(unsigned) = 0;
99 class PosixSynchro : public Synchro {
101 explicit PosixSynchro(Parmap<T>& parmap);
103 void master_signal() override;
104 void master_wait() override;
105 void worker_signal() override;
106 void worker_wait(unsigned round) override;
109 std::condition_variable ready_cond;
110 std::mutex ready_mutex;
111 std::condition_variable done_cond;
112 std::mutex done_mutex;
116 class FutexSynchro : public Synchro {
118 explicit FutexSynchro(Parmap<T>& parmap) : Synchro(parmap) {}
119 void master_signal() override;
120 void master_wait() override;
121 void worker_signal() override;
122 void worker_wait(unsigned) override;
125 static void futex_wait(std::atomic_uint* uaddr, unsigned val);
126 static void futex_wake(std::atomic_uint* uaddr, unsigned val);
130 class BusyWaitSynchro : public Synchro {
132 explicit BusyWaitSynchro(Parmap<T>& parmap) : Synchro(parmap) {}
133 void master_signal() override;
134 void master_wait() override;
135 void worker_signal() override;
136 void worker_wait(unsigned) override;
139 static void worker_main(ThreadData* data);
140 Synchro* new_synchro(e_xbt_parmap_mode_t mode);
143 Flag status; /**< is the parmap active or being destroyed? */
144 std::atomic_uint work_round; /**< index of the current round */
145 std::vector<std::thread*> workers; /**< worker thread handlers */
146 unsigned num_workers; /**< total number of worker threads including the controller */
147 Synchro* synchro; /**< synchronization object */
149 std::atomic_uint thread_counter{0}; /**< number of workers that have done the work */
150 std::function<void(T)> fun; /**< function to run in parallel on each element of data */
151 const std::vector<T>* data = nullptr; /**< parameters to pass to fun in parallel */
152 std::atomic_uint index; /**< index of the next element of data to pick */
156 * @brief Creates a parallel map object
157 * @param num_workers number of worker threads to create
158 * @param mode how to synchronize the worker threads
160 template <typename T> Parmap<T>::Parmap(unsigned num_workers, e_xbt_parmap_mode_t mode)
162 XBT_CDEBUG(xbt_parmap, "Create new parmap (%u workers)", num_workers);
164 /* Initialize the thread pool data structure */
165 this->status = PARMAP_WORK;
166 this->work_round = 0;
167 this->workers.resize(num_workers);
168 this->num_workers = num_workers;
169 this->synchro = new_synchro(mode);
171 /* Create the pool of worker threads (the caller of apply() will be worker[0]) */
172 this->workers[0] = nullptr;
173 XBT_ATTRIB_UNUSED unsigned int core_bind = 0;
175 for (unsigned i = 1; i < num_workers; i++) {
176 this->workers[i] = new std::thread(worker_main, new ThreadData(*this, i));
178 /* Bind the worker to a core if possible */
179 #if HAVE_PTHREAD_SETAFFINITY
180 #if HAVE_PTHREAD_NP_H /* FreeBSD ? */
182 size_t size = sizeof(cpuset_t);
185 size_t size = sizeof(cpu_set_t);
187 pthread_t pthread = this->workers[i]->native_handle();
189 CPU_SET(core_bind, &cpuset);
190 pthread_setaffinity_np(pthread, size, &cpuset);
191 if (core_bind != std::thread::hardware_concurrency() - 1)
200 * @brief Destroys a parmap
202 template <typename T> Parmap<T>::~Parmap()
204 status = PARMAP_DESTROY;
205 synchro->master_signal();
207 for (unsigned i = 1; i < num_workers; i++) {
215 * @brief Applies a list of tasks in parallel.
216 * @param fun the function to call in parallel
217 * @param data each element of this vector will be passed as an argument to fun
219 template <typename T> void Parmap<T>::apply(std::function<void(T)>&& fun, const std::vector<T>& data)
221 /* Assign resources to worker threads (we are maestro here)*/
222 this->fun = std::move(fun);
225 this->synchro->master_signal(); // maestro runs futex_wake to wake all the minions (the working threads)
226 this->work(); // maestro works with its minions
227 this->synchro->master_wait(); // When there is no more work to do, then maestro waits for the last minion to stop
228 XBT_CDEBUG(xbt_parmap, "Job done"); // ... and proceeds
232 * @brief Returns a next task to process.
234 * Worker threads call this function to get more work.
236 * @return the next task to process, or throws a std::out_of_range exception if there is no more work
238 template <typename T> boost::optional<T> Parmap<T>::next()
240 unsigned index = this->index.fetch_add(1, std::memory_order_relaxed);
241 if (index < this->data->size())
242 return (*this->data)[index];
248 * @brief Main work loop: applies fun to elements in turn.
250 template <typename T> void Parmap<T>::work()
252 unsigned length = this->data->size();
253 unsigned index = this->index.fetch_add(1, std::memory_order_relaxed);
254 while (index < length) {
255 this->fun((*this->data)[index]);
256 index = this->index.fetch_add(1, std::memory_order_relaxed);
261 * Get a synchronization object for given mode.
262 * @param mode the synchronization mode
264 template <typename T> typename Parmap<T>::Synchro* Parmap<T>::new_synchro(e_xbt_parmap_mode_t mode)
266 if (mode == XBT_PARMAP_DEFAULT) {
268 mode = XBT_PARMAP_FUTEX;
270 mode = XBT_PARMAP_POSIX;
275 case XBT_PARMAP_POSIX:
276 res = new PosixSynchro(*this);
278 case XBT_PARMAP_FUTEX:
280 res = new FutexSynchro(*this);
282 xbt_die("Futex is not available on this OS.");
285 case XBT_PARMAP_BUSY_WAIT:
286 res = new BusyWaitSynchro(*this);
294 /** @brief Main function of a worker thread */
295 template <typename T> void Parmap<T>::worker_main(ThreadData* data)
297 Parmap<T>& parmap = data->parmap;
299 smx_context_t context = simix_global->context_factory->create_context(std::function<void()>(), nullptr);
300 kernel::context::Context::set_current(context);
302 XBT_CDEBUG(xbt_parmap, "New worker thread created");
304 /* Worker's main loop */
306 round++; // New scheduling round
307 parmap.synchro->worker_wait(round);
308 if (parmap.status == PARMAP_DESTROY)
311 XBT_CDEBUG(xbt_parmap, "Worker %d got a job", data->worker_id);
313 parmap.synchro->worker_signal();
314 XBT_CDEBUG(xbt_parmap, "Worker %d has finished", data->worker_id);
316 /* We are destroying the parmap */
321 template <typename T> Parmap<T>::PosixSynchro::PosixSynchro(Parmap<T>& parmap) : Synchro(parmap)
325 template <typename T> Parmap<T>::PosixSynchro::~PosixSynchro()
329 template <typename T> void Parmap<T>::PosixSynchro::master_signal()
331 std::unique_lock<std::mutex> lk(ready_mutex);
332 this->parmap.thread_counter = 1;
333 this->parmap.work_round++;
334 /* wake all workers */
335 ready_cond.notify_all();
338 template <typename T> void Parmap<T>::PosixSynchro::master_wait()
340 std::unique_lock<std::mutex> lk(done_mutex);
341 while (this->parmap.thread_counter < this->parmap.num_workers) {
342 /* wait for all workers to be ready */
347 template <typename T> void Parmap<T>::PosixSynchro::worker_signal()
349 std::unique_lock<std::mutex> lk(done_mutex);
350 this->parmap.thread_counter++;
351 if (this->parmap.thread_counter == this->parmap.num_workers) {
352 /* all workers have finished, wake the controller */
353 done_cond.notify_one();
357 template <typename T> void Parmap<T>::PosixSynchro::worker_wait(unsigned round)
359 std::unique_lock<std::mutex> lk(ready_mutex);
360 /* wait for more work */
361 while (this->parmap.work_round != round) {
367 template <typename T> inline void Parmap<T>::FutexSynchro::futex_wait(std::atomic_uint* uaddr, unsigned val)
369 XBT_CVERB(xbt_parmap, "Waiting on futex %p", uaddr);
370 syscall(SYS_futex, uaddr, FUTEX_WAIT_PRIVATE, val, nullptr, nullptr, 0);
373 template <typename T> inline void Parmap<T>::FutexSynchro::futex_wake(std::atomic_uint* uaddr, unsigned val)
375 XBT_CVERB(xbt_parmap, "Waking futex %p", uaddr);
376 syscall(SYS_futex, uaddr, FUTEX_WAKE_PRIVATE, val, nullptr, nullptr, 0);
379 template <typename T> void Parmap<T>::FutexSynchro::master_signal()
381 this->parmap.thread_counter.store(1);
382 this->parmap.work_round.fetch_add(1);
383 /* wake all workers */
384 futex_wake(&this->parmap.work_round, std::numeric_limits<int>::max());
387 template <typename T> void Parmap<T>::FutexSynchro::master_wait()
389 unsigned count = this->parmap.thread_counter.load();
390 while (count < this->parmap.num_workers) {
391 /* wait for all workers to be ready */
392 futex_wait(&this->parmap.thread_counter, count);
393 count = this->parmap.thread_counter.load();
397 template <typename T> void Parmap<T>::FutexSynchro::worker_signal()
399 unsigned count = this->parmap.thread_counter.fetch_add(1) + 1;
400 if (count == this->parmap.num_workers) {
401 /* all workers have finished, wake the controller */
402 futex_wake(&this->parmap.thread_counter, std::numeric_limits<int>::max());
406 template <typename T> void Parmap<T>::FutexSynchro::worker_wait(unsigned round)
408 unsigned work_round = this->parmap.work_round.load();
409 /* wait for more work */
410 while (work_round != round) {
411 futex_wait(&this->parmap.work_round, work_round);
412 work_round = this->parmap.work_round.load();
417 template <typename T> void Parmap<T>::BusyWaitSynchro::master_signal()
419 this->parmap.thread_counter.store(1);
420 this->parmap.work_round.fetch_add(1);
423 template <typename T> void Parmap<T>::BusyWaitSynchro::master_wait()
425 while (this->parmap.thread_counter.load() < this->parmap.num_workers) {
426 std::this_thread::yield();
430 template <typename T> void Parmap<T>::BusyWaitSynchro::worker_signal()
432 this->parmap.thread_counter.fetch_add(1);
435 template <typename T> void Parmap<T>::BusyWaitSynchro::worker_wait(unsigned round)
437 /* wait for more work */
438 while (this->parmap.work_round.load() != round) {
439 std::this_thread::yield();