1 /* A thread pool (C++ version). */
3 /* Copyright (c) 2004-2023 The SimGrid Team. All rights reserved. */
5 /* This program is free software; you can redistribute it and/or modify it
6 * under the terms of the license (GNU LGPL) which comes with this package. */
11 #include "src/internal_config.h" // HAVE_FUTEX_H
12 #include "src/kernel/EngineImpl.hpp"
13 #include "src/kernel/context/Context.hpp"
15 #include <boost/optional.hpp>
16 #include <condition_variable>
22 #include <linux/futex.h>
23 #include <sys/syscall.h>
27 #include <pthread_np.h>
30 XBT_LOG_EXTERNAL_CATEGORY(xbt_parmap);
32 namespace simgrid::xbt {
34 /** @addtogroup XBT_parmap
36 * @brief Parallel map class
39 template <typename T> class Parmap {
41 Parmap(unsigned num_workers, e_xbt_parmap_mode_t mode);
42 Parmap(const Parmap&) = delete;
43 Parmap& operator=(const Parmap&) = delete;
45 void apply(std::function<void(T)>&& fun, const std::vector<T>& data);
46 boost::optional<T> next();
50 * @brief Thread data transmission structure
54 ThreadData(Parmap<T>& parmap, int id) : parmap(parmap), worker_id(id) {}
60 * @brief Synchronization object (different specializations).
64 explicit Synchro(Parmap<T>& parmap) : parmap(parmap) {}
65 virtual ~Synchro() = default;
67 * @brief Wakes all workers and waits for them to finish the tasks.
69 * This function is called by the controller thread.
71 virtual void master_signal() = 0;
73 * @brief Starts the parmap: waits for all workers to be ready and returns.
75 * This function is called by the controller thread.
77 virtual void master_wait() = 0;
79 * @brief Ends the parmap: wakes the controller thread when all workers terminate.
81 * This function is called by all worker threads when they end (not including the controller).
83 virtual void worker_signal() = 0;
85 * @brief Waits for some work to process.
87 * This function is called by each worker thread (not including the controller) when it has no more work to do.
89 * @param expected_round the expected round number
91 virtual void worker_wait(unsigned) = 0;
96 class PosixSynchro : public Synchro {
98 explicit PosixSynchro(Parmap<T>& parmap) : Synchro(parmap) {}
99 void master_signal() override;
100 void master_wait() override;
101 void worker_signal() override;
102 void worker_wait(unsigned expected_round) override;
105 std::condition_variable ready_cond;
106 std::mutex ready_mutex;
107 std::condition_variable done_cond;
108 std::mutex done_mutex;
112 class FutexSynchro : public Synchro {
114 explicit FutexSynchro(Parmap<T>& parmap) : Synchro(parmap) {}
115 void master_signal() override;
116 void master_wait() override;
117 void worker_signal() override;
118 void worker_wait(unsigned) override;
121 static void futex_wait(std::atomic_uint* uaddr, unsigned val);
122 static void futex_wake(std::atomic_uint* uaddr, unsigned val);
126 class BusyWaitSynchro : public Synchro {
128 explicit BusyWaitSynchro(Parmap<T>& parmap) : Synchro(parmap) {}
129 void master_signal() override;
130 void master_wait() override;
131 void worker_signal() override;
132 void worker_wait(unsigned) override;
135 static void worker_main(ThreadData* data);
136 Synchro* new_synchro(e_xbt_parmap_mode_t mode);
139 bool destroying = false; /**< is the parmap being destroyed? */
140 std::atomic_uint work_round{0}; /**< index of the current round */
141 std::vector<std::thread*> workers; /**< worker thread handlers */
142 unsigned num_workers; /**< total number of worker threads including the controller */
143 Synchro* synchro; /**< synchronization object */
145 std::atomic_uint thread_counter{0}; /**< number of workers that have done the work */
146 std::function<void(T)> worker_fun; /**< function to run in parallel on each element of data */
147 const std::vector<T>* common_data = nullptr; /**< parameters to pass to fun in parallel */
148 std::atomic_uint common_index{0}; /**< index of the next element of data to pick */
152 * @brief Creates a parallel map object
153 * @param num_workers number of worker threads to create
154 * @param mode how to synchronize the worker threads
156 template <typename T>
157 Parmap<T>::Parmap(unsigned num_workers, e_xbt_parmap_mode_t mode)
158 : workers(num_workers), num_workers(num_workers), synchro(new_synchro(mode))
160 XBT_CDEBUG(xbt_parmap, "Create new parmap (%u workers)", num_workers);
162 /* Create the pool of worker threads (the caller of apply() will be worker[0]) */
163 workers[0] = nullptr;
165 for (unsigned i = 1; i < num_workers; i++) {
166 auto* data = new ThreadData(*this, i);
167 workers[i] = new std::thread(worker_main, data);
169 /* Bind the worker to a core if possible */
170 #if HAVE_PTHREAD_SETAFFINITY
171 #if HAVE_PTHREAD_NP_H /* FreeBSD ? */
173 size_t size = sizeof(cpuset_t);
176 size_t size = sizeof(cpu_set_t);
178 pthread_t pthread = workers[i]->native_handle();
179 int core_bind = (i - 1) % std::thread::hardware_concurrency();
181 CPU_SET(core_bind, &cpuset);
182 pthread_setaffinity_np(pthread, size, &cpuset);
188 * @brief Destroys a parmap
190 template <typename T> Parmap<T>::~Parmap()
193 synchro->master_signal();
195 for (unsigned i = 1; i < num_workers; i++) {
203 * @brief Applies a list of tasks in parallel.
204 * @param fun the function to call in parallel
205 * @param data each element of this vector will be passed as an argument to fun
207 template <typename T> void Parmap<T>::apply(std::function<void(T)>&& fun, const std::vector<T>& data)
209 /* Assign resources to worker threads (we are maestro here)*/
210 worker_fun = std::move(fun);
213 synchro->master_signal(); // maestro runs futex_wake to wake all the minions (the working threads)
214 work(); // maestro works with its minions
215 synchro->master_wait(); // When there is no more work to do, then maestro waits for the last minion to stop
216 XBT_CDEBUG(xbt_parmap, "Job done"); // ... and proceeds
220 * @brief Returns a next task to process.
222 * Worker threads call this function to get more work.
224 * @return the next task to process, or throws a std::out_of_range exception if there is no more work
226 template <typename T> boost::optional<T> Parmap<T>::next()
228 unsigned index = common_index.fetch_add(1, std::memory_order_relaxed);
229 if (index < common_data->size())
230 return (*common_data)[index];
236 * @brief Main work loop: applies fun to elements in turn.
238 template <typename T> void Parmap<T>::work()
240 unsigned length = static_cast<unsigned>(common_data->size());
241 unsigned index = common_index.fetch_add(1, std::memory_order_relaxed);
242 while (index < length) {
243 worker_fun((*common_data)[index]);
244 index = common_index.fetch_add(1, std::memory_order_relaxed);
249 * Get a synchronization object for given mode.
250 * @param mode the synchronization mode
252 template <typename T> typename Parmap<T>::Synchro* Parmap<T>::new_synchro(e_xbt_parmap_mode_t mode)
254 if (mode == XBT_PARMAP_DEFAULT) {
256 mode = XBT_PARMAP_FUTEX;
258 mode = XBT_PARMAP_POSIX;
263 case XBT_PARMAP_POSIX:
264 res = new PosixSynchro(*this);
266 case XBT_PARMAP_FUTEX:
268 res = new FutexSynchro(*this);
270 xbt_die("Futex is not available on this OS.");
273 case XBT_PARMAP_BUSY_WAIT:
274 res = new BusyWaitSynchro(*this);
282 /** @brief Main function of a worker thread */
283 template <typename T> void Parmap<T>::worker_main(ThreadData* data)
285 const auto* engine = simgrid::kernel::EngineImpl::get_instance();
286 Parmap<T>& parmap = data->parmap;
288 kernel::context::Context* context = engine->get_context_factory()->create_context(std::function<void()>(), nullptr);
289 kernel::context::Context::set_current(context);
291 XBT_CDEBUG(xbt_parmap, "New worker thread created");
293 /* Worker's main loop */
295 round++; // New scheduling round
296 parmap.synchro->worker_wait(round);
297 if (parmap.destroying)
300 XBT_CDEBUG(xbt_parmap, "Worker %d got a job", data->worker_id);
302 parmap.synchro->worker_signal();
303 XBT_CDEBUG(xbt_parmap, "Worker %d has finished", data->worker_id);
305 /* We are destroying the parmap */
310 template <typename T> void Parmap<T>::PosixSynchro::master_signal()
312 const std::scoped_lock lock(ready_mutex);
313 this->parmap.thread_counter = 1;
314 this->parmap.work_round++;
315 /* wake all workers */
316 ready_cond.notify_all();
319 template <typename T> void Parmap<T>::PosixSynchro::master_wait()
321 std::unique_lock lock(done_mutex);
322 /* wait for all workers to be ready */
323 done_cond.wait(lock, [this]() { return this->parmap.thread_counter >= this->parmap.num_workers; });
326 template <typename T> void Parmap<T>::PosixSynchro::worker_signal()
328 const std::scoped_lock lock(done_mutex);
329 this->parmap.thread_counter++;
330 if (this->parmap.thread_counter == this->parmap.num_workers) {
331 /* all workers have finished, wake the controller */
332 done_cond.notify_one();
336 template <typename T> void Parmap<T>::PosixSynchro::worker_wait(unsigned expected_round)
338 std::unique_lock lock(ready_mutex);
339 /* wait for more work */
340 ready_cond.wait(lock, [this, expected_round]() { return this->parmap.work_round == expected_round; });
344 template <typename T> inline void Parmap<T>::FutexSynchro::futex_wait(std::atomic_uint* uaddr, unsigned val)
346 XBT_CVERB(xbt_parmap, "Waiting on futex %p", uaddr);
347 syscall(SYS_futex, uaddr, FUTEX_WAIT_PRIVATE, val, nullptr, nullptr, 0);
350 template <typename T> inline void Parmap<T>::FutexSynchro::futex_wake(std::atomic_uint* uaddr, unsigned val)
352 XBT_CVERB(xbt_parmap, "Waking futex %p", uaddr);
353 syscall(SYS_futex, uaddr, FUTEX_WAKE_PRIVATE, val, nullptr, nullptr, 0);
356 template <typename T> void Parmap<T>::FutexSynchro::master_signal()
358 this->parmap.thread_counter.store(1);
359 this->parmap.work_round.fetch_add(1);
360 /* wake all workers */
361 futex_wake(&this->parmap.work_round, std::numeric_limits<int>::max());
364 template <typename T> void Parmap<T>::FutexSynchro::master_wait()
366 unsigned count = this->parmap.thread_counter.load();
367 while (count < this->parmap.num_workers) {
368 /* wait for all workers to be ready */
369 futex_wait(&this->parmap.thread_counter, count);
370 count = this->parmap.thread_counter.load();
374 template <typename T> void Parmap<T>::FutexSynchro::worker_signal()
376 unsigned count = this->parmap.thread_counter.fetch_add(1) + 1;
377 if (count == this->parmap.num_workers) {
378 /* all workers have finished, wake the controller */
379 futex_wake(&this->parmap.thread_counter, std::numeric_limits<int>::max());
383 template <typename T> void Parmap<T>::FutexSynchro::worker_wait(unsigned expected_round)
385 unsigned round = this->parmap.work_round.load();
386 /* wait for more work */
387 while (round != expected_round) {
388 futex_wait(&this->parmap.work_round, round);
389 round = this->parmap.work_round.load();
394 template <typename T> void Parmap<T>::BusyWaitSynchro::master_signal()
396 this->parmap.thread_counter.store(1);
397 this->parmap.work_round.fetch_add(1);
400 template <typename T> void Parmap<T>::BusyWaitSynchro::master_wait()
402 while (this->parmap.thread_counter.load() < this->parmap.num_workers) {
403 std::this_thread::yield();
407 template <typename T> void Parmap<T>::BusyWaitSynchro::worker_signal()
409 this->parmap.thread_counter.fetch_add(1);
412 template <typename T> void Parmap<T>::BusyWaitSynchro::worker_wait(unsigned round)
414 /* wait for more work */
415 while (this->parmap.work_round.load() != round) {
416 std::this_thread::yield();
421 } // namespace simgrid::xbt