1 /* Copyright (c) 2009-2015. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 /** \file RawContext.cpp
8 * Fast context switching inspired from SystemV ucontexts.
10 * In contrast to System V context, it does not touch the signal mask
11 * which avoids making a system call (at least on Linux).
19 #include "src/internal_config.h"
22 #include "xbt/parmap.h"
23 #include "xbt/dynar.h"
25 #include "smx_private.h"
26 #include "smx_private.hpp"
29 XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_context);
31 // ***** Class definitions
37 class RawContextFactory;
39 class RawContext : public Context {
41 void* stack_ = nullptr;
42 /** pointer to top the stack stack */
43 void* stack_top_ = nullptr;
45 friend class RawContextFactory;
46 RawContext(std::function<void()> code,
47 void_pfn_smxprocess_t cleanup_func,
48 smx_process_t process);
51 static void wrapper(void* arg);
53 void suspend() override;
56 void suspend_serial();
57 void suspend_parallel();
59 void resume_parallel();
62 class RawContextFactory : public ContextFactory {
66 RawContext* create_context(std::function<void()> code,
67 void_pfn_smxprocess_t, smx_process_t process) override;
68 void run_all() override;
70 void run_all_adaptative();
71 void run_all_serial();
72 void run_all_parallel();
75 ContextFactory* raw_factory()
77 XBT_VERB("Using raw contexts. Because the glibc is just not good enough for us.");
78 return new RawContextFactory();
84 // ***** Loads of static stuff
86 #if HAVE_THREAD_CONTEXTS
87 static xbt_parmap_t raw_parmap;
88 static simgrid::simix::RawContext** raw_workers_context; /* space to save the worker context in each thread */
89 static uintptr_t raw_threads_working; /* number of threads that have started their work */
90 static xbt_os_thread_key_t raw_worker_id_key; /* thread-specific storage for the thread id */
92 #ifdef ADAPTIVE_THRESHOLD
93 #define SCHED_ROUND_LIMIT 5
94 static xbt_os_timer_t round_time;
95 static double par_time,seq_time;
96 static double par_ratio,seq_ratio;
97 static int reached_seq_limit, reached_par_limit;
98 static unsigned int par_proc_that_ran = 0,seq_proc_that_ran = 0; /* Counters of processes that have run in SCHED_ROUND_LIMIT scheduling rounds */
99 static unsigned int seq_sched_round=0, par_sched_round=0; /* Amount of SR that ran serial/parallel*/
100 /*Varables used to calculate running variance and mean*/
101 static double prev_avg_par_proc=0,prev_avg_seq_proc=0;
102 static double delta=0;
103 static double s_par_proc=0,s_seq_proc=0; /*Standard deviation of number of processes computed in par/seq during the current simulation*/
104 static double avg_par_proc=0,sd_par_proc=0;
105 static double avg_seq_proc=0,sd_seq_proc=0;
106 static long long par_window=(long long)HUGE_VAL,seq_window=0;
108 static unsigned long raw_process_index = 0; /* index of the next process to run in the
109 * list of runnable processes */
110 static simgrid::simix::RawContext* raw_maestro_context;
112 static bool raw_context_parallel = false;
113 #ifdef ADAPTIVE_THRESHOLD
114 static bool raw_context_adaptative = false;
117 // ***** Raw context routines
119 typedef void (*rawctx_entry_point_t)(void *);
121 typedef void* raw_stack_t;
122 extern "C" raw_stack_t raw_makecontext(void* malloced_stack, int stack_size,
123 rawctx_entry_point_t entry_point, void* arg);
124 extern "C" void raw_swapcontext(raw_stack_t* old, raw_stack_t new_context);
126 // TODO, we should handle FP, MMX and the x87 control-word (for x86 and x86_64)
128 #if SIMGRID_PROCESSOR_x86_64
130 #if defined(__APPLE__)
132 ".globl _raw_makecontext\n"
133 "_raw_makecontext:\n"
134 #elif defined(_WIN32)
136 ".globl raw_makecontext\n"
140 ".globl raw_makecontext\n"
141 ".type raw_makecontext,@function\n"
142 "raw_makecontext:\n"/* Calling convention sets the arguments in rdi, rsi, rdx and rcx, respectively */
144 " mov %rdi,%rax\n" /* stack */
145 " add %rsi,%rax\n" /* size */
146 " andq $-16, %rax\n" /* align stack */
147 " movq $0, -8(%rax)\n" /* @return for func */
148 " mov %rdx,-16(%rax)\n" /* func */
149 " mov %rcx,-24(%rax)\n" /* arg/rdi */
150 " movq $0, -32(%rax)\n" /* rsi */
151 " movq $0, -40(%rax)\n" /* rdx */
152 " movq $0, -48(%rax)\n" /* rcx */
153 " movq $0, -56(%rax)\n" /* r8 */
154 " movq $0, -64(%rax)\n" /* r9 */
155 " movq $0, -72(%rax)\n" /* rbp */
156 " movq $0, -80(%rax)\n" /* rbx */
157 " movq $0, -88(%rax)\n" /* r12 */
158 " movq $0, -96(%rax)\n" /* r13 */
159 " movq $0, -104(%rax)\n" /* r14 */
160 " movq $0, -112(%rax)\n" /* r15 */
166 #if defined(__APPLE__)
168 ".globl _raw_swapcontext\n"
169 "_raw_swapcontext:\n"
170 #elif defined(_WIN32)
172 ".globl raw_swapcontext\n"
176 ".globl raw_swapcontext\n"
177 ".type raw_swapcontext,@function\n"
178 "raw_swapcontext:\n" /* Calling convention sets the arguments in rdi and rsi, respectively */
192 " mov %rsp,(%rdi)\n" /* old */
193 " mov %rsi,%rsp\n" /* new */
208 #elif SIMGRID_PROCESSOR_i686
210 #if defined(__APPLE__) || defined(_WIN32)
212 ".globl _raw_makecontext\n"
213 "_raw_makecontext:\n"
216 ".globl raw_makecontext\n"
217 ".type raw_makecontext,@function\n"
220 " movl 4(%esp),%eax\n" /* stack */
221 " addl 8(%esp),%eax\n" /* size */
222 " andl $-16, %eax\n" /* align stack */
223 " movl 12(%esp),%ecx\n" /* func */
224 " movl 16(%esp),%edx\n" /* arg */
225 " movl %edx, -4(%eax)\n"
226 " movl $0, -8(%eax)\n" /* @return for func */
227 " movl %ecx,-12(%eax)\n"
228 " movl $0, -16(%eax)\n" /* ebp */
229 " movl $0, -20(%eax)\n" /* ebx */
230 " movl $0, -24(%eax)\n" /* esi */
231 " movl $0, -28(%eax)\n" /* edi */
237 #if defined(__APPLE__) || defined(_WIN32)
239 ".globl _raw_swapcontext\n"
240 "_raw_swapcontext:\n"
243 ".globl raw_swapcontext\n"
244 ".type raw_swapcontext,@function\n"
247 // Fetch the parameters:
248 " movl 4(%esp),%eax\n" /* old (raw_stack_t*) */
249 " movl 8(%esp),%edx\n" /* new (raw_stack_t) */
250 // Save registers of the current context on the stack:
255 // Save the current context (stack pointer) in *old:
256 " movl %esp,(%eax)\n"
257 // Switch to the stack of the new context:
259 // Pop the values of the new context:
264 // Return using the return address of the new context:
270 /* If you implement raw contexts for other processors, don't forget to
271 update the definition of HAVE_RAW_CONTEXTS in tools/cmake/CompleteInFiles.cmake */
273 raw_stack_t raw_makecontext(void* malloced_stack, int stack_size,
274 rawctx_entry_point_t entry_point, void* arg) {
278 void raw_swapcontext(raw_stack_t* old, raw_stack_t new_context) {
284 // ***** Method definitions
289 RawContextFactory::RawContextFactory()
290 : ContextFactory("RawContextFactory")
292 #ifdef ADAPTIVE_THRESHOLD
293 raw_context_adaptative = (SIMIX_context_get_parallel_threshold() > 1);
295 raw_context_parallel = SIMIX_context_is_parallel();
296 if (raw_context_parallel) {
297 #if HAVE_THREAD_CONTEXTS
298 int nthreads = SIMIX_context_get_nthreads();
299 xbt_os_thread_key_create(&raw_worker_id_key);
301 raw_parmap = nullptr;
302 raw_workers_context = xbt_new(RawContext*, nthreads);
303 raw_maestro_context = nullptr;
305 // TODO, if(SIMIX_context_get_parallel_threshold() > 1) => choose dynamically
307 #ifdef ADAPTIVE_THRESHOLD
308 round_time = xbt_os_timer_new();
309 reached_seq_limit = 0;
310 reached_par_limit = 0;
314 RawContextFactory::~RawContextFactory()
316 #if HAVE_THREAD_CONTEXTS
318 xbt_parmap_destroy(raw_parmap);
319 xbt_free(raw_workers_context);
323 RawContext* RawContextFactory::create_context(std::function<void()> code,
324 void_pfn_smxprocess_t cleanup, smx_process_t process)
326 return this->new_context<RawContext>(std::move(code),
330 void RawContext::wrapper(void* arg)
332 RawContext* context = (RawContext*) arg;
337 RawContext::RawContext(std::function<void()> code,
338 void_pfn_smxprocess_t cleanup, smx_process_t process)
339 : Context(std::move(code), cleanup, process)
342 this->stack_ = SIMIX_context_stack_new();
343 this->stack_top_ = raw_makecontext(this->stack_,
344 smx_context_usable_stack_size,
348 if(process != nullptr && raw_maestro_context == nullptr)
349 raw_maestro_context = this;
352 &raw_maestro_context->stack_top_,
353 sizeof(raw_maestro_context->stack_top_));
357 RawContext::~RawContext()
359 SIMIX_context_stack_delete(this->stack_);
362 void RawContext::stop()
368 void RawContextFactory::run_all()
370 #ifdef ADAPTIVE_THRESHOLD
371 if (raw_context_adaptative)
372 run_all_adaptative();
375 if (raw_context_parallel)
381 void RawContextFactory::run_all_serial()
383 smx_process_t first_process =
384 xbt_dynar_get_as(simix_global->process_to_run, 0, smx_process_t);
385 raw_process_index = 1;
386 static_cast<RawContext*>(first_process->context)->resume_serial();
389 void RawContextFactory::run_all_parallel()
391 #if HAVE_THREAD_CONTEXTS
392 raw_threads_working = 0;
393 if (raw_parmap == nullptr)
394 raw_parmap = xbt_parmap_new(
395 SIMIX_context_get_nthreads(), SIMIX_context_get_parallel_mode());
396 xbt_parmap_apply(raw_parmap,
398 smx_process_t process = static_cast<smx_process_t>(arg);
399 RawContext* context = static_cast<RawContext*>(process->context);
400 context->resume_parallel();
402 simix_global->process_to_run);
404 xbt_die("You asked for a parallel execution, but you don't have any threads.");
408 void RawContext::suspend()
410 if (raw_context_parallel)
411 RawContext::suspend_parallel();
413 RawContext::suspend_serial();
416 void RawContext::suspend_serial()
418 /* determine the next context */
419 RawContext* next_context = nullptr;
421 i = raw_process_index++;
422 if (i < xbt_dynar_length(simix_global->process_to_run)) {
423 /* execute the next process */
424 XBT_DEBUG("Run next process");
425 next_context = (RawContext*) xbt_dynar_get_as(
426 simix_global->process_to_run, i, smx_process_t)->context;
429 /* all processes were run, return to maestro */
430 XBT_DEBUG("No more process to run");
431 next_context = (RawContext*) raw_maestro_context;
433 SIMIX_context_set_current(next_context);
434 raw_swapcontext(&this->stack_top_, next_context->stack_top_);
437 void RawContext::suspend_parallel()
439 #if HAVE_THREAD_CONTEXTS
440 /* determine the next context */
441 smx_process_t next_work = (smx_process_t) xbt_parmap_next(raw_parmap);
442 RawContext* next_context = nullptr;
444 if (next_work != NULL) {
445 /* there is a next process to resume */
446 XBT_DEBUG("Run next process");
447 next_context = (RawContext*) next_work->context;
450 /* all processes were run, go to the barrier */
451 XBT_DEBUG("No more processes to run");
452 uintptr_t worker_id = (uintptr_t)
453 xbt_os_thread_get_specific(raw_worker_id_key);
454 next_context = (RawContext*) raw_workers_context[worker_id];
455 XBT_DEBUG("Restoring worker stack %zu (working threads = %zu)",
456 worker_id, raw_threads_working);
459 SIMIX_context_set_current(next_context);
460 raw_swapcontext(&this->stack_top_, next_context->stack_top_);
464 void RawContext::resume()
466 if (raw_context_parallel)
472 void RawContext::resume_serial()
474 SIMIX_context_set_current(this);
475 raw_swapcontext(&raw_maestro_context->stack_top_, this->stack_top_);
478 void RawContext::resume_parallel()
480 #if HAVE_THREAD_CONTEXTS
481 uintptr_t worker_id = __sync_fetch_and_add(&raw_threads_working, 1);
482 xbt_os_thread_set_specific(raw_worker_id_key, (void*) worker_id);
483 RawContext* worker_context = (RawContext*) SIMIX_context_self();
484 raw_workers_context[worker_id] = worker_context;
485 XBT_DEBUG("Saving worker stack %zu", worker_id);
486 SIMIX_context_set_current(this);
487 raw_swapcontext(&worker_context->stack_top_, this->stack_top_);
489 xbt_die("Parallel execution disabled");
494 * \brief Resumes all processes ready to run.
496 #ifdef ADAPTIVE_THRESHOLD
497 void RawContectFactory::run_all_adaptative()
499 unsigned long nb_processes = xbt_dynar_length(simix_global->process_to_run);
500 unsigned long threshold = SIMIX_context_get_parallel_threshold();
501 reached_seq_limit = (seq_sched_round % SCHED_ROUND_LIMIT == 0);
502 reached_par_limit = (par_sched_round % SCHED_ROUND_LIMIT == 0);
504 if(reached_seq_limit && reached_par_limit){
505 par_ratio = (par_proc_that_ran != 0) ? (par_time / (double)par_proc_that_ran) : 0;
506 seq_ratio = (seq_proc_that_ran != 0) ? (seq_time / (double)seq_proc_that_ran) : 0;
507 if(seq_ratio > par_ratio){
508 if(nb_processes < avg_par_proc) {
509 threshold = (threshold>2) ? threshold - 1 : threshold ;
510 SIMIX_context_set_parallel_threshold(threshold);
513 if(nb_processes > avg_seq_proc){
514 SIMIX_context_set_parallel_threshold(threshold+1);
519 if (nb_processes >= SIMIX_context_get_parallel_threshold()) {
520 simix_global->context_factory->suspend = smx_ctx_raw_suspend_parallel;
521 if (nb_processes < par_window){
523 xbt_os_walltimer_start(round_time);
524 smx_ctx_raw_runall_parallel();
525 xbt_os_walltimer_stop(round_time);
526 par_time += xbt_os_timer_elapsed(round_time);
528 prev_avg_par_proc = avg_par_proc;
529 delta = nb_processes - avg_par_proc;
530 avg_par_proc = (par_sched_round==1) ? nb_processes : avg_par_proc + delta / (double) par_sched_round;
532 if(par_sched_round>=2){
533 s_par_proc = s_par_proc + (nb_processes - prev_avg_par_proc) * delta;
534 sd_par_proc = sqrt(s_par_proc / (par_sched_round-1));
535 par_window = (int) (avg_par_proc + sd_par_proc);
540 par_proc_that_ran += nb_processes;
542 smx_ctx_raw_runall_parallel();
545 simix_global->context_factory->suspend = smx_ctx_raw_suspend_serial;
546 if(nb_processes > seq_window){
548 xbt_os_walltimer_start(round_time);
549 smx_ctx_raw_runall_serial();
550 xbt_os_walltimer_stop(round_time);
551 seq_time += xbt_os_timer_elapsed(round_time);
553 prev_avg_seq_proc = avg_seq_proc;
554 delta = (nb_processes-avg_seq_proc);
555 avg_seq_proc = (seq_sched_round==1) ? nb_processes : avg_seq_proc + delta / (double) seq_sched_round;
557 if(seq_sched_round>=2){
558 s_seq_proc = s_seq_proc + (nb_processes - prev_avg_seq_proc)*delta;
559 sd_seq_proc = sqrt(s_seq_proc / (seq_sched_round-1));
560 seq_window = (int) (avg_seq_proc - sd_seq_proc);
565 seq_proc_that_ran += nb_processes;
567 smx_ctx_raw_runall_serial();
575 void RawContextFactory::run_all_adaptative()
577 unsigned long nb_processes = xbt_dynar_length(simix_global->process_to_run);
578 if (SIMIX_context_is_parallel()
579 && (unsigned long) SIMIX_context_get_parallel_threshold() < nb_processes) {
580 raw_context_parallel = true;
581 XBT_DEBUG("Runall // %lu", nb_processes);
582 this->run_all_parallel();
584 XBT_DEBUG("Runall serial %lu", nb_processes);
585 raw_context_parallel = false;
586 this->run_all_serial();