1 /* Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "surf_private.h"
8 #include "xbt/module.h"
10 #include "simix/smx_host_private.h"
11 #include "surf/surf_resource.h"
12 #include "xbt/xbt_os_thread.h"
13 #include "simgrid/sg_config.h"
17 XBT_LOG_NEW_CATEGORY(surf, "All SURF categories");
18 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(surf_kernel, surf,
19 "Logging specific to SURF (kernel)");
21 /* Additional declarations for Windows portability. */
29 static const char *disk_drives_letter_table[MAX_DRIVE] = {
57 #endif /* #ifdef _XBT_WIN32 */
60 * Returns the initial path. On Windows the initial path is
61 * the current directory for the current process in the other
62 * case the function returns "./" that represents the current
63 * directory on Unix/Linux platforms.
66 const char *__surf_get_initial_path(void)
71 char current_directory[MAX_PATH + 1] = { 0 };
72 unsigned int len = GetCurrentDirectory(MAX_PATH + 1, current_directory);
78 strncpy(root, current_directory, 3);
80 for (i = 0; i < MAX_DRIVE; i++) {
81 if (toupper(root[0]) == disk_drives_letter_table[i][0])
82 return disk_drives_letter_table[i];
91 /* The __surf_is_absolute_file_path() returns 1 if
92 * file_path is a absolute file path, in the other
93 * case the function returns 0.
95 int __surf_is_absolute_file_path(const char *file_path)
98 WIN32_FIND_DATA wfd = { 0 };
99 HANDLE hFile = FindFirstFile(file_path, &wfd);
101 if (INVALID_HANDLE_VALUE == hFile)
107 return (file_path[0] == '/');
113 xbt_dynar_t model_list = NULL;
114 tmgr_history_t history = NULL;
115 lmm_system_t maxmin_system = NULL;
116 xbt_dynar_t surf_path = NULL;
118 /* Don't forget to update the option description in smx_config when you change this */
119 s_surf_model_description_t surf_network_model_description[] = {
121 "Realistic network analytic model (slow-start modeled by multiplying latency by 10.4, bandwidth by .92; bottleneck sharing uses a payload of S=8775 for evaluating RTT). ",
122 surf_network_model_init_LegrandVelho},
124 "Simplistic network model where all communication take a constant time (one second). This model provides the lowest realism, but is (marginally) faster.",
125 surf_network_model_init_Constant},
127 "Realistic network model specifically tailored for HPC settings (accurate modeling of slow start with correction factors on three intervals: < 1KiB, < 64 KiB, >= 64 KiB)",
128 surf_network_model_init_SMPI},
130 "Legacy network analytic model (Very similar to LV08, but without corrective factors. The timings of small messages are thus poorly modeled).",
131 surf_network_model_init_CM02},
134 "Network pseudo-model using the GTNets simulator instead of an analytic model",
135 surf_network_model_init_GTNETS},
139 "Network pseudo-model using the NS3 tcp model instead of an analytic model",
140 surf_network_model_init_NS3},
143 "Model from Steven H. Low using lagrange_solve instead of lmm_solve (experts only; check the code for more info).",
144 surf_network_model_init_Reno},
146 "Model from Steven H. Low using lagrange_solve instead of lmm_solve (experts only; check the code for more info).",
147 surf_network_model_init_Reno2},
149 "Model from Steven H. Low using lagrange_solve instead of lmm_solve (experts only; check the code for more info).",
150 surf_network_model_init_Vegas},
151 {NULL, NULL, NULL} /* this array must be NULL terminated */
154 s_surf_model_description_t surf_cpu_model_description[] = {
156 "Simplistic CPU model (time=size/power).",
157 surf_cpu_model_init_Cas01},
158 {NULL, NULL, NULL} /* this array must be NULL terminated */
161 s_surf_model_description_t surf_workstation_model_description[] = {
163 "Default workstation model. Currently, CPU:Cas01 and network:LV08 (with cross traffic enabled)",
164 surf_workstation_model_init_current_default},
166 "Workstation model that is automatically chosen if you change the network and CPU models",
167 surf_workstation_model_init_compound},
168 {"ptask_L07", "Workstation model somehow similar to Cas01+CM02 but allowing parallel tasks",
169 surf_workstation_model_init_ptask_L07},
170 {NULL, NULL, NULL} /* this array must be NULL terminated */
173 s_surf_model_description_t surf_optimization_mode_description[] = {
175 "Lazy action management (partial invalidation in lmm + heap in action remaining).",
178 "Trace integration. Highly optimized mode when using availability traces (only available for the Cas01 CPU model for now).",
181 "Full update of remaining and variables. Slow but may be useful when debugging.",
183 {NULL, NULL, NULL} /* this array must be NULL terminated */
186 s_surf_model_description_t surf_storage_model_description[] = {
188 "Simplistic storage model.",
189 surf_storage_model_init_default},
190 {NULL, NULL, NULL} /* this array must be NULL terminated */
193 /* ********************************************************************* */
194 /* TUTORIAL: New model */
195 s_surf_model_description_t surf_new_model_description[] = {
198 surf_new_model_init_default},
199 {NULL, NULL, NULL} /* this array must be NULL terminated */
201 /* ********************************************************************* */
203 #ifdef CONTEXT_THREADS
204 static xbt_parmap_t surf_parmap = NULL; /* parallel map on models */
207 static int surf_nthreads = 1; /* number of threads of the parmap (1 means no parallelism) */
208 static double *surf_mins = NULL; /* return value of share_resources for each model */
209 static int surf_min_index; /* current index in surf_mins */
210 static double min; /* duration determined by surf_solve */
212 static void surf_share_resources(surf_model_t model);
213 static void surf_update_actions_state(surf_model_t model);
215 /** Displays the long description of all registered models, and quit */
216 void model_help(const char *category, s_surf_model_description_t * table)
219 printf("Long description of the %s models accepted by this simulator:\n",
221 for (i = 0; table[i].name; i++)
222 printf(" %s: %s\n", table[i].name, table[i].description);
225 int find_model_description(s_surf_model_description_t * table,
229 char *name_list = NULL;
231 for (i = 0; table[i].name; i++)
232 if (!strcmp(name, table[i].name)) {
235 name_list = strdup(table[0].name);
236 for (i = 1; table[i].name; i++) {
238 xbt_realloc(name_list,
239 strlen(name_list) + strlen(table[i].name) + 3);
240 strcat(name_list, ", ");
241 strcat(name_list, table[i].name);
243 xbt_die("Model '%s' is invalid! Valid models are: %s.", name, name_list);
247 double generic_maxmin_share_resources(xbt_swag_t running_actions,
250 void (*solve) (lmm_system_t))
252 surf_action_t action = NULL;
255 #define VARIABLE(action) (*((lmm_variable_t*)(((char *) (action)) + (offset))))
259 xbt_swag_foreach(action, running_actions) {
260 value = lmm_variable_getvalue(VARIABLE(action));
261 if ((value > 0) || (action->max_duration >= 0))
269 if (action->remains > 0)
270 min = action->remains / value;
273 if ((action->max_duration >= 0) && (action->max_duration < min))
274 min = action->max_duration;
276 min = action->max_duration;
279 for (action = xbt_swag_getNext(action, running_actions->offset);
281 action = xbt_swag_getNext(action, running_actions->offset)) {
282 value = lmm_variable_getvalue(VARIABLE(action));
284 if (action->remains > 0)
285 value = action->remains / value;
290 XBT_DEBUG("Updating min (value) with %p: %f", action, min);
293 if ((action->max_duration >= 0) && (action->max_duration < min)) {
294 min = action->max_duration;
295 XBT_DEBUG("Updating min (duration) with %p: %f", action, min);
298 XBT_DEBUG("min value : %f", min);
304 double generic_share_resources_lazy(double now, surf_model_t model)
306 surf_action_lmm_t action = NULL;
311 ("Before share resources, the size of modified actions set is %d",
312 xbt_swag_size(model->model_private->modified_set));
314 lmm_solve(model->model_private->maxmin_system);
317 ("After share resources, The size of modified actions set is %d",
318 xbt_swag_size(model->model_private->modified_set));
320 while((action = xbt_swag_extract(model->model_private->modified_set))) {
321 int max_dur_flag = 0;
323 if (action->generic_action.state_set !=
324 model->states.running_action_set)
327 /* bogus priority, skip it */
328 if (action->generic_action.priority <= 0)
331 generic_update_action_remaining_lazy(action,now);
334 value = lmm_variable_getvalue(action->variable);
336 if (action->generic_action.remains > 0) {
337 value = action->generic_action.remains / value;
345 if ((action->generic_action.max_duration != NO_MAX_DURATION)
347 || action->generic_action.start +
348 action->generic_action.max_duration < min)) {
349 min = action->generic_action.start +
350 action->generic_action.max_duration;
354 XBT_DEBUG("Action(%p) Start %lf Finish %lf Max_duration %lf", action,
355 action->generic_action.start, now + value,
356 action->generic_action.max_duration);
359 surf_action_lmm_heap_remove(model->model_private->action_heap,action);
360 surf_action_lmm_heap_insert(model->model_private->action_heap,action, min, max_dur_flag ? MAX_DURATION : NORMAL);
361 XBT_DEBUG("Insert at heap action(%p) min %lf now %lf", action, min,
363 } else DIE_IMPOSSIBLE;
366 //hereafter must have already the min value for this resource model
367 if (xbt_heap_size(model->model_private->action_heap) > 0)
368 min = xbt_heap_maxkey(model->model_private->action_heap) - now;
372 XBT_DEBUG("The minimum with the HEAP %lf", min);
376 static XBT_INLINE void routing_asr_host_free(void *p)
378 sg_routing_edge_t elm = p;
383 static XBT_INLINE void routing_asr_prop_free(void *p)
389 void sg_version(int *ver_major,int *ver_minor,int *ver_patch) {
390 *ver_major = SIMGRID_VERSION_MAJOR;
391 *ver_minor = SIMGRID_VERSION_MINOR;
392 *ver_patch = SIMGRID_VERSION_PATCH;
395 void surf_init(int *argc, char **argv)
397 XBT_DEBUG("Create all Libs");
398 host_lib = xbt_lib_new();
399 link_lib = xbt_lib_new();
400 as_router_lib = xbt_lib_new();
401 storage_lib = xbt_lib_new();
402 storage_type_lib = xbt_lib_new();
403 watched_hosts_lib = xbt_dict_new();
405 XBT_DEBUG("Add routing levels");
406 ROUTING_HOST_LEVEL = xbt_lib_add_level(host_lib,routing_asr_host_free);
407 ROUTING_ASR_LEVEL = xbt_lib_add_level(as_router_lib,routing_asr_host_free);
408 ROUTING_PROP_ASR_LEVEL = xbt_lib_add_level(as_router_lib,routing_asr_prop_free);
410 XBT_DEBUG("Add SURF levels");
411 SURF_CPU_LEVEL = xbt_lib_add_level(host_lib,surf_resource_free);
412 SURF_WKS_LEVEL = xbt_lib_add_level(host_lib,surf_resource_free);
413 SURF_LINK_LEVEL = xbt_lib_add_level(link_lib,surf_resource_free);
415 xbt_init(argc, argv);
417 model_list = xbt_dynar_new(sizeof(surf_model_private_t), NULL);
419 history = tmgr_history_new();
422 TRACE_add_start_function(TRACE_surf_alloc);
423 TRACE_add_end_function(TRACE_surf_release);
426 sg_config_init(argc, argv);
434 # define FILE_DELIM "\\"
436 # define FILE_DELIM "/" /* FIXME: move to better location */
439 FILE *surf_fopen(const char *name, const char *mode)
442 char *path_elm = NULL;
448 if (__surf_is_absolute_file_path(name)) /* don't mess with absolute file names */
449 return fopen(name, mode);
451 /* search relative files in the path */
452 xbt_dynar_foreach(surf_path, cpt, path_elm) {
453 buff = bprintf("%s" FILE_DELIM "%s", path_elm, name);
454 file = fopen(buff, mode);
466 surf_model_t model = NULL;
468 sg_config_finalize();
470 xbt_dynar_foreach(model_list, iter, model)
471 model->model_private->finalize();
472 xbt_dynar_free(&model_list);
476 lmm_system_free(maxmin_system);
477 maxmin_system = NULL;
480 tmgr_history_free(history);
485 #ifdef CONTEXT_THREADS
486 xbt_parmap_destroy(surf_parmap);
491 xbt_dynar_free(&surf_path);
493 xbt_lib_free(&host_lib);
494 xbt_lib_free(&link_lib);
495 xbt_lib_free(&as_router_lib);
496 xbt_lib_free(&storage_lib);
497 xbt_lib_free(&storage_type_lib);
499 xbt_dict_free(&watched_hosts_lib);
502 surf_parse_lex_destroy();
503 surf_parse_free_callbacks();
505 NOW = 0; /* Just in case the user plans to restart the simulation afterward */
508 void surf_presolve(void)
510 double next_event_date = -1.0;
511 tmgr_trace_event_t event = NULL;
513 surf_resource_t resource = NULL;
514 surf_model_t model = NULL;
518 ("First Run! Let's \"purge\" events and put models in the right state");
519 while ((next_event_date = tmgr_history_next_date(history)) != -1.0) {
520 if (next_event_date > NOW)
523 tmgr_history_get_next_event_leq(history, next_event_date,
525 (void **) &resource))) {
527 resource->model->model_private->update_resource_state(resource,
533 xbt_dynar_foreach(model_list, iter, model)
534 model->model_private->update_actions_state(NOW, 0.0);
537 double surf_solve(double max_date)
539 min = -1.0; /* duration */
540 double next_event_date = -1.0;
541 double model_next_action_end = -1.0;
543 surf_resource_t resource = NULL;
544 surf_model_t model = NULL;
545 tmgr_trace_event_t event = NULL;
548 if (max_date != -1.0 && max_date != NOW) {
549 min = max_date - NOW;
552 XBT_DEBUG("Looking for next action end for all models except NS3");
554 if (surf_mins == NULL) {
555 surf_mins = xbt_new(double, xbt_dynar_length(model_list));
560 if (surf_get_nthreads() > 1) {
561 /* parallel version */
562 #ifdef CONTEXT_THREADS
563 xbt_parmap_apply(surf_parmap, (void_f_pvoid_t) surf_share_resources, model_list);
565 xbt_die("Asked to run in parallel, but no thread at hand...");
569 /* sequential version */
570 xbt_dynar_foreach(model_list, iter, model) {
571 surf_share_resources(model);
576 for (i = 0; i < xbt_dynar_length(model_list); i++) {
577 if ((min < 0.0 || surf_mins[i] < min)
578 && surf_mins[i] >= 0.0) {
583 XBT_DEBUG("Min for resources (remember that NS3 don't update that value) : %f", min);
585 XBT_DEBUG("Looking for next trace event");
588 XBT_DEBUG("Next TRACE event : %f", next_event_date);
590 next_event_date = tmgr_history_next_date(history);
592 if(surf_network_model->name && !strcmp(surf_network_model->name,"network NS3")){
593 if(next_event_date!=-1.0 && min!=-1.0) {
594 min = MIN(next_event_date - NOW, min);
596 min = MAX(next_event_date - NOW, min);
599 XBT_DEBUG("Run for network at most %f", min);
600 // run until min or next flow
601 model_next_action_end = surf_network_model->model_private->share_resources(min);
603 XBT_DEBUG("Min for network : %f", model_next_action_end);
604 if(model_next_action_end>=0.0)
605 min = model_next_action_end;
608 if (next_event_date < 0.0) {
609 XBT_DEBUG("no next TRACE event. Stop searching for it");
613 if ((min == -1.0) || (next_event_date > NOW + min)) break;
615 XBT_DEBUG("Updating models");
617 tmgr_history_get_next_event_leq(history, next_event_date,
619 (void **) &resource))) {
620 if (resource->model->model_private->resource_used(resource)) {
621 min = next_event_date - NOW;
623 ("This event will modify model state. Next event set to %f",
626 /* update state of model_obj according to new value. Does not touch lmm.
627 It will be modified if needed when updating actions */
628 XBT_DEBUG("Calling update_resource_state for resource %s with min %lf",
629 resource->model->name, min);
630 resource->model->model_private->update_resource_state(resource,
636 /* FIXME: Moved this test to here to avoid stopping simulation if there are actions running on cpus and all cpus are with availability = 0.
637 * This may cause an infinite loop if one cpu has a trace with periodicity = 0 and the other a trace with periodicity > 0.
638 * The options are: all traces with same periodicity(0 or >0) or we need to change the way how the events are managed */
640 XBT_DEBUG("No next event at all. Bail out now.");
644 XBT_DEBUG("Duration set to %f", min);
648 if (surf_get_nthreads() > 1) {
649 /* parallel version */
650 #ifdef CONTEXT_THREADS
651 xbt_parmap_apply(surf_parmap, (void_f_pvoid_t) surf_update_actions_state, model_list);
655 /* sequential version */
656 xbt_dynar_foreach(model_list, iter, model) {
657 surf_update_actions_state(model);
662 TRACE_paje_dump_buffer (0);
668 XBT_INLINE double surf_get_clock(void)
673 static void surf_share_resources(surf_model_t model)
675 double next_action_end = -1.0;
676 int i = __sync_fetch_and_add(&surf_min_index, 1);
677 if (strcmp(model->name,"network NS3")) {
678 XBT_DEBUG("Running for Resource [%s]", model->name);
679 next_action_end = model->model_private->share_resources(NOW);
680 XBT_DEBUG("Resource [%s] : next action end = %f",
681 model->name, next_action_end);
683 surf_mins[i] = next_action_end;
686 static void surf_update_actions_state(surf_model_t model)
688 model->model_private->update_actions_state(NOW, min);
692 * \brief Returns the number of parallel threads used to update the models.
693 * \return the number of threads (1 means no parallelism)
695 int surf_get_nthreads(void) {
696 return surf_nthreads;
700 * \brief Sets the number of parallel threads used to update the models.
702 * A value of 1 means no parallelism.
704 * \param nb_threads the number of threads to use
706 void surf_set_nthreads(int nthreads) {
709 nthreads = xbt_os_get_numcores();
710 XBT_INFO("Auto-setting surf/nthreads to %d",nthreads);
713 #ifdef CONTEXT_THREADS
714 xbt_parmap_destroy(surf_parmap);
719 #ifdef CONTEXT_THREADS
720 surf_parmap = xbt_parmap_new(nthreads, XBT_PARMAP_DEFAULT);
722 THROWF(arg_error, 0, "Cannot activate parallel threads in Surf: your architecture does not support threads");
726 surf_nthreads = nthreads;
729 /* This function is a pimple that we ought to fix. But it won't be easy.
731 * The surf_solve() function does properly return the set of actions that changed.
732 * Instead, each model change a global data, and then the caller of surf_solve must
733 * pick into these sets of action_failed and action_done.
735 * This was not clean but ok as long as we didn't had to restart the processes when the resource comes back up.
736 * We worked by putting sentinel actions on every resources we are interested in,
737 * so that surf informs us if/when the corresponding resource fails.
739 * But this does not work to get Simix informed of when a resource comes back up, and this is where this pimple comes.
740 * We have a set of resources that are currently down and for which simix needs to know when it comes back up.
741 * And the current function is called *at every simulation step* to sweep over that set, searching for a resource
742 * that was turned back up in the meanwhile. This is UGLY and slow.
744 * The proper solution would be to not rely on globals for the action_failed and action_done swags.
745 * They must be passed as parameter by the caller (the handling of these actions in simix may let you
746 * think that these two sets can be merged, but their handling in SimDag induce the contrary unless this
747 * simdag code can check by itself whether the action is done of failed -- seems very doable, but yet more
750 * Once surf_solve() is passed the set of actions that changed, you want to add a new set of resources back up
751 * as parameter to this function. You also want to add a boolean field "restart_watched" to each resource, and
752 * make sure that whenever a resource with this field enabled comes back up, it's added to that set so that Simix
753 * sees it and react accordingly. This would kill that need for surf to call simix.
756 void surf_watched_hosts(void)
760 xbt_dict_cursor_t cursor;
762 XBT_DEBUG("Check for host SURF_RESOURCE_ON on watched_hosts_lib");
763 xbt_dict_foreach(watched_hosts_lib,cursor,key,host)
765 if(SIMIX_host_get_state(host) == SURF_RESOURCE_ON){
766 XBT_INFO("Restart processes on host: %s",SIMIX_host_get_name(host));
767 SIMIX_host_autorestart(host);
768 xbt_dict_remove(watched_hosts_lib,key);
771 XBT_DEBUG("See SURF_RESOURCE_OFF on host: %s",key);