From cab169aceb66e0f811bab83b83743bc21795d524 Mon Sep 17 00:00:00 2001 From: Takahiro Hirofuchi Date: Tue, 22 Oct 2013 11:14:18 +0200 Subject: [PATCH] Fix miscalculation of CPU shares on a multicore PM. The recent commit adding the load capping feature broke CPU share calculation on a multicore PM. This patch fixes the problem. The bound value "0" means that there is no CPU bound for that task or VM. On a single-core PM, this is true. The task can use all CPU resource on that PM. On a multicore PM, the task can use the resource of only a CPU core. Even MSG_{task/vm}_set_bound() is not used in a simulation program, simulation results will be wrong for multicore PMs. This is because the bound value "0" is used internally. --- src/msg/msg_vm.c | 4 +++- src/simix/smx_host.c | 9 ++++++++- src/surf/cpu_cas01.c | 6 ++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/msg/msg_vm.c b/src/msg/msg_vm.c index 93afe69571..bae0815ba9 100644 --- a/src/msg/msg_vm.c +++ b/src/msg/msg_vm.c @@ -1250,7 +1250,9 @@ msg_host_t MSG_vm_get_pm(msg_vm_t vm) * * * 2. - * Note that bound == 0 means no bound (i.e., unlimited). + * Note that bound == 0 means no bound (i.e., unlimited). But, if a host has + * multiple CPU cores, the CPU share of a computation task (or a VM) never + * exceeds the capacity of a CPU core. */ void MSG_vm_set_bound(msg_vm_t vm, double bound) { diff --git a/src/simix/smx_host.c b/src/simix/smx_host.c index cf11967d1a..177c4cdb35 100644 --- a/src/simix/smx_host.c +++ b/src/simix/smx_host.c @@ -423,7 +423,14 @@ smx_action_t SIMIX_host_execute(const char *name, action->execution.surf_exec = ws_model->extension.workstation.execute(host, computation_amount); ws_model->action_data_set(action->execution.surf_exec, action); ws_model->set_priority(action->execution.surf_exec, priority); - ws_model->set_bound(action->execution.surf_exec, bound); + + /* Note (hypervisor): for multicore, the bound value being passed to the + * surf layer should not be zero (i.e., unlimited). It should be the + * capacity of a CPU core. */ + if (bound == 0) + ws_model->set_bound(action->execution.surf_exec, SIMIX_host_get_speed(host)); + else + ws_model->set_bound(action->execution.surf_exec, bound); } XBT_DEBUG("Create execute action %p", action); diff --git a/src/surf/cpu_cas01.c b/src/surf/cpu_cas01.c index cae096c6b3..79fe3d6ec0 100644 --- a/src/surf/cpu_cas01.c +++ b/src/surf/cpu_cas01.c @@ -237,6 +237,12 @@ static surf_action_t cpu_execute(void *cpu, double size) GENERIC_LMM_ACTION(action).suspended = 0; /* Should be useless because of the calloc but it seems to help valgrind... */ + /* Note (hypervisor): here, the bound value of the variable is set to the + * capacity of a CPU core. But, after MSG_{task/vm}_set_bound() were added to + * the hypervisor branch, this bound value is overwritten in + * SIMIX_host_execute(). + * TODO: cleanup this. + */ GENERIC_LMM_ACTION(action).variable = lmm_variable_new(cpu_model->model_private->maxmin_system, action, GENERIC_ACTION(action).priority, -- 2.30.2