{
XBT_DEBUG("mig: rx_start");
+ // The structure has been created in the do_migration function and should only be freed in the same place ;)
struct migration_session *ms = MSG_process_get_data(MSG_process_self());
-
s_ws_params_t params;
simcall_host_get_params(ms->vm, ¶ms);
const double xfer_cpu_overhead = params.xfer_cpu_overhead;
msg_task_t task = NULL;
MSG_task_recv(&task, ms->mbox);
{
- double received ;
- // TODO Adrien Clean the code (destroy task, free memory etc..
+ double received ;
if (task)
received = MSG_task_get_data_size(task);
- else
- return 0;
+ else{
+ // An error occured, clean the code and return
+ // The owner did not change, hence the task should be only destroyed on the other side
+ xbt_free(finalize_task_name);
+ return 0;
+ }
/* TODO: clean up */
// const double alpha = 0.22L * 1.0E8 / (80L * 1024 * 1024);
launch_deferred_exec_process(ms->vm, received * xfer_cpu_overhead, 1);
break;
}
+ // Here Stage 1, 2 and 3 have been performed.
+ // Hence complete the migration
+// TODO: we have an issue, if the DST node is turning off during the three next calls, then the VM is in an inconsistent state
+// I should check with Takahiro in order to make this portion of code atomic
/* deinstall the current affinity setting for the CPU */
simcall_vm_set_affinity(ms->vm, ms->src_pm, 0);
msg_task_t task = MSG_task_create(task_name, 0, 0, NULL);
msg_error_t ret = MSG_task_send(task, ms->mbox_ctl);
- xbt_assert(ret == MSG_OK);
-
+ // xbt_assert(ret == MSG_OK);
+ if(ret == MSG_HOST_FAILURE){
+ // The SRC has crashed, this is not a problem has the VM has been correctly migrated on the DST node
+ MSG_task_destroy(task);
+ }
xbt_free(task_name);
}
// xbt_assert(ret == MSG_OK);
xbt_free(task_name);
if(ret == MSG_HOST_FAILURE){
- THROWF(host_error, 0, "host failed during migration of %s", sg_host_name(vm));
- }
+ THROWF(host_error, 0, "host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
+ //XBT_INFO("host failed during migration of %s (stage %d)", sg_host_name(vm), stage);
+ MSG_task_destroy(task);
+ return;
+
+ }
#endif
double clock_end = MSG_get_clock();
double cpu_utilization = 0;
#endif
-// TODO - adsein, WTF with the following code ?
if (stage == 2){
XBT_DEBUG("mig-stage%d.%d: sent %llu duration %f actual_speed %f (target %f) cpu %f", stage, stage2_round, size, duration, actual_speed, mig_speed, cpu_utilization);}
else{
}
-
-
#ifdef USE_MICRO_TASK
/* The name of a micro task starts with __micro, which does not match the
* special name that finalizes the receiver loop. Thus, we send the special task.
// xbt_assert(ret == MSG_OK);
xbt_free(task_name);
if(ret == MSG_HOST_FAILURE){
- //THROWF(host_error, 0, "host failed", sg_host_name(vm));
- XBT_INFO("host failed during migration of %s (stage 3)", sg_host_name(vm));
- //MSG_task_destroy(task);
+ THROWF(host_error, 0, "host failed during migration of VM %s (stage 3)", sg_host_name(vm));
+ //XBT_INFO("host failed during migration of %s (stage 3)", sg_host_name(vm));
+ // The owner of the task did not change so destroy the task
+ MSG_task_destroy(task);
return;
}
}
{
XBT_DEBUG("mig: tx_start");
-
+ // Note that the ms structure has been allocated in do_migration and hence should be freed in the same function ;)
struct migration_session *ms = MSG_process_get_data(MSG_process_self());
s_ws_params_t params;
computed_during_stage1 = send_stage1(ms, ramsize, mig_speed, xfer_cpu_overhead, dp_rate, dp_cap, dpt_cpu_overhead);
} CATCH_ANONYMOUS{
//hostfailure
- // TODO adsein, we should probably clean a bit the memory ?
+ // Stop the dirty page tracking an return (there is no memory space to release)
+ stop_dirty_page_tracking(ms->vm);
return 0;
}
remaining_size -= ramsize;
}
double clock_prev_send = MSG_get_clock();
-
- send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, updated_size, ms->mbox, 2, stage2_round, mig_speed, xfer_cpu_overhead);
-
+ TRY{
+ send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, updated_size, ms->mbox, 2, stage2_round, mig_speed, xfer_cpu_overhead);
+ }CATCH_ANONYMOUS{
+ //hostfailure
+ // Stop the dirty page tracking an return (there is no memory space to release)
+ stop_dirty_page_tracking(ms->vm);
+ return 0;
+ }
double clock_post_send = MSG_get_clock();
double bandwidth = updated_size / (clock_post_send - clock_prev_send);
XBT_INFO("mig-stage3: remaining_size %f", remaining_size);
simcall_vm_suspend(ms->vm);
stop_dirty_page_tracking(ms->vm);
-
- send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, remaining_size, ms->mbox, 3, 0, mig_speed, xfer_cpu_overhead);
-
+
+ TRY{
+ send_migration_data(ms->vm, ms->src_pm, ms->dst_pm, remaining_size, ms->mbox, 3, 0, mig_speed, xfer_cpu_overhead);
+ }CATCH_ANONYMOUS{
+ //hostfailure
+ // Stop the dirty page tracking an return (there is no memory space to release)
+ simcall_vm_resume(ms->vm);
+ return 0;
+ }
+
+ // At that point the Migration is considered valid for the SRC node but remind that the DST side should relocate effectively the VM on the DST node.
XBT_DEBUG("mig: tx_done");
}
#endif
-
-
-
- /* wait until the migration have finished */
+ /* wait until the migration have finished or on error has occured */
{
msg_task_t task = NULL;
msg_error_t ret = MSG_task_recv(&task, ms->mbox_ctl);
+ xbt_free(ms->mbox_ctl);
+ xbt_free(ms->mbox);
+ xbt_free(ms);
+
//xbt_assert(ret == MSG_OK);
if(ret == MSG_HOST_FAILURE){
- //MSG_task_destroy(task);
- THROWF(host_error, 0, "host failed during migration of %s", sg_host_name(vm));
- }
- // TODO clean the code
+ // Note that since the communication failed, the owner did not change and the task should be destroyed on the other side.
+ // Hence, just throw the execption
+ THROWF(host_error, 0, "DST host failed during the migration of %s", sg_host_name(vm));
+ }
char *expected_task_name = get_mig_task_name(vm, src_pm, dst_pm, 4);
xbt_assert(strcmp(task->name, expected_task_name) == 0);
xbt_free(expected_task_name);
MSG_task_destroy(task);
}
-
- xbt_free(ms->mbox_ctl);
- xbt_free(ms->mbox);
- xbt_free(ms);
}
+
+
/** @brief Migrate the VM to the given host.
* @ingroup msg_VMs
*