#include <simgrid/s4u/Host.hpp>
#include <simgrid/sg_config.hpp>
-#define SIMIX_H_NO_DEPRECATED_WARNING // avoid deprecation warning on include (remove with XBT_ATTRIB_DEPRECATED_v332)
-#include <simgrid/simix.h>
-
#include "mc/mc.h"
#include "src/kernel/EngineImpl.hpp"
#include "src/kernel/resource/StandardLinkImpl.hpp"
#include "src/mc/mc_replay.hpp"
#include "src/smpi/include/smpi_actor.hpp"
#include "src/surf/xml/platf.hpp"
+#include "xbt/module.h"
#include "xbt/xbt_modinter.h" /* whether initialization was already done */
#include <boost/algorithm/string/predicate.hpp>
#include "src/mc/remote/AppSide.hpp"
#endif
-double NOW = 0;
-
XBT_LOG_NEW_DEFAULT_CATEGORY(ker_engine, "Logging specific to Engine (kernel)");
namespace simgrid {
namespace kernel {
+double EngineImpl::now_ = 0.0;
EngineImpl* EngineImpl::instance_ = nullptr; /* That singleton is awful too. */
config::Flag<double> cfg_breakpoint{"debug/breakpoint",
for (auto const& kv : mailboxes_)
delete kv.second;
- /* Free the remaining data structures */
+ /* Kill all actors (but maestro) */
+ maestro_->kill_all();
+ run_all_actors();
+ empty_trash();
+
+ delete maestro_;
+ delete context_factory_;
+
+ /* Free the remaining data structures */
#if SIMGRID_HAVE_MC
xbt_dynar_free(&actors_vector_);
#endif
xbt_die("Bailing out to avoid that stop-before-start madness. Please fix your code.");
}
- /* Kill all actors (but maestro) */
- instance_->maestro_->kill_all();
- instance_->run_all_actors();
- instance_->empty_trash();
-
- /* Let's free maestro now */
- delete instance_->maestro_;
- instance_->maestro_ = nullptr;
-
- /* Finish context module and SURF */
- instance_->destroy_context_factory();
-
while (not timer::kernel_timers().empty()) {
delete timer::kernel_timers().top().second;
timer::kernel_timers().pop();
void EngineImpl::load_platform(const std::string& platf)
{
double start = xbt_os_time();
- if (boost::algorithm::ends_with(platf, ".so") or boost::algorithm::ends_with(platf, ".dylib")) {
+ if (boost::algorithm::ends_with(platf, ".so") || boost::algorithm::ends_with(platf, ".dylib")) {
#ifdef _WIN32
xbt_die("loading platform through shared library isn't supported on windows");
#else
}
/** Wake up all actors waiting for a Surf action to finish */
-void EngineImpl::wake_all_waiting_actors() const
+void EngineImpl::handle_ended_actions() const
{
for (auto const& model : models_) {
XBT_DEBUG("Handling the failed actions (if any)");
while (auto* action = model->extract_failed_action()) {
XBT_DEBUG(" Handling Action %p", action);
- if (action->get_activity() != nullptr) {
- // If nobody told the interface that the activity has failed, that's because no actor waits on it (maestro
- // started it). SimDAG I see you!
+ if (action->get_activity() != nullptr) { // Skip vcpu actions
+ // Action failures are not automatically reported when the action is started by maestro (as in SimDAG)
if (action->get_activity()->get_actor() == maestro_)
action->get_activity()->get_iface()->complete(s4u::Activity::State::FAILED);
XBT_DEBUG("Handling the terminated actions (if any)");
while (auto* action = model->extract_done_action()) {
XBT_DEBUG(" Handling Action %p", action);
- if (action->get_activity() == nullptr)
- XBT_DEBUG("probably vcpu's action %p, skip", action);
- else {
- // If nobody told the interface that the activity is finished, that's because no actor waits on it (maestro
- // started it). SimDAG I see you!
+ if (action->get_activity() != nullptr) {
+ // Action termination are not automatically reported when the action is started by maestro (as in SimDAG)
action->get_activity()->set_finish_time(action->get_finish_time());
if (action->get_activity()->get_actor() == maestro_)
*/
void EngineImpl::run_all_actors()
{
- instance_->get_context_factory()->run_all();
+ instance_->get_context_factory()->run_all(actors_to_run_);
for (auto const& actor : actors_to_run_)
- if (actor->context_->to_be_freed())
+ if (actor->to_be_freed())
actor->cleanup_from_kernel();
actors_to_run_.swap(actors_that_ran_);
if (item != actor_list_.end())
return item->second;
- // Search the trash
- for (auto& a : actors_to_destroy_)
- if (a.get_pid() == pid)
- return &a;
- return nullptr; // Not found, even in the trash
+ return nullptr; // Not found
}
void EngineImpl::remove_daemon(actor::ActorImpl* actor)
XBT_DEBUG("Consume all trace events occurring before the starting time.");
double next_event_date;
while ((next_event_date = profile::future_evt_set.next_date()) != -1.0) {
- if (next_event_date > NOW)
+ if (next_event_date > now_)
break;
double value = -1.0;
XBT_DEBUG("Set every models in the right state by updating them to 0.");
for (auto const& model : models_)
- model->update_actions_state(NOW, 0.0);
+ model->update_actions_state(now_, 0.0);
}
double EngineImpl::solve(double max_date) const
resource::Resource* resource = nullptr;
if (max_date != -1.0) {
- xbt_assert(max_date >= NOW, "You asked to simulate up to %f, but that's in the past already", max_date);
+ xbt_assert(max_date >= now_, "You asked to simulate up to %f, but that's in the past already", max_date);
- time_delta = max_date - NOW;
+ time_delta = max_date - now_;
}
XBT_DEBUG("Looking for next event in all models");
if (not model->next_occurring_event_is_idempotent()) {
continue;
}
- double next_event = model->next_occurring_event(NOW);
+ double next_event = model->next_occurring_event(now_);
if ((time_delta < 0.0 || next_event < time_delta) && next_event >= 0.0) {
time_delta = next_event;
}
continue;
if (next_event_date != -1.0) {
- time_delta = std::min(next_event_date - NOW, time_delta);
+ time_delta = std::min(next_event_date - now_, time_delta);
} else {
- time_delta = std::max(next_event_date - NOW, time_delta); // Get the positive component
+ time_delta = std::max(next_event_date - now_, time_delta); // Get the positive component
}
XBT_DEBUG("Run the NS3 network at most %fs", time_delta);
time_delta = model_next_action_end;
}
- if (next_event_date < 0.0 || (next_event_date > NOW + time_delta)) {
+ if (next_event_date < 0.0 || (next_event_date > now_ + time_delta)) {
// next event may have already occurred or will after the next resource change, then bail out
XBT_DEBUG("no next usable TRACE event. Stop searching for it");
break;
}
- XBT_DEBUG("Updating models (min = %g, NOW = %g, next_event_date = %g)", time_delta, NOW, next_event_date);
+ XBT_DEBUG("Updating models (min = %g, NOW = %g, next_event_date = %g)", time_delta, now_, next_event_date);
while (auto* event = profile::future_evt_set.pop_leq(next_event_date, &value, &resource)) {
if (resource->is_used() || (watched_hosts().find(resource->get_cname()) != watched_hosts().end())) {
- time_delta = next_event_date - NOW;
+ time_delta = next_event_date - now_;
XBT_DEBUG("This event invalidates the next_occurring_event() computation of models. Next event set to %f",
time_delta);
}
- // FIXME: I'm too lame to update NOW live, so I change it and restore it so that the real update with surf_min
+ // FIXME: I'm too lame to update now_ live, so I change it and restore it so that the real update with surf_min
// will work
- double round_start = NOW;
- NOW = next_event_date;
+ double round_start = now_;
+ now_ = next_event_date;
/* update state of the corresponding resource to the new value. Does not touch lmm.
It will be modified if needed when updating actions */
XBT_DEBUG("Calling update_resource_state for resource %s", resource->get_cname());
resource->apply_event(event, value);
- NOW = round_start;
+ now_ = round_start;
}
}
XBT_DEBUG("Duration set to %f", time_delta);
// Bump the time: jump into the future
- NOW = NOW + time_delta;
+ now_ += time_delta;
// Inform the models of the date change
for (auto const& model : models_)
- model->update_actions_state(NOW, time_delta);
+ model->update_actions_state(now_, time_delta);
s4u::Engine::on_time_advance(time_delta);
if (cfg_breakpoint >= 0.0 && simgrid_get_clock() >= cfg_breakpoint) {
XBT_DEBUG("Breakpoint reached (%g)", cfg_breakpoint.get());
- cfg_breakpoint = -1.0;
+ cfg_breakpoint = -1.0; // Let the simulation continue without hiting the breakpoint again and again
#ifdef SIGTRAP
std::raise(SIGTRAP);
#else
/* Run all actors that are ready to run, possibly in parallel */
run_all_actors();
- /* answer sequentially and in a fixed arbitrary order all the simcalls that were issued during that sub-round */
-
- /* WARNING, the order *must* be fixed or you'll jeopardize the simulation reproducibility (see RR-7653) */
-
- /* Here, the order is ok because:
- *
- * Only maestro adds stuff to the actors_to_run array, so the execution order of user contexts do not impact its order.
- *
- * In addition, actors remain sorted through an arbitrary but fixed order in all cases:
- *
- * - If there is no killing during the simulation, actors remain sorted according by their PID.
- * - Killer actors are moved to the end of the scheduling round (to let victims finish their simcall before dying), but
- * (1) this decision of killing is reproducible because the simulation was reproducible until then
- * (2) this reordering introduces no reproducibility hazard in the subsequent simulation.
- * Even the order change induced by the actor killing is perfectly reproducible.
- *
- * So the array order is implicit and somewhat complex, but fixed and reproducible (science works, http://xkcd.com/54/).
- *
- * We could manually sort the actors_that_ran array so that simcalls are handled in an easy to predict order
- * (e.g. "according to the PID of issuer"), but it's not mandatory for the simulation soundness and reproducibility,
- * and would thus be a pure waste of time.
+ /* answer sequentially and in a fixed arbitrary order all the simcalls that were issued during that sub-round.
+ * The order must be fixed for the simulation to be reproducible (see RR-7653). It's OK here because only maestro
+ * changes the list. Killer actors are moved to the end to let victims finish their simcall before dying, but
+ * the order remains reproducible (even if arbitrarily). No need to sort the vector for sake of reproducibility.
*/
-
for (auto const& actor : actors_that_ran_)
if (actor->simcall_.call_ != actor::Simcall::Type::NONE)
actor->simcall_handle(0);
- wake_all_waiting_actors();
+ handle_ended_actions();
/* If only daemon actors remain, cancel their actions, mark them to die and reschedule them */
if (actor_list_.size() == daemons_.size())
next_time = std::min(next_time, max_date);
}
- XBT_DEBUG("Calling solve(%g) %g", next_time, NOW);
+ XBT_DEBUG("Calling solve(%g) %g", next_time, now_);
elapsed_time = solve(next_time);
- XBT_DEBUG("Moving time ahead. NOW=%g; elapsed: %g", NOW, elapsed_time);
-
- /* Notify all the hosts that have failed */
- /* FIXME: iterate through the list of failed host and mark each of them */
- /* as failed. On each host, signal all the running actors with host_fail */
+ XBT_DEBUG("Moving time ahead. NOW=%g; elapsed: %g", now_, elapsed_time);
// Execute timers until there isn't anything to be done:
bool again = false;
do {
again = timer::Timer::execute_all();
- wake_all_waiting_actors();
+ handle_ended_actions();
} while (again);
/* Clean actors to destroy */
XBT_CRITICAL("Oops! Daemon actors cannot do any blocking activity (communications, synchronization, etc) "
"once the simulation is over. Please fix your on_exit() functions.");
} else {
- XBT_CRITICAL("Oops! Deadlock or code not perfectly clean.");
+ XBT_CRITICAL("Oops! Deadlock detected, some activities are still around but will never complete. "
+ "This usually happens when the user code is not perfectly clean.");
}
display_all_actor_status();
simgrid::s4u::Engine::on_deadlock();
}
}
} while ((vetoed_activities == nullptr || vetoed_activities->empty()) &&
- ((elapsed_time > -1.0 && not double_equals(max_date, NOW, 0.00001)) || has_actors_to_run()));
+ ((elapsed_time > -1.0 && not double_equals(max_date, now_, 0.00001)) || has_actors_to_run()));
if (not actor_list_.empty() && max_date < 0 && not(vetoed_activities == nullptr || vetoed_activities->empty()))
THROW_IMPOSSIBLE;
double EngineImpl::get_clock()
{
- return NOW;
+ return now_;
}
} // namespace kernel
} // namespace simgrid
-
-void SIMIX_run() // XBT_ATTRIB_DEPRECATED_v332
-{
- simgrid::kernel::EngineImpl::get_instance()->run(-1);
-}