-/* Copyright (c) 2016-2021. The SimGrid Team. All rights reserved. */
+/* Copyright (c) 2016-2022. The SimGrid Team. All rights reserved. */
/* This program is free software; you can redistribute it and/or modify it
* under the terms of the license (GNU LGPL) which comes with this package. */
#include <simgrid/s4u/Host.hpp>
#include <simgrid/sg_config.hpp>
+#define SIMIX_H_NO_DEPRECATED_WARNING // avoid deprecation warning on include (remove with XBT_ATTRIB_DEPRECATED_v332)
+#include <simgrid/simix.h>
+
#include "mc/mc.h"
#include "src/kernel/EngineImpl.hpp"
+#include "src/kernel/resource/StandardLinkImpl.hpp"
#include "src/kernel/resource/profile/Profile.hpp"
#include "src/mc/mc_record.hpp"
#include "src/mc/mc_replay.hpp"
#include "src/smpi/include/smpi_actor.hpp"
-#include "src/surf/network_interface.hpp"
#include "src/surf/xml/platf.hpp"
-#include "xbt/xbt_modinter.h" /* whether initialization was already done */
+#include "xbt/xbt_modinter.h" /* whether initialization was already done */
#include <boost/algorithm/string/predicate.hpp>
#ifndef _WIN32
"When non-negative, raise a SIGTRAP after given (simulated) time", -1.0};
config::Flag<bool> cfg_verbose_exit{"debug/verbose-exit", "Display the actor status at exit", true};
-xbt_dynar_t get_actors_addr()
-{
-#if SIMGRID_HAVE_MC
- return EngineImpl::get_instance()->get_actors_vector();
-#else
- xbt_die("This function is intended to be used when compiling with MC");
-#endif
-}
-
-xbt_dynar_t get_dead_actors_addr()
-{
-#if SIMGRID_HAVE_MC
- return EngineImpl::get_instance()->get_dead_actors_vector();
-#else
- xbt_die("This function is intended to be used when compiling with MC");
-#endif
-}
-
constexpr std::initializer_list<std::pair<const char*, context::ContextFactoryInitializer>> context_factories = {
#if HAVE_RAW_CONTEXTS
{"raw", &context::raw_factory},
/* Free the remaining data structures */
#if SIMGRID_HAVE_MC
xbt_dynar_free(&actors_vector_);
- xbt_dynar_free(&dead_actors_vector_);
#endif
/* clear models before freeing handle, network models can use external callback defined in the handle */
models_prio_.clear();
EngineImpl::instance_ = this;
#if SIMGRID_HAVE_MC
// The communication initialization is done ASAP, as we need to get some init parameters from the MC for different
- // layers. But simix_global needs to be created, as we send the address of some of its fields to the MC that wants to
+ // layers. But instance_ needs to be created, as we send the address of some of its fields to the MC that wants to
// read them directly.
- simgrid::mc::AppSide::initialize();
+ simgrid::mc::AppSide::initialize(actors_vector_);
#endif
if (xbt_initialized == 0) {
/* register a function to be called by SURF after the environment creation */
sg_platf_init();
- s4u::Engine::on_platform_created.connect([this]() { this->presolve(); });
+ s4u::Engine::on_platform_created_cb([this]() { this->presolve(); });
if (config::get_value<bool>("debug/clean-atexit"))
atexit(shutdown);
{
xbt_assert(not instance_->has_context_factory());
-#if HAVE_SMPI && (defined(__APPLE__) || defined(__NetBSD__))
+#if HAVE_SMPI && defined(__NetBSD__)
smpi_init_options_internal(false);
std::string priv = config::get_value<std::string>("smpi/privatization");
if (context_factory_name == "thread" && (priv == "dlopen" || priv == "yes" || priv == "default" || priv == "1")) {
instance_->empty_trash();
/* Let's free maestro now */
- instance_->destroy_maestro();
+ delete instance_->maestro_;
+ instance_->maestro_ = nullptr;
/* Finish context module and SURF */
instance_->destroy_context_factory();
instance_ = nullptr;
}
+void EngineImpl::seal_platform() const
+{
+ /* Seal only once */
+ static bool sealed = false;
+ if (sealed)
+ return;
+ sealed = true;
+
+ /* sealing resources before run: links */
+ for (auto const& kv : links_)
+ kv.second->get_iface()->seal();
+ /* seal netzone root, recursively seal children netzones, hosts and disks */
+ netzone_root_->seal();
+}
+
void EngineImpl::load_platform(const std::string& platf)
{
double start = xbt_os_time();
XBT_DEBUG("Handling the failed actions (if any)");
while (auto* action = model->extract_failed_action()) {
XBT_DEBUG(" Handling Action %p", action);
- if (action->get_activity() != nullptr)
+ if (action->get_activity() != nullptr) {
+ // If nobody told the interface that the activity has failed, that's because no actor waits on it (maestro
+ // started it). SimDAG I see you!
+ if (action->get_activity()->get_actor() == maestro_)
+ action->get_activity()->get_iface()->complete(s4u::Activity::State::FAILED);
+
activity::ActivityImplPtr(action->get_activity())->post();
+ }
}
XBT_DEBUG("Handling the terminated actions (if any)");
while (auto* action = model->extract_done_action()) {
XBT_DEBUG(" Handling Action %p", action);
if (action->get_activity() == nullptr)
XBT_DEBUG("probably vcpu's action %p, skip", action);
- else
+ else {
+ // If nobody told the interface that the activity is finished, that's because no actor waits on it (maestro
+ // started it). SimDAG I see you!
+ action->get_activity()->set_finish_time(action->get_finish_time());
+
+ if (action->get_activity()->get_actor() == maestro_)
+ action->get_activity()->get_iface()->complete(s4u::Activity::State::FINISHED);
+
activity::ActivityImplPtr(action->get_activity())->post();
+ }
}
}
}
{
instance_->get_context_factory()->run_all();
+ for (auto const& actor : actors_to_run_)
+ if (actor->context_->to_be_freed())
+ actor->cleanup_from_kernel();
+
actors_to_run_.swap(actors_that_ran_);
actors_to_run_.clear();
}
XBT_DEBUG("Getting rid of %s (refcount: %d)", actor->get_cname(), actor->get_refcount());
intrusive_ptr_release(actor);
}
-#if SIMGRID_HAVE_MC
- xbt_dynar_reset(dead_actors_vector_);
-#endif
}
void EngineImpl::display_all_actor_status() const
/* List the actors and their state */
XBT_INFO("Legend of the following listing: \"Actor <pid> (<name>@<host>): <status>\"");
for (auto const& kv : actor_list_) {
- actor::ActorImpl* actor = kv.second;
+ const actor::ActorImpl* actor = kv.second;
if (actor->waiting_synchro_) {
const char* synchro_description = "unknown";
if (boost::dynamic_pointer_cast<kernel::activity::SleepImpl>(actor->waiting_synchro_) != nullptr)
synchro_description = "sleeping";
- if (boost::dynamic_pointer_cast<kernel::activity::RawImpl>(actor->waiting_synchro_) != nullptr)
+ if (boost::dynamic_pointer_cast<kernel::activity::SynchroImpl>(actor->waiting_synchro_) != nullptr)
synchro_description = "synchronization";
if (boost::dynamic_pointer_cast<kernel::activity::IoImpl>(actor->waiting_synchro_) != nullptr)
synchro_description = "I/O";
- XBT_INFO("Actor %ld (%s@%s): waiting for %s activity %#zx (%s) in state %d to finish", actor->get_pid(),
+ XBT_INFO("Actor %ld (%s@%s): waiting for %s activity %#zx (%s) in state %s to finish", actor->get_pid(),
actor->get_cname(), actor->get_host()->get_cname(), synchro_description,
(xbt_log_no_loc ? (size_t)0xDEADBEEF : (size_t)actor->waiting_synchro_.get()),
- actor->waiting_synchro_->get_cname(), (int)actor->waiting_synchro_->state_);
+ actor->waiting_synchro_->get_cname(), actor->waiting_synchro_->get_state_str());
} else {
XBT_INFO("Actor %ld (%s@%s) simcall %s", actor->get_pid(), actor->get_cname(), actor->get_host()->get_cname(),
- SIMIX_simcall_name(actor->simcall_));
+ actor->simcall_.get_cname());
}
}
}
return time_delta;
}
-void EngineImpl::run()
+void EngineImpl::run(double max_date)
{
+ seal_platform();
+
+ if (MC_is_active()) {
+#if SIMGRID_HAVE_MC
+ mc::AppSide::get()->main_loop();
+#else
+ xbt_die("MC_is_active() is not supposed to return true in non-MC settings");
+#endif
+ THROW_IMPOSSIBLE; // main_loop never returns
+ }
+
if (MC_record_replay_is_active()) {
- mc::replay(MC_record_path());
+ mc::RecordTrace::replay(MC_record_path());
empty_trash();
return;
}
- double time = 0;
+ double elapsed_time = -1;
+ const std::set<s4u::Activity*>* vetoed_activities = s4u::Activity::get_vetoed_activities();
do {
XBT_DEBUG("New Schedule Round; size(queue)=%zu", actors_to_run_.size());
/* Here, the order is ok because:
*
- * Short proof: only maestro adds stuff to the actors_to_run array, so the execution order of user contexts do
- * not impact its order.
- *
- * Long proof: actors remain sorted through an arbitrary (implicit, complex but fixed) order in all cases.
+ * Only maestro adds stuff to the actors_to_run array, so the execution order of user contexts do not impact its order.
+ *
+ * In addition, actors remain sorted through an arbitrary but fixed order in all cases:
*
- * - if there is no kill during the simulation, actors remain sorted according by their PID.
- * Rationale: This can be proved inductively.
- * Assume that actors_to_run is sorted at a beginning of one round (it is at round 0: the deployment file
- * is parsed linearly).
- * Let's show that it is still so at the end of this round.
- * - if an actor is added when being created, that's from maestro. It can be either at startup
- * time (and then in PID order), or in response to a process_create simcall. Since simcalls are handled
- * in arbitrary order (inductive hypothesis), we are fine.
- * - If an actor is added because it's getting killed, its subsequent actions shouldn't matter
- * - If an actor gets added to actors_to_run because one of their blocking action constituting the meat
- * of a simcall terminates, we're still good. Proof:
- * - You are added from ActorImpl::simcall_answer() only. When this function is called depends on the
- * resource kind (network, cpu, disk, whatever), but the same arguments hold. Let's take communications
- * as an example.
- * - For communications, this function is called from CommImpl::finish().
- * This function itself don't mess with the order since simcalls are handled in FIFO order.
- * The function is called:
- * - before the comm starts (invalid parameters, or resource already dead or whatever).
- * The order then trivial holds since maestro didn't interrupt its handling of the simcall yet
- * - because the communication failed or were canceled after startup. In this case, it's called from
- * the function we are in, by the chunk:
- * set = model->states.failed_action_set;
- * while ((synchro = extract(set)))
- * SIMIX_simcall_post((smx_synchro_t) synchro->data);
- * This order is also fixed because it depends of the order in which the surf actions were
- * added to the system, and only maestro can add stuff this way, through simcalls.
- * We thus use the inductive hypothesis once again to conclude that the order in which synchros are
- * popped out of the set does not depend on the user code's execution order.
- * - because the communication terminated. In this case, synchros are served in the order given by
- * set = model->states.done_action_set;
- * while ((synchro = extract(set)))
- * SIMIX_simcall_post((smx_synchro_t) synchro->data);
- * and the argument is very similar to the previous one.
- * So, in any case, the orders of calls to CommImpl::finish() do not depend on the order in which user
- * actors are executed.
- * So, in any cases, the orders of actors within actors_to_run do not depend on the order in which
- * user actors were executed previously.
- * So, if there is no killing in the simulation, the simulation reproducibility is not jeopardized.
- * - If there is some actor killings, the order is changed by this decision that comes from user-land
- * But this decision may not have been motivated by a situation that were different because the simulation is
- * not reproducible.
- * So, even the order change induced by the actor killing is perfectly reproducible.
+ * - If there is no killing during the simulation, actors remain sorted according by their PID.
+ * - Killer actors are moved to the end of the scheduling round (to let victims finish their simcall before dying), but
+ * (1) this decision of killing is reproducible because the simulation was reproducible until then
+ * (2) this reordering introduces no reproducibility hazard in the subsequent simulation.
+ * Even the order change induced by the actor killing is perfectly reproducible.
+ *
+ * So the array order is implicit and somewhat complex, but fixed and reproducible (science works, http://xkcd.com/54/).
*
- * So science works, bitches [http://xkcd.com/54/].
- *
- * We could sort the actors_that_ran array completely so that we can describe the order in which simcalls are
- * handled (like "according to the PID of issuer"), but it's not mandatory (order is fixed already even if
- * unfriendly).
- * That would thus be a pure waste of time.
+ * We could manually sort the actors_that_ran array so that simcalls are handled in an easy to predict order
+ * (e.g. "according to the PID of issuer"), but it's not mandatory for the simulation soundness and reproducibility,
+ * and would thus be a pure waste of time.
*/
- for (auto const& actor : actors_that_ran_) {
- if (actor->simcall_.call_ != simix::Simcall::NONE) {
+ for (auto const& actor : actors_that_ran_)
+ if (actor->simcall_.call_ != actor::Simcall::Type::NONE)
actor->simcall_handle(0);
- }
- }
execute_tasks();
do {
}
}
- time = timer::Timer::next();
- if (time > -1.0 || not actor_list_.empty()) {
- XBT_DEBUG("Calling solve");
- time = solve(time);
- XBT_DEBUG("Moving time ahead : %g", time);
+ // Compute the max_date of the next solve.
+ // It's either when a timer occurs, or when user-specified deadline is reached, or -1 if none is given
+ double next_time = timer::Timer::next();
+ if (next_time < 0 && max_date > -1) {
+ next_time = max_date;
+ } else if (next_time > -1 && max_date > -1) { // either both <0, or both >0
+ next_time = std::min(next_time, max_date);
}
+ XBT_DEBUG("Calling solve(%g) %g", next_time, NOW);
+ elapsed_time = solve(next_time);
+ XBT_DEBUG("Moving time ahead. NOW=%g; elapsed: %g", NOW, elapsed_time);
+
/* Notify all the hosts that have failed */
/* FIXME: iterate through the list of failed host and mark each of them */
/* as failed. On each host, signal all the running actors with host_fail */
/* Clean actors to destroy */
empty_trash();
- XBT_DEBUG("### time %f, #actors %zu, #to_run %zu", time, actor_list_.size(), actors_to_run_.size());
+ XBT_DEBUG("### elapsed time %f, #actors %zu, #to_run %zu, #vetoed %d", elapsed_time, actor_list_.size(),
+ actors_to_run_.size(), (vetoed_activities == nullptr ? -1 : static_cast<int>(vetoed_activities->size())));
- if (time < 0. && actors_to_run_.empty() && not actor_list_.empty()) {
+ if (elapsed_time < 0. && actors_to_run_.empty() && not actor_list_.empty()) {
if (actor_list_.size() <= daemons_.size()) {
XBT_CRITICAL("Oops! Daemon actors cannot do any blocking activity (communications, synchronization, etc) "
"once the simulation is over. Please fix your on_exit() functions.");
maestro_->kill(kv.second);
}
}
- } while (time > -1.0 || has_actors_to_run());
+ } while ((vetoed_activities == nullptr || vetoed_activities->empty()) &&
+ ((elapsed_time > -1.0 && not double_equals(max_date, NOW, 0.00001)) || has_actors_to_run()));
- if (not actor_list_.empty())
+ if (not actor_list_.empty() && max_date < 0 && not(vetoed_activities == nullptr || vetoed_activities->empty()))
THROW_IMPOSSIBLE;
simgrid::s4u::Engine::on_simulation_end();
void SIMIX_run() // XBT_ATTRIB_DEPRECATED_v332
{
- simgrid::kernel::EngineImpl::get_instance()->run();
+ simgrid::kernel::EngineImpl::get_instance()->run(-1);
}