- Reimplementation of barriers natively.
Previously, they were implemented on top of s4u::Mutex and s4u::ConditionVariable.
The new version should be faster (and can be used in the model-checker).
+ - Actor::get_restart_count(): Returns the number of reboots that this actor did.
MSG:
- MSG_barrier_destroy now expects a non-const msg_barrier parameter.
.. doxygenfunction:: simgrid::s4u::Actor::join() const
.. doxygenfunction:: simgrid::s4u::Actor::join(double timeout) const
.. doxygenfunction:: simgrid::s4u::Actor::set_auto_restart(bool autorestart)
+ .. doxygenfunction:: simgrid::s4u::Actor::get_restart_count()
.. group-tab:: Python
! output sort 19
$ ${bindir:=.}/c-platform-failures --log=xbt_cfg.thres:critical --log=no_loc ${platfdir}/small_platform_failures.xml ${srcdir:=.}/../../cpp/platform-failures/s4u-platform-failures_d.xml --cfg=path:${srcdir} --cfg=network/crosstraffic:0 "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n" --log=res_cpu.t:verbose
> [ 0.000000] (0:maestro@) Cannot launch actor 'worker' on failed host 'Fafard'
-> [ 0.000000] (0:maestro@) Deployment includes some initially turned off Hosts ... nevermind.
+> [ 0.000000] (0:maestro@) Starting actor worker(Fafard) failed because its host is turned off.
> [ 0.000000] (1:master@Tremblay) Got 5 workers and 20 tasks to process
> [ 0.000000] (1:master@Tremblay) Send a message to worker-0
> [ 0.010309] (1:master@Tremblay) Send to worker-0 completed
! output sort 19
$ ${bindir:=.}/c-platform-failures --log=xbt_cfg.thres:critical --log=no_loc ${platfdir}/small_platform_failures.xml ${srcdir:=.}/../../cpp/platform-failures/s4u-platform-failures_d.xml --cfg=path:${srcdir} "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n" --log=res_cpu.t:verbose
> [ 0.000000] (0:maestro@) Cannot launch actor 'worker' on failed host 'Fafard'
-> [ 0.000000] (0:maestro@) Deployment includes some initially turned off Hosts ... nevermind.
+> [ 0.000000] (0:maestro@) Starting actor worker(Fafard) failed because its host is turned off.
> [ 0.000000] (1:master@Tremblay) Got 5 workers and 20 tasks to process
> [ 0.000000] (1:master@Tremblay) Send a message to worker-0
> [ 0.000000] (2:worker@Tremblay) Waiting a message on worker-0
! output sort 19
$ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc ${platfdir}/small_platform_failures.xml ${srcdir:=.}/s4u-platform-failures_d.xml --cfg=path:${srcdir} --cfg=network/crosstraffic:0 "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n" --log=res_cpu.t:verbose
> [ 0.000000] (0:maestro@) Cannot launch actor 'worker' on failed host 'Fafard'
-> [ 0.000000] (0:maestro@) Deployment includes some initially turned off Hosts ... nevermind.
+> [ 0.000000] (0:maestro@) Starting actor worker(Fafard) failed because its host is turned off.
> [ 0.000000] (1:master@Tremblay) Got 5 workers and 20 tasks to process
> [ 0.000000] (1:master@Tremblay) Send a message to worker-0
> [ 0.000000] (7:sleeper@Lilibeth) Start sleeping...
! output sort 19
$ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc ${platfdir}/small_platform_failures.xml ${srcdir:=.}/s4u-platform-failures_d.xml --cfg=path:${srcdir} "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n" --log=res_cpu.t:verbose
> [ 0.000000] (0:maestro@) Cannot launch actor 'worker' on failed host 'Fafard'
-> [ 0.000000] (0:maestro@) Deployment includes some initially turned off Hosts ... nevermind.
+> [ 0.000000] (0:maestro@) Starting actor worker(Fafard) failed because its host is turned off.
> [ 0.000000] (1:master@Tremblay) Got 5 workers and 20 tasks to process
> [ 0.000000] (1:master@Tremblay) Send a message to worker-0
> [ 0.000000] (2:worker@Tremblay) Waiting a message on worker-0
! output sort 19
$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${bindir:=.}/platform-failures.py ${platfdir}/small_platform_failures.xml ${srcdir:=.}/platform-failures_d.xml --log=xbt_cfg.thres:critical --log=no_loc --cfg=path:${srcdir} --cfg=network/crosstraffic:0 "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n" --log=res_cpu.t:verbose
> [ 0.000000] (0:maestro@) Cannot launch actor 'worker' on failed host 'Fafard'
-> [ 0.000000] (0:maestro@) Deployment includes some initially turned off Hosts ... nevermind.
+> [ 0.000000] (0:maestro@) Starting actor worker(Fafard) failed because its host is turned off.
> [ 0.000000] (1:master@Tremblay) Got 5 workers and 20 tasks to process
> [ 0.000000] (1:master@Tremblay) Send a message to worker-0
> [ 0.000000] (7:sleeper@Lilibeth) Start sleeping...
/** If set to true, the actor will automatically restart when its host reboots */
Actor* set_auto_restart(bool autorestart = true);
+ /** Returns the number of reboots that this actor did. Before the first reboot, this function returns 0. */
+ int get_restart_count();
/** Add a function to the list of "on_exit" functions for the current actor. The on_exit functions are the functions
* executed when your actor is killed. You should use them to free the data used by your actor.
XBT_DEBUG("Restarting actor %s on %s", get_cname(), host_->get_cname());
// retrieve the arguments of the old actor
- ProcessArg arg(host_, this);
+ ProcessArg args(host_, this);
// kill the old actor
context::Context::self()->get_actor()->kill(this);
// start the new actor
- ActorImplPtr actor = ActorImpl::create(arg.name, arg.code, arg.data, arg.host, nullptr);
- actor->set_properties(arg.properties);
- *actor->on_exit = std::move(*arg.on_exit);
- actor->set_kill_time(arg.kill_time);
- actor->set_auto_restart(arg.auto_restart);
-
- return actor->get_ciface();
+ return create(&args)->get_ciface();
}
void ActorImpl::suspend()
return actor;
}
+ActorImplPtr ActorImpl::create(ProcessArg* args)
+{
+ actor::ActorImplPtr actor = actor::ActorImpl::create(args->name, args->code, nullptr, args->host, nullptr);
+ auto* naked_actor = actor.get();
+ naked_actor->restart_count_ = args->restart_count_;
+ actor->set_properties(args->properties);
+ if (args->on_exit)
+ *actor->on_exit = *args->on_exit;
+ if (args->kill_time >= 0)
+ actor->set_kill_time(args->kill_time);
+ if (args->auto_restart)
+ actor->set_auto_restart(args->auto_restart);
+ if (args->daemon_)
+ actor->daemonize();
+ return actor;
+}
void create_maestro(const std::function<void()>& code)
{
namespace simgrid {
namespace kernel {
namespace actor {
+class ProcessArg;
class XBT_PUBLIC ActorImpl : public xbt::PropertyHolder {
s4u::Host* host_ = nullptr; /* the host on which the actor is running */
aid_t ppid_ = -1;
bool daemon_ = false; /* Daemon actors are automatically killed when the last non-daemon leaves */
bool auto_restart_ = false;
+ int restart_count_ = 0;
unsigned stacksize_; // set to default value in constructor
std::vector<activity::MailboxImpl*> mailboxes;
/ after terminaison) */
bool has_to_auto_restart() const { return auto_restart_; }
void set_auto_restart(bool autorestart) { auto_restart_ = autorestart; }
+ int get_restart_count() { return restart_count_; }
void set_stacksize(unsigned stacksize) { stacksize_ = stacksize; }
unsigned get_stacksize() const { return stacksize_; }
static ActorImplPtr create(const std::string& name, const ActorCode& code, void* data, s4u::Host* host,
const ActorImpl* parent_actor);
+ static ActorImplPtr create(ProcessArg* args);
static ActorImplPtr attach(const std::string& name, void* data, s4u::Host* host);
static void detach();
void cleanup();
bool daemon_ = false;
/* list of functions executed when the actor dies */
const std::shared_ptr<std::vector<std::function<void(bool)>>> on_exit;
+ int restart_count_ = 0;
ProcessArg() = delete;
ProcessArg(const ProcessArg&) = delete;
explicit ProcessArg(const std::string& name, const std::function<void()>& code, void* data, s4u::Host* host,
double kill_time, const std::unordered_map<std::string, std::string>& properties,
- bool auto_restart)
+ bool auto_restart, int restart_count)
: name(name)
, code(code)
, data(data)
, kill_time(kill_time)
, properties(properties)
, auto_restart(auto_restart)
+ , restart_count_(restart_count)
{
}
, auto_restart(actor->has_to_auto_restart())
, daemon_(actor->is_daemon())
, on_exit(actor->on_exit)
+ , restart_count_(actor->get_restart_count() + 1)
{
}
};
Actor* Actor::set_auto_restart(bool autorestart)
{
+ if (autorestart == pimpl_->has_to_auto_restart()) // not changed
+ return this;
+
kernel::actor::simcall_answered([this, autorestart]() {
- xbt_assert(autorestart && not pimpl_->has_to_auto_restart()); // FIXME: handle all cases
+ xbt_assert(autorestart, "Asking an actor to stop being autorestart is not implemented yet. Ask us if you need it.");
pimpl_->set_auto_restart(autorestart);
auto* arg = new kernel::actor::ProcessArg(pimpl_->get_host(), pimpl_);
});
return this;
}
+int Actor::get_restart_count()
+{
+ return pimpl_->get_restart_count();
+}
void Actor::on_exit(const std::function<void(bool /*failed*/)>& fun) const
{
{
for (auto const& arg : actors_at_boot_) {
XBT_DEBUG("Booting Actor %s(%s) right now", arg->name.c_str(), arg->host->get_cname());
- actor::ActorImplPtr actor = actor::ActorImpl::create(arg->name, arg->code, nullptr, arg->host, nullptr);
- actor->set_properties(arg->properties);
- if (arg->on_exit)
- *actor->on_exit = *arg->on_exit;
- if (arg->kill_time >= 0)
- actor->set_kill_time(arg->kill_time);
- if (arg->auto_restart)
- actor->set_auto_restart(arg->auto_restart);
- if (arg->daemon_)
- actor->daemonize();
+ actor::ActorImplPtr actor = actor::ActorImpl::create(arg);
}
}
simgrid::kernel::actor::ActorCode code = factory(std::move(actor->args));
auto* arg = new simgrid::kernel::actor::ProcessArg(actor_name, code, nullptr, host, kill_time, actor->properties,
- auto_restart);
+ auto_restart, /*restart_count=*/0);
host->get_impl()->add_actor_at_boot(arg);
if (start_time > simgrid::s4u::Engine::get_clock()) {
arg = new simgrid::kernel::actor::ProcessArg(actor_name, code, nullptr, host, kill_time, actor->properties,
- auto_restart);
+ auto_restart, /*restart_count=*/0);
- XBT_DEBUG("Process %s@%s will be started at time %f", arg->name.c_str(), arg->host->get_cname(), start_time);
+ XBT_DEBUG("Actor %s@%s will be started at time %f", arg->name.c_str(), arg->host->get_cname(), start_time);
simgrid::kernel::timer::Timer::set(start_time, [arg, auto_restart]() {
- simgrid::kernel::actor::ActorImplPtr new_actor =
- simgrid::kernel::actor::ActorImpl::create(arg->name.c_str(), arg->code, arg->data, arg->host, nullptr);
- new_actor->set_properties(arg->properties);
- if (arg->kill_time >= 0)
- new_actor->set_kill_time(arg->kill_time);
- if (auto_restart)
- new_actor->set_auto_restart(auto_restart);
+ simgrid::kernel::actor::ActorImplPtr new_actor = simgrid::kernel::actor::ActorImpl::create(arg);
delete arg;
});
} else { // start_time <= simgrid::s4u::Engine::get_clock()
- XBT_DEBUG("Starting Process %s(%s) right now", arg->name.c_str(), host->get_cname());
+ XBT_DEBUG("Starting actor %s(%s) right now", arg->name.c_str(), host->get_cname());
try {
- simgrid::kernel::actor::ActorImplPtr new_actor = nullptr;
- new_actor = simgrid::kernel::actor::ActorImpl::create(arg->name.c_str(), code, nullptr, host, nullptr);
- new_actor->set_properties(arg->properties);
- /* The actor creation will fail if the host is currently dead, but that's fine */
- if (arg->kill_time >= 0)
- new_actor->set_kill_time(arg->kill_time);
- if (auto_restart)
- new_actor->set_auto_restart(auto_restart);
+ simgrid::kernel::actor::ActorImplPtr new_actor = simgrid::kernel::actor::ActorImpl::create(arg);
} catch (simgrid::HostFailureException const&) {
- XBT_WARN("Deployment includes some initially turned off Hosts ... nevermind.");
+ XBT_WARN("Starting actor %s(%s) failed because its host is turned off.", arg->name.c_str(), host->get_cname());
}
}
}
static void producer(SharedBuffer& buf)
{
- static bool inited = false;
static int todo = cfg_item_count; // remaining amount of items to exchange
SemStack to_release;
- XBT_INFO("Producer %s", inited ? "rebooting" : "booting");
+ bool rebooting = sg4::Actor::self()->get_restart_count() > 0;
- if (not inited) {
+ XBT_INFO("Producer %s", rebooting ? "rebooting" : "booting");
+ if (not rebooting) // Starting for the first time
sg4::this_actor::on_exit(
[](bool forcefully) { XBT_INFO("Producer dying %s.", forcefully ? "forcefully" : "peacefully"); });
- inited = true;
- }
while (todo > 0) {
xbt_assert(sg4::Engine::get_clock() < cfg_deadline,
static void consumer(const SharedBuffer& buf)
{
SemStack to_release;
+ bool rebooting = sg4::Actor::self()->get_restart_count() > 0;
- static bool inited = false;
- XBT_INFO("Consumer %s", inited ? "rebooting" : "booting");
- if (not inited) {
+ XBT_INFO("Consumer %s", rebooting ? "rebooting" : "booting");
+ if (not rebooting) // Starting for the first time
sg4::this_actor::on_exit(
[](bool forcefully) { XBT_INFO("Consumer dying %s.", forcefully ? "forcefully" : "peacefully"); });
- inited = true;
- }
int item;
do {