From 809ceedc3c7d57329e88aed7275af1eb205e1d52 Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Sat, 18 Mar 2023 11:58:55 +0100 Subject: [PATCH] Move the checker_side_ from the ModelChecker to the RemoteApp with an axe. --- src/mc/ModelChecker.cpp | 62 +-------------------- src/mc/ModelChecker.hpp | 12 +--- src/mc/api/RemoteApp.cpp | 101 ++++++++++++++++++++++++++++------ src/mc/api/RemoteApp.hpp | 2 + src/mc/remote/CheckerSide.cpp | 7 ++- src/mc/remote/CheckerSide.hpp | 2 +- src/mc/sosp/Snapshot_test.cpp | 2 +- 7 files changed, 96 insertions(+), 92 deletions(-) diff --git a/src/mc/ModelChecker.cpp b/src/mc/ModelChecker.cpp index 8de5122af5..75eb5a6ca7 100644 --- a/src/mc/ModelChecker.cpp +++ b/src/mc/ModelChecker.cpp @@ -23,71 +23,13 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_ModelChecker, mc, "ModelChecker"); ::simgrid::mc::ModelChecker* mc_model_checker = nullptr; -#ifdef __linux__ -# define WAITPID_CHECKED_FLAGS __WALL -#else -# define WAITPID_CHECKED_FLAGS 0 -#endif - namespace simgrid::mc { -ModelChecker::ModelChecker(std::unique_ptr remote_memory, int sockfd) - : checker_side_(sockfd), remote_process_memory_(std::move(remote_memory)) +ModelChecker::ModelChecker(std::unique_ptr remote_memory) + : remote_process_memory_(std::move(remote_memory)) { } -void ModelChecker::start() -{ - checker_side_.start( - [](evutil_socket_t sig, short events, void* arg) { - auto mc = static_cast(arg); - if (events == EV_READ) { - std::array buffer; - ssize_t size = recv(mc->checker_side_.get_channel().get_socket(), buffer.data(), buffer.size(), MSG_DONTWAIT); - if (size == -1) { - XBT_ERROR("Channel::receive failure: %s", strerror(errno)); - if (errno != EAGAIN) - throw simgrid::xbt::errno_error(); - } - - if (not mc->handle_message(buffer.data(), size)) - mc->checker_side_.break_loop(); - } else if (events == EV_SIGNAL) { - if (sig == SIGCHLD) - mc->handle_waitpid(); - else - xbt_die("Unexpected signal: %d", sig); - } else { - xbt_die("Unexpected event"); - } - }, - this); - - XBT_DEBUG("Waiting for the model-checked process"); - int status; - - // The model-checked process SIGSTOP itself to signal it's ready: - const pid_t pid = remote_process_memory_->pid(); - - xbt_assert(waitpid(pid, &status, WAITPID_CHECKED_FLAGS) == pid && WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP, - "Could not wait model-checked process"); - - errno = 0; -#ifdef __linux__ - ptrace(PTRACE_SETOPTIONS, pid, nullptr, PTRACE_O_TRACEEXIT); - ptrace(PTRACE_CONT, pid, 0, 0); -#elif defined BSD - ptrace(PT_CONTINUE, pid, (caddr_t)1, 0); -#else -# error "no ptrace equivalent coded for this platform" -#endif - xbt_assert(errno == 0, - "Ptrace does not seem to be usable in your setup (errno: %d). " - "If you run from within a docker, adding `--cap-add SYS_PTRACE` to the docker line may help. " - "If it does not help, please report this bug.", - errno); -} - bool ModelChecker::handle_message(const char* buffer, ssize_t size) { s_mc_message_t base_message; diff --git a/src/mc/ModelChecker.hpp b/src/mc/ModelChecker.hpp index 50f37791e2..551d0f0307 100644 --- a/src/mc/ModelChecker.hpp +++ b/src/mc/ModelChecker.hpp @@ -18,27 +18,21 @@ namespace simgrid::mc { /** State of the model-checker (global variables for the model checker) */ class ModelChecker { - CheckerSide checker_side_; std::unique_ptr remote_process_memory_; Exploration* exploration_ = nullptr; public: ModelChecker(ModelChecker const&) = delete; ModelChecker& operator=(ModelChecker const&) = delete; - explicit ModelChecker(std::unique_ptr remote_simulation, int sockfd); + explicit ModelChecker(std::unique_ptr remote_simulation); RemoteProcessMemory& get_remote_process_memory() { return *remote_process_memory_; } - Channel& get_channel() { return checker_side_.get_channel(); } - void channel_handle_events() { checker_side_.dispatch(); } - - void start(); Exploration* get_exploration() const { return exploration_; } void set_exploration(Exploration* exploration) { exploration_ = exploration; } -private: - bool handle_message(const char* buffer, ssize_t size); - void handle_waitpid(); + void handle_waitpid(); // FIXME move to RemoteApp + bool handle_message(const char* buffer, ssize_t size); // FIXME move to RemoteApp }; } // namespace simgrid::mc diff --git a/src/mc/api/RemoteApp.cpp b/src/mc/api/RemoteApp.cpp index d7fa9773a8..6d250bad40 100644 --- a/src/mc/api/RemoteApp.cpp +++ b/src/mc/api/RemoteApp.cpp @@ -26,6 +26,14 @@ #ifdef __linux__ #include #endif +#include +#include + +#ifdef __linux__ +#define WAITPID_CHECKED_FLAGS __WALL +#else +#define WAITPID_CHECKED_FLAGS 0 +#endif XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_Session, mc, "Model-checker session"); XBT_LOG_EXTERNAL_CATEGORY(mc_global); @@ -117,11 +125,12 @@ RemoteApp::RemoteApp(const std::vector& args) xbt_assert(mc_model_checker == nullptr, "Did you manage to start the MC twice in this process?"); + checker_side_ = std::make_unique(sockets[1]); auto process = std::make_unique(pid); - model_checker_ = std::make_unique(std::move(process), sockets[1]); + model_checker_ = std::make_unique(std::move(process)); mc_model_checker = model_checker_.get(); - model_checker_->start(); + start(); /* Take the initial snapshot */ wait_for_requests(); @@ -138,7 +147,63 @@ RemoteApp::~RemoteApp() mc_model_checker = nullptr; } } - +void RemoteApp::start() +{ + checker_side_->start( + [](evutil_socket_t sig, short events, void* arg) { + auto checker = static_cast(arg); + if (events == EV_READ) { + std::array buffer; + ssize_t size = recv(checker->get_channel().get_socket(), buffer.data(), buffer.size(), MSG_DONTWAIT); + if (size == -1) { + XBT_ERROR("Channel::receive failure: %s", strerror(errno)); + if (errno != EAGAIN) + throw simgrid::xbt::errno_error(); + } + + if (not mc_model_checker->handle_message(buffer.data(), size)) + checker->break_loop(); + } else { + xbt_die("Unexpected event"); + } + }, + [](evutil_socket_t sig, short events, void* arg) { + auto mc = static_cast(arg); + if (events == EV_SIGNAL) { + if (sig == SIGCHLD) + mc->handle_waitpid(); + else + xbt_die("Unexpected signal: %d", sig); + } else { + xbt_die("Unexpected event"); + } + }, + model_checker_.get()); + + XBT_DEBUG("Waiting for the model-checked process"); + int status; + + // The model-checked process SIGSTOP itself to signal it's ready: + const pid_t pid = get_remote_process_memory().pid(); + + xbt_assert(waitpid(pid, &status, WAITPID_CHECKED_FLAGS) == pid && WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP, + "Could not wait model-checked process"); + + errno = 0; +#ifdef __linux__ + ptrace(PTRACE_SETOPTIONS, pid, nullptr, PTRACE_O_TRACEEXIT); + ptrace(PTRACE_CONT, pid, 0, 0); +#elif defined BSD + ptrace(PT_CONTINUE, pid, (caddr_t)1, 0); +#else +#error "no ptrace equivalent coded for this platform" +#endif + xbt_assert(errno == 0, + "Ptrace does not seem to be usable in your setup (errno: %d). " + "If you run from within a docker, adding `--cap-add SYS_PTRACE` to the docker line may help. " + "If it does not help, please report this bug.", + errno); +} void RemoteApp::restore_initial_state() const { this->initial_snapshot_->restore(model_checker_->get_remote_process_memory()); @@ -149,9 +214,9 @@ unsigned long RemoteApp::get_maxpid() const // note: we could maybe cache it and count the actor creation on checker side too. // But counting correctly accross state checkpoint/restore would be annoying. - model_checker_->get_channel().send(MessageType::ACTORS_MAXPID); + checker_side_->get_channel().send(MessageType::ACTORS_MAXPID); s_mc_message_int_t answer; - ssize_t answer_size = model_checker_->get_channel().receive(answer); + ssize_t answer_size = checker_side_->get_channel().receive(answer); xbt_assert(answer_size != -1, "Could not receive message"); xbt_assert(answer_size == sizeof answer, "Broken message (size=%zd; expected %zu)", answer_size, sizeof answer); xbt_assert(answer.type == MessageType::ACTORS_MAXPID_REPLY, @@ -170,10 +235,10 @@ void RemoteApp::get_actors_status(std::map& whereto) const // <----- send ACTORS_STATUS_REPLY // <----- send `N` `s_mc_message_actors_status_one_t` structs // <----- send `M` `s_mc_message_simcall_probe_one_t` structs - model_checker_->get_channel().send(MessageType::ACTORS_STATUS); + checker_side_->get_channel().send(MessageType::ACTORS_STATUS); s_mc_message_actors_status_answer_t answer; - ssize_t answer_size = model_checker_->get_channel().receive(answer); + ssize_t answer_size = checker_side_->get_channel().receive(answer); xbt_assert(answer_size != -1, "Could not receive message"); xbt_assert(answer_size == sizeof answer, "Broken message (size=%zd; expected %zu)", answer_size, sizeof answer); xbt_assert(answer.type == MessageType::ACTORS_STATUS_REPLY, @@ -191,7 +256,7 @@ void RemoteApp::get_actors_status(std::map& whereto) const std::vector status(answer.count); if (answer.count > 0) { size_t size = status.size() * sizeof(s_mc_message_actors_status_one_t); - ssize_t received = model_checker_->get_channel().receive(status.data(), size); + ssize_t received = checker_side_->get_channel().receive(status.data(), size); xbt_assert(static_cast(received) == size); } @@ -207,7 +272,7 @@ void RemoteApp::get_actors_status(std::map& whereto) const std::vector probes(answer.transition_count); if (answer.transition_count > 0) { for (auto& probe : probes) { - ssize_t received = model_checker_->get_channel().receive(probe); + ssize_t received = checker_side_->get_channel().receive(probe); xbt_assert(received >= 0, "Could not receive response to ACTORS_PROBE message (%s)", strerror(errno)); xbt_assert(static_cast(received) == sizeof probe, "Could not receive response to ACTORS_PROBE message (%zd bytes received != %zu bytes expected", @@ -238,9 +303,9 @@ void RemoteApp::get_actors_status(std::map& whereto) const void RemoteApp::check_deadlock() const { - xbt_assert(model_checker_->get_channel().send(MessageType::DEADLOCK_CHECK) == 0, "Could not check deadlock state"); + xbt_assert(checker_side_->get_channel().send(MessageType::DEADLOCK_CHECK) == 0, "Could not check deadlock state"); s_mc_message_int_t message; - ssize_t received = model_checker_->get_channel().receive(message); + ssize_t received = checker_side_->get_channel().receive(message); xbt_assert(received != -1, "Could not receive message"); xbt_assert(received == sizeof message, "Broken message (size=%zd; expected %zu)", received, sizeof message); xbt_assert(message.type == MessageType::DEADLOCK_CHECK_REPLY, @@ -262,12 +327,12 @@ void RemoteApp::check_deadlock() const void RemoteApp::wait_for_requests() { /* Resume the application */ - if (model_checker_->get_channel().send(MessageType::CONTINUE) != 0) + if (checker_side_->get_channel().send(MessageType::CONTINUE) != 0) throw xbt::errno_error(); get_remote_process_memory().clear_cache(); if (this->get_remote_process_memory().running()) - model_checker_->channel_handle_events(); + checker_side_->dispatch(); } void RemoteApp::shutdown() @@ -289,14 +354,14 @@ Transition* RemoteApp::handle_simcall(aid_t aid, int times_considered, bool new_ m.type = MessageType::SIMCALL_EXECUTE; m.aid_ = aid; m.times_considered_ = times_considered; - model_checker_->get_channel().send(m); + checker_side_->get_channel().send(m); get_remote_process_memory().clear_cache(); if (this->get_remote_process_memory().running()) - model_checker_->channel_handle_events(); // The app may send messages while processing the transition + checker_side_->dispatch(); // The app may send messages while processing the transition s_mc_message_simcall_execute_answer_t answer; - ssize_t s = model_checker_->get_channel().receive(answer); + ssize_t s = checker_side_->get_channel().receive(answer); xbt_assert(s != -1, "Could not receive message"); xbt_assert(s == sizeof answer, "Broken message (size=%zd; expected %zu)", s, sizeof answer); xbt_assert(answer.type == MessageType::SIMCALL_EXECUTE_ANSWER, @@ -315,10 +380,10 @@ void RemoteApp::finalize_app(bool terminate_asap) s_mc_message_int_t m = {}; m.type = MessageType::FINALIZE; m.value = terminate_asap; - xbt_assert(model_checker_->get_channel().send(m) == 0, "Could not ask the app to finalize on need"); + xbt_assert(checker_side_->get_channel().send(m) == 0, "Could not ask the app to finalize on need"); s_mc_message_t answer; - ssize_t s = model_checker_->get_channel().receive(answer); + ssize_t s = checker_side_->get_channel().receive(answer); xbt_assert(s != -1, "Could not receive answer to FINALIZE"); xbt_assert(s == sizeof answer, "Broken message (size=%zd; expected %zu)", s, sizeof answer); xbt_assert(answer.type == MessageType::FINALIZE_REPLY, diff --git a/src/mc/api/RemoteApp.hpp b/src/mc/api/RemoteApp.hpp index 58531fab20..77e07f693e 100644 --- a/src/mc/api/RemoteApp.hpp +++ b/src/mc/api/RemoteApp.hpp @@ -25,6 +25,7 @@ namespace simgrid::mc { */ class XBT_PUBLIC RemoteApp { private: + std::unique_ptr checker_side_; std::unique_ptr model_checker_; PageStore page_store_{500}; std::shared_ptr initial_snapshot_; @@ -45,6 +46,7 @@ public: ~RemoteApp(); + void start(); void restore_initial_state() const; void wait_for_requests(); diff --git a/src/mc/remote/CheckerSide.cpp b/src/mc/remote/CheckerSide.cpp index 7a6202b833..c797daadfd 100644 --- a/src/mc/remote/CheckerSide.cpp +++ b/src/mc/remote/CheckerSide.cpp @@ -9,16 +9,17 @@ namespace simgrid::mc { -void CheckerSide::start(void (*handler)(int, short, void*), ModelChecker* mc) +void CheckerSide::start(void (*handler_sock)(int, short, void*), void (*handler_sig)(int, short, void*), + ModelChecker* mc) { auto* base = event_base_new(); base_.reset(base); - auto* socket_event = event_new(base, get_channel().get_socket(), EV_READ | EV_PERSIST, handler, mc); + auto* socket_event = event_new(base, get_channel().get_socket(), EV_READ | EV_PERSIST, handler_sock, this); event_add(socket_event, nullptr); socket_event_.reset(socket_event); - auto* signal_event = event_new(base, SIGCHLD, EV_SIGNAL | EV_PERSIST, handler, mc); + auto* signal_event = event_new(base, SIGCHLD, EV_SIGNAL | EV_PERSIST, handler_sig, mc); event_add(signal_event, nullptr); signal_event_.reset(signal_event); } diff --git a/src/mc/remote/CheckerSide.hpp b/src/mc/remote/CheckerSide.hpp index 287bf896df..725ab093e7 100644 --- a/src/mc/remote/CheckerSide.hpp +++ b/src/mc/remote/CheckerSide.hpp @@ -33,7 +33,7 @@ public: Channel const& get_channel() const { return channel_; } Channel& get_channel() { return channel_; } - void start(void (*handler)(int, short, void*), ModelChecker* mc); + void start(void (*handler_sock)(int, short, void*), void (*handler_sig)(int, short, void*), ModelChecker* mc); void dispatch() const; void break_loop() const; }; diff --git a/src/mc/sosp/Snapshot_test.cpp b/src/mc/sosp/Snapshot_test.cpp index 95c10a06ab..662c7a1cee 100644 --- a/src/mc/sosp/Snapshot_test.cpp +++ b/src/mc/sosp/Snapshot_test.cpp @@ -62,7 +62,7 @@ void snap_test_helper::Init() process = std::make_unique(getpid()); process->init(nullptr); - mc_model_checker = new ::simgrid::mc::ModelChecker(std::move(process), -1); + mc_model_checker = new ::simgrid::mc::ModelChecker(std::move(process)); } snap_test_helper::prologue_return snap_test_helper::prologue(int n) -- 2.20.1