From 9e06b08542a41f34598b10da650f04ad35d79bd2 Mon Sep 17 00:00:00 2001
From: Martin Quinson <martin.quinson@ens-rennes.fr>
Date: Wed, 10 May 2023 23:21:10 +0200
Subject: [PATCH] DFS MC: Restore from system states even if we have to run
 some transitions out of it

This makes the model-check/checkpoint a bit more efficient by
increasing the amount of cases where we can reuse taken checkpoints.

It is still awfully slow because checkpoints are. The next step is to
implement model-check/checkpoint with forks.
---
 examples/cpp/CMakeLists.txt                   |  8 ++++
 .../s4u-mc-synchro-mutex-stateful.tesh        | 13 ++++++
 src/mc/explo/DFSExplorer.cpp                  | 40 ++++++++++++++++---
 3 files changed, 55 insertions(+), 6 deletions(-)
 create mode 100644 examples/cpp/synchro-mutex/s4u-mc-synchro-mutex-stateful.tesh

diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index c0eca33faf..9c06ea0cc1 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -47,6 +47,14 @@ if(SIMGRID_HAVE_STATEFUL_MC)
     add_dependencies(tests-mc s4u-mc-bugged1-liveness-cleaner-off)
   endif()
 
+  ADD_TESH(s4u-mc-synchro-mutex-stateful
+     --setenv bindir=${CMAKE_CURRENT_BINARY_DIR}/synchro-mutex
+     --setenv libdir=${CMAKE_BINARY_DIR}/lib
+     --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms
+     --setenv srcdir=${CMAKE_CURRENT_SOURCE_DIR}/synchro-mutex
+     --cd ${CMAKE_CURRENT_SOURCE_DIR}/synchro-mutex
+      ${CMAKE_HOME_DIRECTORY}/examples/cpp/synchro-mutex/s4u-mc-synchro-mutex-stateful.tesh)
+
   # Model-checking liveness
   if(HAVE_UCONTEXT_CONTEXTS AND SIMGRID_PROCESSOR_x86_64)
     # liveness model-checking works only on 64bits (for now ...)
diff --git a/examples/cpp/synchro-mutex/s4u-mc-synchro-mutex-stateful.tesh b/examples/cpp/synchro-mutex/s4u-mc-synchro-mutex-stateful.tesh
new file mode 100644
index 0000000000..3d1e75d99b
--- /dev/null
+++ b/examples/cpp/synchro-mutex/s4u-mc-synchro-mutex-stateful.tesh
@@ -0,0 +1,13 @@
+#!/usr/bin/env tesh
+
+p This file tests the cfg=model-check/checkpoint option for DFS explorer
+
+$ $VALGRIND_NO_TRACE_CHILDREN ${bindir:=.}/../../../bin/simgrid-mc --cfg=model-check/checkpoint:5 --cfg=model-check/sleep-set:true -- ${bindir:=.}/s4u-synchro-mutex --cfg=actors:2 --log=s4u_test.thres:critical
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'model-check/checkpoint' to '5'
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'model-check/sleep-set' to 'true'
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'actors' to '2'
+> [0.000000] [mc_dfs/INFO] Start a DFS exploration. Reduction is: dpor.
+> [0.000000] [mc_dfs/INFO] DFS exploration ended. 130 unique states visited; 27 backtracks (209 transition replays, 52 states visited overall)
+
+p The stats without checkpoints is:               130 unique states visited; 27 backtracks (308 transition replays, 151 states visited overall)
+p But it runs much faster (0.6 sec vs. 1.6 sec), damn slow checkpointing code.
diff --git a/src/mc/explo/DFSExplorer.cpp b/src/mc/explo/DFSExplorer.cpp
index 4742ab474a..a8faec6fd7 100644
--- a/src/mc/explo/DFSExplorer.cpp
+++ b/src/mc/explo/DFSExplorer.cpp
@@ -336,20 +336,48 @@ void DFSExplorer::backtrack()
   }
 #endif
 
+  // Search how to restore the backtracking point
+  State* init_state = nullptr;
+  std::deque<Transition*> replay_recipe;
+  for (auto* s = backtracking_point.get(); s != nullptr; s = s->get_parent_state().get()) {
+#if SIMGRID_HAVE_STATEFUL_MC
+    if (s->get_system_state() != nullptr) { // Found a state that I can restore
+      init_state = s;
+      break;
+    }
+#endif
+    if (s->get_transition_in() != nullptr) // The root has no transition_in
+      replay_recipe.push_front(s->get_transition_in().get());
+  }
+
+  // Restore the init_state, if any
+  if (init_state != nullptr) {
+#if SIMGRID_HAVE_STATEFUL_MC
+    const auto* system_state = init_state->get_system_state();
+    system_state->restore(*get_remote_app().get_remote_process_memory());
+    on_restore_system_state_signal(init_state, get_remote_app());
+#endif
+  } else { // Restore the initial state if no intermediate state was found
+    get_remote_app().restore_initial_state();
+    on_restore_initial_state_signal(get_remote_app());
+  }
+
   /* if no snapshot, we need to restore the initial state and replay the transitions */
-  get_remote_app().restore_initial_state();
-  on_restore_initial_state_signal(get_remote_app());
   /* Traverse the stack from the state at position start and re-execute the transitions */
-  for (auto& state : backtracking_point->get_recipe()) {
-    state->replay(get_remote_app());
-    on_transition_replay_signal(state, get_remote_app());
+  for (auto& transition : replay_recipe) {
+    transition->replay(get_remote_app());
+    on_transition_replay_signal(transition, get_remote_app());
     visited_states_count_++;
   }
   this->restore_stack(backtracking_point);
 }
 
 DFSExplorer::DFSExplorer(const std::vector<char*>& args, bool with_dpor, bool need_memory_info)
-    : Exploration(args, need_memory_info || _sg_mc_termination)
+    : Exploration(args, need_memory_info || _sg_mc_termination
+#if SIMGRID_HAVE_STATEFUL_MC
+                            || _sg_mc_checkpoint > 0
+#endif
+      )
 {
   if (with_dpor)
     reduction_mode_ = ReductionMode::dpor;
-- 
2.20.1