Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Only add a barrier to 8 collectives. The other ones don't seem to need it
authorMartin Quinson <martin.quinson@ens-rennes.fr>
Mon, 10 Oct 2022 20:49:19 +0000 (22:49 +0200)
committerMartin Quinson <martin.quinson@ens-rennes.fr>
Mon, 10 Oct 2022 20:49:19 +0000 (22:49 +0200)
ChangeLog
docs/source/Configuring_SimGrid.rst
src/smpi/bindings/smpi_pmpi_coll.cpp

index 7863f28..e782fb0 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -5,8 +5,8 @@ S4U:
    Comm::set_payload_size() to change the size of the simulated data.
 
 MPI:
    Comm::set_payload_size() to change the size of the simulated data.
 
 MPI:
- - New option smpi/barrier-collectives to add a barrier in all
-   collectives to detect dangerous code that /may/ work on some implems.
+ - New option smpi/barrier-collectives to add a barrier to some collectives
+   to detect dangerous code that /may/ work on some MPI implems.
 
 Models:
  - WiFi: the total capacity of a link depends on the amout of flows on that link.
 
 Models:
  - WiFi: the total capacity of a link depends on the amout of flows on that link.
index f5af4a0..6ed80fd 100644 (file)
@@ -1376,13 +1376,13 @@ Add a barrier in all collectives
 
 **Option** ``smpi/barrier-collectives`` **default:** off
 
 
 **Option** ``smpi/barrier-collectives`` **default:** off
 
-This option adds a simple barrier in all collectives operation to catch dangerous
-code that may or may not work depending on the MPI implementation. It is disabled
-by default, and activated by the `-analyze` flag of smpirun.
+This option adds a simple barrier in some collective operations to catch dangerous
+code that may or may not work depending on the MPI implementation: Bcast, Exscan,
+Gather, Gatherv, Scan, Scatter, Scatterv and Reduce.
 
 For example, the following code works with OpenMPI while it deadlocks in MPICH and
 
 For example, the following code works with OpenMPI while it deadlocks in MPICH and
-Intel MPI. It seems to mean that OpenMPI has a "fire and forget" implementation for
-Broadcast.
+Intel MPI. Broadcast seem to be "fire and forget" in OpenMPI while other
+implementations expect to receive a message.
 
 .. code-block:: C
 
 
 .. code-block:: C
 
@@ -1394,6 +1394,9 @@ Broadcast.
     MPI_Bcast(buf1, buff_size, MPI_CHAR, 0, newcom);
   }
 
     MPI_Bcast(buf1, buff_size, MPI_CHAR, 0, newcom);
   }
 
+The barrier is only simulated and does not involve any additional message (it is a S4U barrier).
+This option is disabled by default, and activated by the `-analyze` flag of smpirun.
+
 .. _cfg=smpi/barrier-finalization:
 
 Add a barrier in MPI_Finalize
 .. _cfg=smpi/barrier-finalization:
 
 Add a barrier in MPI_Finalize
index c626857..1804c73 100644 (file)
@@ -76,7 +76,8 @@ int PMPI_Ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm
                      new simgrid::instr::CollTIData(request == MPI_REQUEST_IGNORED ? "bcast" : "ibcast", root, -1.0,
                                                     count, 0,
                                                     simgrid::smpi::Datatype::encode(datatype), ""));
                      new simgrid::instr::CollTIData(request == MPI_REQUEST_IGNORED ? "bcast" : "ibcast", root, -1.0,
                                                     count, 0,
                                                     simgrid::smpi::Datatype::encode(datatype), ""));
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives") &&
+      request == MPI_REQUEST_IGNORED) // No barrier in Ibcast
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   if (comm->size() > 1) {
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   if (comm->size() > 1) {
@@ -138,7 +139,8 @@ int PMPI_Igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives") &&
+      request == MPI_REQUEST_IGNORED) // no barrier in Igather
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
@@ -197,7 +199,8 @@ int PMPI_Igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives") &&
+      request == MPI_REQUEST_IGNORED) // no barrier in Igatherv
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   const void* real_sendbuf   = sendbuf;
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   const void* real_sendbuf   = sendbuf;
@@ -270,9 +273,6 @@ int PMPI_Iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, v
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
-    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
-
   aid_t pid = simgrid::s4u::this_actor::get_pid();
 
   TRACE_smpi_comm_in(pid, request == MPI_REQUEST_IGNORED ? "PMPI_Allgather" : "PMPI_Iallggather",
   aid_t pid = simgrid::s4u::this_actor::get_pid();
 
   TRACE_smpi_comm_in(pid, request == MPI_REQUEST_IGNORED ? "PMPI_Allgather" : "PMPI_Iallggather",
@@ -320,9 +320,6 @@ int PMPI_Iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype,
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
-    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
-
   if (sendbuf == MPI_IN_PLACE) {
     sendbuf   = static_cast<char*>(recvbuf) + recvtype->get_extent() * displs[comm->rank()];
     sendcount = recvcounts[comm->rank()];
   if (sendbuf == MPI_IN_PLACE) {
     sendbuf   = static_cast<char*>(recvbuf) + recvtype->get_extent() * displs[comm->rank()];
     sendcount = recvcounts[comm->rank()];
@@ -391,7 +388,8 @@ int PMPI_Iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives") &&
+      request == MPI_REQUEST_IGNORED) // no barrier in Iscatter
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
@@ -450,7 +448,8 @@ int PMPI_Iscatterv(const void* sendbuf, const int* sendcounts, const int* displs
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives") &&
+      request == MPI_REQUEST_IGNORED) // no barrier in Iscatterv
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid        = simgrid::s4u::this_actor::get_pid();
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid        = simgrid::s4u::this_actor::get_pid();
@@ -504,7 +503,8 @@ int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives") &&
+      request == MPI_REQUEST_IGNORED) // no barrier in Ireduce
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
@@ -560,9 +560,6 @@ int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
-    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
-
   std::vector<unsigned char> tmp_sendbuf;
   const void* real_sendbuf = smpi_get_in_place_buf(sendbuf, recvbuf, tmp_sendbuf, count, datatype);
 
   std::vector<unsigned char> tmp_sendbuf;
   const void* real_sendbuf = smpi_get_in_place_buf(sendbuf, recvbuf, tmp_sendbuf, count, datatype);
 
@@ -603,7 +600,8 @@ int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives") &&
+      request == MPI_REQUEST_IGNORED) // no barrier in Iscan
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
@@ -644,7 +642,8 @@ int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives") &&
+      request == MPI_REQUEST_IGNORED) // no barrier in Iexscan
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
     smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
@@ -691,9 +690,6 @@ int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcoun
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
-    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
-
   aid_t pid                          = simgrid::s4u::this_actor::get_pid();
   auto trace_recvcounts              = std::make_shared<std::vector<int>>();
   trace_recvcounts->insert(trace_recvcounts->end(), &recvcounts[0], &recvcounts[comm->size()]);
   aid_t pid                          = simgrid::s4u::this_actor::get_pid();
   auto trace_recvcounts              = std::make_shared<std::vector<int>>();
   trace_recvcounts->insert(trace_recvcounts->end(), &recvcounts[0], &recvcounts[comm->size()]);
@@ -744,9 +740,6 @@ int PMPI_Ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
-    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
-
   int count = comm->size();
 
   aid_t pid                          = simgrid::s4u::this_actor::get_pid();
   int count = comm->size();
 
   aid_t pid                          = simgrid::s4u::this_actor::get_pid();
@@ -814,9 +807,6 @@ int PMPI_Ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, vo
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
-    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
-
   TRACE_smpi_comm_in(pid, request == MPI_REQUEST_IGNORED ? "PMPI_Alltoall" : "PMPI_Ialltoall",
                      new simgrid::instr::CollTIData(
                          request == MPI_REQUEST_IGNORED ? "alltoall" : "ialltoall", -1, -1.0,
   TRACE_smpi_comm_in(pid, request == MPI_REQUEST_IGNORED ? "PMPI_Alltoall" : "PMPI_Ialltoall",
                      new simgrid::instr::CollTIData(
                          request == MPI_REQUEST_IGNORED ? "alltoall" : "ialltoall", -1, -1.0,
@@ -870,9 +860,6 @@ int PMPI_Ialltoallv(const void* sendbuf, const int* sendcounts, const int* sendd
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
-    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
-
   int send_size                      = 0;
   int recv_size                      = 0;
   auto trace_sendcounts              = std::make_shared<std::vector<int>>();
   int send_size                      = 0;
   int recv_size                      = 0;
   auto trace_sendcounts              = std::make_shared<std::vector<int>>();
@@ -968,9 +955,6 @@ int PMPI_Ialltoallw(const void* sendbuf, const int* sendcounts, const int* sendd
 
   const SmpiBenchGuard suspend_bench;
 
 
   const SmpiBenchGuard suspend_bench;
 
-  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
-    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
-
   int send_size                      = 0;
   int recv_size                      = 0;
   auto trace_sendcounts              = std::make_shared<std::vector<int>>();
   int send_size                      = 0;
   int recv_size                      = 0;
   auto trace_sendcounts              = std::make_shared<std::vector<int>>();