From f1778fef9c17fd450a8c8bee3b893c3042c37adf Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Mon, 10 Oct 2022 22:49:19 +0200 Subject: [PATCH] Only add a barrier to 8 collectives. The other ones don't seem to need it --- ChangeLog | 4 +-- docs/source/Configuring_SimGrid.rst | 13 +++++--- src/smpi/bindings/smpi_pmpi_coll.cpp | 48 ++++++++++------------------ 3 files changed, 26 insertions(+), 39 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7863f285c8..e782fb08f3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,8 +5,8 @@ S4U: Comm::set_payload_size() to change the size of the simulated data. MPI: - - New option smpi/barrier-collectives to add a barrier in all - collectives to detect dangerous code that /may/ work on some implems. + - New option smpi/barrier-collectives to add a barrier to some collectives + to detect dangerous code that /may/ work on some MPI implems. Models: - WiFi: the total capacity of a link depends on the amout of flows on that link. diff --git a/docs/source/Configuring_SimGrid.rst b/docs/source/Configuring_SimGrid.rst index f5af4a0b80..6ed80fdcc3 100644 --- a/docs/source/Configuring_SimGrid.rst +++ b/docs/source/Configuring_SimGrid.rst @@ -1376,13 +1376,13 @@ Add a barrier in all collectives **Option** ``smpi/barrier-collectives`` **default:** off -This option adds a simple barrier in all collectives operation to catch dangerous -code that may or may not work depending on the MPI implementation. It is disabled -by default, and activated by the `-analyze` flag of smpirun. +This option adds a simple barrier in some collective operations to catch dangerous +code that may or may not work depending on the MPI implementation: Bcast, Exscan, +Gather, Gatherv, Scan, Scatter, Scatterv and Reduce. For example, the following code works with OpenMPI while it deadlocks in MPICH and -Intel MPI. It seems to mean that OpenMPI has a "fire and forget" implementation for -Broadcast. +Intel MPI. Broadcast seem to be "fire and forget" in OpenMPI while other +implementations expect to receive a message. .. code-block:: C @@ -1394,6 +1394,9 @@ Broadcast. MPI_Bcast(buf1, buff_size, MPI_CHAR, 0, newcom); } +The barrier is only simulated and does not involve any additional message (it is a S4U barrier). +This option is disabled by default, and activated by the `-analyze` flag of smpirun. + .. _cfg=smpi/barrier-finalization: Add a barrier in MPI_Finalize diff --git a/src/smpi/bindings/smpi_pmpi_coll.cpp b/src/smpi/bindings/smpi_pmpi_coll.cpp index c62685706d..1804c7319b 100644 --- a/src/smpi/bindings/smpi_pmpi_coll.cpp +++ b/src/smpi/bindings/smpi_pmpi_coll.cpp @@ -76,7 +76,8 @@ int PMPI_Ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm new simgrid::instr::CollTIData(request == MPI_REQUEST_IGNORED ? "bcast" : "ibcast", root, -1.0, count, 0, simgrid::smpi::Datatype::encode(datatype), "")); - if (simgrid::config::get_value("smpi/barrier-collectives")) + if (simgrid::config::get_value("smpi/barrier-collectives") && + request == MPI_REQUEST_IGNORED) // No barrier in Ibcast smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); if (comm->size() > 1) { @@ -138,7 +139,8 @@ int PMPI_Igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) + if (simgrid::config::get_value("smpi/barrier-collectives") && + request == MPI_REQUEST_IGNORED) // no barrier in Igather smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -197,7 +199,8 @@ int PMPI_Igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) + if (simgrid::config::get_value("smpi/barrier-collectives") && + request == MPI_REQUEST_IGNORED) // no barrier in Igatherv smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); const void* real_sendbuf = sendbuf; @@ -270,9 +273,6 @@ int PMPI_Iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, v const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) - smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); - aid_t pid = simgrid::s4u::this_actor::get_pid(); TRACE_smpi_comm_in(pid, request == MPI_REQUEST_IGNORED ? "PMPI_Allgather" : "PMPI_Iallggather", @@ -320,9 +320,6 @@ int PMPI_Iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) - smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); - if (sendbuf == MPI_IN_PLACE) { sendbuf = static_cast(recvbuf) + recvtype->get_extent() * displs[comm->rank()]; sendcount = recvcounts[comm->rank()]; @@ -391,7 +388,8 @@ int PMPI_Iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) + if (simgrid::config::get_value("smpi/barrier-collectives") && + request == MPI_REQUEST_IGNORED) // no barrier in Iscatter smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -450,7 +448,8 @@ int PMPI_Iscatterv(const void* sendbuf, const int* sendcounts, const int* displs const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) + if (simgrid::config::get_value("smpi/barrier-collectives") && + request == MPI_REQUEST_IGNORED) // no barrier in Iscatterv smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -504,7 +503,8 @@ int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) + if (simgrid::config::get_value("smpi/barrier-collectives") && + request == MPI_REQUEST_IGNORED) // no barrier in Ireduce smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -560,9 +560,6 @@ int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) - smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); - std::vector tmp_sendbuf; const void* real_sendbuf = smpi_get_in_place_buf(sendbuf, recvbuf, tmp_sendbuf, count, datatype); @@ -603,7 +600,8 @@ int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) + if (simgrid::config::get_value("smpi/barrier-collectives") && + request == MPI_REQUEST_IGNORED) // no barrier in Iscan smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -644,7 +642,8 @@ int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) + if (simgrid::config::get_value("smpi/barrier-collectives") && + request == MPI_REQUEST_IGNORED) // no barrier in Iexscan smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -691,9 +690,6 @@ int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcoun const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) - smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); - aid_t pid = simgrid::s4u::this_actor::get_pid(); auto trace_recvcounts = std::make_shared>(); trace_recvcounts->insert(trace_recvcounts->end(), &recvcounts[0], &recvcounts[comm->size()]); @@ -744,9 +740,6 @@ int PMPI_Ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) - smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); - int count = comm->size(); aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -814,9 +807,6 @@ int PMPI_Ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, vo const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) - smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); - TRACE_smpi_comm_in(pid, request == MPI_REQUEST_IGNORED ? "PMPI_Alltoall" : "PMPI_Ialltoall", new simgrid::instr::CollTIData( request == MPI_REQUEST_IGNORED ? "alltoall" : "ialltoall", -1, -1.0, @@ -870,9 +860,6 @@ int PMPI_Ialltoallv(const void* sendbuf, const int* sendcounts, const int* sendd const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) - smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); - int send_size = 0; int recv_size = 0; auto trace_sendcounts = std::make_shared>(); @@ -968,9 +955,6 @@ int PMPI_Ialltoallw(const void* sendbuf, const int* sendcounts, const int* sendd const SmpiBenchGuard suspend_bench; - if (simgrid::config::get_value("smpi/barrier-collectives")) - smpi_deployment_startup_barrier(smpi_process()->get_instance_id()); - int send_size = 0; int recv_size = 0; auto trace_sendcounts = std::make_shared>(); -- 2.20.1