From: Augustin Degomme Date: Sun, 20 Mar 2022 23:15:01 +0000 (+0100) Subject: Have MPI collectives display their root and MPI_Op to handle mismatches lazily X-Git-Tag: v3.31~17 X-Git-Url: http://bilbo.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/cdbe41fd5a9d7501206af7e3fe0080b661e62e76 Have MPI collectives display their root and MPI_Op to handle mismatches lazily --- diff --git a/src/smpi/bindings/smpi_pmpi_coll.cpp b/src/smpi/bindings/smpi_pmpi_coll.cpp index 4549c06970..7df4731f6d 100644 --- a/src/smpi/bindings/smpi_pmpi_coll.cpp +++ b/src/smpi/bindings/smpi_pmpi_coll.cpp @@ -67,7 +67,9 @@ int PMPI_Ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm CHECK_BUFFER(1, buf, count, datatype) CHECK_ROOT(4) CHECK_REQUEST(6) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Bcast" : "PMPI_Ibcast") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Bcast" : "PMPI_Ibcast"); + name += " with root " + std::to_string(root); + CHECK_COLLECTIVE(comm, name.c_str()) const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -116,7 +118,9 @@ int PMPI_Igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void } CHECK_ROOT(7) CHECK_REQUEST(9) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Gather" : "PMPI_Igather") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Gather" : "PMPI_Igather"); + name += " with root " + std::to_string(root); + CHECK_COLLECTIVE(comm, name.c_str()) const void* real_sendbuf = sendbuf; int real_sendcount = sendcount; @@ -177,7 +181,9 @@ int PMPI_Igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi } CHECK_ROOT(8) CHECK_REQUEST(10) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Gatherv" : "PMPI_Igatherv") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Gatherv" : "PMPI_Igatherv"); + name += " with root " + std::to_string(root); + CHECK_COLLECTIVE(comm, name.c_str()) if (rank == root){ for (int i = 0; i < comm->size(); i++) { @@ -356,7 +362,9 @@ int PMPI_Iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi } CHECK_ROOT(8) CHECK_REQUEST(9) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Scatter" : "PMPI_Iscatter") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Scatter" : "PMPI_Iscatter"); + name += " with root " + std::to_string(root); + CHECK_COLLECTIVE(comm, name.c_str()) if (recvbuf == MPI_IN_PLACE) { recvtype = sendtype; @@ -421,7 +429,9 @@ int PMPI_Iscatterv(const void* sendbuf, const int* sendcounts, const int* displs } else { CHECK_NOT_IN_PLACE_ROOT(4, recvbuf) } - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Scatterv" : "PMPI_Iscatterv") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Scatterv" : "PMPI_Iscatterv"); + name += " with root " + std::to_string(root); + CHECK_COLLECTIVE(comm, name.c_str()) const SmpiBenchGuard suspend_bench; @@ -471,7 +481,10 @@ int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat CHECK_OP(5, op, datatype) CHECK_ROOT(7) CHECK_REQUEST(8) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Reduce" : "PMPI_Ireduce") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Reduce" : "PMPI_Ireduce"); + name += " with op " + op->name(); + name += " and root " + std::to_string(root); + CHECK_COLLECTIVE(comm, name.c_str()) const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -522,7 +535,9 @@ int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype CHECK_BUFFER(1, sendbuf, count, datatype) CHECK_BUFFER(2, recvbuf, count, datatype) CHECK_REQUEST(7) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Allreduce" : "PMPI_Iallreduce") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Alleduce" : "PMPI_Iallreduce"); + name += " with op " + op->name(); + CHECK_COLLECTIVE(comm, name.c_str()) const SmpiBenchGuard suspend_bench; std::vector tmp_sendbuf; @@ -560,7 +575,9 @@ int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat CHECK_BUFFER(2,recvbuf,count, datatype) CHECK_REQUEST(7) CHECK_OP(5, op, datatype) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Scan" : "PMPI_Iscan") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Scan" : "PMPI_Iscan"); + name += " with op " + op->name(); + CHECK_COLLECTIVE(comm, name.c_str()) const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -596,7 +613,9 @@ int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat CHECK_BUFFER(2, recvbuf, count, datatype) CHECK_REQUEST(7) CHECK_OP(5, op, datatype) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Exscan" : "PMPI_Iexscan") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Exscan" : "PMPI_Iexscan"); + name += " with op " + op->name(); + CHECK_COLLECTIVE(comm, name.c_str()) const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -638,7 +657,9 @@ int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcoun CHECK_BUFFER(1, sendbuf, recvcounts[i], datatype) CHECK_BUFFER(2, recvbuf, recvcounts[i], datatype) } - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Reduce_scatter" : "PMPI_Ireduce_scatter") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Reduce_scatter" : "PMPI_Ireduce_scatter"); + name += " with op " + op->name(); + CHECK_COLLECTIVE(comm, name.c_str()) const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -685,7 +706,9 @@ int PMPI_Ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount CHECK_BUFFER(2, recvbuf, recvcount, datatype) CHECK_REQUEST(7) CHECK_OP(5, op, datatype) - CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Reduce_scatter_block" : "PMPI_Ireduce_scatter_block") + std::string name = (request == MPI_REQUEST_IGNORED ? "PMPI_Reduce_scatter_block" : "PMPI_Ireduce_scatter_block"); + name += " with op " + op->name(); + CHECK_COLLECTIVE(comm, name.c_str()) const SmpiBenchGuard suspend_bench; int count = comm->size(); diff --git a/src/smpi/include/smpi_op.hpp b/src/smpi/include/smpi_op.hpp index ff1c1283f7..1409e8a302 100644 --- a/src/smpi/include/smpi_op.hpp +++ b/src/smpi/include/smpi_op.hpp @@ -19,14 +19,15 @@ class Op : public F2C{ int refcount_ = 1; bool is_predefined_; int types_; //bitmask of the allowed datatypes flags + std::string name_; public: - Op(MPI_User_function* function, bool commutative, bool predefined=false, int types=0) : func_(function), is_commutative_(commutative), is_predefined_(predefined), types_(types) {if(not predefined) this->add_f();} + Op(MPI_User_function* function, bool commutative, bool predefined=false, int types=0, std::string name="MPI_Op") : func_(function), is_commutative_(commutative), is_predefined_(predefined), types_(types), name_(name) {if(not predefined) this->add_f();} bool is_commutative() const { return is_commutative_; } bool is_predefined() const { return is_predefined_; } bool is_fortran_op() const { return is_fortran_op_; } int allowed_types() const { return types_; } - std::string name() const override {return std::string("MPI_Op");} + std::string name() const override {return name_;} // tell that we were created from fortran, so we need to translate the type to fortran when called void set_fortran_op() { is_fortran_op_ = true; } void apply(const void* invec, void* inoutvec, const int* len, MPI_Datatype datatype) const; diff --git a/src/smpi/internals/smpi_utils.cpp b/src/smpi/internals/smpi_utils.cpp index 686bc14605..8d1fec4204 100644 --- a/src/smpi/internals/smpi_utils.cpp +++ b/src/smpi/internals/smpi_utils.cpp @@ -361,7 +361,7 @@ int check_collectives_ordering(MPI_Comm comm, std::string call){ vec->second.push_back(call); } else if (vec->second.size() > count){ if (vec->second[count] != call){ - XBT_WARN("Collective communication mismatch. For process %ld, expected %s, got %s", simgrid::s4u::this_actor::get_pid(), vec->second[count].c_str(), call.c_str()); + XBT_WARN("Collective operation mismatch. For process %ld, expected %s, got %s", simgrid::s4u::this_actor::get_pid(), vec->second[count].c_str(), call.c_str()); return MPI_ERR_OTHER; } } else { diff --git a/src/smpi/mpi/smpi_op.cpp b/src/smpi/mpi/smpi_op.cpp index 3e4e34822f..6679911632 100644 --- a/src/smpi/mpi/smpi_op.cpp +++ b/src/smpi/mpi/smpi_op.cpp @@ -240,7 +240,7 @@ static void no_func(void*, void*, int*, MPI_Datatype*) #define CREATE_MPI_OP(name, func, types) \ - SMPI_Op _XBT_CONCAT(smpi_MPI_, name)(&(func) /* func */, true, true, types); + SMPI_Op _XBT_CONCAT(smpi_MPI_, name)(&(func) /* func */, true, true, types, std::string(_XBT_STRINGIFY(MPI_##name))); #define MAX_TYPES DT_FLAG_C_INTEGER|DT_FLAG_F_INTEGER|DT_FLAG_FP|DT_FLAG_MULTILANG #define LAND_TYPES DT_FLAG_C_INTEGER|DT_FLAG_FP|DT_FLAG_LOGICAL|DT_FLAG_MULTILANG