From dbb0c80acf5f0b020ed570a418601afc09e693e3 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Sun, 20 Mar 2022 21:41:55 +0100 Subject: [PATCH] add check for collectives, using check_collectives_ordering utility. maybe I missed some .. --- src/smpi/bindings/smpi_pmpi.cpp | 2 ++ src/smpi/bindings/smpi_pmpi_coll.cpp | 18 +++++++++++++++++- src/smpi/bindings/smpi_pmpi_comm.cpp | 2 ++ src/smpi/bindings/smpi_pmpi_file.cpp | 11 +++++++++++ src/smpi/bindings/smpi_pmpi_topo.cpp | 2 ++ src/smpi/bindings/smpi_pmpi_win.cpp | 2 ++ src/smpi/include/private.hpp | 4 ++++ 7 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/smpi/bindings/smpi_pmpi.cpp b/src/smpi/bindings/smpi_pmpi.cpp index 7de62bd039..6f205a2c90 100644 --- a/src/smpi/bindings/smpi_pmpi.cpp +++ b/src/smpi/bindings/smpi_pmpi.cpp @@ -57,6 +57,7 @@ int PMPI_Init(int*, char***) smpi_process()->mark_as_initialized(); smpi_mpi_init(); + CHECK_COLLECTIVE(smpi_process()->comm_world(), "MPI_Init") return MPI_SUCCESS; } @@ -64,6 +65,7 @@ int PMPI_Init(int*, char***) int PMPI_Finalize() { smpi_bench_end(); + CHECK_COLLECTIVE(smpi_process()->comm_world(), "MPI_Finalize") aid_t rank_traced = simgrid::s4u::this_actor::get_pid(); smpi_process()->mark_as_finalizing(); TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::NoOpTIData("finalize")); diff --git a/src/smpi/bindings/smpi_pmpi_coll.cpp b/src/smpi/bindings/smpi_pmpi_coll.cpp index 3fd9db7e0f..4549c06970 100644 --- a/src/smpi/bindings/smpi_pmpi_coll.cpp +++ b/src/smpi/bindings/smpi_pmpi_coll.cpp @@ -37,7 +37,7 @@ int PMPI_Ibarrier(MPI_Comm comm, MPI_Request *request) { CHECK_COMM(1) CHECK_REQUEST(2) - + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Barrier" : "PMPI_Ibarrier") const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); TRACE_smpi_comm_in(pid, request == MPI_REQUEST_IGNORED ? "PMPI_Barrier" : "PMPI_Ibarrier", @@ -67,6 +67,7 @@ int PMPI_Ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm CHECK_BUFFER(1, buf, count, datatype) CHECK_ROOT(4) CHECK_REQUEST(6) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Bcast" : "PMPI_Ibcast") const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -115,6 +116,7 @@ int PMPI_Igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void } CHECK_ROOT(7) CHECK_REQUEST(9) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Gather" : "PMPI_Igather") const void* real_sendbuf = sendbuf; int real_sendcount = sendcount; @@ -175,6 +177,7 @@ int PMPI_Igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi } CHECK_ROOT(8) CHECK_REQUEST(10) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Gatherv" : "PMPI_Igatherv") if (rank == root){ for (int i = 0; i < comm->size(); i++) { @@ -239,6 +242,7 @@ int PMPI_Iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, v CHECK_BUFFER(1, sendbuf, sendcount, sendtype) CHECK_BUFFER(4, recvbuf, recvcount, recvtype) CHECK_REQUEST(8) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Allgather" : "PMPI_Iallggather") if (sendbuf == MPI_IN_PLACE) { sendbuf = static_cast(recvbuf) + recvtype->get_extent() * recvcount * comm->rank(); @@ -296,6 +300,7 @@ int PMPI_Iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, CHECK_COUNT(5, recvcounts[i]) CHECK_BUFFER(4, recvbuf, recvcounts[i], recvtype) } + CHECK_COLLECTIVE(comm, MPI_REQUEST_IGNORED ? "PMPI_Allgatherv" : "PMPI_Iallgatherv") const SmpiBenchGuard suspend_bench; if (sendbuf == MPI_IN_PLACE) { @@ -351,6 +356,7 @@ int PMPI_Iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi } CHECK_ROOT(8) CHECK_REQUEST(9) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Scatter" : "PMPI_Iscatter") if (recvbuf == MPI_IN_PLACE) { recvtype = sendtype; @@ -415,6 +421,7 @@ int PMPI_Iscatterv(const void* sendbuf, const int* sendcounts, const int* displs } else { CHECK_NOT_IN_PLACE_ROOT(4, recvbuf) } + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Scatterv" : "PMPI_Iscatterv") const SmpiBenchGuard suspend_bench; @@ -464,6 +471,7 @@ int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat CHECK_OP(5, op, datatype) CHECK_ROOT(7) CHECK_REQUEST(8) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Reduce" : "PMPI_Ireduce") const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -514,6 +522,7 @@ int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype CHECK_BUFFER(1, sendbuf, count, datatype) CHECK_BUFFER(2, recvbuf, count, datatype) CHECK_REQUEST(7) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Allreduce" : "PMPI_Iallreduce") const SmpiBenchGuard suspend_bench; std::vector tmp_sendbuf; @@ -551,6 +560,7 @@ int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat CHECK_BUFFER(2,recvbuf,count, datatype) CHECK_REQUEST(7) CHECK_OP(5, op, datatype) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Scan" : "PMPI_Iscan") const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -586,6 +596,7 @@ int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat CHECK_BUFFER(2, recvbuf, count, datatype) CHECK_REQUEST(7) CHECK_OP(5, op, datatype) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Exscan" : "PMPI_Iexscan") const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -627,6 +638,7 @@ int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcoun CHECK_BUFFER(1, sendbuf, recvcounts[i], datatype) CHECK_BUFFER(2, recvbuf, recvcounts[i], datatype) } + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Reduce_scatter" : "PMPI_Ireduce_scatter") const SmpiBenchGuard suspend_bench; aid_t pid = simgrid::s4u::this_actor::get_pid(); @@ -673,6 +685,7 @@ int PMPI_Ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount CHECK_BUFFER(2, recvbuf, recvcount, datatype) CHECK_REQUEST(7) CHECK_OP(5, op, datatype) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Reduce_scatter_block" : "PMPI_Ireduce_scatter_block") const SmpiBenchGuard suspend_bench; int count = comm->size(); @@ -721,6 +734,7 @@ int PMPI_Ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, vo CHECK_COUNT(5, recvcount) CHECK_BUFFER(4, recvbuf, recvcount, recvtype) CHECK_REQUEST(8) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Alltoall" : "PMPI_Ialltoall") aid_t pid = simgrid::s4u::this_actor::get_pid(); int real_sendcount = sendcount; @@ -779,6 +793,7 @@ int PMPI_Ialltoallv(const void* sendbuf, const int* sendcounts, const int* sendd CHECK_NULL(6, MPI_ERR_COUNT, recvcounts) CHECK_NULL(7, MPI_ERR_ARG, recvdispls) CHECK_REQUEST(10) + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Alltoallv" : "PMPI_Ialltoallv") aid_t pid = simgrid::s4u::this_actor::get_pid(); int size = comm->size(); @@ -883,6 +898,7 @@ int PMPI_Ialltoallw(const void* sendbuf, const int* sendcounts, const int* sendd CHECK_TYPE(8, recvtypes[i]) CHECK_BUFFER(5, recvbuf, recvcounts[i], recvtypes[i]) } + CHECK_COLLECTIVE(comm, request == MPI_REQUEST_IGNORED ? "PMPI_Alltoallw" : "PMPI_Ialltoallw") const SmpiBenchGuard suspend_bench; diff --git a/src/smpi/bindings/smpi_pmpi_comm.cpp b/src/smpi/bindings/smpi_pmpi_comm.cpp index ac85cf374f..4416bfe208 100644 --- a/src/smpi/bindings/smpi_pmpi_comm.cpp +++ b/src/smpi/bindings/smpi_pmpi_comm.cpp @@ -128,6 +128,7 @@ int PMPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm* comm_out) { CHECK_NULL(4, MPI_ERR_ARG, comm_out) CHECK_COMM2(1, comm) + CHECK_COLLECTIVE(comm, __func__) if( color != MPI_UNDEFINED)//we use a negative value for MPI_UNDEFINED CHECK_NEGATIVE(3, MPI_ERR_ARG, color) const SmpiBenchGuard suspend_bench; @@ -139,6 +140,7 @@ int PMPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, { CHECK_COMM(1) CHECK_NULL(5, MPI_ERR_ARG, newcomm) + CHECK_COLLECTIVE(comm, __func__) const SmpiBenchGuard suspend_bench; *newcomm = comm->split_type(split_type, key, info); return MPI_SUCCESS; diff --git a/src/smpi/bindings/smpi_pmpi_file.cpp b/src/smpi/bindings/smpi_pmpi_file.cpp index 70df57f25d..e87680f94b 100644 --- a/src/smpi/bindings/smpi_pmpi_file.cpp +++ b/src/smpi/bindings/smpi_pmpi_file.cpp @@ -38,6 +38,7 @@ extern MPI_Errhandler SMPI_default_File_Errhandler; int PMPI_File_open(MPI_Comm comm, const char *filename, int amode, MPI_Info info, MPI_File *fh){ CHECK_COMM(1) + CHECK_COLLECTIVE(comm, "MPI_File_open") CHECK_NULL(2, MPI_ERR_FILE, filename) if (amode < 0) return MPI_ERR_AMODE; @@ -54,6 +55,7 @@ int PMPI_File_open(MPI_Comm comm, const char *filename, int amode, MPI_Info info int PMPI_File_close(MPI_File *fh){ CHECK_NULL(2, MPI_ERR_ARG, fh) + CHECK_COLLECTIVE((*fh)->comm(), __func__) const SmpiBenchGuard suspend_bench; int ret = simgrid::smpi::File::close(fh); *fh = MPI_FILE_NULL; @@ -69,6 +71,7 @@ int PMPI_File_seek(MPI_File fh, MPI_Offset offset, int whence){ int PMPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence){ CHECK_FILE(1, fh) + CHECK_COLLECTIVE(fh->comm(), __func__) const SmpiBenchGuard suspend_bench; int ret = fh->seek_shared(offset,whence); return ret; @@ -143,6 +146,7 @@ int PMPI_File_write_shared(MPI_File fh, const void *buf, int count,MPI_Datatype int PMPI_File_read_all(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE_INPUTS CHECK_WRONLY(fh) + CHECK_COLLECTIVE(fh->comm(), __func__) const SmpiBenchGuard suspend_bench; aid_t rank_traced = simgrid::s4u::this_actor::get_pid(); TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::CpuTIData("IO - read_all", count * datatype->size())); @@ -154,6 +158,7 @@ int PMPI_File_read_all(MPI_File fh, void *buf, int count,MPI_Datatype datatype, int PMPI_File_read_ordered(MPI_File fh, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE_INPUTS CHECK_WRONLY(fh) + CHECK_COLLECTIVE(fh->comm(), __func__) const SmpiBenchGuard suspend_bench; aid_t rank_traced = simgrid::s4u::this_actor::get_pid(); TRACE_smpi_comm_in(rank_traced, __func__, @@ -166,6 +171,7 @@ int PMPI_File_read_ordered(MPI_File fh, void *buf, int count,MPI_Datatype dataty int PMPI_File_write_all(MPI_File fh, const void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE_INPUTS CHECK_RDONLY(fh) + CHECK_COLLECTIVE(fh->comm(), __func__) const SmpiBenchGuard suspend_bench; aid_t rank_traced = simgrid::s4u::this_actor::get_pid(); TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::CpuTIData("IO - write_all", count * datatype->size())); @@ -177,6 +183,7 @@ int PMPI_File_write_all(MPI_File fh, const void *buf, int count,MPI_Datatype dat int PMPI_File_write_ordered(MPI_File fh, const void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE_INPUTS CHECK_RDONLY(fh) + CHECK_COLLECTIVE(fh->comm(), __func__) const SmpiBenchGuard suspend_bench; aid_t rank_traced = simgrid::s4u::this_actor::get_pid(); TRACE_smpi_comm_in(rank_traced, __func__, @@ -203,6 +210,7 @@ int PMPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count,MPI_D int PMPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE_INPUT_OFFSET CHECK_WRONLY(fh) + CHECK_COLLECTIVE(fh->comm(), __func__) const SmpiBenchGuard suspend_bench; aid_t rank_traced = simgrid::s4u::this_actor::get_pid(); TRACE_smpi_comm_in(rank_traced, __func__, @@ -231,6 +239,7 @@ int PMPI_File_write_at(MPI_File fh, MPI_Offset offset, const void *buf, int coun int PMPI_File_write_at_all(MPI_File fh, MPI_Offset offset, const void *buf, int count,MPI_Datatype datatype, MPI_Status *status){ CHECK_FILE_INPUT_OFFSET CHECK_RDONLY(fh) + CHECK_COLLECTIVE(fh->comm(), __func__) const SmpiBenchGuard suspend_bench; aid_t rank_traced = simgrid::s4u::this_actor::get_pid(); TRACE_smpi_comm_in(rank_traced, __func__, @@ -252,6 +261,7 @@ int PMPI_File_delete(const char *filename, MPI_Info info){ int PMPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, const char *datarep, MPI_Info info){ CHECK_FILE(1, fh) + CHECK_COLLECTIVE(fh->comm(), __func__) if(not ((fh->flags() & MPI_MODE_SEQUENTIAL) && (disp == MPI_DISPLACEMENT_CURRENT))) CHECK_OFFSET(2, disp) CHECK_TYPE(3, etype) @@ -295,6 +305,7 @@ int PMPI_File_get_size(MPI_File fh, MPI_Offset* size) int PMPI_File_set_size(MPI_File fh, MPI_Offset size) { CHECK_FILE(1, fh) + CHECK_COLLECTIVE(fh->comm(), __func__) fh->set_size(size); return MPI_SUCCESS; } diff --git a/src/smpi/bindings/smpi_pmpi_topo.cpp b/src/smpi/bindings/smpi_pmpi_topo.cpp index 36035770d6..9198240cdc 100644 --- a/src/smpi/bindings/smpi_pmpi_topo.cpp +++ b/src/smpi/bindings/smpi_pmpi_topo.cpp @@ -24,6 +24,7 @@ int PMPI_Cart_create(MPI_Comm comm, int ndims, const int* dims, const int* perio CHECK_NEGATIVE(2, MPI_ERR_ARG, ndims) for (int i = 0; i < ndims; i++) CHECK_NEGATIVE(2, MPI_ERR_ARG, dims[i]) + CHECK_COLLECTIVE(comm, __func__) const auto* topo = new simgrid::smpi::Topo_Cart(comm, ndims, dims, periodic, reorder, comm_cart); if (*comm_cart == MPI_COMM_NULL) { delete topo; @@ -109,6 +110,7 @@ int PMPI_Cart_sub(MPI_Comm comm, const int* remain_dims, MPI_Comm* comm_new) { CHECK_COMM(1) CHECK_NULL(1, MPI_ERR_TOPOLOGY, comm->topo()) CHECK_NULL(3, MPI_ERR_ARG, comm_new) + CHECK_COLLECTIVE(comm, __func__) auto* topo = static_cast(comm->topo().get()); if (topo==nullptr) { return MPI_ERR_ARG; diff --git a/src/smpi/bindings/smpi_pmpi_win.cpp b/src/smpi/bindings/smpi_pmpi_win.cpp index 9c7e74d403..34349d6a5f 100644 --- a/src/smpi/bindings/smpi_pmpi_win.cpp +++ b/src/smpi/bindings/smpi_pmpi_win.cpp @@ -33,6 +33,7 @@ int PMPI_Win_create( void *base, MPI_Aint size, int disp_unit, MPI_Info info, MP CHECK_BUFFER(1, base, size, MPI_BYTE) CHECK_NEGATIVE(2, MPI_ERR_OTHER, size) CHECK_NEGATIVE(3, MPI_ERR_OTHER, disp_unit) + CHECK_COLLECTIVE(comm, __func__) const SmpiBenchGuard suspend_bench; if (base == nullptr && size != 0){ retval= MPI_ERR_OTHER; @@ -100,6 +101,7 @@ int PMPI_Win_detach(MPI_Win win, const void* base) int PMPI_Win_free( MPI_Win* win){ CHECK_NULL(1, MPI_ERR_WIN, win) CHECK_WIN(1, (*win)) + CHECK_COLLECTIVE((*win)->comm(), __func__) if (_smpi_cfg_pedantic && (*win)->opened() == 1){//only check in pedantic mode, as it's not clear this is illegal XBT_WARN("Attempt to destroy a MPI_Win too early -missing MPI_Win_fence ?"); return MPI_ERR_WIN; diff --git a/src/smpi/include/private.hpp b/src/smpi/include/private.hpp index 7d1fa6c62d..9b2a5164f6 100644 --- a/src/smpi/include/private.hpp +++ b/src/smpi/include/private.hpp @@ -582,6 +582,10 @@ XBT_PRIVATE void private_execute_flops(double flops); #define CHECK_COMM2(num, comm)\ CHECK_MPI_NULL((num), MPI_COMM_NULL, MPI_ERR_COMM, (comm)) +#define CHECK_COLLECTIVE(comm, call)\ + CHECK_ARGS((simgrid::smpi::utils::check_collectives_ordering((comm), std::string(call)) != MPI_SUCCESS), MPI_ERR_OTHER,\ + "%s: collective mismatch", call) + #define CHECK_DELETED(num, err, obj)\ CHECK_ARGS((obj)->deleted(), (err), "%s: param %d %s has already been freed", __func__, (num),\ _XBT_STRINGIFY(obj)) -- 2.20.1