From cb4ef7fe43e87109c20adad0826a0db500c45ec1 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Thu, 13 Feb 2020 18:37:46 +0100 Subject: [PATCH] #139 : - have group::rank check for parent process in case it is called by a subprocess. - add SMPI_thread_create, which is rather empty, as there is apparently no need for suer_data anymore (?). --- include/smpi/smpi.h | 1 + src/smpi/internals/smpi_global.cpp | 4 ++++ src/smpi/mpi/smpi_group.cpp | 3 +++ teshsuite/smpi/gh-139/gh-139.c | 12 ++++++------ teshsuite/smpi/gh-139/gh-139.tesh | 16 ++++++++-------- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index aa6d101507..32b20bfc93 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -1058,6 +1058,7 @@ XBT_PUBLIC void smpi_replay_run(const char* instance_id, int rank, double start_ XBT_PUBLIC void SMPI_app_instance_register(const char* name, xbt_main_func_t code, int num_processes); XBT_PUBLIC void SMPI_init(); XBT_PUBLIC void SMPI_finalize(); +XBT_PUBLIC void SMPI_thread_create(); SG_END_DECL diff --git a/src/smpi/internals/smpi_global.cpp b/src/smpi/internals/smpi_global.cpp index 337403827f..7cd5672808 100644 --- a/src/smpi/internals/smpi_global.cpp +++ b/src/smpi/internals/smpi_global.cpp @@ -640,3 +640,7 @@ void smpi_mpi_init() { if(smpi_init_sleep > 0) simgrid::s4u::this_actor::sleep_for(smpi_init_sleep); } + +void SMPI_thread_create() { + TRACE_smpi_init(simgrid::s4u::this_actor::get_pid()); +} diff --git a/src/smpi/mpi/smpi_group.cpp b/src/smpi/mpi/smpi_group.cpp index 67655d4938..162b8deb08 100644 --- a/src/smpi/mpi/smpi_group.cpp +++ b/src/smpi/mpi/smpi_group.cpp @@ -62,6 +62,9 @@ s4u::Actor* Group::actor(int rank) int Group::rank(s4u::Actor* actor) { auto iterator = actor_to_rank_map_.find(actor); + //I'm not in the communicator ... but maybe my parent is ? + if (iterator == actor_to_rank_map_.end()) + iterator = actor_to_rank_map_.find(s4u::Actor::by_pid(actor->get_ppid()).get()); return (iterator == actor_to_rank_map_.end()) ? MPI_UNDEFINED : (*iterator).second; } diff --git a/teshsuite/smpi/gh-139/gh-139.c b/teshsuite/smpi/gh-139/gh-139.c index 6a9d436aa8..9379b2c0df 100644 --- a/teshsuite/smpi/gh-139/gh-139.c +++ b/teshsuite/smpi/gh-139/gh-139.c @@ -33,16 +33,14 @@ static void thread_create_wrapper(XBT_ATTRIB_UNUSED int argc, XBT_ATTRIB_UNUSED struct threadwrap* t = (struct threadwrap*)sg_actor_self_data(); XBT_INFO("new thread has parameter rank %d and global variable rank %d", ((struct param*)(t->param))->rank, the_global_rank); - sg_actor_self_data_set(t->father_data); + SMPI_thread_create(); t->f(t->param); - sg_actor_self_data_set(NULL); free(t); } static void mpi_thread_create(const char* name, void* (*f)(void*), void* param) { struct threadwrap* threadwrap = (struct threadwrap*)malloc(sizeof(*threadwrap)); - threadwrap->father_data = sg_actor_self_data(); threadwrap->f = f; threadwrap->param = param; sg_actor_t actor = sg_actor_init(name, sg_host_self()); @@ -65,13 +63,15 @@ void* req_wait(void* bar) struct param* param = (struct param*)bar; int rank; MPI_Status status; - + char err_string[1024]; + int length = 1024; MPI_Comm_rank(MPI_COMM_WORLD, &rank); XBT_INFO("%d has MPI rank %d and global variable rank %d", param->rank, rank, global_rank); XBT_INFO("%d waiting request", rank); - MPI_Wait(param->req, &status); - XBT_INFO("%d request done", rank); + int ret = MPI_Wait(param->req, &status); + MPI_Error_string(ret, err_string, &length); + XBT_INFO("%d request done, return %s", rank, err_string); XBT_INFO("%d still has MPI rank %d and global variable %d", param->rank, rank, global_rank); free(param); return NULL; diff --git a/teshsuite/smpi/gh-139/gh-139.tesh b/teshsuite/smpi/gh-139/gh-139.tesh index acb01f0eb5..e854a11b6d 100644 --- a/teshsuite/smpi/gh-139/gh-139.tesh +++ b/teshsuite/smpi/gh-139/gh-139.tesh @@ -2,14 +2,14 @@ $ ${bindir:=.}/../../../smpi_script/bin/smpirun -np 2 -platform ../../../example > [Tremblay:0:(1) 0.000000] [smpi_test/INFO] I'm 0/2 > [Jupiter:1:(2) 0.000000] [smpi_test/INFO] I'm 1/2 > [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] new thread has parameter rank 0 and global variable rank 0 -> [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] 0 has MPI rank -333 and global variable rank 0 -> [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] -333 waiting request -> [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] -333 request done -> [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] 0 still has MPI rank -333 and global variable 0 +> [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] 0 has MPI rank 0 and global variable rank 0 +> [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] 0 waiting request +> [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] 0 request done, return MPI_SUCCESS +> [Tremblay:wait send:(3) 0.000000] [smpi_test/INFO] 0 still has MPI rank 0 and global variable 0 > [Jupiter:wait recv:(4) 0.000000] [smpi_test/INFO] new thread has parameter rank 1 and global variable rank 1 -> [Jupiter:wait recv:(4) 0.000000] [smpi_test/INFO] 1 has MPI rank -333 and global variable rank 1 -> [Jupiter:wait recv:(4) 0.000000] [smpi_test/INFO] -333 waiting request -> [Jupiter:wait recv:(4) 0.002945] [smpi_test/INFO] -333 request done -> [Jupiter:wait recv:(4) 0.002945] [smpi_test/INFO] 1 still has MPI rank -333 and global variable 1 +> [Jupiter:wait recv:(4) 0.000000] [smpi_test/INFO] 1 has MPI rank 1 and global variable rank 1 +> [Jupiter:wait recv:(4) 0.000000] [smpi_test/INFO] 1 waiting request +> [Jupiter:wait recv:(4) 0.002945] [smpi_test/INFO] 1 request done, return MPI_SUCCESS +> [Jupiter:wait recv:(4) 0.002945] [smpi_test/INFO] 1 still has MPI rank 1 and global variable 1 > [Tremblay:0:(1) 1.000000] [smpi_test/INFO] finally 42 > [Jupiter:1:(2) 2.000000] [smpi_test/INFO] finally 42 -- 2.20.1