From ebf52365999da9055076015c4d0296effd224795 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Tue, 19 Nov 2019 13:56:26 +0100 Subject: [PATCH] add option "smpi/auto-shared-malloc-thresh" This applies smpi_shared_malloc automatically to all allocations above a certain size. Test based on the sample-shared one: malloc >= 8bytes is indeed shared, without using macros. todo: calloc and co. --- include/smpi/sampi.h | 3 + include/smpi/smpi.h | 3 +- include/smpi/smpi_helpers.h | 5 +- include/smpi/smpi_helpers_internal.h | 2 + src/simgrid/sg_config.cpp | 4 ++ src/smpi/internals/smpi_shared.cpp | 13 ++++- teshsuite/smpi/CMakeLists.txt | 5 +- teshsuite/smpi/auto-shared/auto-shared.c | 61 +++++++++++++++++++++ teshsuite/smpi/auto-shared/auto-shared.tesh | 28 ++++++++++ 9 files changed, 118 insertions(+), 6 deletions(-) create mode 100644 teshsuite/smpi/auto-shared/auto-shared.c create mode 100644 teshsuite/smpi/auto-shared/auto-shared.tesh diff --git a/include/smpi/sampi.h b/include/smpi/sampi.h index b51f8ee5d0..5e011d9d81 100644 --- a/include/smpi/sampi.h +++ b/include/smpi/sampi.h @@ -6,6 +6,7 @@ #ifndef SAMPI_H_ #define SAMPI_H_ +#define SAMPI_OVERRIDEN_MALLOC #include #include @@ -14,6 +15,8 @@ type _XBT_CONCAT(AP, name) args; #ifndef HAVE_SMPI +#undef malloc +#undef free // Internally disable these overrides (HAVE_SMPI is only defined when building the library) #define malloc(nbytes) _sampi_malloc(nbytes) #define calloc(n_elm, elm_size) _sampi_calloc((n_elm), (elm_size)) diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index cae3e1f240..3407cc742c 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -1025,14 +1025,13 @@ XBT_PUBLIC void smpi_trace_set_call_location__(const char* file, int* line); SMPI_SAMPLE_LOOP(loop_init, (loop_end), (loop_iter), 1, (iters), (thres)) #define SMPI_SAMPLE_DELAY(duration) for(smpi_execute(duration); 0; ) #define SMPI_SAMPLE_FLOPS(flops) for(smpi_execute_flops(flops); 0; ) - XBT_PUBLIC void* smpi_shared_malloc(size_t size, const char* file, int line); #define SMPI_SHARED_MALLOC(size) smpi_shared_malloc((size), __FILE__, __LINE__) XBT_PUBLIC void* smpi_shared_malloc_partial(size_t size, size_t* shared_block_offsets, int nb_shared_blocks); #define SMPI_PARTIAL_SHARED_MALLOC(size, shared_block_offsets, nb_shared_blocks) \ smpi_shared_malloc_partial((size), (shared_block_offsets), (nb_shared_blocks)) -XBT_PUBLIC void smpi_shared_free(void* data); + #define SMPI_SHARED_FREE(data) smpi_shared_free(data) XBT_PUBLIC int smpi_shared_known_call(const char* func, const char* input); diff --git a/include/smpi/smpi_helpers.h b/include/smpi/smpi_helpers.h index b1a12da8e9..55efcf8090 100644 --- a/include/smpi/smpi_helpers.h +++ b/include/smpi/smpi_helpers.h @@ -32,5 +32,8 @@ #define getopt(x, y, z) smpi_getopt((x), (y), (z)) #define getopt_long(x, y, z, a, b) smpi_getopt_long((x), (y), (z), (a), (b)) #define getopt_long_only(x, y, z, a, b) smpi_getopt_long_only((x), (y), (z), (a), (b)) - +#ifndef SAMPI_OVERRIDEN_MALLOC +#define malloc(x) smpi_shared_malloc_intercept(x, __FILE__, __LINE__) +#define free(x) smpi_shared_free(x) +#endif #endif diff --git a/include/smpi/smpi_helpers_internal.h b/include/smpi/smpi_helpers_internal.h index 57490ce54b..59aa0df407 100644 --- a/include/smpi/smpi_helpers_internal.h +++ b/include/smpi/smpi_helpers_internal.h @@ -34,6 +34,8 @@ int smpi_getopt_long(int argc, char* const* argv, const char* options, const str int* opt_index); int smpi_getopt(int argc, char* const* argv, const char* options); +void* smpi_shared_malloc_intercept(size_t size, const char* file, int line); +void smpi_shared_free(void* data); #ifdef __cplusplus } // extern "C" #endif diff --git a/src/simgrid/sg_config.cpp b/src/simgrid/sg_config.cpp index e736195faf..2b8cdc2829 100644 --- a/src/simgrid/sg_config.cpp +++ b/src/simgrid/sg_config.cpp @@ -392,6 +392,10 @@ void sg_config_init(int *argc, char **argv) simgrid::config::declare_flag("smpi/shared-malloc-blocksize", "Size of the bogus file which will be created for global shared allocations", 1UL << 20); + simgrid::config::declare_flag("smpi/auto-shared-malloc-thresh", + "Threshold size for the automatic sharing of memory", + 0); + simgrid::config::declare_flag("smpi/shared-malloc-hugepage", "Path to a mounted hugetlbfs, to use huge pages with shared malloc.", ""); diff --git a/src/smpi/internals/smpi_shared.cpp b/src/smpi/internals/smpi_shared.cpp index defbbba636..2fb5a572cf 100644 --- a/src/smpi/internals/smpi_shared.cpp +++ b/src/smpi/internals/smpi_shared.cpp @@ -342,6 +342,14 @@ void* smpi_shared_malloc_partial(size_t size, size_t* shared_block_offsets, int return mem; } + +void *smpi_shared_malloc_intercept(size_t size, const char *file, int line) { + if( simgrid::config::get_value("smpi/auto-shared-malloc-thresh") == 0 || size < simgrid::config::get_value("smpi/auto-shared-malloc-thresh")) + return ::operator new(size); + else + return smpi_shared_malloc(size, file, line); +} + void *smpi_shared_malloc(size_t size, const char *file, int line) { if (size > 0 && smpi_cfg_shared_malloc == SharedMallocType::LOCAL) { return smpi_shared_malloc_local(size, file, line); @@ -427,7 +435,10 @@ void smpi_shared_free(void *ptr) snprintf(loc, PTR_STRLEN, "%p", ptr); auto meta = allocs_metadata.find(ptr); if (meta == allocs_metadata.end()) { - XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr); + if (simgrid::config::get_value("smpi/auto_shared_malloc_thresh") > 0)//this free belongs to a malloc under the threshold. + ::operator delete(ptr); + else + XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr); return; } shared_data_t* data = &meta->second.data->second; diff --git a/teshsuite/smpi/CMakeLists.txt b/teshsuite/smpi/CMakeLists.txt index 178c155e02..d71b18f192 100644 --- a/teshsuite/smpi/CMakeLists.txt +++ b/teshsuite/smpi/CMakeLists.txt @@ -17,7 +17,7 @@ if(enable_smpi) endforeach() if(NOT WIN32) - foreach(x macro-shared macro-partial-shared macro-partial-shared-communication ) + foreach(x macro-shared auto-shared macro-partial-shared macro-partial-shared-communication ) add_executable (${x} EXCLUDE_FROM_ALL ${x}/${x}.c) target_link_libraries(${x} simgrid) set_target_properties(${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x}) @@ -38,7 +38,7 @@ endif() foreach(x coll-allgather coll-allgatherv coll-allreduce coll-alltoall coll-alltoallv coll-barrier coll-bcast coll-gather coll-reduce coll-reduce-scatter coll-scatter macro-sample pt2pt-dsend pt2pt-pingpong type-hvector type-indexed type-struct type-vector bug-17132 gh-139 timers privatization - macro-shared macro-partial-shared macro-partial-shared-communication + macro-shared auto-shared macro-partial-shared macro-partial-shared-communication io-simple io-simple-at io-all io-all-at io-shared io-ordered) set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh) set(teshsuite_src ${teshsuite_src} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.c) @@ -62,6 +62,7 @@ set(bin_files ${bin_files} ${CMAKE_CURRENT_SOURCE_DIR}/hostfile if(enable_smpi) if(NOT WIN32) ADD_TESH_FACTORIES(tesh-smpi-macro-shared "thread;ucontext;raw;boost" --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/macro-shared --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/macro-shared macro-shared.tesh) + ADD_TESH_FACTORIES(tesh-smpi-auto-shared "thread;ucontext;raw;boost" --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/auto-shared --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/auto-shared auto-shared.tesh) ADD_TESH_FACTORIES(tesh-smpi-macro-partial-shared "thread;ucontext;raw;boost" --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/macro-partial-shared --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/macro-partial-shared macro-partial-shared.tesh) ADD_TESH_FACTORIES(tesh-smpi-macro-partial-shared-communication "thread;ucontext;raw;boost" --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/macro-partial-shared-communication --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/macro-partial-shared-communication macro-partial-shared-communication.tesh) endif() diff --git a/teshsuite/smpi/auto-shared/auto-shared.c b/teshsuite/smpi/auto-shared/auto-shared.c new file mode 100644 index 0000000000..8966cc8f1a --- /dev/null +++ b/teshsuite/smpi/auto-shared/auto-shared.c @@ -0,0 +1,61 @@ +/* Copyright (c) 2009-2019. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +/* This example should be instructive to learn about SMPI_SHARED_CALL */ + +#include +#include +#include +#include + +static void* hash(const char *str, uint64_t* ans) +{ + const char *tohash = str; + *ans=5381; + printf("hashing !\n"); + int c = *tohash; + while (c != 0) { + *ans = ((*ans << 5) + *ans) + c; /* hash * 33 + c */ + tohash++; + c = *tohash; + } + return NULL; +} + +int main(int argc, char *argv[]) +{ + MPI_Init(&argc, &argv); + int rank; + int size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + //Let's Allocate a memory buffer + uint64_t* buf = malloc(sizeof(uint64_t)); + //one writes data in it + if(rank==0){ + *buf=size; + } + + MPI_Barrier(MPI_COMM_WORLD); + //everyone reads from it. + printf("[%d] The value in the shared buffer is: %" PRIu64"\n", rank, *buf); + + MPI_Barrier(MPI_COMM_WORLD); + //Try SMPI_SHARED_CALL function, which should call hash only once and for all. + static const char str[] = "onceandforall"; + if(rank==size-1){ + SMPI_SHARED_CALL(hash,str,str,buf); + } + + MPI_Barrier(MPI_COMM_WORLD); + + printf("[%d] After change, the value in the shared buffer is: %" PRIu64"\n", rank, *buf); + + free(buf); + + MPI_Finalize(); + return 0; +} diff --git a/teshsuite/smpi/auto-shared/auto-shared.tesh b/teshsuite/smpi/auto-shared/auto-shared.tesh new file mode 100644 index 0000000000..cfadf9f161 --- /dev/null +++ b/teshsuite/smpi/auto-shared/auto-shared.tesh @@ -0,0 +1,28 @@ +p Test compute +! output sort +! timeout 5 +$ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ../hostfile -platform ../../../examples/platforms/small_platform.xml -np 4 ${bindir:=.}/auto-shared --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/auto-shared-malloc-thresh:8 +> [0] After change, the value in the shared buffer is: 16053117601147974045 +> [0] The value in the shared buffer is: 4 +> [1] After change, the value in the shared buffer is: 16053117601147974045 +> [1] The value in the shared buffer is: 4 +> [2] After change, the value in the shared buffer is: 16053117601147974045 +> [2] The value in the shared buffer is: 4 +> [3] After change, the value in the shared buffer is: 16053117601147974045 +> [3] The value in the shared buffer is: 4 +> hashing ! + +! output sort +! timeout 5 +$ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ../hostfile -platform ../../../examples/platforms/small_platform.xml -np 4 ${bindir:=.}/auto-shared --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/shared-malloc:local --cfg=smpi/auto-shared-malloc-thresh:8 +> [0] After change, the value in the shared buffer is: 16053117601147974045 +> [0] The value in the shared buffer is: 4 +> [1] After change, the value in the shared buffer is: 16053117601147974045 +> [1] The value in the shared buffer is: 4 +> [2] After change, the value in the shared buffer is: 16053117601147974045 +> [2] The value in the shared buffer is: 4 +> [3] After change, the value in the shared buffer is: 16053117601147974045 +> [3] The value in the shared buffer is: 4 +> hashing ! + + -- 2.20.1