From: Augustin Degomme Date: Wed, 16 Jun 2021 22:24:24 +0000 (+0200) Subject: Add a SMPI_SAMPLE_LOCAL_TAG and SMPI_SAMPLE_GLOBAL_TAG macro for sampling, to provide... X-Git-Tag: v3.28~95 X-Git-Url: http://bilbo.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/022fcd4dab24301e12c127f24f04c05d90d2d39a Add a SMPI_SAMPLE_LOCAL_TAG and SMPI_SAMPLE_GLOBAL_TAG macro for sampling, to provide unique parameters to separate various calls to sampling. This can be used when a kernel is called with various distinct sets of parameters. Tag is a string of max size 128. --- diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index f9cba1c87b..e839a43b2b 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -1150,10 +1150,10 @@ XBT_PUBLIC void smpi_execute_benched(double duration); XBT_PUBLIC unsigned long long smpi_rastro_resolution(); XBT_PUBLIC unsigned long long smpi_rastro_timestamp(); -XBT_PUBLIC void smpi_sample_1(int global, const char* file, int line, int iters, double threshold); -XBT_PUBLIC int smpi_sample_2(int global, const char* file, int line, int iter_count); -XBT_PUBLIC void smpi_sample_3(int global, const char* file, int line); -XBT_PUBLIC int smpi_sample_exit(int global, const char* file, int line, int iter_count); +XBT_PUBLIC void smpi_sample_1(int global, const char* file, const char* tag, int iters, double threshold); +XBT_PUBLIC int smpi_sample_2(int global, const char* file, const char* tag, int iter_count); +XBT_PUBLIC void smpi_sample_3(int global, const char* file, const char* tag); +XBT_PUBLIC int smpi_sample_exit(int global, const char* file, const char* tag, int iter_count); /** * Need a public setter for SMPI copy_callback function, so users can define * their own while still using default SIMIX_copy_callback for S4U copies. @@ -1173,7 +1173,12 @@ XBT_PUBLIC void smpi_trace_set_call_location__(const char* file, const int* line #define SMPI_ITER_NAME1(line) _XBT_CONCAT(iter_count, line) #define SMPI_ITER_NAME(line) SMPI_ITER_NAME1(line) -#define SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, global, iters, thres) \ +#define SMPI_CTAG_NAME1(line) _XBT_CONCAT(ctag, line) +#define SMPI_CTAG_NAME(line) SMPI_CTAG_NAME1(line) + +#define SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, global, iters, thres, tag) \ + char SMPI_CTAG_NAME(__LINE__) [128]; \ + sprintf( SMPI_CTAG_NAME(__LINE__), "%s%d", tag, __LINE__); \ int SMPI_ITER_NAME(__LINE__) = 0; \ { \ loop_init; \ @@ -1182,14 +1187,20 @@ XBT_PUBLIC void smpi_trace_set_call_location__(const char* file, const int* line (loop_iter); \ } \ } \ - for (loop_init; (loop_end) ? (smpi_sample_1((global), __FILE__, __LINE__, (iters), (thres)), \ - (smpi_sample_2((global), __FILE__, __LINE__, SMPI_ITER_NAME(__LINE__)))) \ - : smpi_sample_exit((global), __FILE__, __LINE__, SMPI_ITER_NAME(__LINE__)); \ - smpi_sample_3((global), __FILE__, __LINE__), (loop_iter)) + for ( loop_init; \ + (loop_end) ? (smpi_sample_1((global), __FILE__, SMPI_CTAG_NAME(__LINE__), (iters), (thres)) \ + , (smpi_sample_2((global), __FILE__, SMPI_CTAG_NAME(__LINE__), SMPI_ITER_NAME(__LINE__)))) \ + : smpi_sample_exit((global), __FILE__, SMPI_CTAG_NAME(__LINE__), SMPI_ITER_NAME(__LINE__)); \ + smpi_sample_3((global), __FILE__, SMPI_CTAG_NAME(__LINE__)), (loop_iter) ) + #define SMPI_SAMPLE_LOCAL(loop_init, loop_end, loop_iter, iters, thres) \ - SMPI_SAMPLE_LOOP(loop_init, (loop_end), (loop_iter), 0, (iters), (thres)) + SMPI_SAMPLE_LOOP(loop_init, (loop_end), (loop_iter), 0, (iters), (thres), "") +#define SMPI_SAMPLE_LOCAL_TAG(loop_init, loop_end, loop_iter, iters, thres, tag) \ + SMPI_SAMPLE_LOOP(loop_init, (loop_end), (loop_iter), 0, (iters), (thres), tag) #define SMPI_SAMPLE_GLOBAL(loop_init, loop_end, loop_iter, iters, thres) \ - SMPI_SAMPLE_LOOP(loop_init, (loop_end), (loop_iter), 1, (iters), (thres)) + SMPI_SAMPLE_LOOP(loop_init, (loop_end), (loop_iter), 1, (iters), (thres), "") +#define SMPI_SAMPLE_GLOBAL_TAG(loop_init, loop_end, loop_iter, iters, thres, tag) \ + SMPI_SAMPLE_LOOP(loop_init, (loop_end), (loop_iter), 1, (iters), (thres), tag) #define SMPI_SAMPLE_DELAY(duration) for(smpi_execute(duration); 0; ) #define SMPI_SAMPLE_FLOPS(flops) for(smpi_execute_flops(flops); 0; ) XBT_PUBLIC void* smpi_shared_malloc(size_t size, const char* file, int line); diff --git a/src/smpi/internals/smpi_bench.cpp b/src/smpi/internals/smpi_bench.cpp index 1e77dc6b14..71af3a8428 100644 --- a/src/smpi/internals/smpi_bench.cpp +++ b/src/smpi/internals/smpi_bench.cpp @@ -56,7 +56,7 @@ void smpi_execute_flops(double flops) void smpi_execute(double duration) { if (duration >= smpi_cfg_cpu_thresh()) { - XBT_DEBUG("Sleep for %g to handle real computation time", duration); + XBT_DEBUG("Sleep for %gs (host time) to handle real computation time", duration); private_execute_flops(duration * smpi_cfg_host_speed()); } else { XBT_DEBUG("Real computation took %g while option smpi/cpu-threshold is set to %g => ignore it", duration, @@ -276,7 +276,7 @@ unsigned long long smpi_rastro_timestamp () namespace { class SampleLocation : public std::string { public: - SampleLocation(bool global, const char* file, int line) : std::string(std::string(file) + ":" + std::to_string(line)) + SampleLocation(bool global, const char* file, const char* tag) : std::string(std::string(file) + ":" + std::string(tag)) { if (not global) this->append(":" + std::to_string(simgrid::s4u::this_actor::get_pid())); @@ -309,9 +309,9 @@ bool LocalData::need_more_benchs() const std::unordered_map> samples; } -void smpi_sample_1(int global, const char *file, int line, int iters, double threshold) +void smpi_sample_1(int global, const char *file, const char *tag, int iters, double threshold) { - SampleLocation loc(global, file, line); + SampleLocation loc(global, file, tag); if (not smpi_process()->sampling()) { /* Only at first call when benchmarking, skip for next ones */ smpi_bench_end(); /* Take time from previous, unrelated computation into account */ smpi_process()->set_sampling(1); @@ -348,9 +348,9 @@ void smpi_sample_1(int global, const char *file, int line, int iters, double thr } } -int smpi_sample_2(int global, const char *file, int line, int iter_count) +int smpi_sample_2(int global, const char *file,const char *tag, int iter_count) { - SampleLocation loc(global, file, line); + SampleLocation loc(global, file, tag); XBT_DEBUG("sample2 %s %d", loc.c_str(), iter_count); auto sample = samples.find(loc); @@ -384,9 +384,9 @@ int smpi_sample_2(int global, const char *file, int line, int iter_count) return 1; } -void smpi_sample_3(int global, const char *file, int line) +void smpi_sample_3(int global, const char *file, const char* tag) { - SampleLocation loc(global, file, line); + SampleLocation loc(global, file, tag); XBT_DEBUG("sample3 %s", loc.c_str()); auto sample = samples.find(loc); @@ -416,9 +416,9 @@ void smpi_sample_3(int global, const char *file, int line) data.benching = false; } -int smpi_sample_exit(int global, const char *file, int line, int iter_count){ +int smpi_sample_exit(int global, const char *file, const char* tag, int iter_count){ if (smpi_process()->sampling()){ - SampleLocation loc(global, file, line); + SampleLocation loc(global, file, tag); XBT_DEBUG("sample exit %s", loc.c_str()); auto sample = samples.find(loc); diff --git a/teshsuite/smpi/macro-sample/macro-sample.c b/teshsuite/smpi/macro-sample/macro-sample.c index 56c8ed186c..429d87ad1b 100644 --- a/teshsuite/smpi/macro-sample/macro-sample.c +++ b/teshsuite/smpi/macro-sample/macro-sample.c @@ -43,6 +43,21 @@ int main(int argc, char *argv[]) d = compute(2.0); } + //tagged version, should differentiate between two different calls to the same kernel and run calibration even on the second one + for (int tag=0; tag < 4; tag++){ + char ctag [8]; + //run twice with the same tag, test should skip 1 and 3, as they were already benched. + sprintf(ctag, "%d", tag - tag%2); + SMPI_SAMPLE_GLOBAL_TAG(int i = 0, i < 500, i++, 2, 0.1, ctag){ + if (verbose) + fprintf(stderr, "(%12.6f) [rank:%d]", MPI_Wtime(), rank); + else + fprintf(stderr, "(0)"); + fprintf(stderr, " Run the computation %d with tag %d\n", tag, tag- tag%2); + d = compute(2.0); + } + } + n = 0; //Use 0 as max iter, but one will always be performed by design. SMPI_SAMPLE_LOCAL (int i = 0, i < 5, i++,0, 0.1){ diff --git a/teshsuite/smpi/macro-sample/macro-sample.tesh b/teshsuite/smpi/macro-sample/macro-sample.tesh index d0d8006be6..a795f927a5 100644 --- a/teshsuite/smpi/macro-sample/macro-sample.tesh +++ b/teshsuite/smpi/macro-sample/macro-sample.tesh @@ -9,6 +9,10 @@ $ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ../hostfile -platform > (1) [rank:0] Run the second (locally benched) computation. It's locally benched, and I want the standard error to go below 0.1 second (count is not >0) > (1) [rank:1] Run the second (locally benched) computation. It's locally benched, and I want the standard error to go below 0.1 second (count is not >0) > (1) [rank:2] Run the second (locally benched) computation. It's locally benched, and I want the standard error to go below 0.1 second (count is not >0) +> (0) Run the computation 0 with tag 0 +> (0) Run the computation 0 with tag 0 +> (0) Run the computation 2 with tag 2 +> (0) Run the computation 2 with tag 2 > (2) [rank:0] Done. > (2) [rank:1] Done. > (2) [rank:2] Done.