1 /* Copyright (c) 2007, 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
11 #include "surf/surf.h"
17 #include <sys/types.h>
20 #include <math.h> // sqrt
25 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
26 "Logging specific to SMPI (benchmarking)");
28 /* Shared allocations are handled through shared memory segments.
29 * Associated data and metadata are used as follows:
32 * `allocs' dict ---- -.
33 * ---------- shared_data_t shared_metadata_t / | | |
34 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
35 * | ---------- | fd of <name> | | | size of mmap | --| | | |
36 * | | count (2) | |-- | data | \ | | |
37 * `----------------- | <name> | | ----------------- ---- |
38 * -------------------- | ^ |
40 * | | `allocs_metadata' dict |
41 * | | ---------------------- |
42 * | `-- | <addr of mmap #1> |<-'
43 * | .-- | <addr of mmap #2> |<-.
44 * | | ---------------------- |
50 * | shared_metadata_t / | |
51 * | ----------------- | | |
52 * | | size of mmap | --| | |
54 * ----------------- | | |
59 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
61 xbt_dict_t allocs = NULL; /* Allocated on first use */
62 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
63 xbt_dict_t samples = NULL; /* Allocated on first use */
64 xbt_dict_t calls = NULL; /* Allocated on first use */
65 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
78 static size_t shm_size(int fd) {
81 if(fstat(fd, &st) < 0) {
82 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
84 return (size_t)st.st_size;
88 static void* shm_map(int fd, size_t size, shared_data_t* data) {
91 shared_metadata_t* meta;
93 if(size > shm_size(fd)) {
94 if(ftruncate(fd, (off_t)size) < 0) {
95 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
99 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
100 if(mem == MAP_FAILED) {
101 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
103 if(!allocs_metadata) {
104 allocs_metadata = xbt_dict_new();
106 snprintf(loc, PTR_STRLEN, "%p", mem);
107 meta = xbt_new(shared_metadata_t, 1);
110 xbt_dict_set(allocs_metadata, loc, meta, &free);
111 XBT_DEBUG("MMAP %zu to %p", size, mem);
116 void smpi_bench_destroy(void)
118 xbt_dict_free(&allocs);
119 xbt_dict_free(&samples);
120 xbt_dict_free(&calls);
123 void smpi_execute_flops(double flops) {
126 host = SIMIX_host_self();
128 XBT_DEBUG("Handle real computation time: %f flops", flops);
129 action = simcall_host_execute("computation", host, flops, 1);
131 simcall_set_category (action, TRACE_internal_smpi_get_category());
133 simcall_host_execution_wait(action);
136 static void smpi_execute(double duration)
138 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
139 if (duration >= surf_cfg_get_double("smpi/cpu_threshold")) {
140 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
141 smpi_execute_flops(duration *
142 surf_cfg_get_double("smpi/running_power"));
144 XBT_DEBUG("Real computation took %f while option smpi/cpu_threshold is set to %f => ignore it",
145 duration, surf_cfg_get_double("smpi/cpu_threshold"));
149 void smpi_bench_begin(void)
151 xbt_os_timer_start(smpi_process_timer());
152 smpi_current_rank = smpi_process_index();
155 void smpi_bench_end(void)
157 xbt_os_timer_t timer = smpi_process_timer();
159 xbt_os_timer_stop(timer);
160 smpi_execute(xbt_os_timer_elapsed(timer));
163 unsigned int smpi_sleep(unsigned int secs)
166 smpi_execute((double) secs);
171 int smpi_gettimeofday(struct timeval *tv)
175 now = SIMIX_get_clock();
177 tv->tv_sec = (time_t)now;
179 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
181 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
188 extern double sg_maxmin_precision;
189 unsigned long long smpi_rastro_resolution (void)
192 double resolution = (1/sg_maxmin_precision);
194 return (unsigned long long)resolution;
197 unsigned long long smpi_rastro_timestamp (void)
200 double now = SIMIX_get_clock();
202 unsigned long long sec = (unsigned long long)now;
203 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
205 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
208 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
210 int iters; /* amount of requested iterations */
211 int count; /* amount of iterations done so far */
212 double threshold; /* maximal stderr requested (if positive) */
213 double relstderr; /* observed stderr so far */
214 double mean; /* mean of benched times, to be used if the block is disabled */
215 double sum; /* sum of benched times (to compute the mean and stderr) */
216 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
217 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
220 static char *sample_location(int global, const char *file, int line) {
222 return bprintf("%s:%d", file, line);
224 return bprintf("%s:%d:%d", file, line, smpi_process_index());
227 static int sample_enough_benchs(local_data_t *data) {
228 int res = data->count >= data->iters;
229 if (data->threshold>0.0) {
231 res = 0; // not enough data
232 if (data->relstderr > data->threshold)
233 res = 0; // stderr too high yet
235 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
236 (res?"enough benchs":"need more data"),
237 data->count, data->iters, data->relstderr, data->threshold, data->mean);
241 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
243 char *loc = sample_location(global, file, line);
246 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
248 samples = xbt_dict_new_homogeneous(free);
250 data = xbt_dict_get_or_null(samples, loc);
252 xbt_assert(threshold>0 || iters>0,
253 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
254 data = (local_data_t *) xbt_new(local_data_t, 1);
257 data->sum_pow2 = 0.0;
259 data->threshold = threshold;
260 data->benching = 1; // If we have no data, we need at least one
262 xbt_dict_set(samples, loc, data, NULL);
263 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
265 if (data->iters != iters || data->threshold != threshold) {
266 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
267 loc, data->iters, data->threshold, iters,threshold);
271 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
272 data->benching = !sample_enough_benchs(data);
273 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
278 int smpi_sample_2(int global, const char *file, int line)
280 char *loc = sample_location(global, file, line);
283 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
284 data = xbt_dict_get(samples, loc);
285 XBT_DEBUG("sample2 %s",loc);
288 if (data->benching==1) {
289 // we need to run a new bench
290 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
291 data->count, data->iters, data->relstderr, data->threshold, data->mean);
295 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
296 // Just sleep instead
297 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
298 data->count, data->iters, data->relstderr, data->threshold, data->mean);
299 smpi_execute(data->mean);
301 smpi_bench_begin(); // prepare to capture future, unrelated computations
307 void smpi_sample_3(int global, const char *file, int line)
309 char *loc = sample_location(global, file, line);
312 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
313 data = xbt_dict_get(samples, loc);
314 XBT_DEBUG("sample3 %s",loc);
316 if (data->benching==0) {
320 // ok, benchmarking this loop is over
321 xbt_os_timer_stop(smpi_process_timer());
326 sample = xbt_os_timer_elapsed(smpi_process_timer());
328 data->sum_pow2 += sample * sample;
329 n = (double)data->count;
330 data->mean = data->sum / n;
331 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
332 if (!sample_enough_benchs(data)) {
333 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
335 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
336 data->mean, data->relstderr, sample);
338 // That's enough for now, prevent sample_2 to run the same code over and over
343 void *smpi_shared_malloc(size_t size, const char *file, int line)
345 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
346 size_t len = strlen(loc);
352 for(i = 0; i < len; i++) {
353 /* Make the 'loc' ID be a flat filename */
359 allocs = xbt_dict_new_homogeneous(free);
361 data = xbt_dict_get_or_null(allocs, loc);
363 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
367 xbt_die("Please cleanup /dev/shm/%s", loc);
369 xbt_die("An unhandled error occured while opening %s: %s", loc, strerror(errno));
372 data = xbt_new(shared_data_t, 1);
376 mem = shm_map(fd, size, data);
377 if(shm_unlink(loc) < 0) {
378 XBT_WARN("Could not early unlink %s: %s", loc, strerror(errno));
380 xbt_dict_set(allocs, loc, data, NULL);
381 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
383 mem = shm_map(data->fd, size, data);
386 XBT_DEBUG("Malloc %zu in %p (metadata at %p)", size, mem, data);
389 void smpi_shared_free(void *ptr)
391 char loc[PTR_STRLEN];
392 shared_metadata_t* meta;
396 XBT_WARN("Cannot free: nothing was allocated");
399 if(!allocs_metadata) {
400 XBT_WARN("Cannot free: no metadata was allocated");
402 snprintf(loc, PTR_STRLEN, "%p", ptr);
403 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
405 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
410 XBT_WARN("Cannot free: something is broken in the metadata link");
413 if(munmap(ptr, meta->size) < 0) {
414 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
417 if (data->count <= 0) {
419 xbt_dict_remove(allocs, data->loc);
425 int smpi_shared_known_call(const char* func, const char* input) {
426 char* loc = bprintf("%s:%s", func, input);
431 calls = xbt_dict_new_homogeneous(NULL);
434 xbt_dict_get(calls, loc); /* Succeed or throw */
438 if(ex.category == not_found_error) {
449 void* smpi_shared_get_call(const char* func, const char* input) {
450 char* loc = bprintf("%s:%s", func, input);
454 calls = xbt_dict_new_homogeneous(NULL);
456 data = xbt_dict_get(calls, loc);
461 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
462 char* loc = bprintf("%s:%s", func, input);
465 calls = xbt_dict_new_homogeneous(NULL);
467 xbt_dict_set(calls, loc, data, NULL);