1 /* Copyright (c) 2013-2017. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
11 #include "colls_private.hpp"
12 #include "smpi_process.hpp"
15 //attempt to do a quick autotuning version of the collective,
17 #define TRACE_AUTO_COLL(cat) \
18 if (TRACE_is_enabled()) { \
19 simgrid::instr::Type* type = PJ_type_get_root()->getChildOrNull(#cat); \
21 type = simgrid::instr::Type::eventNew(#cat, PJ_type_get_root()); \
24 snprintf(cont_name, 25, "rank-%d", smpi_process()->index()); \
25 simgrid::instr::Value* val = \
26 simgrid::instr::Value::byNameOrCreate(Colls::mpi_coll_##cat##_description[i].name, "1.0 1.0 1.0", type); \
27 new simgrid::instr::NewEvent(SIMIX_get_clock(), simgrid::instr::Container::byName(cont_name), type, val); \
30 #define AUTOMATIC_COLL_BENCH(cat, ret, args, args2) \
31 ret Coll_##cat##_automatic::cat(COLL_UNPAREN args) \
33 double time1, time2, time_min = DBL_MAX; \
34 int min_coll = -1, global_coll = -1; \
36 double buf_in, buf_out, max_min = DBL_MAX; \
37 for (i = 0; Colls::mpi_coll_##cat##_description[i].name; i++) { \
38 if (not strcmp(Colls::mpi_coll_##cat##_description[i].name, "automatic")) \
40 if (not strcmp(Colls::mpi_coll_##cat##_description[i].name, "default")) \
42 Coll_barrier_default::barrier(comm); \
43 TRACE_AUTO_COLL(cat) \
44 time1 = SIMIX_get_clock(); \
46 ((int(*) args)Colls::mpi_coll_##cat##_description[i].coll) args2; \
47 } catch (std::exception & ex) { \
50 time2 = SIMIX_get_clock(); \
51 buf_out = time2 - time1; \
52 Coll_reduce_default::reduce((void*)&buf_out, (void*)&buf_in, 1, MPI_DOUBLE, MPI_MAX, 0, comm); \
53 if (time2 - time1 < time_min) { \
55 time_min = time2 - time1; \
57 if (comm->rank() == 0) { \
58 if (buf_in < max_min) { \
64 if (comm->rank() == 0) { \
65 XBT_WARN("For rank 0, the quickest was %s : %f , but global was %s : %f at max", \
66 Colls::mpi_coll_##cat##_description[min_coll].name, time_min, \
67 Colls::mpi_coll_##cat##_description[global_coll].name, max_min); \
69 XBT_WARN("The quickest %s was %s on rank %d and took %f", #cat, \
70 Colls::mpi_coll_##cat##_description[min_coll].name, comm->rank(), time_min); \
71 return (min_coll != -1) ? MPI_SUCCESS : MPI_ERR_INTERN; \
77 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
78 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
79 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
80 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLGATHER_SIG, (send_buff,send_count,send_type,recv_buff,recv_count,recv_type,comm));
81 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLTOALL_SIG,(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,comm));
82 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLTOALLV_SIG, (send_buff, send_counts, send_disps, send_type, recv_buff, recv_counts, recv_disps, recv_type, comm));
83 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_BCAST_SIG , (buf, count, datatype, root, comm));
84 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, root, comm));
85 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm));
86 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
87 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_BARRIER_SIG,(comm));