/* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
-/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
+/* Copyright (c) 2009-2010, 2013-2017. The SimGrid Team.
* All rights reserved. */
/* This program is free software; you can redistribute it and/or modify it
4 - Topology aware Reduce + Bcast algorithm
5 - Binomial gather + scatter algorithm
6 - Topology aware binominal gather + scatter algorithm
-7 - Shumilin's ring algorithm
+7 - Shumilin's ring algorithm
8 - Ring algorithm
as Shumilin's ring algorithm is unknown, default to ring'
*/
+namespace simgrid{
+namespace smpi{
int (*intel_allreduce_functions_table[])(void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm) ={
- smpi_coll_tuned_allreduce_rdb,
- smpi_coll_tuned_allreduce_rab1,
- smpi_coll_tuned_allreduce_redbcast,
- smpi_coll_tuned_allreduce_mvapich2_two_level,
- smpi_coll_tuned_allreduce_smp_binomial,
- smpi_coll_tuned_allreduce_mvapich2_two_level,
- smpi_coll_tuned_allreduce_ompi_ring_segmented,
- smpi_coll_tuned_allreduce_ompi_ring_segmented
+ Coll_allreduce_rdb::allreduce,
+ Coll_allreduce_rab1::allreduce,
+ Coll_allreduce_redbcast::allreduce,
+ Coll_allreduce_mvapich2_two_level::allreduce,
+ Coll_allreduce_smp_binomial::allreduce,
+ Coll_allreduce_mvapich2_two_level::allreduce,
+ Coll_allreduce_ompi_ring_segmented::allreduce,
+ Coll_allreduce_ompi_ring_segmented::allreduce
};
intel_tuning_table_element intel_allreduce_table[] =
{
- {1,{
+ {1,{
{ 2,9,{
{6,7},
{85,1},
-/*I_MPI_ADJUST_ALLTOALL
+/*I_MPI_ADJUST_ALLTOALL
-MPI_Alltoall
+MPI_Alltoall
-1. Bruck's algorithm
-2. Isend/Irecv + waitall algorithm
-3. Pair wise exchange algorithm
+1. Bruck's algorithm
+2. Isend/Irecv + waitall algorithm
+3. Pair wise exchange algorithm
4. Plum's algorithm
*/
}
}
};
-int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
+int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
MPI_Comm comm) ={
- smpi_coll_tuned_alltoall_bruck,
- smpi_coll_tuned_alltoall_mvapich2_scatter_dest,
- smpi_coll_tuned_alltoall_pair,
- smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient)
+ Coll_alltoall_bruck::alltoall,
+ Coll_alltoall_mvapich2_scatter_dest::alltoall,
+ Coll_alltoall_pair::alltoall,
+ Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient)
};
-/*I_MPI_ADJUST_BARRIER
+/*I_MPI_ADJUST_BARRIER
-MPI_Barrier
+MPI_Barrier
-1. Dissemination algorithm
-2. Recursive doubling algorithm
-3. Topology aware dissemination algorithm
-4. Topology aware recursive doubling algorithm
-5. Binominal gather + scatter algorithm
-6. Topology aware binominal gather + scatter algorithm
+1. Dissemination algorithm
+2. Recursive doubling algorithm
+3. Topology aware dissemination algorithm
+4. Topology aware recursive doubling algorithm
+5. Binominal gather + scatter algorithm
+6. Topology aware binominal gather + scatter algorithm
*/
static int intel_barrier_gather_scatter(MPI_Comm comm){
//our default barrier performs a antibcast/bcast
- smpi_mpi_barrier(comm);
+ Coll_barrier_default::barrier(comm);
return MPI_SUCCESS;
}
int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
- smpi_coll_tuned_barrier_ompi_basic_linear,
- smpi_coll_tuned_barrier_ompi_recursivedoubling,
- smpi_coll_tuned_barrier_ompi_basic_linear,
- smpi_coll_tuned_barrier_ompi_recursivedoubling,
+ Coll_barrier_ompi_basic_linear::barrier,
+ Coll_barrier_ompi_recursivedoubling::barrier,
+ Coll_barrier_ompi_basic_linear::barrier,
+ Coll_barrier_ompi_recursivedoubling::barrier,
intel_barrier_gather_scatter,
intel_barrier_gather_scatter
};
};
-/*I_MPI_ADJUST_BCAST
+/*I_MPI_ADJUST_BCAST
-MPI_Bcast
+MPI_Bcast
-1. Binomial algorithm
-2. Recursive doubling algorithm
-3. Ring algorithm
-4. Topology aware binomial algorithm
-5. Topology aware recursive doubling algorithm
-6. Topology aware ring algorithm
-7. Shumilin's bcast algorithm
+1. Binomial algorithm
+2. Recursive doubling algorithm
+3. Ring algorithm
+4. Topology aware binomial algorithm
+5. Topology aware recursive doubling algorithm
+6. Topology aware ring algorithm
+7. Shumilin's bcast algorithm
*/
int (*intel_bcast_functions_table[])(void *buff, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm) ={
- smpi_coll_tuned_bcast_binomial_tree,
- //smpi_coll_tuned_bcast_scatter_rdb_allgather,
- smpi_coll_tuned_bcast_NTSL,
- smpi_coll_tuned_bcast_NTSL,
- smpi_coll_tuned_bcast_SMP_binomial,
- //smpi_coll_tuned_bcast_scatter_rdb_allgather,
- smpi_coll_tuned_bcast_NTSL,
- smpi_coll_tuned_bcast_SMP_linear,
- smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo'
+ Coll_bcast_binomial_tree::bcast,
+ //Coll_bcast_scatter_rdb_allgather::bcast,
+ Coll_bcast_NTSL::bcast,
+ Coll_bcast_NTSL::bcast,
+ Coll_bcast_SMP_binomial::bcast,
+ //Coll_bcast_scatter_rdb_allgather::bcast,
+ Coll_bcast_NTSL::bcast,
+ Coll_bcast_SMP_linear::bcast,
+ Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo'
};
intel_tuning_table_element intel_bcast_table[] =
};
-/*I_MPI_ADJUST_REDUCE
+/*I_MPI_ADJUST_REDUCE
-MPI_Reduce
+MPI_Reduce
-1. Shumilin's algorithm
-2. Binomial algorithm
-3. Topology aware Shumilin's algorithm
-4. Topology aware binomial algorithm
-5. Rabenseifner's algorithm
+1. Shumilin's algorithm
+2. Binomial algorithm
+3. Topology aware Shumilin's algorithm
+4. Topology aware binomial algorithm
+5. Rabenseifner's algorithm
6. Topology aware Rabenseifner's algorithm
*/
int count, MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm) ={
- smpi_coll_tuned_reduce_mvapich2,
- smpi_coll_tuned_reduce_binomial,
- smpi_coll_tuned_reduce_mvapich2,
- smpi_coll_tuned_reduce_mvapich2_two_level,
- smpi_coll_tuned_reduce_rab,
- smpi_coll_tuned_reduce_rab
+ Coll_reduce_mvapich2::reduce,
+ Coll_reduce_binomial::reduce,
+ Coll_reduce_mvapich2::reduce,
+ Coll_reduce_mvapich2_two_level::reduce,
+ Coll_reduce_rab::reduce,
+ Coll_reduce_rab::reduce
};
intel_tuning_table_element intel_reduce_table[] =
}
};
-/* I_MPI_ADJUST_REDUCE_SCATTER
+/* I_MPI_ADJUST_REDUCE_SCATTER
-MPI_Reduce_scatter
+MPI_Reduce_scatter
-1. Recursive having algorithm
-2. Pair wise exchange algorithm
-3. Recursive doubling algorithm
-4. Reduce + Scatterv algorithm
-5. Topology aware Reduce + Scatterv algorithm
+1. Recursive having algorithm
+2. Pair wise exchange algorithm
+3. Recursive doubling algorithm
+4. Reduce + Scatterv algorithm
+5. Topology aware Reduce + Scatterv algorithm
*/
static int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
MPI_Op op,
MPI_Comm comm)
{
- smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+ Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
return MPI_SUCCESS;
}
MPI_Op op,
MPI_Comm comm)
{
- if(smpi_op_is_commute(op))
- return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
+ if(op==MPI_OP_NULL || op->is_commutative())
+ return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
else
- return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
+ return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
}
int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
MPI_Comm comm
) ={
intel_reduce_scatter_recursivehalving,
- smpi_coll_tuned_reduce_scatter_mpich_pair,
- smpi_coll_tuned_reduce_scatter_mpich_rdb,
+ Coll_reduce_scatter_mpich_pair::reduce_scatter,
+ Coll_reduce_scatter_mpich_rdb::reduce_scatter,
intel_reduce_scatter_reduce_scatterv,
intel_reduce_scatter_reduce_scatterv
};
}
};
-/* I_MPI_ADJUST_ALLGATHER
+/* I_MPI_ADJUST_ALLGATHER
-MPI_Allgather
+MPI_Allgather
-1. Recursive doubling algorithm
-2. Bruck's algorithm
-3. Ring algorithm
-4. Topology aware Gatherv + Bcast algorithm
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
*/
-int (*intel_allgather_functions_table[])(void *sbuf, int scount,
+int (*intel_allgather_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
MPI_Comm comm
) ={
- smpi_coll_tuned_allgather_rdb,
- smpi_coll_tuned_allgather_bruck,
- smpi_coll_tuned_allgather_ring,
- smpi_coll_tuned_allgather_GB
+ Coll_allgather_rdb::allgather,
+ Coll_allgather_bruck::allgather,
+ Coll_allgather_ring::allgather,
+ Coll_allgather_GB::allgather
};
intel_tuning_table_element intel_allgather_table[] =
}
};
-/* I_MPI_ADJUST_ALLGATHERV
+/* I_MPI_ADJUST_ALLGATHERV
-MPI_Allgatherv
+MPI_Allgatherv
-1. Recursive doubling algorithm
-2. Bruck's algorithm
-3. Ring algorithm
-4. Topology aware Gatherv + Bcast algorithm
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
*/
-int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
+int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int *rcounts,
+ void* rbuf, int *rcounts,
int *rdispls,
- MPI_Datatype rdtype,
+ MPI_Datatype rdtype,
MPI_Comm comm
) ={
- smpi_coll_tuned_allgatherv_mpich_rdb,
- smpi_coll_tuned_allgatherv_ompi_bruck,
- smpi_coll_tuned_allgatherv_ring,
- smpi_coll_tuned_allgatherv_GB
+ Coll_allgatherv_mpich_rdb::allgatherv,
+ Coll_allgatherv_ompi_bruck::allgatherv,
+ Coll_allgatherv_ring::allgatherv,
+ Coll_allgatherv_GB::allgatherv
};
intel_tuning_table_element intel_allgatherv_table[] =
MPI_Gather
-1. Binomial algorithm
-2. Topology aware binomial algorithm
+1. Binomial algorithm
+2. Topology aware binomial algorithm
3. Shumilin's algorithm
*/
-int (*intel_gather_functions_table[])(void *sbuf, int scount,
+int (*intel_gather_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
int root,
MPI_Comm comm
) ={
- smpi_coll_tuned_gather_ompi_binomial,
- smpi_coll_tuned_gather_ompi_binomial,
- smpi_coll_tuned_gather_mvapich2
+ Coll_gather_ompi_binomial::gather,
+ Coll_gather_ompi_binomial::gather,
+ Coll_gather_mvapich2::gather
};
intel_tuning_table_element intel_gather_table[] =
};
-/* I_MPI_ADJUST_SCATTER
+/* I_MPI_ADJUST_SCATTER
-MPI_Scatter
+MPI_Scatter
-1. Binomial algorithm
-2. Topology aware binomial algorithm
-3. Shumilin's algorithm
+1. Binomial algorithm
+2. Topology aware binomial algorithm
+3. Shumilin's algorithm
*/
-int (*intel_scatter_functions_table[])(void *sbuf, int scount,
+int (*intel_scatter_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
int root, MPI_Comm comm
) ={
- smpi_coll_tuned_scatter_ompi_binomial,
- smpi_coll_tuned_scatter_ompi_binomial,
- smpi_coll_tuned_scatter_mvapich2
+ Coll_scatter_ompi_binomial::scatter,
+ Coll_scatter_ompi_binomial::scatter,
+ Coll_scatter_mvapich2::scatter
};
intel_tuning_table_element intel_scatter_table[] =
-/* I_MPI_ADJUST_ALLTOALLV
+/* I_MPI_ADJUST_ALLTOALLV
-MPI_Alltoallv
+MPI_Alltoallv
-1. Isend/Irecv + waitall algorithm
-2. Plum's algorithm
+1. Isend/Irecv + waitall algorithm
+2. Plum's algorithm
*/
MPI_Datatype rdtype,
MPI_Comm comm
) ={
- smpi_coll_tuned_alltoallv_ompi_basic_linear,
- smpi_coll_tuned_alltoallv_bruck
+ Coll_alltoallv_ompi_basic_linear::alltoallv,
+ Coll_alltoallv_bruck::alltoallv
};
intel_tuning_table_element intel_alltoallv_table[] =
}
},
{ 2147483647,2,{
- {4,1},//0 again
+ {4,1},//0 again
{2147483647,2}
}
}
};
-//These are collected from table 3.5-2 of the Intel MPI Reference Manual
+//These are collected from table 3.5-2 of the Intel MPI Reference Manual
+
-
#define SIZECOMP_reduce_scatter\
int total_message_size = 0;\
for (i = 0; i < comm_size; i++) { \
total_message_size += rcounts[i];\
}\
- size_t block_dsize = total_message_size*smpi_datatype_size(dtype);\
-
+ size_t block_dsize = total_message_size*dtype->size();\
+
#define SIZECOMP_allreduce\
- size_t block_dsize =rcount * smpi_datatype_size(dtype);
-
+ size_t block_dsize =rcount * dtype->size();
+
#define SIZECOMP_alltoall\
- size_t block_dsize =send_count * smpi_datatype_size(send_type);
+ size_t block_dsize =send_count * send_type->size();
#define SIZECOMP_bcast\
- size_t block_dsize =count * smpi_datatype_size(datatype);
+ size_t block_dsize =count * datatype->size();
#define SIZECOMP_reduce\
- size_t block_dsize =count * smpi_datatype_size(datatype);
+ size_t block_dsize =count * datatype->size();
#define SIZECOMP_barrier\
size_t block_dsize = 1;
#define SIZECOMP_allgather\
- size_t block_dsize =recv_count * smpi_datatype_size(recv_type);
+ size_t block_dsize =recv_count * recv_type->size();
#define SIZECOMP_allgatherv\
int total_message_size = 0;\
for (i = 0; i < comm_size; i++) { \
total_message_size += recv_count[i];\
}\
- size_t block_dsize = total_message_size*smpi_datatype_size(recv_type);
-
+ size_t block_dsize = total_message_size*recv_type->size();
+
#define SIZECOMP_gather\
int rank = comm->rank();\
size_t block_dsize = (send_buff == MPI_IN_PLACE || rank ==root) ?\
- recv_count * smpi_datatype_size(recv_type) :\
- send_count * smpi_datatype_size(send_type);
+ recv_count * recv_type->size() :\
+ send_count * send_type->size();
#define SIZECOMP_scatter\
int rank = comm->rank();\
size_t block_dsize = (sendbuf == MPI_IN_PLACE || rank !=root ) ?\
- recvcount * smpi_datatype_size(recvtype) :\
- sendcount * smpi_datatype_size(sendtype);
+ recvcount * recvtype->size() :\
+ sendcount * sendtype->size();
#define SIZECOMP_alltoallv\
size_t block_dsize = 1;
-
+
#define IMPI_COLL_SELECT(cat, ret, args, args2)\
-ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
+ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
{\
int comm_size = comm->size();\
int i =0;\
args2);\
}
+
COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
+}
+}