1 /* selector for collective algorithms based on mvapich decision logic, with calibration from Stampede cluster at TACC*/
2 /* This is the tuning used by MVAPICH for Stampede platform based on (MV2_ARCH_INTEL_XEON_E5_2680_16,
3 * MV2_HCA_MLX_CX_FDR) */
5 /* Copyright (c) 2009-2019. The SimGrid Team. All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
10 /************ Alltoall variables and initializers */
12 #ifndef SMPI_MVAPICH2_SELECTOR_STAMPEDE_HPP
13 #define SMPI_MVAPICH2_SELECTOR_STAMPEDE_HPP
17 #define MV2_MAX_NB_THRESHOLDS 32
19 XBT_PUBLIC void smpi_coll_cleanup_mvapich2(void);
21 struct mv2_alltoall_tuning_element {
24 int (*MV2_pt_Alltoall_function)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
25 MPI_Datatype recvtype, MPI_Comm comm_ptr);
28 struct mv2_alltoall_tuning_table {
31 mv2_alltoall_tuning_element algo_table[MV2_MAX_NB_THRESHOLDS];
32 mv2_alltoall_tuning_element in_place_algo_table[MV2_MAX_NB_THRESHOLDS];
35 int (*MV2_Alltoall_function)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
36 MPI_Datatype recvtype, MPI_Comm comm_ptr) = NULL;
38 /* Indicates number of processes per node */
39 int* mv2_alltoall_table_ppn_conf = NULL;
40 /* Indicates total number of configurations */
41 int mv2_alltoall_num_ppn_conf = 1;
42 int* mv2_size_alltoall_tuning_table = NULL;
43 mv2_alltoall_tuning_table** mv2_alltoall_thresholds_table = NULL;
45 #define MPIR_Alltoall_bruck_MV2 simgrid::smpi::Coll_alltoall_bruck::alltoall
46 #define MPIR_Alltoall_RD_MV2 simgrid::smpi::Coll_alltoall_rdb::alltoall
47 #define MPIR_Alltoall_Scatter_dest_MV2 simgrid::smpi::Coll_alltoall_mvapich2_scatter_dest::alltoall
48 #define MPIR_Alltoall_pairwise_MV2 simgrid::smpi::Coll_alltoall_pair::alltoall
49 #define MPIR_Alltoall_inplace_MV2 simgrid::smpi::Coll_alltoall_ring::alltoall
51 static void init_mv2_alltoall_tables_stampede()
53 int agg_table_sum = 0;
54 mv2_alltoall_tuning_table** table_ptrs = NULL;
55 mv2_alltoall_num_ppn_conf = 3;
56 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
57 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
58 mv2_alltoall_thresholds_table = new mv2_alltoall_tuning_table*[mv2_alltoall_num_ppn_conf];
59 table_ptrs = new mv2_alltoall_tuning_table*[mv2_alltoall_num_ppn_conf];
60 mv2_size_alltoall_tuning_table = new int[mv2_alltoall_num_ppn_conf];
61 mv2_alltoall_table_ppn_conf = new int[mv2_alltoall_num_ppn_conf];
62 mv2_alltoall_table_ppn_conf[0] = 1;
63 mv2_size_alltoall_tuning_table[0] = 6;
64 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
69 {0, -1, &MPIR_Alltoall_pairwise_MV2},
73 {0, -1, &MPIR_Alltoall_inplace_MV2},
81 {0, 262144, &MPIR_Alltoall_Scatter_dest_MV2}, {262144, -1, &MPIR_Alltoall_pairwise_MV2},
85 {0, -1, &MPIR_Alltoall_inplace_MV2},
93 {0, 8, &MPIR_Alltoall_RD_MV2}, {8, -1, &MPIR_Alltoall_Scatter_dest_MV2},
97 {0, -1, &MPIR_Alltoall_inplace_MV2},
105 {0, 64, &MPIR_Alltoall_RD_MV2},
106 {64, 512, &MPIR_Alltoall_bruck_MV2},
107 {512, -1, &MPIR_Alltoall_Scatter_dest_MV2},
111 {0, -1, &MPIR_Alltoall_inplace_MV2},
119 {0, 32, &MPIR_Alltoall_RD_MV2},
120 {32, 2048, &MPIR_Alltoall_bruck_MV2},
121 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
125 {0, -1, &MPIR_Alltoall_inplace_MV2},
133 {0, 8, &MPIR_Alltoall_RD_MV2},
134 {8, 1024, &MPIR_Alltoall_bruck_MV2},
135 {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
139 {0, -1, &MPIR_Alltoall_inplace_MV2},
143 table_ptrs[0] = mv2_tmp_alltoall_thresholds_table_1ppn;
144 mv2_alltoall_table_ppn_conf[1] = 2;
145 mv2_size_alltoall_tuning_table[1] = 6;
146 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_2ppn[] = {
151 {0, 32, &MPIR_Alltoall_RD_MV2}, {32, -1, &MPIR_Alltoall_Scatter_dest_MV2},
155 {0, -1, &MPIR_Alltoall_inplace_MV2},
163 {0, 64, &MPIR_Alltoall_RD_MV2}, {64, -1, &MPIR_Alltoall_Scatter_dest_MV2},
167 {0, -1, &MPIR_Alltoall_inplace_MV2},
175 {0, 64, &MPIR_Alltoall_RD_MV2},
176 {64, 2048, &MPIR_Alltoall_bruck_MV2},
177 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
181 {0, -1, &MPIR_Alltoall_inplace_MV2},
189 {0, 16, &MPIR_Alltoall_RD_MV2},
190 {16, 2048, &MPIR_Alltoall_bruck_MV2},
191 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
195 {0, -1, &MPIR_Alltoall_inplace_MV2},
203 {0, 8, &MPIR_Alltoall_RD_MV2},
204 {8, 1024, &MPIR_Alltoall_bruck_MV2},
205 {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
209 {0, -1, &MPIR_Alltoall_inplace_MV2},
217 {0, 4, &MPIR_Alltoall_RD_MV2},
218 {4, 2048, &MPIR_Alltoall_bruck_MV2},
219 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
223 {0, -1, &MPIR_Alltoall_inplace_MV2},
227 table_ptrs[1] = mv2_tmp_alltoall_thresholds_table_2ppn;
228 mv2_alltoall_table_ppn_conf[2] = 16;
229 mv2_size_alltoall_tuning_table[2] = 7;
230 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_16ppn[] = {
235 {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
239 {32768, -1, &MPIR_Alltoall_inplace_MV2},
247 {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
251 {16384, -1, &MPIR_Alltoall_inplace_MV2},
259 {0, 2048, &MPIR_Alltoall_bruck_MV2},
260 {2048, 16384, &MPIR_Alltoall_Scatter_dest_MV2},
261 {16384, -1, &MPIR_Alltoall_pairwise_MV2},
265 {32768, 131072, &MPIR_Alltoall_inplace_MV2},
273 {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_pairwise_MV2},
277 {16384, 65536, &MPIR_Alltoall_inplace_MV2},
285 {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
289 {16384, 65536, &MPIR_Alltoall_inplace_MV2},
297 {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
301 {16384, 65536, &MPIR_Alltoall_inplace_MV2},
308 {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
312 {16384, 65536, &MPIR_Alltoall_inplace_MV2},
317 table_ptrs[2] = mv2_tmp_alltoall_thresholds_table_16ppn;
319 for (int i = 0; i < mv2_alltoall_num_ppn_conf; i++) {
320 agg_table_sum += mv2_size_alltoall_tuning_table[i];
322 mv2_alltoall_thresholds_table[0] = new mv2_alltoall_tuning_table[agg_table_sum];
323 std::copy_n(table_ptrs[0], mv2_size_alltoall_tuning_table[0], mv2_alltoall_thresholds_table[0]);
324 for (int i = 1; i < mv2_alltoall_num_ppn_conf; i++) {
325 mv2_alltoall_thresholds_table[i] = mv2_alltoall_thresholds_table[i - 1] + mv2_size_alltoall_tuning_table[i - 1];
326 std::copy_n(table_ptrs[i], mv2_size_alltoall_tuning_table[i], mv2_alltoall_thresholds_table[i]);
331 /************ Allgather variables and initializers */
333 struct mv2_allgather_tuning_element {
336 int (*MV2_pt_Allgatherction)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
337 MPI_Datatype recvtype, MPI_Comm comm_ptr);
340 struct mv2_allgather_tuning_table {
342 int two_level[MV2_MAX_NB_THRESHOLDS];
343 int size_inter_table;
344 mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
347 int (*MV2_Allgatherction)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
348 MPI_Datatype recvtype, MPI_Comm comm);
350 int* mv2_allgather_table_ppn_conf = NULL;
351 int mv2_allgather_num_ppn_conf = 1;
352 int* mv2_size_allgather_tuning_table = NULL;
353 mv2_allgather_tuning_table** mv2_allgather_thresholds_table = NULL;
355 static int MPIR_Allgather_RD_Allgather_Comm_MV2(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
356 int recvcount, MPI_Datatype recvtype, MPI_Comm comm_ptr)
361 #define MPIR_Allgather_Bruck_MV2 simgrid::smpi::Coll_allgather_bruck::allgather
362 #define MPIR_Allgather_RD_MV2 simgrid::smpi::Coll_allgather_rdb::allgather
363 #define MPIR_Allgather_Ring_MV2 simgrid::smpi::Coll_allgather_ring::allgather
364 #define MPIR_2lvl_Allgather_MV2 simgrid::smpi::Coll_allgather_mvapich2_smp::allgather
366 static void init_mv2_allgather_tables_stampede()
368 int agg_table_sum = 0;
370 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
371 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
372 mv2_allgather_num_ppn_conf = 3;
373 mv2_allgather_thresholds_table = new mv2_allgather_tuning_table*[mv2_allgather_num_ppn_conf];
374 mv2_allgather_tuning_table** table_ptrs = new mv2_allgather_tuning_table*[mv2_allgather_num_ppn_conf];
375 mv2_size_allgather_tuning_table = new int[mv2_allgather_num_ppn_conf];
376 mv2_allgather_table_ppn_conf = new int[mv2_allgather_num_ppn_conf];
377 mv2_allgather_table_ppn_conf[0] = 1;
378 mv2_size_allgather_tuning_table[0] = 6;
379 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
385 {0, -1, &MPIR_Allgather_Ring_MV2},
393 {0, 262144, &MPIR_Allgather_RD_MV2}, {262144, -1, &MPIR_Allgather_Ring_MV2},
401 {0, 131072, &MPIR_Allgather_RD_MV2}, {131072, -1, &MPIR_Allgather_Ring_MV2},
409 {0, 131072, &MPIR_Allgather_RD_MV2}, {131072, -1, &MPIR_Allgather_Ring_MV2},
417 {0, 65536, &MPIR_Allgather_RD_MV2}, {65536, -1, &MPIR_Allgather_Ring_MV2},
425 {0, 32768, &MPIR_Allgather_RD_MV2}, {32768, -1, &MPIR_Allgather_Ring_MV2},
429 table_ptrs[0] = mv2_tmp_allgather_thresholds_table_1ppn;
430 mv2_allgather_table_ppn_conf[1] = 2;
431 mv2_size_allgather_tuning_table[1] = 6;
432 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_2ppn[] = {
438 {0, 524288, &MPIR_Allgather_RD_MV2}, {524288, -1, &MPIR_Allgather_Ring_MV2},
446 {0, 32768, &MPIR_Allgather_RD_MV2},
447 {32768, 524288, &MPIR_Allgather_Ring_MV2},
448 {524288, -1, &MPIR_Allgather_Ring_MV2},
456 {0, 16384, &MPIR_Allgather_RD_MV2},
457 {16384, 524288, &MPIR_Allgather_Ring_MV2},
458 {524288, -1, &MPIR_Allgather_Ring_MV2},
466 {0, 65536, &MPIR_Allgather_RD_MV2},
467 {65536, 524288, &MPIR_Allgather_Ring_MV2},
468 {524288, -1, &MPIR_Allgather_Ring_MV2},
476 {0, 32768, &MPIR_Allgather_RD_MV2},
477 {32768, 524288, &MPIR_Allgather_Ring_MV2},
478 {524288, -1, &MPIR_Allgather_Ring_MV2},
486 {0, 65536, &MPIR_Allgather_RD_MV2},
487 {65536, 524288, &MPIR_Allgather_Ring_MV2},
488 {524288, -1, &MPIR_Allgather_Ring_MV2},
492 table_ptrs[1] = mv2_tmp_allgather_thresholds_table_2ppn;
493 mv2_allgather_table_ppn_conf[2] = 16;
494 mv2_size_allgather_tuning_table[2] = 6;
495 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_16ppn[] = {
501 {0, 1024, &MPIR_Allgather_RD_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
509 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
517 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
525 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
533 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
541 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
546 table_ptrs[2] = mv2_tmp_allgather_thresholds_table_16ppn;
548 for (int i = 0; i < mv2_allgather_num_ppn_conf; i++) {
549 agg_table_sum += mv2_size_allgather_tuning_table[i];
551 mv2_allgather_thresholds_table[0] = new mv2_allgather_tuning_table[agg_table_sum];
552 std::copy_n(table_ptrs[0], mv2_size_allgather_tuning_table[0], mv2_allgather_thresholds_table[0]);
553 for (int i = 1; i < mv2_allgather_num_ppn_conf; i++) {
554 mv2_allgather_thresholds_table[i] = mv2_allgather_thresholds_table[i - 1] + mv2_size_allgather_tuning_table[i - 1];
555 std::copy_n(table_ptrs[i], mv2_size_allgather_tuning_table[i], mv2_allgather_thresholds_table[i]);
560 /************ Gather variables and initializers */
562 struct mv2_gather_tuning_element {
565 int (*MV2_pt_Gather_function)(const void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
566 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
569 struct mv2_gather_tuning_table {
571 int size_inter_table;
572 mv2_gather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
573 int size_intra_table;
574 mv2_gather_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
577 int mv2_size_gather_tuning_table = 7;
578 mv2_gather_tuning_table* mv2_gather_thresholds_table = NULL;
580 typedef int (*MV2_Gather_function_ptr)(const void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
581 MPI_Datatype recvtype, int root, MPI_Comm comm);
583 MV2_Gather_function_ptr MV2_Gather_inter_leader_function = NULL;
584 MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL;
586 #define MPIR_Gather_MV2_Direct simgrid::smpi::Coll_gather_ompi_basic_linear::gather
587 #define MPIR_Gather_MV2_two_level_Direct simgrid::smpi::Coll_gather_mvapich2_two_level::gather
588 #define MPIR_Gather_intra simgrid::smpi::Coll_gather_mpich::gather
590 static void init_mv2_gather_tables_stampede()
593 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
594 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
595 mv2_size_gather_tuning_table = 7;
596 mv2_gather_thresholds_table = new mv2_gather_tuning_table[mv2_size_gather_tuning_table];
597 mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[] = {
600 {{0, 524288, &MPIR_Gather_MV2_Direct}, {524288, -1, &MPIR_Gather_intra}},
602 {{0, -1, &MPIR_Gather_MV2_Direct}}},
605 {{0, 16384, &MPIR_Gather_MV2_Direct},
606 {16384, 131072, &MPIR_Gather_intra},
607 {131072, -1, &MPIR_Gather_MV2_two_level_Direct}},
609 {{0, -1, &MPIR_Gather_intra}}},
612 {{0, 256, &MPIR_Gather_MV2_two_level_Direct},
613 {256, 16384, &MPIR_Gather_MV2_Direct},
614 {256, -1, &MPIR_Gather_MV2_two_level_Direct}},
616 {{0, -1, &MPIR_Gather_intra}}},
619 {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
620 {512, 16384, &MPIR_Gather_MV2_Direct},
621 {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
623 {{0, -1, &MPIR_Gather_intra}}},
626 {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
627 {512, 16384, &MPIR_Gather_MV2_Direct},
628 {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
630 {{0, -1, &MPIR_Gather_intra}}},
633 {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
634 {512, 16384, &MPIR_Gather_MV2_Direct},
635 {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
637 {{0, -1, &MPIR_Gather_intra}}},
640 {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
641 {512, 16384, &MPIR_Gather_MV2_Direct},
642 {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
644 {{0, -1, &MPIR_Gather_intra}}},
647 std::copy_n(mv2_tmp_gather_thresholds_table, mv2_size_gather_tuning_table, mv2_gather_thresholds_table);
650 /************ Allgatherv variables and initializers */
652 struct mv2_allgatherv_tuning_element {
655 int (*MV2_pt_Allgatherv_function)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
656 const int* displs, MPI_Datatype recvtype, MPI_Comm commg);
659 struct mv2_allgatherv_tuning_table {
661 int size_inter_table;
662 mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
665 int (*MV2_Allgatherv_function)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
666 const int* displs, MPI_Datatype recvtype, MPI_Comm comm);
668 int mv2_size_allgatherv_tuning_table = 0;
669 mv2_allgatherv_tuning_table* mv2_allgatherv_thresholds_table = NULL;
671 #define MPIR_Allgatherv_Rec_Doubling_MV2 simgrid::smpi::Coll_allgatherv_mpich_rdb::allgatherv
672 #define MPIR_Allgatherv_Bruck_MV2 simgrid::smpi::Coll_allgatherv_ompi_bruck::allgatherv
673 #define MPIR_Allgatherv_Ring_MV2 simgrid::smpi::Coll_allgatherv_mpich_ring::allgatherv
675 static void init_mv2_allgatherv_tables_stampede()
677 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
678 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
679 mv2_size_allgatherv_tuning_table = 6;
680 mv2_allgatherv_thresholds_table = new mv2_allgatherv_tuning_table[mv2_size_allgatherv_tuning_table];
681 mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
686 {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, {512, -1, &MPIR_Allgatherv_Ring_MV2},
693 {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, {512, -1, &MPIR_Allgatherv_Ring_MV2},
700 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
707 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
714 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
721 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
726 std::copy_n(mv2_tmp_allgatherv_thresholds_table, mv2_size_allgatherv_tuning_table, mv2_allgatherv_thresholds_table);
729 /************ Allreduce variables and initializers */
731 struct mv2_allreduce_tuning_element {
734 int (*MV2_pt_Allreducection)(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
738 struct mv2_allreduce_tuning_table {
741 int is_two_level_allreduce[MV2_MAX_NB_THRESHOLDS];
742 int size_inter_table;
743 mv2_allreduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
744 int size_intra_table;
745 mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
748 int (*MV2_Allreducection)(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
749 MPI_Comm comm) = NULL;
751 int (*MV2_Allreduce_intra_function)(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
752 MPI_Comm comm) = NULL;
754 int mv2_size_allreduce_tuning_table = 0;
755 mv2_allreduce_tuning_table* mv2_allreduce_thresholds_table = NULL;
757 static int MPIR_Allreduce_mcst_reduce_two_level_helper_MV2(const void* sendbuf, void* recvbuf, int count,
758 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
763 static int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
764 MPI_Op op, MPI_Comm comm)
769 static int MPIR_Allreduce_reduce_p2p_MV2(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
772 simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
776 static int MPIR_Allreduce_reduce_shmem_MV2(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
779 simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
783 #define MPIR_Allreduce_pt2pt_rd_MV2 simgrid::smpi::Coll_allreduce_rdb::allreduce
784 #define MPIR_Allreduce_pt2pt_rs_MV2 simgrid::smpi::Coll_allreduce_mvapich2_rs::allreduce
785 #define MPIR_Allreduce_two_level_MV2 simgrid::smpi::Coll_allreduce_mvapich2_two_level::allreduce
787 static void init_mv2_allreduce_tables_stampede()
789 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
790 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
791 mv2_size_allreduce_tuning_table = 8;
792 mv2_allreduce_thresholds_table = new mv2_allreduce_tuning_table[mv2_size_allreduce_tuning_table];
793 mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
800 {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2}, {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
804 {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
813 {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
814 {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
815 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
819 {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
828 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
829 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
830 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
834 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
843 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
844 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
845 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
849 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
858 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
859 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
860 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
864 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
873 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
874 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
875 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
879 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
888 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
889 {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
890 {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
891 {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
895 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
904 {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
905 {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
906 {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
907 {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
908 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
912 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
917 std::copy_n(mv2_tmp_allreduce_thresholds_table, mv2_size_allreduce_tuning_table, mv2_allreduce_thresholds_table);
920 struct mv2_bcast_tuning_element {
923 int (*MV2_pt_Bcast_function)(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm_ptr);
924 int zcpy_pipelined_knomial_factor;
927 struct mv2_bcast_tuning_table {
929 int bcast_segment_size;
930 int intra_node_knomial_factor;
931 int inter_node_knomial_factor;
932 int is_two_level_bcast[MV2_MAX_NB_THRESHOLDS];
933 int size_inter_table;
934 mv2_bcast_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
935 int size_intra_table;
936 mv2_bcast_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
939 int mv2_size_bcast_tuning_table = 0;
940 mv2_bcast_tuning_table* mv2_bcast_thresholds_table = NULL;
942 int (*MV2_Bcast_function)(void* buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm_ptr) = NULL;
944 int (*MV2_Bcast_intra_node_function)(void* buffer, int count, MPI_Datatype datatype, int root,
945 MPI_Comm comm_ptr) = NULL;
947 int zcpy_knomial_factor = 2;
948 int mv2_pipelined_zcpy_knomial_factor = -1;
949 int bcast_segment_size = 8192;
950 int mv2_inter_node_knomial_factor = 4;
951 int mv2_intra_node_knomial_factor = 4;
952 #define mv2_bcast_two_level_system_size 64
953 #define mv2_bcast_short_msg 16384
954 #define mv2_bcast_large_msg 512 * 1024
956 #define INTRA_NODE_ROOT 0
958 #define MPIR_Pipelined_Bcast_Zcpy_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
959 #define MPIR_Pipelined_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
960 #define MPIR_Bcast_binomial_MV2 simgrid::smpi::Coll_bcast_binomial_tree::bcast
961 #define MPIR_Bcast_scatter_ring_allgather_shm_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
962 #define MPIR_Bcast_scatter_doubling_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_rdb_allgather::bcast
963 #define MPIR_Bcast_scatter_ring_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
964 #define MPIR_Shmem_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
965 #define MPIR_Bcast_tune_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
966 #define MPIR_Bcast_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
967 #define MPIR_Knomial_Bcast_intra_node_MV2 simgrid::smpi::Coll_bcast_mvapich2_knomial_intra_node::bcast
968 #define MPIR_Bcast_intra_MV2 simgrid::smpi::Coll_bcast_mvapich2_intra_node::bcast
970 static void init_mv2_bcast_tables_stampede()
973 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
974 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
975 mv2_size_bcast_tuning_table = 8;
976 mv2_bcast_thresholds_table = new mv2_bcast_tuning_table[mv2_size_bcast_tuning_table];
978 mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[] = {
983 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
985 {{0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
986 {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
987 {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
988 {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
989 {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
990 {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
991 {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
992 {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
993 {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
994 {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
995 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
997 {{0, 8, &MPIR_Shmem_Bcast_MV2, 2},
998 {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
999 {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
1000 {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
1001 {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
1002 {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
1003 {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1004 {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
1005 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1006 {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1007 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1012 {1, 1, 1, 1, 1, 1, 1, 1},
1014 {{0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1015 {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1016 {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1017 {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1018 {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1019 {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1020 {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1021 {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}},
1023 {{0, 128, &MPIR_Shmem_Bcast_MV2, 2},
1024 {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
1025 {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1026 {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
1027 {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1028 {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
1029 {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1030 {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}}},
1035 {1, 1, 1, 1, 1, 1, 1, 1, 1},
1037 {{0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1038 {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1039 {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1040 {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1041 {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1042 {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1043 {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1044 {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1045 {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}},
1047 {{0, 2, &MPIR_Shmem_Bcast_MV2, 4},
1048 {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
1049 {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
1050 {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
1051 {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
1052 {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
1053 {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
1054 {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
1055 {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}}},
1062 {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1063 {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1064 {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1065 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
1067 {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1068 {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1069 {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1070 {524288, -1, NULL, -1}}},
1077 {{0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1078 {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1079 {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1080 {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1081 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1083 {{0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1084 {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1085 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1086 {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1087 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1094 {{0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1095 {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1096 {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1097 {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
1098 {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1100 {{0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1101 {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1102 {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1103 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1104 {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1111 {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1112 {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1113 {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1114 {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1115 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1117 {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1118 {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1119 {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1120 {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1121 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1126 {1, 1, 1, 1, 1, 1, 1},
1128 {{0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1129 {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1130 {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1131 {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1132 {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1133 {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1134 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1136 {{0, 16, &MPIR_Shmem_Bcast_MV2, 8},
1137 {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
1138 {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1139 {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1140 {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1141 {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1142 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}}};
1144 std::copy_n(mv2_tmp_bcast_thresholds_table, mv2_size_bcast_tuning_table, mv2_bcast_thresholds_table);
1147 /************ Reduce variables and initializers */
1149 struct mv2_reduce_tuning_element {
1152 int (*MV2_pt_Reduce_function)(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1156 struct mv2_reduce_tuning_table {
1160 int is_two_level_reduce[MV2_MAX_NB_THRESHOLDS];
1161 int size_inter_table;
1162 mv2_reduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1163 int size_intra_table;
1164 mv2_reduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1167 int mv2_size_reduce_tuning_table = 0;
1168 mv2_reduce_tuning_table* mv2_reduce_thresholds_table = NULL;
1170 int mv2_reduce_intra_knomial_factor = -1;
1171 int mv2_reduce_inter_knomial_factor = -1;
1173 int (*MV2_Reduce_function)(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1174 MPI_Comm comm_ptr) = NULL;
1176 int (*MV2_Reduce_intra_function)(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1177 MPI_Comm comm_ptr) = NULL;
1179 #define MPIR_Reduce_inter_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1180 #define MPIR_Reduce_intra_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1181 #define MPIR_Reduce_binomial_MV2 simgrid::smpi::Coll_reduce_binomial::reduce
1182 #define MPIR_Reduce_redscat_gather_MV2 simgrid::smpi::Coll_reduce_scatter_gather::reduce
1183 #define MPIR_Reduce_shmem_MV2 simgrid::smpi::Coll_reduce_ompi_basic_linear::reduce
1184 #define MPIR_Reduce_two_level_helper_MV2 simgrid::smpi::Coll_reduce_mvapich2_two_level::reduce
1186 static void init_mv2_reduce_tables_stampede()
1188 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1189 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1191 mv2_size_reduce_tuning_table = 8;
1192 mv2_reduce_thresholds_table = new mv2_reduce_tuning_table[mv2_size_reduce_tuning_table];
1193 mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
1201 {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1202 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1203 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1207 {0, 65536, &MPIR_Reduce_shmem_MV2}, {65536, -1, &MPIR_Reduce_binomial_MV2},
1214 {1, 1, 1, 1, 0, 0, 0},
1217 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1218 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1219 {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1220 {32768, 65536, &MPIR_Reduce_binomial_MV2},
1221 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1222 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1223 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1227 {0, 8192, &MPIR_Reduce_shmem_MV2},
1228 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1229 {16384, 32768, &MPIR_Reduce_shmem_MV2},
1230 {32768, 65536, &MPIR_Reduce_shmem_MV2},
1231 {65536, 262144, &MPIR_Reduce_shmem_MV2},
1232 {262144, -1, &MPIR_Reduce_binomial_MV2},
1242 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1243 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1244 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1245 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1246 {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
1250 {0, 8192, &MPIR_Reduce_shmem_MV2},
1251 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1252 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1253 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1254 {262144, -1, &MPIR_Reduce_binomial_MV2},
1264 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1265 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1266 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1267 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1268 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1269 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1273 {0, 8192, &MPIR_Reduce_shmem_MV2},
1274 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1275 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1276 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1277 {262144, -1, &MPIR_Reduce_binomial_MV2},
1284 {1, 1, 1, 0, 1, 1, 0},
1287 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1288 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1289 {16384, 32768, &MPIR_Reduce_binomial_MV2},
1290 {32768, 65536, &MPIR_Reduce_binomial_MV2},
1291 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1292 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1293 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1297 {0, 8192, &MPIR_Reduce_shmem_MV2},
1298 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1299 {16384, 32768, &MPIR_Reduce_shmem_MV2},
1300 {32768, 65536, &MPIR_Reduce_shmem_MV2},
1301 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1302 {262144, -1, &MPIR_Reduce_binomial_MV2},
1312 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1313 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1314 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1315 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1316 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1317 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1321 {0, 8192, &MPIR_Reduce_shmem_MV2},
1322 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1323 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1324 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1325 {262144, -1, &MPIR_Reduce_binomial_MV2},
1335 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1336 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1337 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1338 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1339 {262144, -1, &MPIR_Reduce_binomial_MV2},
1343 {0, 8192, &MPIR_Reduce_shmem_MV2},
1344 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1345 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1346 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1347 {262144, -1, &MPIR_Reduce_binomial_MV2},
1357 {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1358 {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1359 {4096, 16384, &MPIR_Reduce_binomial_MV2},
1360 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1361 {65536, 131072, &MPIR_Reduce_binomial_MV2},
1362 {131072, -1, &MPIR_Reduce_binomial_MV2},
1366 {0, 2048, &MPIR_Reduce_shmem_MV2},
1367 {2048, 4096, &MPIR_Reduce_shmem_MV2},
1368 {4096, 16384, &MPIR_Reduce_shmem_MV2},
1369 {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1370 {65536, 131072, &MPIR_Reduce_binomial_MV2},
1371 {131072, -1, &MPIR_Reduce_shmem_MV2},
1376 std::copy_n(mv2_tmp_reduce_thresholds_table, mv2_size_reduce_tuning_table, mv2_reduce_thresholds_table);
1379 /************ Reduce scatter variables and initializers */
1381 struct mv2_red_scat_tuning_element {
1384 int (*MV2_pt_Red_scat_function)(const void* sendbuf, void* recvbuf, const int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1388 struct mv2_red_scat_tuning_table {
1390 int size_inter_table;
1391 mv2_red_scat_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1394 int mv2_size_red_scat_tuning_table = 0;
1395 mv2_red_scat_tuning_table* mv2_red_scat_thresholds_table = NULL;
1397 int (*MV2_Red_scat_function)(const void* sendbuf, void* recvbuf, const int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1400 static int MPIR_Reduce_Scatter_Basic_MV2(const void* sendbuf, void* recvbuf, const int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1403 simgrid::smpi::Coll_reduce_scatter_default::reduce_scatter(sendbuf, recvbuf, recvcnts, datatype, op, comm);
1406 #define MPIR_Reduce_scatter_non_comm_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_noncomm::reduce_scatter
1407 #define MPIR_Reduce_scatter_Rec_Halving_MV2 \
1408 simgrid::smpi::Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter
1409 #define MPIR_Reduce_scatter_Pair_Wise_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_pair::reduce_scatter
1411 static void init_mv2_reduce_scatter_tables_stampede()
1413 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1414 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1415 mv2_size_red_scat_tuning_table = 6;
1416 mv2_red_scat_thresholds_table = new mv2_red_scat_tuning_table[mv2_size_red_scat_tuning_table];
1417 mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
1422 {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1423 {64, 65536, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1424 {65536, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1431 {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1432 {64, 131072, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1433 {131072, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1440 {0, 1024, &MPIR_Reduce_Scatter_Basic_MV2},
1441 {1024, 262144, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1442 {262144, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1449 {0, 128, &MPIR_Reduce_Scatter_Basic_MV2}, {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1456 {0, 128, &MPIR_Reduce_Scatter_Basic_MV2}, {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1463 {0, 256, &MPIR_Reduce_Scatter_Basic_MV2}, {256, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1468 std::copy_n(mv2_tmp_red_scat_thresholds_table, mv2_size_red_scat_tuning_table, mv2_red_scat_thresholds_table);
1471 /************ Scatter variables and initializers */
1473 struct mv2_scatter_tuning_element {
1476 int (*MV2_pt_Scatter_function)(const void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1477 MPI_Datatype recvtype, int root, MPI_Comm comm);
1480 struct mv2_scatter_tuning_table {
1482 int size_inter_table;
1483 mv2_scatter_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1484 int size_intra_table;
1485 mv2_scatter_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1488 int* mv2_scatter_table_ppn_conf = NULL;
1489 int mv2_scatter_num_ppn_conf = 1;
1490 int* mv2_size_scatter_tuning_table = NULL;
1491 mv2_scatter_tuning_table** mv2_scatter_thresholds_table = NULL;
1493 int (*MV2_Scatter_function)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
1494 MPI_Datatype recvtype, int root, MPI_Comm comm) = NULL;
1496 int (*MV2_Scatter_intra_function)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
1497 MPI_Datatype recvtype, int root, MPI_Comm comm) = NULL;
1498 int MPIR_Scatter_mcst_wrap_MV2(const void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1499 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
1501 int MPIR_Scatter_mcst_wrap_MV2(const void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1502 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr)
1507 #define MPIR_Scatter_MV2_Binomial simgrid::smpi::Coll_scatter_ompi_binomial::scatter
1508 #define MPIR_Scatter_MV2_Direct simgrid::smpi::Coll_scatter_ompi_basic_linear::scatter
1509 #define MPIR_Scatter_MV2_two_level_Binomial simgrid::smpi::Coll_scatter_mvapich2_two_level_binomial::scatter
1510 #define MPIR_Scatter_MV2_two_level_Direct simgrid::smpi::Coll_scatter_mvapich2_two_level_direct::scatter
1512 static void init_mv2_scatter_tables_stampede()
1514 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1515 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1517 int agg_table_sum = 0;
1518 mv2_scatter_num_ppn_conf = 3;
1519 mv2_scatter_thresholds_table = new mv2_scatter_tuning_table*[mv2_scatter_num_ppn_conf];
1520 mv2_scatter_tuning_table** table_ptrs = new mv2_scatter_tuning_table*[mv2_scatter_num_ppn_conf];
1521 mv2_size_scatter_tuning_table = new int[mv2_scatter_num_ppn_conf];
1522 mv2_scatter_table_ppn_conf = new int[mv2_scatter_num_ppn_conf];
1523 mv2_scatter_table_ppn_conf[0] = 1;
1524 mv2_size_scatter_tuning_table[0] = 6;
1525 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
1530 {0, -1, &MPIR_Scatter_MV2_Binomial},
1534 {0, -1, &MPIR_Scatter_MV2_Binomial},
1542 {0, -1, &MPIR_Scatter_MV2_Direct},
1546 {0, -1, &MPIR_Scatter_MV2_Direct},
1554 {0, -1, &MPIR_Scatter_MV2_Direct},
1558 {0, -1, &MPIR_Scatter_MV2_Direct},
1566 {0, -1, &MPIR_Scatter_MV2_Direct},
1570 {0, -1, &MPIR_Scatter_MV2_Direct},
1578 {0, -1, &MPIR_Scatter_MV2_Direct},
1582 {0, -1, &MPIR_Scatter_MV2_Direct},
1590 {0, 32, &MPIR_Scatter_MV2_Binomial}, {32, -1, &MPIR_Scatter_MV2_Direct},
1594 {0, -1, &MPIR_Scatter_MV2_Binomial},
1598 table_ptrs[0] = mv2_tmp_scatter_thresholds_table_1ppn;
1599 mv2_scatter_table_ppn_conf[1] = 2;
1600 mv2_size_scatter_tuning_table[1] = 6;
1601 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_2ppn[] = {
1606 {0, 4096, &MPIR_Scatter_MV2_Binomial}, {4096, -1, &MPIR_Scatter_MV2_Direct},
1610 {0, -1, &MPIR_Scatter_MV2_Direct},
1618 {0, 512, &MPIR_Scatter_MV2_two_level_Direct}, {512, -1, &MPIR_Scatter_MV2_Direct},
1622 {0, -1, &MPIR_Scatter_MV2_Binomial},
1630 {0, 2048, &MPIR_Scatter_MV2_two_level_Direct}, {2048, -1, &MPIR_Scatter_MV2_Direct},
1634 {0, -1, &MPIR_Scatter_MV2_Binomial},
1642 {0, 2048, &MPIR_Scatter_MV2_two_level_Direct}, {2048, -1, &MPIR_Scatter_MV2_Direct},
1646 {0, -1, &MPIR_Scatter_MV2_Binomial},
1654 {0, 8192, &MPIR_Scatter_MV2_two_level_Direct}, {8192, -1, &MPIR_Scatter_MV2_Direct},
1658 {0, -1, &MPIR_Scatter_MV2_Binomial},
1666 {0, 16, &MPIR_Scatter_MV2_Binomial},
1667 {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1668 {128, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1669 {16384, -1, &MPIR_Scatter_MV2_Direct},
1673 {0, 128, &MPIR_Scatter_MV2_Direct}, {128, -1, &MPIR_Scatter_MV2_Binomial},
1677 table_ptrs[1] = mv2_tmp_scatter_thresholds_table_2ppn;
1678 mv2_scatter_table_ppn_conf[2] = 16;
1679 mv2_size_scatter_tuning_table[2] = 8;
1680 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_16ppn[] = {
1685 {0, 256, &MPIR_Scatter_MV2_Binomial}, {256, -1, &MPIR_Scatter_MV2_Direct},
1689 {0, -1, &MPIR_Scatter_MV2_Direct},
1697 {0, 512, &MPIR_Scatter_MV2_Binomial}, {512, -1, &MPIR_Scatter_MV2_Direct},
1701 {0, -1, &MPIR_Scatter_MV2_Direct},
1709 {0, 1024, &MPIR_Scatter_MV2_two_level_Direct}, {1024, -1, &MPIR_Scatter_MV2_Direct},
1713 {0, -1, &MPIR_Scatter_MV2_Direct},
1721 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1722 {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1723 {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1724 {2048, -1, &MPIR_Scatter_MV2_Direct},
1728 {0, -1, &MPIR_Scatter_MV2_Direct},
1736 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1737 {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1738 {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1739 {2048, -1, &MPIR_Scatter_MV2_Direct},
1743 {0, -1, &MPIR_Scatter_MV2_Direct},
1751 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1752 {16, 16, &MPIR_Scatter_MV2_two_level_Direct},
1753 {16, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1754 {4096, -1, &MPIR_Scatter_MV2_Direct},
1758 {0, -1, &MPIR_Scatter_MV2_Binomial},
1765 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1766 {0, 16, &MPIR_Scatter_MV2_Binomial},
1767 {16, 32, &MPIR_Scatter_MV2_Binomial},
1768 {32, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1769 {4096, -1, &MPIR_Scatter_MV2_Direct},
1773 {0, -1, &MPIR_Scatter_MV2_Binomial},
1780 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1781 {0, 16, &MPIR_Scatter_MV2_two_level_Binomial},
1782 {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1783 {128, 1024, &MPIR_Scatter_MV2_two_level_Direct},
1784 {1024, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1785 {16384, 65536, &MPIR_Scatter_MV2_Direct},
1786 {65536, -1, &MPIR_Scatter_MV2_two_level_Direct},
1790 {0, 16, &MPIR_Scatter_MV2_Binomial},
1791 {16, 128, &MPIR_Scatter_MV2_Binomial},
1792 {128, 1024, &MPIR_Scatter_MV2_Binomial},
1793 {1024, 16384, &MPIR_Scatter_MV2_Direct},
1794 {16384, 65536, &MPIR_Scatter_MV2_Direct},
1795 {65536, -1, &MPIR_Scatter_MV2_Direct},
1799 table_ptrs[2] = mv2_tmp_scatter_thresholds_table_16ppn;
1801 for (int i = 0; i < mv2_scatter_num_ppn_conf; i++) {
1802 agg_table_sum += mv2_size_scatter_tuning_table[i];
1804 mv2_scatter_thresholds_table[0] = new mv2_scatter_tuning_table[agg_table_sum];
1805 std::copy_n(table_ptrs[0], mv2_size_scatter_tuning_table[0], mv2_scatter_thresholds_table[0]);
1806 for (int i = 1; i < mv2_scatter_num_ppn_conf; i++) {
1807 mv2_scatter_thresholds_table[i] = mv2_scatter_thresholds_table[i - 1] + mv2_size_scatter_tuning_table[i - 1];
1808 std::copy_n(table_ptrs[i], mv2_size_scatter_tuning_table[i], mv2_scatter_thresholds_table[i]);
1810 delete[] table_ptrs;