1 /* selector for collective algorithms based on mvapich decision logic, with calibration from Stampede cluster at TACC*/
2 /* This is the tuning used by MVAPICH for Stampede platform based on (MV2_ARCH_INTEL_XEON_E5_2680_16, MV2_HCA_MLX_CX_FDR) */
4 /* Copyright (c) 2009-2017. The SimGrid Team. All rights reserved. */
6 /* This program is free software; you can redistribute it and/or modify it
7 * under the terms of the license (GNU LGPL) which comes with this package. */
9 /************ Alltoall variables and initializers */
11 #define MV2_MAX_NB_THRESHOLDS 32
14 XBT_PUBLIC(void) smpi_coll_cleanup_mvapich2(void);
19 int (*MV2_pt_Alltoall_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
20 void *recvbuf, int recvcount, MPI_Datatype recvtype,
22 } mv2_alltoall_tuning_element;
27 mv2_alltoall_tuning_element algo_table[MV2_MAX_NB_THRESHOLDS];
28 mv2_alltoall_tuning_element in_place_algo_table[MV2_MAX_NB_THRESHOLDS];
29 } mv2_alltoall_tuning_table;
31 int (*MV2_Alltoall_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm_ptr)=NULL;
33 /* Indicates number of processes per node */
34 int *mv2_alltoall_table_ppn_conf = NULL;
35 /* Indicates total number of configurations */
36 int mv2_alltoall_num_ppn_conf = 1;
37 int *mv2_size_alltoall_tuning_table = NULL;
38 mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table = NULL;
41 #define MPIR_Alltoall_bruck_MV2 simgrid::smpi::Coll_alltoall_bruck::alltoall
42 #define MPIR_Alltoall_RD_MV2 simgrid::smpi::Coll_alltoall_rdb::alltoall
43 #define MPIR_Alltoall_Scatter_dest_MV2 simgrid::smpi::Coll_alltoall_mvapich2_scatter_dest::alltoall
44 #define MPIR_Alltoall_pairwise_MV2 simgrid::smpi::Coll_alltoall_pair::alltoall
45 #define MPIR_Alltoall_inplace_MV2 simgrid::smpi::Coll_alltoall_ring::alltoall
48 static void init_mv2_alltoall_tables_stampede(){
50 int agg_table_sum = 0;
51 mv2_alltoall_tuning_table **table_ptrs = NULL;
52 mv2_alltoall_num_ppn_conf = 3;
53 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
54 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
55 mv2_alltoall_thresholds_table = static_cast<mv2_alltoall_tuning_table**>(xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
56 * mv2_alltoall_num_ppn_conf));
57 table_ptrs = static_cast<mv2_alltoall_tuning_table**>(xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
58 * mv2_alltoall_num_ppn_conf));
59 mv2_size_alltoall_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
60 mv2_alltoall_num_ppn_conf));
61 mv2_alltoall_table_ppn_conf = static_cast<int*>(xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int)));
62 mv2_alltoall_table_ppn_conf[0] = 1;
63 mv2_size_alltoall_tuning_table[0] = 6;
64 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
67 {{0, -1, &MPIR_Alltoall_pairwise_MV2},
70 {{0, -1, &MPIR_Alltoall_inplace_MV2},
76 {{0, 262144, &MPIR_Alltoall_Scatter_dest_MV2},
77 {262144, -1, &MPIR_Alltoall_pairwise_MV2},
80 {{0, -1, &MPIR_Alltoall_inplace_MV2},
86 {{0, 8, &MPIR_Alltoall_RD_MV2},
87 {8, -1, &MPIR_Alltoall_Scatter_dest_MV2},
90 {{0, -1, &MPIR_Alltoall_inplace_MV2},
96 {{0, 64, &MPIR_Alltoall_RD_MV2},
97 {64, 512, &MPIR_Alltoall_bruck_MV2},
98 {512, -1, &MPIR_Alltoall_Scatter_dest_MV2},
101 {{0,-1, &MPIR_Alltoall_inplace_MV2},
107 {{0, 32, &MPIR_Alltoall_RD_MV2},
108 {32, 2048, &MPIR_Alltoall_bruck_MV2},
109 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
112 {{0, -1, &MPIR_Alltoall_inplace_MV2},
118 {{0, 8, &MPIR_Alltoall_RD_MV2},
119 {8, 1024, &MPIR_Alltoall_bruck_MV2},
120 {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
123 {{0, -1, &MPIR_Alltoall_inplace_MV2},
127 table_ptrs[0] = mv2_tmp_alltoall_thresholds_table_1ppn;
128 mv2_alltoall_table_ppn_conf[1] = 2;
129 mv2_size_alltoall_tuning_table[1] = 6;
130 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_2ppn[] = {
133 {{0, 32, &MPIR_Alltoall_RD_MV2},
134 {32, -1, &MPIR_Alltoall_Scatter_dest_MV2},
137 {{0, -1, &MPIR_Alltoall_inplace_MV2},
143 {{0, 64, &MPIR_Alltoall_RD_MV2},
144 {64, -1, &MPIR_Alltoall_Scatter_dest_MV2},
147 {{0, -1, &MPIR_Alltoall_inplace_MV2},
153 {{0, 64, &MPIR_Alltoall_RD_MV2},
154 {64, 2048, &MPIR_Alltoall_bruck_MV2},
155 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
158 {{0,-1, &MPIR_Alltoall_inplace_MV2},
164 {{0, 16, &MPIR_Alltoall_RD_MV2},
165 {16, 2048, &MPIR_Alltoall_bruck_MV2},
166 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
169 {{0, -1, &MPIR_Alltoall_inplace_MV2},
175 {{0, 8, &MPIR_Alltoall_RD_MV2},
176 {8, 1024, &MPIR_Alltoall_bruck_MV2},
177 {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
180 {{0, -1, &MPIR_Alltoall_inplace_MV2},
186 {{0, 4, &MPIR_Alltoall_RD_MV2},
187 {4, 2048, &MPIR_Alltoall_bruck_MV2},
188 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
191 {{0, -1, &MPIR_Alltoall_inplace_MV2},
195 table_ptrs[1] = mv2_tmp_alltoall_thresholds_table_2ppn;
196 mv2_alltoall_table_ppn_conf[2] = 16;
197 mv2_size_alltoall_tuning_table[2] = 7;
198 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_16ppn[] = {
201 {{0, 2048, &MPIR_Alltoall_bruck_MV2},
202 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
205 {{32768, -1, &MPIR_Alltoall_inplace_MV2},
211 {{0, 2048, &MPIR_Alltoall_bruck_MV2},
212 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
215 {{16384, -1, &MPIR_Alltoall_inplace_MV2},
221 {{0, 2048, &MPIR_Alltoall_bruck_MV2},
222 {2048, 16384, &MPIR_Alltoall_Scatter_dest_MV2},
223 {16384, -1, &MPIR_Alltoall_pairwise_MV2},
226 {{32768, 131072, &MPIR_Alltoall_inplace_MV2},
232 {{0, 2048, &MPIR_Alltoall_bruck_MV2},
233 {2048, -1, &MPIR_Alltoall_pairwise_MV2},
236 {{16384,65536, &MPIR_Alltoall_inplace_MV2},
242 {{0, 1024, &MPIR_Alltoall_bruck_MV2},
243 {1024, -1, &MPIR_Alltoall_pairwise_MV2},
246 {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
252 {{0, 1024, &MPIR_Alltoall_bruck_MV2},
253 {1024, -1, &MPIR_Alltoall_pairwise_MV2},
256 {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
261 {{0, 1024, &MPIR_Alltoall_bruck_MV2},
262 {1024, -1, &MPIR_Alltoall_pairwise_MV2},
265 {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
270 table_ptrs[2] = mv2_tmp_alltoall_thresholds_table_16ppn;
272 for (i = 0; i < mv2_alltoall_num_ppn_conf; i++) {
273 agg_table_sum += mv2_size_alltoall_tuning_table[i];
275 mv2_alltoall_thresholds_table[0] =
276 static_cast<mv2_alltoall_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_alltoall_tuning_table)));
277 memcpy(mv2_alltoall_thresholds_table[0], table_ptrs[0],
278 (sizeof(mv2_alltoall_tuning_table)
279 * mv2_size_alltoall_tuning_table[0]));
280 for (i = 1; i < mv2_alltoall_num_ppn_conf; i++) {
281 mv2_alltoall_thresholds_table[i] =
282 mv2_alltoall_thresholds_table[i - 1]
283 + mv2_size_alltoall_tuning_table[i - 1];
284 memcpy(mv2_alltoall_thresholds_table[i], table_ptrs[i],
285 (sizeof(mv2_alltoall_tuning_table)
286 * mv2_size_alltoall_tuning_table[i]));
288 xbt_free(table_ptrs);
294 /************ Allgather variables and initializers */
299 int (*MV2_pt_Allgatherction)(void *sendbuf,
301 MPI_Datatype sendtype,
304 MPI_Datatype recvtype, MPI_Comm comm_ptr);
305 } mv2_allgather_tuning_element;
309 int two_level[MV2_MAX_NB_THRESHOLDS];
310 int size_inter_table;
311 mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
312 } mv2_allgather_tuning_table;
314 int (*MV2_Allgatherction)(void *sendbuf,
316 MPI_Datatype sendtype,
319 MPI_Datatype recvtype, MPI_Comm comm);
321 int *mv2_allgather_table_ppn_conf = NULL;
322 int mv2_allgather_num_ppn_conf = 1;
323 int *mv2_size_allgather_tuning_table = NULL;
324 mv2_allgather_tuning_table **mv2_allgather_thresholds_table = NULL;
326 static int MPIR_Allgather_RD_Allgather_Comm_MV2( void *sendbuf,
328 MPI_Datatype sendtype,
331 MPI_Datatype recvtype, MPI_Comm comm_ptr)
336 #define MPIR_Allgather_Bruck_MV2 simgrid::smpi::Coll_allgather_bruck::allgather
337 #define MPIR_Allgather_RD_MV2 simgrid::smpi::Coll_allgather_rdb::allgather
338 #define MPIR_Allgather_Ring_MV2 simgrid::smpi::Coll_allgather_ring::allgather
339 #define MPIR_2lvl_Allgather_MV2 simgrid::smpi::Coll_allgather_mvapich2_smp::allgather
341 static void init_mv2_allgather_tables_stampede(){
343 int agg_table_sum = 0;
345 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
346 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
347 mv2_allgather_tuning_table **table_ptrs = NULL;
348 mv2_allgather_num_ppn_conf = 3;
349 mv2_allgather_thresholds_table
350 = static_cast<mv2_allgather_tuning_table**>(xbt_malloc(sizeof(mv2_allgather_tuning_table *)
351 * mv2_allgather_num_ppn_conf));
352 table_ptrs = static_cast<mv2_allgather_tuning_table**>(xbt_malloc(sizeof(mv2_allgather_tuning_table *)
353 * mv2_allgather_num_ppn_conf));
354 mv2_size_allgather_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
355 mv2_allgather_num_ppn_conf));
356 mv2_allgather_table_ppn_conf
357 = static_cast<int*>(xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int)));
358 mv2_allgather_table_ppn_conf[0] = 1;
359 mv2_size_allgather_tuning_table[0] = 6;
360 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
366 {0, -1, &MPIR_Allgather_Ring_MV2},
374 {0, 262144, &MPIR_Allgather_RD_MV2},
375 {262144, -1, &MPIR_Allgather_Ring_MV2},
383 {0, 131072, &MPIR_Allgather_RD_MV2},
384 {131072, -1, &MPIR_Allgather_Ring_MV2},
392 {0, 131072, &MPIR_Allgather_RD_MV2},
393 {131072, -1, &MPIR_Allgather_Ring_MV2},
401 {0, 65536, &MPIR_Allgather_RD_MV2},
402 {65536, -1, &MPIR_Allgather_Ring_MV2},
410 {0, 32768, &MPIR_Allgather_RD_MV2},
411 {32768, -1, &MPIR_Allgather_Ring_MV2},
415 table_ptrs[0] = mv2_tmp_allgather_thresholds_table_1ppn;
416 mv2_allgather_table_ppn_conf[1] = 2;
417 mv2_size_allgather_tuning_table[1] = 6;
418 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_2ppn[] = {
424 {0, 524288, &MPIR_Allgather_RD_MV2},
425 {524288, -1, &MPIR_Allgather_Ring_MV2},
433 {0, 32768, &MPIR_Allgather_RD_MV2},
434 {32768, 524288, &MPIR_Allgather_Ring_MV2},
435 {524288, -1, &MPIR_Allgather_Ring_MV2},
443 {0, 16384, &MPIR_Allgather_RD_MV2},
444 {16384, 524288, &MPIR_Allgather_Ring_MV2},
445 {524288, -1, &MPIR_Allgather_Ring_MV2},
453 {0, 65536, &MPIR_Allgather_RD_MV2},
454 {65536, 524288, &MPIR_Allgather_Ring_MV2},
455 {524288, -1, &MPIR_Allgather_Ring_MV2},
463 {0, 32768, &MPIR_Allgather_RD_MV2},
464 {32768, 524288, &MPIR_Allgather_Ring_MV2},
465 {524288, -1, &MPIR_Allgather_Ring_MV2},
473 {0, 65536, &MPIR_Allgather_RD_MV2},
474 {65536, 524288, &MPIR_Allgather_Ring_MV2},
475 {524288, -1, &MPIR_Allgather_Ring_MV2},
479 table_ptrs[1] = mv2_tmp_allgather_thresholds_table_2ppn;
480 mv2_allgather_table_ppn_conf[2] = 16;
481 mv2_size_allgather_tuning_table[2] = 6;
482 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_16ppn[] = {
488 {0, 1024, &MPIR_Allgather_RD_MV2},
489 {1024, -1, &MPIR_Allgather_Ring_MV2},
497 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
498 {1024, -1, &MPIR_Allgather_Ring_MV2},
506 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
507 {1024, -1, &MPIR_Allgather_Ring_MV2},
515 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
516 {1024, -1, &MPIR_Allgather_Ring_MV2},
524 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
525 {1024, -1, &MPIR_Allgather_Ring_MV2},
533 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
534 {1024, -1, &MPIR_Allgather_Ring_MV2},
539 table_ptrs[2] = mv2_tmp_allgather_thresholds_table_16ppn;
541 for (i = 0; i < mv2_allgather_num_ppn_conf; i++) {
542 agg_table_sum += mv2_size_allgather_tuning_table[i];
544 mv2_allgather_thresholds_table[0] =
545 static_cast<mv2_allgather_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_allgather_tuning_table)));
546 memcpy(mv2_allgather_thresholds_table[0], table_ptrs[0],
547 (sizeof(mv2_allgather_tuning_table)
548 * mv2_size_allgather_tuning_table[0]));
549 for (i = 1; i < mv2_allgather_num_ppn_conf; i++) {
550 mv2_allgather_thresholds_table[i] =
551 mv2_allgather_thresholds_table[i - 1]
552 + mv2_size_allgather_tuning_table[i - 1];
553 memcpy(mv2_allgather_thresholds_table[i], table_ptrs[i],
554 (sizeof(mv2_allgather_tuning_table)
555 * mv2_size_allgather_tuning_table[i]));
557 xbt_free(table_ptrs);
561 /************ Gather variables and initializers */
566 int (*MV2_pt_Gather_function)(void *sendbuf, int sendcnt,
567 MPI_Datatype sendtype, void *recvbuf, int recvcnt,
568 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
569 } mv2_gather_tuning_element;
574 int size_inter_table;
575 mv2_gather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
576 int size_intra_table;
577 mv2_gather_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
578 } mv2_gather_tuning_table;
580 int mv2_size_gather_tuning_table=7;
581 mv2_gather_tuning_table * mv2_gather_thresholds_table=NULL;
583 typedef int (*MV2_Gather_function_ptr) (void *sendbuf,
585 MPI_Datatype sendtype,
588 MPI_Datatype recvtype,
589 int root, MPI_Comm comm);
591 MV2_Gather_function_ptr MV2_Gather_inter_leader_function = NULL;
592 MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL;
596 #define MPIR_Gather_MV2_Direct simgrid::smpi::Coll_gather_ompi_basic_linear::gather
597 #define MPIR_Gather_MV2_two_level_Direct simgrid::smpi::Coll_gather_mvapich2_two_level::gather
598 #define MPIR_Gather_intra simgrid::smpi::Coll_gather_mpich::gather
601 static void init_mv2_gather_tables_stampede(){
603 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
604 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
605 mv2_size_gather_tuning_table=7;
606 mv2_gather_thresholds_table = static_cast<mv2_gather_tuning_table*>(xbt_malloc(mv2_size_gather_tuning_table*
607 sizeof (mv2_gather_tuning_table)));
608 mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[]={
610 2,{{0, 524288, &MPIR_Gather_MV2_Direct},
611 {524288, -1, &MPIR_Gather_intra}},
612 1,{{0, -1, &MPIR_Gather_MV2_Direct}}},
614 3,{{0, 16384, &MPIR_Gather_MV2_Direct},
615 {16384, 131072, &MPIR_Gather_intra},
616 {131072, -1, &MPIR_Gather_MV2_two_level_Direct}},
617 1,{{0, -1, &MPIR_Gather_intra}}},
619 3,{{0, 256, &MPIR_Gather_MV2_two_level_Direct},
620 {256, 16384, &MPIR_Gather_MV2_Direct},
621 {256, -1, &MPIR_Gather_MV2_two_level_Direct}},
622 1,{{0, -1, &MPIR_Gather_intra}}},
624 3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct},
625 {512, 16384, &MPIR_Gather_MV2_Direct},
626 {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
627 1,{{0, -1, &MPIR_Gather_intra}}},
629 3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct},
630 {512, 16384, &MPIR_Gather_MV2_Direct},
631 {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
632 1,{{0, -1, &MPIR_Gather_intra}}},
634 3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct},
635 {512, 16384, &MPIR_Gather_MV2_Direct},
636 {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
637 1,{{0, -1, &MPIR_Gather_intra}}},
639 3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct},
640 {512, 16384, &MPIR_Gather_MV2_Direct},
641 {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
642 1,{{0, -1, &MPIR_Gather_intra}}},
645 memcpy(mv2_gather_thresholds_table, mv2_tmp_gather_thresholds_table,
646 mv2_size_gather_tuning_table * sizeof (mv2_gather_tuning_table));
651 /************ Allgatherv variables and initializers */
656 int (*MV2_pt_Allgatherv_function)(void *sendbuf,
658 MPI_Datatype sendtype,
662 MPI_Datatype recvtype,
664 } mv2_allgatherv_tuning_element;
668 int size_inter_table;
669 mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
670 } mv2_allgatherv_tuning_table;
672 int (*MV2_Allgatherv_function)(void *sendbuf,
674 MPI_Datatype sendtype,
678 MPI_Datatype recvtype,
681 int mv2_size_allgatherv_tuning_table = 0;
682 mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL;
684 #define MPIR_Allgatherv_Rec_Doubling_MV2 simgrid::smpi::Coll_allgatherv_mpich_rdb::allgatherv
685 #define MPIR_Allgatherv_Bruck_MV2 simgrid::smpi::Coll_allgatherv_ompi_bruck::allgatherv
686 #define MPIR_Allgatherv_Ring_MV2 simgrid::smpi::Coll_allgatherv_mpich_ring::allgatherv
689 static void init_mv2_allgatherv_tables_stampede(){
690 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
691 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
692 mv2_size_allgatherv_tuning_table = 6;
693 mv2_allgatherv_thresholds_table = static_cast<mv2_allgatherv_tuning_table*>(xbt_malloc(mv2_size_allgatherv_tuning_table *
694 sizeof (mv2_allgatherv_tuning_table)));
695 mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
700 {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
701 {512, -1, &MPIR_Allgatherv_Ring_MV2},
708 {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
709 {512, -1, &MPIR_Allgatherv_Ring_MV2},
716 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
717 {256, -1, &MPIR_Allgatherv_Ring_MV2},
724 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
725 {256, -1, &MPIR_Allgatherv_Ring_MV2},
732 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
733 {256, -1, &MPIR_Allgatherv_Ring_MV2},
740 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
741 {256, -1, &MPIR_Allgatherv_Ring_MV2},
746 memcpy(mv2_allgatherv_thresholds_table, mv2_tmp_allgatherv_thresholds_table,
747 mv2_size_allgatherv_tuning_table * sizeof (mv2_allgatherv_tuning_table));
751 /************ Allreduce variables and initializers */
756 int (*MV2_pt_Allreducection)(void *sendbuf,
759 MPI_Datatype datatype,
760 MPI_Op op, MPI_Comm comm);
761 } mv2_allreduce_tuning_element;
766 int is_two_level_allreduce[MV2_MAX_NB_THRESHOLDS];
767 int size_inter_table;
768 mv2_allreduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
769 int size_intra_table;
770 mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
771 } mv2_allreduce_tuning_table;
774 int (*MV2_Allreducection)(void *sendbuf,
777 MPI_Datatype datatype,
778 MPI_Op op, MPI_Comm comm)=NULL;
781 int (*MV2_Allreduce_intra_function)( void *sendbuf,
784 MPI_Datatype datatype,
785 MPI_Op op, MPI_Comm comm)=NULL;
787 int mv2_size_allreduce_tuning_table = 0;
788 mv2_allreduce_tuning_table *mv2_allreduce_thresholds_table = NULL;
794 static int MPIR_Allreduce_mcst_reduce_two_level_helper_MV2( void *sendbuf,
797 MPI_Datatype datatype,
798 MPI_Op op, MPI_Comm comm)
803 static int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2( void *sendbuf,
806 MPI_Datatype datatype,
807 MPI_Op op, MPI_Comm comm)
812 static int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf,
815 MPI_Datatype datatype,
816 MPI_Op op, MPI_Comm comm)
818 simgrid::smpi::Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
822 static int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf,
825 MPI_Datatype datatype,
826 MPI_Op op, MPI_Comm comm)
828 simgrid::smpi::Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
832 #define MPIR_Allreduce_pt2pt_rd_MV2 simgrid::smpi::Coll_allreduce_rdb::allreduce
833 #define MPIR_Allreduce_pt2pt_rs_MV2 simgrid::smpi::Coll_allreduce_mvapich2_rs::allreduce
834 #define MPIR_Allreduce_two_level_MV2 simgrid::smpi::Coll_allreduce_mvapich2_two_level::allreduce
837 static void init_mv2_allreduce_tables_stampede(){
838 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
839 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
840 mv2_size_allreduce_tuning_table = 8;
841 mv2_allreduce_thresholds_table = static_cast<mv2_allreduce_tuning_table*>(xbt_malloc(mv2_size_allreduce_tuning_table *
842 sizeof (mv2_allreduce_tuning_table)));
843 mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
850 {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
851 {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
855 {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
856 {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
865 {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
866 {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
867 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
871 {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
872 {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
881 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
882 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
883 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
887 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
888 {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
897 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
898 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
899 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
903 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
904 {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
913 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
914 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
915 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
919 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
920 {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
929 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
930 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
931 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
935 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
936 {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
945 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
946 {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
947 {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
948 {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
952 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
953 {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
962 {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
963 {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
964 {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
965 {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
966 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
970 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
971 {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
976 memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table,
977 mv2_size_allreduce_tuning_table * sizeof (mv2_allreduce_tuning_table));
986 int (*MV2_pt_Bcast_function) (void *buf, int count, MPI_Datatype datatype,
987 int root, MPI_Comm comm_ptr);
988 int zcpy_pipelined_knomial_factor;
989 } mv2_bcast_tuning_element;
993 int bcast_segment_size;
994 int intra_node_knomial_factor;
995 int inter_node_knomial_factor;
996 int is_two_level_bcast[MV2_MAX_NB_THRESHOLDS];
997 int size_inter_table;
998 mv2_bcast_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
999 int size_intra_table;
1000 mv2_bcast_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1001 } mv2_bcast_tuning_table;
1003 int mv2_size_bcast_tuning_table = 0;
1004 mv2_bcast_tuning_table *mv2_bcast_thresholds_table = NULL;
1007 int (*MV2_Bcast_function) (void *buffer, int count, MPI_Datatype datatype,
1008 int root, MPI_Comm comm_ptr) = NULL;
1010 int (*MV2_Bcast_intra_node_function) (void *buffer, int count, MPI_Datatype datatype,
1011 int root, MPI_Comm comm_ptr) = NULL;
1013 int zcpy_knomial_factor = 2;
1014 int mv2_pipelined_zcpy_knomial_factor = -1;
1015 int bcast_segment_size = 8192;
1016 int mv2_inter_node_knomial_factor = 4;
1017 int mv2_intra_node_knomial_factor = 4;
1018 #define mv2_bcast_two_level_system_size 64
1019 #define mv2_bcast_short_msg 16384
1020 #define mv2_bcast_large_msg 512*1024
1022 #define INTRA_NODE_ROOT 0
1024 #define MPIR_Pipelined_Bcast_Zcpy_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
1025 #define MPIR_Pipelined_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
1026 #define MPIR_Bcast_binomial_MV2 simgrid::smpi::Coll_bcast_binomial_tree::bcast
1027 #define MPIR_Bcast_scatter_ring_allgather_shm_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
1028 #define MPIR_Bcast_scatter_doubling_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_rdb_allgather::bcast
1029 #define MPIR_Bcast_scatter_ring_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
1030 #define MPIR_Shmem_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
1031 #define MPIR_Bcast_tune_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
1032 #define MPIR_Bcast_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
1033 #define MPIR_Knomial_Bcast_intra_node_MV2 simgrid::smpi::Coll_bcast_mvapich2_knomial_intra_node::bcast
1034 #define MPIR_Bcast_intra_MV2 simgrid::smpi::Coll_bcast_mvapich2_intra_node::bcast
1036 static void init_mv2_bcast_tables_stampede(){
1038 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
1039 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
1040 mv2_size_bcast_tuning_table=8;
1041 mv2_bcast_thresholds_table = static_cast<mv2_bcast_tuning_table*>(xbt_malloc(mv2_size_bcast_tuning_table *
1042 sizeof (mv2_bcast_tuning_table)));
1044 mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[]={
1048 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1051 {0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1052 {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1053 {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1054 {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1055 {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
1056 {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1057 {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1058 {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1059 {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
1060 {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
1061 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
1065 {0, 8, &MPIR_Shmem_Bcast_MV2, 2},
1066 {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
1067 {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
1068 {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
1069 {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
1070 {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
1071 {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1072 {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
1073 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1074 {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1075 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
1081 {1, 1, 1, 1, 1, 1, 1, 1},
1084 {0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1085 {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1086 {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1087 {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1088 {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1089 {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1090 {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1091 {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}
1095 {0, 128, &MPIR_Shmem_Bcast_MV2, 2},
1096 {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
1097 {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1098 {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
1099 {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1100 {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
1101 {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1102 {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}
1108 {1, 1, 1, 1, 1, 1, 1, 1, 1},
1111 {0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1112 {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1113 {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1114 {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1115 {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1116 {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1117 {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1118 {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1119 {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}
1123 {0, 2, &MPIR_Shmem_Bcast_MV2, 4},
1124 {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
1125 {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
1126 {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
1127 {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
1128 {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
1129 {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
1130 {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
1131 {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}
1140 {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1141 {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1142 {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1143 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
1147 {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1148 {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1149 {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1150 {524288, -1, NULL, -1}
1159 {0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1160 {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1161 {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1162 {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1163 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
1167 {0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1168 {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1169 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1170 {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1171 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
1180 {0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1181 {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1182 {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1183 {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
1184 {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
1188 {0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1189 {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1190 {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1191 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1192 {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}
1201 {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1202 {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1203 {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1204 {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1205 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
1209 {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1210 {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1211 {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1212 {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1213 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
1219 {1, 1, 1, 1, 1, 1, 1},
1222 {0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1223 {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1224 {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1225 {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1226 {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1227 {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1228 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
1232 {0, 16, &MPIR_Shmem_Bcast_MV2, 8},
1233 {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
1234 {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1235 {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1236 {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1237 {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1238 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
1243 memcpy(mv2_bcast_thresholds_table, mv2_tmp_bcast_thresholds_table,
1244 mv2_size_bcast_tuning_table * sizeof (mv2_bcast_tuning_table));
1248 /************ Reduce variables and initializers */
1253 int (*MV2_pt_Reduce_function)(void *sendbuf,
1256 MPI_Datatype datatype,
1260 } mv2_reduce_tuning_element;
1266 int is_two_level_reduce[MV2_MAX_NB_THRESHOLDS];
1267 int size_inter_table;
1268 mv2_reduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1269 int size_intra_table;
1270 mv2_reduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1271 } mv2_reduce_tuning_table;
1273 int mv2_size_reduce_tuning_table = 0;
1274 mv2_reduce_tuning_table *mv2_reduce_thresholds_table = NULL;
1277 int mv2_reduce_intra_knomial_factor = -1;
1278 int mv2_reduce_inter_knomial_factor = -1;
1280 int (*MV2_Reduce_function)( void *sendbuf,
1283 MPI_Datatype datatype,
1286 MPI_Comm comm_ptr)=NULL;
1288 int (*MV2_Reduce_intra_function)( void *sendbuf,
1291 MPI_Datatype datatype,
1294 MPI_Comm comm_ptr)=NULL;
1297 #define MPIR_Reduce_inter_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1298 #define MPIR_Reduce_intra_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1299 #define MPIR_Reduce_binomial_MV2 simgrid::smpi::Coll_reduce_binomial::reduce
1300 #define MPIR_Reduce_redscat_gather_MV2 simgrid::smpi::Coll_reduce_scatter_gather::reduce
1301 #define MPIR_Reduce_shmem_MV2 simgrid::smpi::Coll_reduce_ompi_basic_linear::reduce
1302 #define MPIR_Reduce_two_level_helper_MV2 simgrid::smpi::Coll_reduce_mvapich2_two_level::reduce
1305 static void init_mv2_reduce_tables_stampede(){
1306 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
1307 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
1309 mv2_size_reduce_tuning_table = 8;
1310 mv2_reduce_thresholds_table = static_cast<mv2_reduce_tuning_table*>(xbt_malloc(mv2_size_reduce_tuning_table *
1311 sizeof (mv2_reduce_tuning_table)));
1312 mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
1320 {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1321 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1322 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1326 {0, 65536, &MPIR_Reduce_shmem_MV2},
1327 {65536,-1, &MPIR_Reduce_binomial_MV2},
1334 {1, 1, 1, 1, 0, 0, 0},
1337 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1338 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1339 {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1340 {32768, 65536, &MPIR_Reduce_binomial_MV2},
1341 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1342 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1343 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1347 {0, 8192, &MPIR_Reduce_shmem_MV2},
1348 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1349 {16384, 32768, &MPIR_Reduce_shmem_MV2},
1350 {32768, 65536, &MPIR_Reduce_shmem_MV2},
1351 {65536, 262144, &MPIR_Reduce_shmem_MV2},
1352 {262144,-1, &MPIR_Reduce_binomial_MV2},
1362 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1363 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1364 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1365 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1366 {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
1370 {0, 8192, &MPIR_Reduce_shmem_MV2},
1371 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1372 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1373 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1374 {262144, -1, &MPIR_Reduce_binomial_MV2},
1384 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1385 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1386 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1387 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1388 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1389 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1393 {0, 8192, &MPIR_Reduce_shmem_MV2},
1394 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1395 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1396 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1397 {262144, -1, &MPIR_Reduce_binomial_MV2},
1404 {1, 1, 1, 0, 1, 1, 0},
1407 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1408 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1409 {16384, 32768, &MPIR_Reduce_binomial_MV2},
1410 {32768, 65536, &MPIR_Reduce_binomial_MV2},
1411 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1412 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1413 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1417 {0, 8192, &MPIR_Reduce_shmem_MV2},
1418 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1419 {16384, 32768, &MPIR_Reduce_shmem_MV2},
1420 {32768, 65536, &MPIR_Reduce_shmem_MV2},
1421 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1422 {262144, -1, &MPIR_Reduce_binomial_MV2},
1432 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1433 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1434 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1435 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1436 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1437 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1441 {0, 8192, &MPIR_Reduce_shmem_MV2},
1442 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1443 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1444 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1445 {262144, -1, &MPIR_Reduce_binomial_MV2},
1455 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1456 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1457 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1458 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1459 {262144, -1, &MPIR_Reduce_binomial_MV2},
1463 {0, 8192, &MPIR_Reduce_shmem_MV2},
1464 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1465 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1466 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1467 {262144, -1, &MPIR_Reduce_binomial_MV2},
1477 {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1478 {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1479 {4096, 16384, &MPIR_Reduce_binomial_MV2},
1480 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1481 {65536, 131072, &MPIR_Reduce_binomial_MV2},
1482 {131072, -1, &MPIR_Reduce_binomial_MV2},
1486 {0, 2048, &MPIR_Reduce_shmem_MV2},
1487 {2048, 4096, &MPIR_Reduce_shmem_MV2},
1488 {4096, 16384, &MPIR_Reduce_shmem_MV2},
1489 {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1490 {65536, 131072, &MPIR_Reduce_binomial_MV2},
1491 {131072, -1, &MPIR_Reduce_shmem_MV2},
1496 memcpy(mv2_reduce_thresholds_table, mv2_tmp_reduce_thresholds_table,
1497 mv2_size_reduce_tuning_table * sizeof (mv2_reduce_tuning_table));
1500 /************ Reduce scatter variables and initializers */
1505 int (*MV2_pt_Red_scat_function)(void *sendbuf,
1508 MPI_Datatype datatype,
1511 } mv2_red_scat_tuning_element;
1515 int size_inter_table;
1516 mv2_red_scat_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1517 } mv2_red_scat_tuning_table;
1519 int mv2_size_red_scat_tuning_table = 0;
1520 mv2_red_scat_tuning_table *mv2_red_scat_thresholds_table = NULL;
1523 int (*MV2_Red_scat_function)(void *sendbuf,
1526 MPI_Datatype datatype,
1532 static int MPIR_Reduce_Scatter_Basic_MV2(void *sendbuf,
1535 MPI_Datatype datatype,
1539 simgrid::smpi::Coll_reduce_scatter_default::reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm);
1542 #define MPIR_Reduce_scatter_non_comm_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_noncomm::reduce_scatter
1543 #define MPIR_Reduce_scatter_Rec_Halving_MV2 simgrid::smpi::Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter
1544 #define MPIR_Reduce_scatter_Pair_Wise_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_pair::reduce_scatter
1549 static void init_mv2_reduce_scatter_tables_stampede(){
1550 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
1551 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
1552 mv2_size_red_scat_tuning_table = 6;
1553 mv2_red_scat_thresholds_table = static_cast<mv2_red_scat_tuning_table*>(xbt_malloc(mv2_size_red_scat_tuning_table *
1554 sizeof (mv2_red_scat_tuning_table)));
1555 mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
1560 {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1561 {64, 65536, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1562 {65536, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1569 {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1570 {64, 131072, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1571 {131072, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1578 {0, 1024, &MPIR_Reduce_Scatter_Basic_MV2},
1579 {1024, 262144, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1580 {262144, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1587 {0, 128, &MPIR_Reduce_Scatter_Basic_MV2},
1588 {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1595 {0, 128, &MPIR_Reduce_Scatter_Basic_MV2},
1596 {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1603 {0, 256, &MPIR_Reduce_Scatter_Basic_MV2},
1604 {256, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1609 memcpy(mv2_red_scat_thresholds_table, mv2_tmp_red_scat_thresholds_table,
1610 mv2_size_red_scat_tuning_table * sizeof (mv2_red_scat_tuning_table));
1613 /************ Scatter variables and initializers */
1618 int (*MV2_pt_Scatter_function)(void *sendbuf,
1620 MPI_Datatype sendtype,
1623 MPI_Datatype recvtype,
1624 int root, MPI_Comm comm);
1625 } mv2_scatter_tuning_element;
1629 int size_inter_table;
1630 mv2_scatter_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1631 int size_intra_table;
1632 mv2_scatter_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1633 } mv2_scatter_tuning_table;
1636 int *mv2_scatter_table_ppn_conf = NULL;
1637 int mv2_scatter_num_ppn_conf = 1;
1638 int *mv2_size_scatter_tuning_table = NULL;
1639 mv2_scatter_tuning_table **mv2_scatter_thresholds_table = NULL;
1641 int (*MV2_Scatter_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
1642 void *recvbuf, int recvcount, MPI_Datatype recvtype,
1643 int root, MPI_Comm comm)=NULL;
1645 int (*MV2_Scatter_intra_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
1646 void *recvbuf, int recvcount, MPI_Datatype recvtype,
1647 int root, MPI_Comm comm)=NULL;
1648 int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf,
1650 MPI_Datatype sendtype,
1653 MPI_Datatype recvtype,
1654 int root, MPI_Comm comm_ptr);
1656 int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf,
1658 MPI_Datatype sendtype,
1661 MPI_Datatype recvtype,
1662 int root, MPI_Comm comm_ptr)
1667 #define MPIR_Scatter_MV2_Binomial simgrid::smpi::Coll_scatter_ompi_binomial::scatter
1668 #define MPIR_Scatter_MV2_Direct simgrid::smpi::Coll_scatter_ompi_basic_linear::scatter
1669 #define MPIR_Scatter_MV2_two_level_Binomial simgrid::smpi::Coll_scatter_mvapich2_two_level_binomial::scatter
1670 #define MPIR_Scatter_MV2_two_level_Direct simgrid::smpi::Coll_scatter_mvapich2_two_level_direct::scatter
1675 static void init_mv2_scatter_tables_stampede(){
1676 if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
1677 simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
1679 int agg_table_sum = 0;
1681 mv2_scatter_tuning_table **table_ptrs = NULL;
1682 mv2_scatter_num_ppn_conf = 3;
1683 mv2_scatter_thresholds_table
1684 = static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table *)
1685 * mv2_scatter_num_ppn_conf));
1686 table_ptrs = static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table *)
1687 * mv2_scatter_num_ppn_conf));
1688 mv2_size_scatter_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
1689 mv2_scatter_num_ppn_conf));
1690 mv2_scatter_table_ppn_conf
1691 = static_cast<int*>(xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int)));
1692 mv2_scatter_table_ppn_conf[0] = 1;
1693 mv2_size_scatter_tuning_table[0] = 6;
1694 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
1698 {0, -1, &MPIR_Scatter_MV2_Binomial},
1702 {0, -1, &MPIR_Scatter_MV2_Binomial},
1709 {0, -1, &MPIR_Scatter_MV2_Direct},
1713 {0, -1, &MPIR_Scatter_MV2_Direct},
1720 {0, -1, &MPIR_Scatter_MV2_Direct},
1724 {0, -1, &MPIR_Scatter_MV2_Direct},
1731 {0, -1, &MPIR_Scatter_MV2_Direct},
1735 {0, -1, &MPIR_Scatter_MV2_Direct},
1742 {0, -1, &MPIR_Scatter_MV2_Direct},
1746 {0, -1, &MPIR_Scatter_MV2_Direct},
1753 {0, 32, &MPIR_Scatter_MV2_Binomial},
1754 {32, -1, &MPIR_Scatter_MV2_Direct},
1758 {0, -1, &MPIR_Scatter_MV2_Binomial},
1762 table_ptrs[0] = mv2_tmp_scatter_thresholds_table_1ppn;
1763 mv2_scatter_table_ppn_conf[1] = 2;
1764 mv2_size_scatter_tuning_table[1] = 6;
1765 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_2ppn[] = {
1769 {0, 4096, &MPIR_Scatter_MV2_Binomial},
1770 {4096, -1, &MPIR_Scatter_MV2_Direct},
1774 {0, -1, &MPIR_Scatter_MV2_Direct},
1781 {0, 512, &MPIR_Scatter_MV2_two_level_Direct},
1782 {512, -1, &MPIR_Scatter_MV2_Direct},
1786 {0, -1, &MPIR_Scatter_MV2_Binomial},
1793 {0, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1794 {2048, -1, &MPIR_Scatter_MV2_Direct},
1798 {0, -1, &MPIR_Scatter_MV2_Binomial},
1805 {0, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1806 {2048, -1, &MPIR_Scatter_MV2_Direct},
1810 {0, -1, &MPIR_Scatter_MV2_Binomial},
1817 {0, 8192, &MPIR_Scatter_MV2_two_level_Direct},
1818 {8192, -1, &MPIR_Scatter_MV2_Direct},
1822 {0, -1, &MPIR_Scatter_MV2_Binomial},
1829 {0, 16, &MPIR_Scatter_MV2_Binomial},
1830 {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1831 {128, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1832 {16384, -1, &MPIR_Scatter_MV2_Direct},
1836 {0, 128, &MPIR_Scatter_MV2_Direct},
1837 {128, -1, &MPIR_Scatter_MV2_Binomial},
1841 table_ptrs[1] = mv2_tmp_scatter_thresholds_table_2ppn;
1842 mv2_scatter_table_ppn_conf[2] = 16;
1843 mv2_size_scatter_tuning_table[2] = 8;
1844 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_16ppn[] = {
1849 {0, 256, &MPIR_Scatter_MV2_Binomial},
1850 {256, -1, &MPIR_Scatter_MV2_Direct},
1854 { 0, -1, &MPIR_Scatter_MV2_Direct},
1862 {0, 512, &MPIR_Scatter_MV2_Binomial},
1863 {512, -1, &MPIR_Scatter_MV2_Direct},
1867 { 0, -1, &MPIR_Scatter_MV2_Direct},
1875 {0, 1024, &MPIR_Scatter_MV2_two_level_Direct},
1876 {1024, -1, &MPIR_Scatter_MV2_Direct},
1880 { 0, -1, &MPIR_Scatter_MV2_Direct},
1888 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1889 {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1890 {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1891 {2048, -1, &MPIR_Scatter_MV2_Direct},
1895 { 0, -1, &MPIR_Scatter_MV2_Direct},
1903 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1904 {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1905 {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1906 {2048, -1, &MPIR_Scatter_MV2_Direct},
1910 { 0, -1, &MPIR_Scatter_MV2_Direct},
1918 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1919 {16, 16, &MPIR_Scatter_MV2_two_level_Direct},
1920 {16, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1921 {4096, -1, &MPIR_Scatter_MV2_Direct},
1925 { 0, -1, &MPIR_Scatter_MV2_Binomial},
1932 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1933 {0, 16, &MPIR_Scatter_MV2_Binomial},
1934 {16, 32, &MPIR_Scatter_MV2_Binomial},
1935 {32, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1936 {4096, -1, &MPIR_Scatter_MV2_Direct},
1940 { 0, -1, &MPIR_Scatter_MV2_Binomial},
1947 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1948 {0, 16, &MPIR_Scatter_MV2_two_level_Binomial},
1949 {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1950 {128, 1024, &MPIR_Scatter_MV2_two_level_Direct},
1951 {1024, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1952 {16384, 65536, &MPIR_Scatter_MV2_Direct},
1953 {65536, -1, &MPIR_Scatter_MV2_two_level_Direct},
1957 {0, 16, &MPIR_Scatter_MV2_Binomial},
1958 {16, 128, &MPIR_Scatter_MV2_Binomial},
1959 {128, 1024, &MPIR_Scatter_MV2_Binomial},
1960 {1024, 16384, &MPIR_Scatter_MV2_Direct},
1961 {16384, 65536, &MPIR_Scatter_MV2_Direct},
1962 {65536, -1, &MPIR_Scatter_MV2_Direct},
1966 table_ptrs[2] = mv2_tmp_scatter_thresholds_table_16ppn;
1968 for (i = 0; i < mv2_scatter_num_ppn_conf; i++) {
1969 agg_table_sum += mv2_size_scatter_tuning_table[i];
1971 mv2_scatter_thresholds_table[0] =
1972 static_cast<mv2_scatter_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_scatter_tuning_table)));
1973 memcpy(mv2_scatter_thresholds_table[0], table_ptrs[0],
1974 (sizeof(mv2_scatter_tuning_table)
1975 * mv2_size_scatter_tuning_table[0]));
1976 for (i = 1; i < mv2_scatter_num_ppn_conf; i++) {
1977 mv2_scatter_thresholds_table[i] =
1978 mv2_scatter_thresholds_table[i - 1]
1979 + mv2_size_scatter_tuning_table[i - 1];
1980 memcpy(mv2_scatter_thresholds_table[i], table_ptrs[i],
1981 (sizeof(mv2_scatter_tuning_table)
1982 * mv2_size_scatter_tuning_table[i]));
1984 xbt_free(table_ptrs);