1 /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector
4 /* Copyright (c) 2009-2023. The SimGrid Team.
5 * All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
10 #include "colls_private.hpp"
14 namespace simgrid::smpi {
16 int allreduce__ompi(const void *sbuf, void *rbuf, int count,
17 MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
19 size_t total_dsize = dtype->size() * (ptrdiff_t)count;
20 int communicator_size = comm->size();
22 int(*funcs[]) (const void*, void*, int, MPI_Datatype, MPI_Op, MPI_Comm)={
27 &allreduce__ompi_ring_segmented,
32 * {1, "basic_linear"},
33 * {2, "nonoverlapping"},
34 * {3, "recursive_doubling"},
36 * {5, "segmented_ring"},
39 * Currently, ring, segmented ring, and rabenseifner do not support
40 * non-commutative operations.
42 if ((op != MPI_OP_NULL) && not op->is_commutative()) {
43 if (communicator_size < 4) {
44 if (total_dsize < 131072) {
49 } else if (communicator_size < 8) {
51 } else if (communicator_size < 16) {
52 if (total_dsize < 1048576) {
57 } else if (communicator_size < 128) {
59 } else if (communicator_size < 256) {
60 if (total_dsize < 131072) {
62 } else if (total_dsize < 524288) {
67 } else if (communicator_size < 512) {
68 if (total_dsize < 4096) {
70 } else if (total_dsize < 524288) {
76 if (total_dsize < 2048) {
83 if (communicator_size < 4) {
84 if (total_dsize < 8) {
86 } else if (total_dsize < 4096) {
88 } else if (total_dsize < 8192) {
90 } else if (total_dsize < 16384) {
92 } else if (total_dsize < 65536) {
94 } else if (total_dsize < 262144) {
99 } else if (communicator_size < 8) {
100 if (total_dsize < 16) {
102 } else if (total_dsize < 8192) {
107 } else if (communicator_size < 16) {
108 if (total_dsize < 8192) {
113 } else if (communicator_size < 32) {
114 if (total_dsize < 64) {
116 } else if (total_dsize < 4096) {
121 } else if (communicator_size < 64) {
122 if (total_dsize < 128) {
127 } else if (communicator_size < 128) {
128 if (total_dsize < 262144) {
133 } else if (communicator_size < 256) {
134 if (total_dsize < 131072) {
136 } else if (total_dsize < 262144) {
141 } else if (communicator_size < 512) {
142 if (total_dsize < 4096) {
147 } else if (communicator_size < 2048) {
148 if (total_dsize < 2048) {
150 } else if (total_dsize < 16384) {
155 } else if (communicator_size < 4096) {
156 if (total_dsize < 2048) {
158 } else if (total_dsize < 4096) {
160 } else if (total_dsize < 16384) {
166 if (total_dsize < 2048) {
168 } else if (total_dsize < 16384) {
170 } else if (total_dsize < 32768) {
177 return funcs[alg-1](sbuf, rbuf, count, dtype, op, comm);
182 int alltoall__ompi(const void *sbuf, int scount,
184 void* rbuf, int rcount,
189 size_t dsize, total_dsize;
190 int communicator_size = comm->size();
192 if (MPI_IN_PLACE != sbuf) {
193 dsize = sdtype->size();
195 dsize = rdtype->size();
197 total_dsize = dsize * (ptrdiff_t)scount;
198 int (*funcs[])(const void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm) = {
199 &alltoall__basic_linear,
202 &alltoall__basic_linear,
203 &alltoall__basic_linear
208 * {3, "modified_bruck"},
209 * {4, "linear_sync"},
212 if (communicator_size == 2) {
213 if (total_dsize < 2) {
215 } else if (total_dsize < 4) {
217 } else if (total_dsize < 16) {
219 } else if (total_dsize < 64) {
221 } else if (total_dsize < 256) {
223 } else if (total_dsize < 4096) {
225 } else if (total_dsize < 32768) {
227 } else if (total_dsize < 262144) {
229 } else if (total_dsize < 1048576) {
234 } else if (communicator_size < 8) {
235 if (total_dsize < 8192) {
237 } else if (total_dsize < 16384) {
239 } else if (total_dsize < 65536) {
241 } else if (total_dsize < 524288) {
243 } else if (total_dsize < 1048576) {
248 } else if (communicator_size < 16) {
249 if (total_dsize < 262144) {
254 } else if (communicator_size < 32) {
255 if (total_dsize < 4) {
257 } else if (total_dsize < 512) {
259 } else if (total_dsize < 8192) {
261 } else if (total_dsize < 32768) {
263 } else if (total_dsize < 262144) {
265 } else if (total_dsize < 524288) {
270 } else if (communicator_size < 64) {
271 if (total_dsize < 512) {
273 } else if (total_dsize < 524288) {
278 } else if (communicator_size < 128) {
279 if (total_dsize < 1024) {
281 } else if (total_dsize < 2048) {
283 } else if (total_dsize < 4096) {
285 } else if (total_dsize < 262144) {
290 } else if (communicator_size < 256) {
291 if (total_dsize < 1024) {
293 } else if (total_dsize < 2048) {
295 } else if (total_dsize < 262144) {
300 } else if (communicator_size < 512) {
301 if (total_dsize < 1024) {
303 } else if (total_dsize < 8192) {
305 } else if (total_dsize < 32768) {
310 } else if (communicator_size < 1024) {
311 if (total_dsize < 512) {
313 } else if (total_dsize < 8192) {
315 } else if (total_dsize < 16384) {
317 } else if (total_dsize < 131072) {
319 } else if (total_dsize < 262144) {
324 } else if (communicator_size < 2048) {
325 if (total_dsize < 512) {
327 } else if (total_dsize < 1024) {
329 } else if (total_dsize < 2048) {
331 } else if (total_dsize < 16384) {
333 } else if (total_dsize < 262144) {
338 } else if (communicator_size < 4096) {
339 if (total_dsize < 1024) {
341 } else if (total_dsize < 4096) {
343 } else if (total_dsize < 8192) {
345 } else if (total_dsize < 131072) {
351 if (total_dsize < 2048) {
353 } else if (total_dsize < 8192) {
355 } else if (total_dsize < 16384) {
357 } else if (total_dsize < 32768) {
359 } else if (total_dsize < 65536) {
366 return funcs[alg-1](sbuf, scount, sdtype,
367 rbuf, rcount, rdtype, comm);
370 int alltoallv__ompi(const void *sbuf, const int *scounts, const int *sdisps,
372 void *rbuf, const int *rcounts, const int *rdisps,
377 int communicator_size = comm->size();
379 int (*funcs[])(const void *, const int*, const int*, MPI_Datatype, void*, const int*, const int*, MPI_Datatype, MPI_Comm) = {
380 &alltoallv__ompi_basic_linear,
384 * {1, "basic_linear"},
387 * We can only optimize based on com size
389 if (communicator_size < 4) {
391 } else if (communicator_size < 64) {
393 } else if (communicator_size < 128) {
395 } else if (communicator_size < 256) {
397 } else if (communicator_size < 1024) {
402 return funcs[alg-1](sbuf, scounts, sdisps, sdtype,
403 rbuf, rcounts, rdisps,rdtype,
407 int barrier__ompi(MPI_Comm comm)
409 int communicator_size = comm->size();
411 int (*funcs[])(MPI_Comm) = {
412 &barrier__ompi_basic_linear,
413 &barrier__ompi_basic_linear,
414 &barrier__ompi_recursivedoubling,
415 &barrier__ompi_bruck,
416 &barrier__ompi_two_procs,
421 * {2, "double_ring"},
422 * {3, "recursive_doubling"},
427 * We can only optimize based on com size
429 if (communicator_size < 4) {
431 } else if (communicator_size < 8) {
433 } else if (communicator_size < 64) {
435 } else if (communicator_size < 256) {
437 } else if (communicator_size < 512) {
439 } else if (communicator_size < 1024) {
441 } else if (communicator_size < 4096) {
447 return funcs[alg-1](comm);
450 int bcast__ompi(void *buff, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
453 size_t total_dsize, dsize;
455 int communicator_size = comm->size();
457 dsize = datatype->size();
458 total_dsize = dsize * (unsigned long)count;
459 int (*funcs[])(void*, int, MPI_Datatype, int, MPI_Comm) = {
461 &bcast__ompi_pipeline,
462 &bcast__ompi_pipeline,
463 &bcast__ompi_split_bintree,
465 &bcast__binomial_tree,
466 &bcast__mvapich2_knomial_intra_node,
467 &bcast__scatter_rdb_allgather,
468 &bcast__scatter_LR_allgather,
471 * {1, "basic_linear"},
474 * {4, "split_binary_tree"},
475 * {5, "binary_tree"},
478 * {8, "scatter_allgather"},
479 * {9, "scatter_allgather_ring"},
481 if (communicator_size < 4) {
482 if (total_dsize < 32) {
484 } else if (total_dsize < 256) {
486 } else if (total_dsize < 512) {
488 } else if (total_dsize < 1024) {
490 } else if (total_dsize < 32768) {
492 } else if (total_dsize < 131072) {
494 } else if (total_dsize < 262144) {
496 } else if (total_dsize < 524288) {
498 } else if (total_dsize < 1048576) {
503 } else if (communicator_size < 8) {
504 if (total_dsize < 64) {
506 } else if (total_dsize < 128) {
508 } else if (total_dsize < 2048) {
510 } else if (total_dsize < 8192) {
512 } else if (total_dsize < 1048576) {
517 } else if (communicator_size < 16) {
518 if (total_dsize < 8) {
520 } else if (total_dsize < 64) {
522 } else if (total_dsize < 4096) {
524 } else if (total_dsize < 16384) {
526 } else if (total_dsize < 32768) {
531 } else if (communicator_size < 32) {
532 if (total_dsize < 4096) {
534 } else if (total_dsize < 1048576) {
539 } else if (communicator_size < 64) {
540 if (total_dsize < 2048) {
545 } else if (communicator_size < 128) {
547 } else if (communicator_size < 256) {
548 if (total_dsize < 2) {
550 } else if (total_dsize < 16384) {
552 } else if (total_dsize < 32768) {
554 } else if (total_dsize < 65536) {
559 } else if (communicator_size < 1024) {
560 if (total_dsize < 16384) {
562 } else if (total_dsize < 32768) {
567 } else if (communicator_size < 2048) {
568 if (total_dsize < 524288) {
573 } else if (communicator_size < 4096) {
574 if (total_dsize < 262144) {
580 if (total_dsize < 8192) {
582 } else if (total_dsize < 16384) {
584 } else if (total_dsize < 262144) {
590 return funcs[alg-1](buff, count, datatype, root, comm);
593 int reduce__ompi(const void *sendbuf, void *recvbuf,
594 int count, MPI_Datatype datatype,
598 size_t total_dsize, dsize;
600 int communicator_size = comm->size();
602 dsize=datatype->size();
603 total_dsize = dsize * count;
604 int (*funcs[])(const void*, void*, int, MPI_Datatype, MPI_Op, int, MPI_Comm) = {
605 &reduce__ompi_basic_linear,
607 &reduce__ompi_pipeline,
608 &reduce__ompi_binary,
609 &reduce__ompi_binomial,
610 &reduce__ompi_in_order_binary,
611 //&reduce__rab our rab can't be used with all datatypes
612 &reduce__ompi_basic_linear
620 * {6, "in-order_binary"},
621 * {7, "rabenseifner"},
623 * Currently, only linear and in-order binary tree algorithms are
624 * capable of non commutative ops.
626 if ((op != MPI_OP_NULL) && not op->is_commutative()) {
627 if (communicator_size < 4) {
628 if (total_dsize < 8) {
633 } else if (communicator_size < 8) {
635 } else if (communicator_size < 16) {
636 if (total_dsize < 1024) {
638 } else if (total_dsize < 8192) {
640 } else if (total_dsize < 16384) {
642 } else if (total_dsize < 262144) {
647 } else if (communicator_size < 128) {
649 } else if (communicator_size < 256) {
650 if (total_dsize < 512) {
652 } else if (total_dsize < 1024) {
661 if (communicator_size < 4) {
662 if (total_dsize < 8) {
664 } else if (total_dsize < 16) {
666 } else if (total_dsize < 32) {
668 } else if (total_dsize < 262144) {
670 } else if (total_dsize < 524288) {
672 } else if (total_dsize < 1048576) {
677 } else if (communicator_size < 8) {
678 if (total_dsize < 4096) {
680 } else if (total_dsize < 65536) {
682 } else if (total_dsize < 262144) {
684 } else if (total_dsize < 524288) {
686 } else if (total_dsize < 1048576) {
691 } else if (communicator_size < 16) {
692 if (total_dsize < 8192) {
697 } else if (communicator_size < 32) {
698 if (total_dsize < 4096) {
703 } else if (communicator_size < 256) {
705 } else if (communicator_size < 512) {
706 if (total_dsize < 8192) {
708 } else if (total_dsize < 16384) {
713 } else if (communicator_size < 2048) {
715 } else if (communicator_size < 4096) {
716 if (total_dsize < 512) {
718 } else if (total_dsize < 1024) {
720 } else if (total_dsize < 8192) {
722 } else if (total_dsize < 16384) {
728 if (total_dsize < 16) {
730 } else if (total_dsize < 32) {
732 } else if (total_dsize < 1024) {
734 } else if (total_dsize < 2048) {
736 } else if (total_dsize < 8192) {
738 } else if (total_dsize < 16384) {
746 return funcs[alg-1] (sendbuf, recvbuf, count, datatype, op, root, comm);
749 int reduce_scatter__ompi(const void *sbuf, void *rbuf,
756 size_t total_dsize, dsize;
757 int communicator_size = comm->size();
762 for (int i = 0; i < communicator_size; i++) {
763 total_dsize += rcounts[i];
764 // if (0 == rcounts[i]) {
768 total_dsize *= dsize;
769 int (*funcs[])(const void*, void*, const int*, MPI_Datatype, MPI_Op, MPI_Comm) = {
770 &reduce_scatter__default,
771 &reduce_scatter__ompi_basic_recursivehalving,
772 &reduce_scatter__ompi_ring,
773 &reduce_scatter__ompi_butterfly,
776 * {1, "non-overlapping"},
777 * {2, "recursive_halving"},
781 * Non commutative algorithm capability needs re-investigation.
782 * Defaulting to non overlapping for non commutative ops.
784 if (((op != MPI_OP_NULL) && not op->is_commutative()) || (zerocounts)) {
787 if (communicator_size < 4) {
788 if (total_dsize < 65536) {
790 } else if (total_dsize < 131072) {
795 } else if (communicator_size < 8) {
796 if (total_dsize < 8) {
798 } else if (total_dsize < 262144) {
803 } else if (communicator_size < 32) {
804 if (total_dsize < 262144) {
809 } else if (communicator_size < 64) {
810 if (total_dsize < 64) {
812 } else if (total_dsize < 2048) {
814 } else if (total_dsize < 524288) {
819 } else if (communicator_size < 128) {
820 if (total_dsize < 256) {
822 } else if (total_dsize < 512) {
824 } else if (total_dsize < 2048) {
826 } else if (total_dsize < 4096) {
831 } else if (communicator_size < 256) {
832 if (total_dsize < 256) {
834 } else if (total_dsize < 512) {
839 } else if (communicator_size < 512) {
840 if (total_dsize < 256) {
842 } else if (total_dsize < 1024) {
847 } else if (communicator_size < 1024) {
848 if (total_dsize < 512) {
850 } else if (total_dsize < 2048) {
852 } else if (total_dsize < 8192) {
854 } else if (total_dsize < 16384) {
859 } else if (communicator_size < 2048) {
860 if (total_dsize < 512) {
862 } else if (total_dsize < 4096) {
864 } else if (total_dsize < 16384) {
866 } else if (total_dsize < 32768) {
871 } else if (communicator_size < 4096) {
872 if (total_dsize < 512) {
874 } else if (total_dsize < 4096) {
880 if (total_dsize < 1024) {
882 } else if (total_dsize < 8192) {
890 return funcs[alg-1] (sbuf, rbuf, rcounts, dtype, op, comm);
893 int allgather__ompi(const void *sbuf, int scount,
895 void* rbuf, int rcount,
900 int communicator_size;
901 size_t dsize, total_dsize;
903 communicator_size = comm->size();
904 if (MPI_IN_PLACE != sbuf) {
905 dsize = sdtype->size();
907 dsize = rdtype->size();
909 total_dsize = dsize * (ptrdiff_t)scount;
910 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm) = {
911 &allgather__NTSLR_NB,
915 &allgather__ompi_neighborexchange,
921 * {3, "recursive_doubling"},
926 if (communicator_size == 2) {
928 } else if (communicator_size < 32) {
930 } else if (communicator_size < 64) {
931 if (total_dsize < 1024) {
933 } else if (total_dsize < 65536) {
938 } else if (communicator_size < 128) {
939 if (total_dsize < 512) {
941 } else if (total_dsize < 65536) {
946 } else if (communicator_size < 256) {
947 if (total_dsize < 512) {
949 } else if (total_dsize < 131072) {
951 } else if (total_dsize < 524288) {
953 } else if (total_dsize < 1048576) {
958 } else if (communicator_size < 512) {
959 if (total_dsize < 32) {
961 } else if (total_dsize < 128) {
963 } else if (total_dsize < 1024) {
965 } else if (total_dsize < 131072) {
967 } else if (total_dsize < 524288) {
969 } else if (total_dsize < 1048576) {
974 } else if (communicator_size < 1024) {
975 if (total_dsize < 64) {
977 } else if (total_dsize < 256) {
979 } else if (total_dsize < 2048) {
984 } else if (communicator_size < 2048) {
985 if (total_dsize < 4) {
987 } else if (total_dsize < 8) {
989 } else if (total_dsize < 16) {
991 } else if (total_dsize < 32) {
993 } else if (total_dsize < 256) {
995 } else if (total_dsize < 512) {
997 } else if (total_dsize < 4096) {
1002 } else if (communicator_size < 4096) {
1003 if (total_dsize < 32) {
1005 } else if (total_dsize < 128) {
1007 } else if (total_dsize < 512) {
1009 } else if (total_dsize < 4096) {
1015 if (total_dsize < 2) {
1017 } else if (total_dsize < 8) {
1019 } else if (total_dsize < 16) {
1021 } else if (total_dsize < 512) {
1023 } else if (total_dsize < 4096) {
1030 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
1034 int allgatherv__ompi(const void *sbuf, int scount,
1035 MPI_Datatype sdtype,
1036 void* rbuf, const int *rcounts,
1038 MPI_Datatype rdtype,
1043 int communicator_size;
1044 size_t dsize, total_dsize;
1046 communicator_size = comm->size();
1047 if (MPI_IN_PLACE != sbuf) {
1048 dsize = sdtype->size();
1050 dsize = rdtype->size();
1054 for (i = 0; i < communicator_size; i++) {
1055 total_dsize += dsize * rcounts[i];
1058 /* use the per-rank data size as basis, similar to allgather */
1059 size_t per_rank_dsize = total_dsize / communicator_size;
1061 int (*funcs[])(const void*, int, MPI_Datatype, void*, const int*, const int*, MPI_Datatype, MPI_Comm) = {
1063 &allgatherv__ompi_bruck,
1064 &allgatherv__mpich_ring,
1065 &allgatherv__ompi_neighborexchange,
1075 if (communicator_size == 2) {
1076 if (per_rank_dsize < 2048) {
1078 } else if (per_rank_dsize < 4096) {
1080 } else if (per_rank_dsize < 8192) {
1085 } else if (communicator_size < 8) {
1086 if (per_rank_dsize < 256) {
1088 } else if (per_rank_dsize < 4096) {
1090 } else if (per_rank_dsize < 8192) {
1092 } else if (per_rank_dsize < 16384) {
1094 } else if (per_rank_dsize < 262144) {
1099 } else if (communicator_size < 16) {
1100 if (per_rank_dsize < 1024) {
1105 } else if (communicator_size < 32) {
1106 if (per_rank_dsize < 128) {
1108 } else if (per_rank_dsize < 262144) {
1113 } else if (communicator_size < 64) {
1114 if (per_rank_dsize < 256) {
1116 } else if (per_rank_dsize < 8192) {
1121 } else if (communicator_size < 128) {
1122 if (per_rank_dsize < 256) {
1124 } else if (per_rank_dsize < 4096) {
1129 } else if (communicator_size < 256) {
1130 if (per_rank_dsize < 1024) {
1132 } else if (per_rank_dsize < 65536) {
1137 } else if (communicator_size < 512) {
1138 if (per_rank_dsize < 1024) {
1143 } else if (communicator_size < 1024) {
1144 if (per_rank_dsize < 512) {
1146 } else if (per_rank_dsize < 1024) {
1148 } else if (per_rank_dsize < 4096) {
1150 } else if (per_rank_dsize < 1048576) {
1156 if (per_rank_dsize < 4096) {
1163 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm);
1166 int gather__ompi(const void *sbuf, int scount,
1167 MPI_Datatype sdtype,
1168 void* rbuf, int rcount,
1169 MPI_Datatype rdtype,
1174 int communicator_size, rank;
1175 size_t dsize, total_dsize;
1177 communicator_size = comm->size();
1178 rank = comm->rank();
1181 dsize = rdtype->size();
1182 total_dsize = dsize * rcount;
1184 dsize = sdtype->size();
1185 total_dsize = dsize * scount;
1187 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
1188 &gather__ompi_basic_linear,
1189 &gather__ompi_binomial,
1190 &gather__ompi_linear_sync
1193 * {1, "basic_linear"},
1195 * {3, "linear_sync"},
1197 * We do not make any rank specific checks since the params
1198 * should be uniform across ranks.
1200 if (communicator_size < 4) {
1201 if (total_dsize < 2) {
1203 } else if (total_dsize < 4) {
1205 } else if (total_dsize < 32768) {
1207 } else if (total_dsize < 65536) {
1209 } else if (total_dsize < 131072) {
1214 } else if (communicator_size < 8) {
1215 if (total_dsize < 1024) {
1217 } else if (total_dsize < 8192) {
1219 } else if (total_dsize < 32768) {
1221 } else if (total_dsize < 262144) {
1226 } else if (communicator_size < 256) {
1228 } else if (communicator_size < 512) {
1229 if (total_dsize < 2048) {
1231 } else if (total_dsize < 8192) {
1240 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
1244 int scatter__ompi(const void *sbuf, int scount,
1245 MPI_Datatype sdtype,
1246 void* rbuf, int rcount,
1247 MPI_Datatype rdtype,
1248 int root, MPI_Comm comm
1251 int communicator_size, rank;
1252 size_t dsize, total_dsize;
1255 communicator_size = comm->size();
1256 rank = comm->rank();
1258 dsize=sdtype->size();
1259 total_dsize = dsize * scount;
1261 dsize=rdtype->size();
1262 total_dsize = dsize * rcount;
1264 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
1265 &scatter__ompi_basic_linear,
1266 &scatter__ompi_binomial,
1267 &scatter__ompi_linear_nb
1270 * {1, "basic_linear"},
1274 * We do not make any rank specific checks since the params
1275 * should be uniform across ranks.
1277 if (communicator_size < 4) {
1278 if (total_dsize < 2) {
1280 } else if (total_dsize < 131072) {
1282 } else if (total_dsize < 262144) {
1287 } else if (communicator_size < 8) {
1288 if (total_dsize < 2048) {
1290 } else if (total_dsize < 4096) {
1292 } else if (total_dsize < 8192) {
1294 } else if (total_dsize < 32768) {
1296 } else if (total_dsize < 1048576) {
1301 } else if (communicator_size < 16) {
1302 if (total_dsize < 16384) {
1304 } else if (total_dsize < 1048576) {
1309 } else if (communicator_size < 32) {
1310 if (total_dsize < 16384) {
1312 } else if (total_dsize < 32768) {
1317 } else if (communicator_size < 64) {
1318 if (total_dsize < 512) {
1320 } else if (total_dsize < 8192) {
1322 } else if (total_dsize < 16384) {
1328 if (total_dsize < 512) {
1335 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
1338 } // namespace simgrid::smpi