1 /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector
4 /* Copyright (c) 2009-2022. The SimGrid Team.
5 * All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
10 #include "colls_private.hpp"
16 allreduce nonoverlapping, basic linear
25 int allreduce__ompi(const void *sbuf, void *rbuf, int count,
26 MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
28 size_t total_dsize = dtype->size() * (ptrdiff_t)count;
29 int communicator_size = comm->size();
31 int(*funcs[]) (const void*, void*, int, MPI_Datatype, MPI_Op, MPI_Comm)={
36 &allreduce__ompi_ring_segmented,
41 * {1, "basic_linear"},
42 * {2, "nonoverlapping"},
43 * {3, "recursive_doubling"},
45 * {5, "segmented_ring"},
48 * Currently, ring, segmented ring, and rabenseifner do not support
49 * non-commutative operations.
51 if ((op != MPI_OP_NULL) && not op->is_commutative()) {
52 if (communicator_size < 4) {
53 if (total_dsize < 131072) {
58 } else if (communicator_size < 8) {
60 } else if (communicator_size < 16) {
61 if (total_dsize < 1048576) {
66 } else if (communicator_size < 128) {
68 } else if (communicator_size < 256) {
69 if (total_dsize < 131072) {
71 } else if (total_dsize < 524288) {
76 } else if (communicator_size < 512) {
77 if (total_dsize < 4096) {
79 } else if (total_dsize < 524288) {
85 if (total_dsize < 2048) {
92 if (communicator_size < 4) {
93 if (total_dsize < 8) {
95 } else if (total_dsize < 4096) {
97 } else if (total_dsize < 8192) {
99 } else if (total_dsize < 16384) {
101 } else if (total_dsize < 65536) {
103 } else if (total_dsize < 262144) {
108 } else if (communicator_size < 8) {
109 if (total_dsize < 16) {
111 } else if (total_dsize < 8192) {
116 } else if (communicator_size < 16) {
117 if (total_dsize < 8192) {
122 } else if (communicator_size < 32) {
123 if (total_dsize < 64) {
125 } else if (total_dsize < 4096) {
130 } else if (communicator_size < 64) {
131 if (total_dsize < 128) {
136 } else if (communicator_size < 128) {
137 if (total_dsize < 262144) {
142 } else if (communicator_size < 256) {
143 if (total_dsize < 131072) {
145 } else if (total_dsize < 262144) {
150 } else if (communicator_size < 512) {
151 if (total_dsize < 4096) {
156 } else if (communicator_size < 2048) {
157 if (total_dsize < 2048) {
159 } else if (total_dsize < 16384) {
164 } else if (communicator_size < 4096) {
165 if (total_dsize < 2048) {
167 } else if (total_dsize < 4096) {
169 } else if (total_dsize < 16384) {
175 if (total_dsize < 2048) {
177 } else if (total_dsize < 16384) {
179 } else if (total_dsize < 32768) {
186 return funcs[alg-1](sbuf, rbuf, count, dtype, op, comm);
191 int alltoall__ompi(const void *sbuf, int scount,
193 void* rbuf, int rcount,
198 size_t dsize, total_dsize;
199 int communicator_size = comm->size();
201 if (MPI_IN_PLACE != sbuf) {
202 dsize = sdtype->size();
204 dsize = rdtype->size();
206 total_dsize = dsize * (ptrdiff_t)scount;
207 int (*funcs[])(const void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm) = {
208 &alltoall__basic_linear,
211 &alltoall__basic_linear,
212 &alltoall__basic_linear
217 * {3, "modified_bruck"},
218 * {4, "linear_sync"},
221 if (communicator_size == 2) {
222 if (total_dsize < 2) {
224 } else if (total_dsize < 4) {
226 } else if (total_dsize < 16) {
228 } else if (total_dsize < 64) {
230 } else if (total_dsize < 256) {
232 } else if (total_dsize < 4096) {
234 } else if (total_dsize < 32768) {
236 } else if (total_dsize < 262144) {
238 } else if (total_dsize < 1048576) {
243 } else if (communicator_size < 8) {
244 if (total_dsize < 8192) {
246 } else if (total_dsize < 16384) {
248 } else if (total_dsize < 65536) {
250 } else if (total_dsize < 524288) {
252 } else if (total_dsize < 1048576) {
257 } else if (communicator_size < 16) {
258 if (total_dsize < 262144) {
263 } else if (communicator_size < 32) {
264 if (total_dsize < 4) {
266 } else if (total_dsize < 512) {
268 } else if (total_dsize < 8192) {
270 } else if (total_dsize < 32768) {
272 } else if (total_dsize < 262144) {
274 } else if (total_dsize < 524288) {
279 } else if (communicator_size < 64) {
280 if (total_dsize < 512) {
282 } else if (total_dsize < 524288) {
287 } else if (communicator_size < 128) {
288 if (total_dsize < 1024) {
290 } else if (total_dsize < 2048) {
292 } else if (total_dsize < 4096) {
294 } else if (total_dsize < 262144) {
299 } else if (communicator_size < 256) {
300 if (total_dsize < 1024) {
302 } else if (total_dsize < 2048) {
304 } else if (total_dsize < 262144) {
309 } else if (communicator_size < 512) {
310 if (total_dsize < 1024) {
312 } else if (total_dsize < 8192) {
314 } else if (total_dsize < 32768) {
319 } else if (communicator_size < 1024) {
320 if (total_dsize < 512) {
322 } else if (total_dsize < 8192) {
324 } else if (total_dsize < 16384) {
326 } else if (total_dsize < 131072) {
328 } else if (total_dsize < 262144) {
333 } else if (communicator_size < 2048) {
334 if (total_dsize < 512) {
336 } else if (total_dsize < 1024) {
338 } else if (total_dsize < 2048) {
340 } else if (total_dsize < 16384) {
342 } else if (total_dsize < 262144) {
347 } else if (communicator_size < 4096) {
348 if (total_dsize < 1024) {
350 } else if (total_dsize < 4096) {
352 } else if (total_dsize < 8192) {
354 } else if (total_dsize < 131072) {
360 if (total_dsize < 2048) {
362 } else if (total_dsize < 8192) {
364 } else if (total_dsize < 16384) {
366 } else if (total_dsize < 32768) {
368 } else if (total_dsize < 65536) {
375 return funcs[alg-1](sbuf, scount, sdtype,
376 rbuf, rcount, rdtype, comm);
379 int alltoallv__ompi(const void *sbuf, const int *scounts, const int *sdisps,
381 void *rbuf, const int *rcounts, const int *rdisps,
386 int communicator_size = comm->size();
388 int (*funcs[])(const void *, const int*, const int*, MPI_Datatype, void*, const int*, const int*, MPI_Datatype, MPI_Comm) = {
389 &alltoallv__ompi_basic_linear,
393 * {1, "basic_linear"},
396 * We can only optimize based on com size
398 if (communicator_size < 4) {
400 } else if (communicator_size < 64) {
402 } else if (communicator_size < 128) {
404 } else if (communicator_size < 256) {
406 } else if (communicator_size < 1024) {
411 return funcs[alg-1](sbuf, scounts, sdisps, sdtype,
412 rbuf, rcounts, rdisps,rdtype,
416 int barrier__ompi(MPI_Comm comm)
418 int communicator_size = comm->size();
420 int (*funcs[])(MPI_Comm) = {
421 &barrier__ompi_basic_linear,
422 &barrier__ompi_basic_linear,
423 &barrier__ompi_recursivedoubling,
424 &barrier__ompi_bruck,
425 &barrier__ompi_two_procs,
430 * {2, "double_ring"},
431 * {3, "recursive_doubling"},
436 * We can only optimize based on com size
438 if (communicator_size < 4) {
440 } else if (communicator_size < 8) {
442 } else if (communicator_size < 64) {
444 } else if (communicator_size < 256) {
446 } else if (communicator_size < 512) {
448 } else if (communicator_size < 1024) {
450 } else if (communicator_size < 4096) {
456 return funcs[alg-1](comm);
459 int bcast__ompi(void *buff, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
462 size_t total_dsize, dsize;
464 int communicator_size = comm->size();
466 dsize = datatype->size();
467 total_dsize = dsize * (unsigned long)count;
468 int (*funcs[])(void*, int, MPI_Datatype, int, MPI_Comm) = {
470 &bcast__ompi_pipeline,
471 &bcast__ompi_pipeline,
472 &bcast__ompi_split_bintree,
474 &bcast__binomial_tree,
475 &bcast__mvapich2_knomial_intra_node,
476 &bcast__scatter_rdb_allgather,
477 &bcast__scatter_LR_allgather,
480 * {1, "basic_linear"},
483 * {4, "split_binary_tree"},
484 * {5, "binary_tree"},
487 * {8, "scatter_allgather"},
488 * {9, "scatter_allgather_ring"},
490 if (communicator_size < 4) {
491 if (total_dsize < 32) {
493 } else if (total_dsize < 256) {
495 } else if (total_dsize < 512) {
497 } else if (total_dsize < 1024) {
499 } else if (total_dsize < 32768) {
501 } else if (total_dsize < 131072) {
503 } else if (total_dsize < 262144) {
505 } else if (total_dsize < 524288) {
507 } else if (total_dsize < 1048576) {
512 } else if (communicator_size < 8) {
513 if (total_dsize < 64) {
515 } else if (total_dsize < 128) {
517 } else if (total_dsize < 2048) {
519 } else if (total_dsize < 8192) {
521 } else if (total_dsize < 1048576) {
526 } else if (communicator_size < 16) {
527 if (total_dsize < 8) {
529 } else if (total_dsize < 64) {
531 } else if (total_dsize < 4096) {
533 } else if (total_dsize < 16384) {
535 } else if (total_dsize < 32768) {
540 } else if (communicator_size < 32) {
541 if (total_dsize < 4096) {
543 } else if (total_dsize < 1048576) {
548 } else if (communicator_size < 64) {
549 if (total_dsize < 2048) {
554 } else if (communicator_size < 128) {
556 } else if (communicator_size < 256) {
557 if (total_dsize < 2) {
559 } else if (total_dsize < 16384) {
561 } else if (total_dsize < 32768) {
563 } else if (total_dsize < 65536) {
568 } else if (communicator_size < 1024) {
569 if (total_dsize < 16384) {
571 } else if (total_dsize < 32768) {
576 } else if (communicator_size < 2048) {
577 if (total_dsize < 524288) {
582 } else if (communicator_size < 4096) {
583 if (total_dsize < 262144) {
589 if (total_dsize < 8192) {
591 } else if (total_dsize < 16384) {
593 } else if (total_dsize < 262144) {
599 return funcs[alg-1](buff, count, datatype, root, comm);
602 int reduce__ompi(const void *sendbuf, void *recvbuf,
603 int count, MPI_Datatype datatype,
607 size_t total_dsize, dsize;
609 int communicator_size = comm->size();
611 dsize=datatype->size();
612 total_dsize = dsize * count;
613 int (*funcs[])(const void*, void*, int, MPI_Datatype, MPI_Op, int, MPI_Comm) = {
614 &reduce__ompi_basic_linear,
616 &reduce__ompi_pipeline,
617 &reduce__ompi_binary,
618 &reduce__ompi_binomial,
619 &reduce__ompi_in_order_binary,
620 //&reduce__rab our rab can't be used with all datatypes
621 &reduce__ompi_basic_linear
629 * {6, "in-order_binary"},
630 * {7, "rabenseifner"},
632 * Currently, only linear and in-order binary tree algorithms are
633 * capable of non commutative ops.
635 if ((op != MPI_OP_NULL) && not op->is_commutative()) {
636 if (communicator_size < 4) {
637 if (total_dsize < 8) {
642 } else if (communicator_size < 8) {
644 } else if (communicator_size < 16) {
645 if (total_dsize < 1024) {
647 } else if (total_dsize < 8192) {
649 } else if (total_dsize < 16384) {
651 } else if (total_dsize < 262144) {
656 } else if (communicator_size < 128) {
658 } else if (communicator_size < 256) {
659 if (total_dsize < 512) {
661 } else if (total_dsize < 1024) {
670 if (communicator_size < 4) {
671 if (total_dsize < 8) {
673 } else if (total_dsize < 16) {
675 } else if (total_dsize < 32) {
677 } else if (total_dsize < 262144) {
679 } else if (total_dsize < 524288) {
681 } else if (total_dsize < 1048576) {
686 } else if (communicator_size < 8) {
687 if (total_dsize < 4096) {
689 } else if (total_dsize < 65536) {
691 } else if (total_dsize < 262144) {
693 } else if (total_dsize < 524288) {
695 } else if (total_dsize < 1048576) {
700 } else if (communicator_size < 16) {
701 if (total_dsize < 8192) {
706 } else if (communicator_size < 32) {
707 if (total_dsize < 4096) {
712 } else if (communicator_size < 256) {
714 } else if (communicator_size < 512) {
715 if (total_dsize < 8192) {
717 } else if (total_dsize < 16384) {
722 } else if (communicator_size < 2048) {
724 } else if (communicator_size < 4096) {
725 if (total_dsize < 512) {
727 } else if (total_dsize < 1024) {
729 } else if (total_dsize < 8192) {
731 } else if (total_dsize < 16384) {
737 if (total_dsize < 16) {
739 } else if (total_dsize < 32) {
741 } else if (total_dsize < 1024) {
743 } else if (total_dsize < 2048) {
745 } else if (total_dsize < 8192) {
747 } else if (total_dsize < 16384) {
755 return funcs[alg-1] (sendbuf, recvbuf, count, datatype, op, root, comm);
758 int reduce_scatter__ompi(const void *sbuf, void *rbuf,
765 size_t total_dsize, dsize;
766 int communicator_size = comm->size();
771 for (int i = 0; i < communicator_size; i++) {
772 total_dsize += rcounts[i];
773 // if (0 == rcounts[i]) {
777 total_dsize *= dsize;
778 int (*funcs[])(const void*, void*, const int*, MPI_Datatype, MPI_Op, MPI_Comm) = {
779 &reduce_scatter__default,
780 &reduce_scatter__ompi_basic_recursivehalving,
781 &reduce_scatter__ompi_ring,
782 &reduce_scatter__ompi_butterfly,
785 * {1, "non-overlapping"},
786 * {2, "recursive_halving"},
790 * Non commutative algorithm capability needs re-investigation.
791 * Defaulting to non overlapping for non commutative ops.
793 if (((op != MPI_OP_NULL) && not op->is_commutative()) || (zerocounts)) {
796 if (communicator_size < 4) {
797 if (total_dsize < 65536) {
799 } else if (total_dsize < 131072) {
804 } else if (communicator_size < 8) {
805 if (total_dsize < 8) {
807 } else if (total_dsize < 262144) {
812 } else if (communicator_size < 32) {
813 if (total_dsize < 262144) {
818 } else if (communicator_size < 64) {
819 if (total_dsize < 64) {
821 } else if (total_dsize < 2048) {
823 } else if (total_dsize < 524288) {
828 } else if (communicator_size < 128) {
829 if (total_dsize < 256) {
831 } else if (total_dsize < 512) {
833 } else if (total_dsize < 2048) {
835 } else if (total_dsize < 4096) {
840 } else if (communicator_size < 256) {
841 if (total_dsize < 256) {
843 } else if (total_dsize < 512) {
848 } else if (communicator_size < 512) {
849 if (total_dsize < 256) {
851 } else if (total_dsize < 1024) {
856 } else if (communicator_size < 1024) {
857 if (total_dsize < 512) {
859 } else if (total_dsize < 2048) {
861 } else if (total_dsize < 8192) {
863 } else if (total_dsize < 16384) {
868 } else if (communicator_size < 2048) {
869 if (total_dsize < 512) {
871 } else if (total_dsize < 4096) {
873 } else if (total_dsize < 16384) {
875 } else if (total_dsize < 32768) {
880 } else if (communicator_size < 4096) {
881 if (total_dsize < 512) {
883 } else if (total_dsize < 4096) {
889 if (total_dsize < 1024) {
891 } else if (total_dsize < 8192) {
899 return funcs[alg-1] (sbuf, rbuf, rcounts, dtype, op, comm);
902 int allgather__ompi(const void *sbuf, int scount,
904 void* rbuf, int rcount,
909 int communicator_size;
910 size_t dsize, total_dsize;
912 communicator_size = comm->size();
913 if (MPI_IN_PLACE != sbuf) {
914 dsize = sdtype->size();
916 dsize = rdtype->size();
918 total_dsize = dsize * (ptrdiff_t)scount;
919 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm) = {
920 &allgather__NTSLR_NB,
924 &allgather__ompi_neighborexchange,
930 * {3, "recursive_doubling"},
935 if (communicator_size == 2) {
937 } else if (communicator_size < 32) {
939 } else if (communicator_size < 64) {
940 if (total_dsize < 1024) {
942 } else if (total_dsize < 65536) {
947 } else if (communicator_size < 128) {
948 if (total_dsize < 512) {
950 } else if (total_dsize < 65536) {
955 } else if (communicator_size < 256) {
956 if (total_dsize < 512) {
958 } else if (total_dsize < 131072) {
960 } else if (total_dsize < 524288) {
962 } else if (total_dsize < 1048576) {
967 } else if (communicator_size < 512) {
968 if (total_dsize < 32) {
970 } else if (total_dsize < 128) {
972 } else if (total_dsize < 1024) {
974 } else if (total_dsize < 131072) {
976 } else if (total_dsize < 524288) {
978 } else if (total_dsize < 1048576) {
983 } else if (communicator_size < 1024) {
984 if (total_dsize < 64) {
986 } else if (total_dsize < 256) {
988 } else if (total_dsize < 2048) {
993 } else if (communicator_size < 2048) {
994 if (total_dsize < 4) {
996 } else if (total_dsize < 8) {
998 } else if (total_dsize < 16) {
1000 } else if (total_dsize < 32) {
1002 } else if (total_dsize < 256) {
1004 } else if (total_dsize < 512) {
1006 } else if (total_dsize < 4096) {
1011 } else if (communicator_size < 4096) {
1012 if (total_dsize < 32) {
1014 } else if (total_dsize < 128) {
1016 } else if (total_dsize < 512) {
1018 } else if (total_dsize < 4096) {
1024 if (total_dsize < 2) {
1026 } else if (total_dsize < 8) {
1028 } else if (total_dsize < 16) {
1030 } else if (total_dsize < 512) {
1032 } else if (total_dsize < 4096) {
1039 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
1043 int allgatherv__ompi(const void *sbuf, int scount,
1044 MPI_Datatype sdtype,
1045 void* rbuf, const int *rcounts,
1047 MPI_Datatype rdtype,
1052 int communicator_size;
1053 size_t dsize, total_dsize;
1055 communicator_size = comm->size();
1056 if (MPI_IN_PLACE != sbuf) {
1057 dsize = sdtype->size();
1059 dsize = rdtype->size();
1063 for (i = 0; i < communicator_size; i++) {
1064 total_dsize += dsize * rcounts[i];
1067 /* use the per-rank data size as basis, similar to allgather */
1068 size_t per_rank_dsize = total_dsize / communicator_size;
1070 int (*funcs[])(const void*, int, MPI_Datatype, void*, const int*, const int*, MPI_Datatype, MPI_Comm) = {
1072 &allgatherv__ompi_bruck,
1073 &allgatherv__mpich_ring,
1074 &allgatherv__ompi_neighborexchange,
1084 if (communicator_size == 2) {
1085 if (per_rank_dsize < 2048) {
1087 } else if (per_rank_dsize < 4096) {
1089 } else if (per_rank_dsize < 8192) {
1094 } else if (communicator_size < 8) {
1095 if (per_rank_dsize < 256) {
1097 } else if (per_rank_dsize < 4096) {
1099 } else if (per_rank_dsize < 8192) {
1101 } else if (per_rank_dsize < 16384) {
1103 } else if (per_rank_dsize < 262144) {
1108 } else if (communicator_size < 16) {
1109 if (per_rank_dsize < 1024) {
1114 } else if (communicator_size < 32) {
1115 if (per_rank_dsize < 128) {
1117 } else if (per_rank_dsize < 262144) {
1122 } else if (communicator_size < 64) {
1123 if (per_rank_dsize < 256) {
1125 } else if (per_rank_dsize < 8192) {
1130 } else if (communicator_size < 128) {
1131 if (per_rank_dsize < 256) {
1133 } else if (per_rank_dsize < 4096) {
1138 } else if (communicator_size < 256) {
1139 if (per_rank_dsize < 1024) {
1141 } else if (per_rank_dsize < 65536) {
1146 } else if (communicator_size < 512) {
1147 if (per_rank_dsize < 1024) {
1152 } else if (communicator_size < 1024) {
1153 if (per_rank_dsize < 512) {
1155 } else if (per_rank_dsize < 1024) {
1157 } else if (per_rank_dsize < 4096) {
1159 } else if (per_rank_dsize < 1048576) {
1165 if (per_rank_dsize < 4096) {
1172 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm);
1175 int gather__ompi(const void *sbuf, int scount,
1176 MPI_Datatype sdtype,
1177 void* rbuf, int rcount,
1178 MPI_Datatype rdtype,
1183 int communicator_size, rank;
1184 size_t dsize, total_dsize;
1186 communicator_size = comm->size();
1187 rank = comm->rank();
1190 dsize = rdtype->size();
1191 total_dsize = dsize * rcount;
1193 dsize = sdtype->size();
1194 total_dsize = dsize * scount;
1196 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
1197 &gather__ompi_basic_linear,
1198 &gather__ompi_binomial,
1199 &gather__ompi_linear_sync
1202 * {1, "basic_linear"},
1204 * {3, "linear_sync"},
1206 * We do not make any rank specific checks since the params
1207 * should be uniform across ranks.
1209 if (communicator_size < 4) {
1210 if (total_dsize < 2) {
1212 } else if (total_dsize < 4) {
1214 } else if (total_dsize < 32768) {
1216 } else if (total_dsize < 65536) {
1218 } else if (total_dsize < 131072) {
1223 } else if (communicator_size < 8) {
1224 if (total_dsize < 1024) {
1226 } else if (total_dsize < 8192) {
1228 } else if (total_dsize < 32768) {
1230 } else if (total_dsize < 262144) {
1235 } else if (communicator_size < 256) {
1237 } else if (communicator_size < 512) {
1238 if (total_dsize < 2048) {
1240 } else if (total_dsize < 8192) {
1249 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
1253 int scatter__ompi(const void *sbuf, int scount,
1254 MPI_Datatype sdtype,
1255 void* rbuf, int rcount,
1256 MPI_Datatype rdtype,
1257 int root, MPI_Comm comm
1260 int communicator_size, rank;
1261 size_t dsize, total_dsize;
1264 communicator_size = comm->size();
1265 rank = comm->rank();
1267 dsize=sdtype->size();
1268 total_dsize = dsize * scount;
1270 dsize=rdtype->size();
1271 total_dsize = dsize * rcount;
1273 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
1274 &scatter__ompi_basic_linear,
1275 &scatter__ompi_binomial,
1276 &scatter__ompi_basic_linear
1279 * {1, "basic_linear"},
1283 * We do not make any rank specific checks since the params
1284 * should be uniform across ranks.
1286 if (communicator_size < 4) {
1287 if (total_dsize < 2) {
1289 } else if (total_dsize < 131072) {
1291 } else if (total_dsize < 262144) {
1296 } else if (communicator_size < 8) {
1297 if (total_dsize < 2048) {
1299 } else if (total_dsize < 4096) {
1301 } else if (total_dsize < 8192) {
1303 } else if (total_dsize < 32768) {
1305 } else if (total_dsize < 1048576) {
1310 } else if (communicator_size < 16) {
1311 if (total_dsize < 16384) {
1313 } else if (total_dsize < 1048576) {
1318 } else if (communicator_size < 32) {
1319 if (total_dsize < 16384) {
1321 } else if (total_dsize < 32768) {
1326 } else if (communicator_size < 64) {
1327 if (total_dsize < 512) {
1329 } else if (total_dsize < 8192) {
1331 } else if (total_dsize < 16384) {
1337 if (total_dsize < 512) {
1344 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);