1 /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector
4 /* Copyright (c) 2009-2022. The SimGrid Team.
5 * All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
10 #include "colls_private.hpp"
17 int allreduce__ompi(const void *sbuf, void *rbuf, int count,
18 MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
20 size_t total_dsize = dtype->size() * (ptrdiff_t)count;
21 int communicator_size = comm->size();
23 int(*funcs[]) (const void*, void*, int, MPI_Datatype, MPI_Op, MPI_Comm)={
28 &allreduce__ompi_ring_segmented,
33 * {1, "basic_linear"},
34 * {2, "nonoverlapping"},
35 * {3, "recursive_doubling"},
37 * {5, "segmented_ring"},
40 * Currently, ring, segmented ring, and rabenseifner do not support
41 * non-commutative operations.
43 if ((op != MPI_OP_NULL) && not op->is_commutative()) {
44 if (communicator_size < 4) {
45 if (total_dsize < 131072) {
50 } else if (communicator_size < 8) {
52 } else if (communicator_size < 16) {
53 if (total_dsize < 1048576) {
58 } else if (communicator_size < 128) {
60 } else if (communicator_size < 256) {
61 if (total_dsize < 131072) {
63 } else if (total_dsize < 524288) {
68 } else if (communicator_size < 512) {
69 if (total_dsize < 4096) {
71 } else if (total_dsize < 524288) {
77 if (total_dsize < 2048) {
84 if (communicator_size < 4) {
85 if (total_dsize < 8) {
87 } else if (total_dsize < 4096) {
89 } else if (total_dsize < 8192) {
91 } else if (total_dsize < 16384) {
93 } else if (total_dsize < 65536) {
95 } else if (total_dsize < 262144) {
100 } else if (communicator_size < 8) {
101 if (total_dsize < 16) {
103 } else if (total_dsize < 8192) {
108 } else if (communicator_size < 16) {
109 if (total_dsize < 8192) {
114 } else if (communicator_size < 32) {
115 if (total_dsize < 64) {
117 } else if (total_dsize < 4096) {
122 } else if (communicator_size < 64) {
123 if (total_dsize < 128) {
128 } else if (communicator_size < 128) {
129 if (total_dsize < 262144) {
134 } else if (communicator_size < 256) {
135 if (total_dsize < 131072) {
137 } else if (total_dsize < 262144) {
142 } else if (communicator_size < 512) {
143 if (total_dsize < 4096) {
148 } else if (communicator_size < 2048) {
149 if (total_dsize < 2048) {
151 } else if (total_dsize < 16384) {
156 } else if (communicator_size < 4096) {
157 if (total_dsize < 2048) {
159 } else if (total_dsize < 4096) {
161 } else if (total_dsize < 16384) {
167 if (total_dsize < 2048) {
169 } else if (total_dsize < 16384) {
171 } else if (total_dsize < 32768) {
178 return funcs[alg-1](sbuf, rbuf, count, dtype, op, comm);
183 int alltoall__ompi(const void *sbuf, int scount,
185 void* rbuf, int rcount,
190 size_t dsize, total_dsize;
191 int communicator_size = comm->size();
193 if (MPI_IN_PLACE != sbuf) {
194 dsize = sdtype->size();
196 dsize = rdtype->size();
198 total_dsize = dsize * (ptrdiff_t)scount;
199 int (*funcs[])(const void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm) = {
200 &alltoall__basic_linear,
203 &alltoall__basic_linear,
204 &alltoall__basic_linear
209 * {3, "modified_bruck"},
210 * {4, "linear_sync"},
213 if (communicator_size == 2) {
214 if (total_dsize < 2) {
216 } else if (total_dsize < 4) {
218 } else if (total_dsize < 16) {
220 } else if (total_dsize < 64) {
222 } else if (total_dsize < 256) {
224 } else if (total_dsize < 4096) {
226 } else if (total_dsize < 32768) {
228 } else if (total_dsize < 262144) {
230 } else if (total_dsize < 1048576) {
235 } else if (communicator_size < 8) {
236 if (total_dsize < 8192) {
238 } else if (total_dsize < 16384) {
240 } else if (total_dsize < 65536) {
242 } else if (total_dsize < 524288) {
244 } else if (total_dsize < 1048576) {
249 } else if (communicator_size < 16) {
250 if (total_dsize < 262144) {
255 } else if (communicator_size < 32) {
256 if (total_dsize < 4) {
258 } else if (total_dsize < 512) {
260 } else if (total_dsize < 8192) {
262 } else if (total_dsize < 32768) {
264 } else if (total_dsize < 262144) {
266 } else if (total_dsize < 524288) {
271 } else if (communicator_size < 64) {
272 if (total_dsize < 512) {
274 } else if (total_dsize < 524288) {
279 } else if (communicator_size < 128) {
280 if (total_dsize < 1024) {
282 } else if (total_dsize < 2048) {
284 } else if (total_dsize < 4096) {
286 } else if (total_dsize < 262144) {
291 } else if (communicator_size < 256) {
292 if (total_dsize < 1024) {
294 } else if (total_dsize < 2048) {
296 } else if (total_dsize < 262144) {
301 } else if (communicator_size < 512) {
302 if (total_dsize < 1024) {
304 } else if (total_dsize < 8192) {
306 } else if (total_dsize < 32768) {
311 } else if (communicator_size < 1024) {
312 if (total_dsize < 512) {
314 } else if (total_dsize < 8192) {
316 } else if (total_dsize < 16384) {
318 } else if (total_dsize < 131072) {
320 } else if (total_dsize < 262144) {
325 } else if (communicator_size < 2048) {
326 if (total_dsize < 512) {
328 } else if (total_dsize < 1024) {
330 } else if (total_dsize < 2048) {
332 } else if (total_dsize < 16384) {
334 } else if (total_dsize < 262144) {
339 } else if (communicator_size < 4096) {
340 if (total_dsize < 1024) {
342 } else if (total_dsize < 4096) {
344 } else if (total_dsize < 8192) {
346 } else if (total_dsize < 131072) {
352 if (total_dsize < 2048) {
354 } else if (total_dsize < 8192) {
356 } else if (total_dsize < 16384) {
358 } else if (total_dsize < 32768) {
360 } else if (total_dsize < 65536) {
367 return funcs[alg-1](sbuf, scount, sdtype,
368 rbuf, rcount, rdtype, comm);
371 int alltoallv__ompi(const void *sbuf, const int *scounts, const int *sdisps,
373 void *rbuf, const int *rcounts, const int *rdisps,
378 int communicator_size = comm->size();
380 int (*funcs[])(const void *, const int*, const int*, MPI_Datatype, void*, const int*, const int*, MPI_Datatype, MPI_Comm) = {
381 &alltoallv__ompi_basic_linear,
385 * {1, "basic_linear"},
388 * We can only optimize based on com size
390 if (communicator_size < 4) {
392 } else if (communicator_size < 64) {
394 } else if (communicator_size < 128) {
396 } else if (communicator_size < 256) {
398 } else if (communicator_size < 1024) {
403 return funcs[alg-1](sbuf, scounts, sdisps, sdtype,
404 rbuf, rcounts, rdisps,rdtype,
408 int barrier__ompi(MPI_Comm comm)
410 int communicator_size = comm->size();
412 int (*funcs[])(MPI_Comm) = {
413 &barrier__ompi_basic_linear,
414 &barrier__ompi_basic_linear,
415 &barrier__ompi_recursivedoubling,
416 &barrier__ompi_bruck,
417 &barrier__ompi_two_procs,
422 * {2, "double_ring"},
423 * {3, "recursive_doubling"},
428 * We can only optimize based on com size
430 if (communicator_size < 4) {
432 } else if (communicator_size < 8) {
434 } else if (communicator_size < 64) {
436 } else if (communicator_size < 256) {
438 } else if (communicator_size < 512) {
440 } else if (communicator_size < 1024) {
442 } else if (communicator_size < 4096) {
448 return funcs[alg-1](comm);
451 int bcast__ompi(void *buff, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
454 size_t total_dsize, dsize;
456 int communicator_size = comm->size();
458 dsize = datatype->size();
459 total_dsize = dsize * (unsigned long)count;
460 int (*funcs[])(void*, int, MPI_Datatype, int, MPI_Comm) = {
462 &bcast__ompi_pipeline,
463 &bcast__ompi_pipeline,
464 &bcast__ompi_split_bintree,
466 &bcast__binomial_tree,
467 &bcast__mvapich2_knomial_intra_node,
468 &bcast__scatter_rdb_allgather,
469 &bcast__scatter_LR_allgather,
472 * {1, "basic_linear"},
475 * {4, "split_binary_tree"},
476 * {5, "binary_tree"},
479 * {8, "scatter_allgather"},
480 * {9, "scatter_allgather_ring"},
482 if (communicator_size < 4) {
483 if (total_dsize < 32) {
485 } else if (total_dsize < 256) {
487 } else if (total_dsize < 512) {
489 } else if (total_dsize < 1024) {
491 } else if (total_dsize < 32768) {
493 } else if (total_dsize < 131072) {
495 } else if (total_dsize < 262144) {
497 } else if (total_dsize < 524288) {
499 } else if (total_dsize < 1048576) {
504 } else if (communicator_size < 8) {
505 if (total_dsize < 64) {
507 } else if (total_dsize < 128) {
509 } else if (total_dsize < 2048) {
511 } else if (total_dsize < 8192) {
513 } else if (total_dsize < 1048576) {
518 } else if (communicator_size < 16) {
519 if (total_dsize < 8) {
521 } else if (total_dsize < 64) {
523 } else if (total_dsize < 4096) {
525 } else if (total_dsize < 16384) {
527 } else if (total_dsize < 32768) {
532 } else if (communicator_size < 32) {
533 if (total_dsize < 4096) {
535 } else if (total_dsize < 1048576) {
540 } else if (communicator_size < 64) {
541 if (total_dsize < 2048) {
546 } else if (communicator_size < 128) {
548 } else if (communicator_size < 256) {
549 if (total_dsize < 2) {
551 } else if (total_dsize < 16384) {
553 } else if (total_dsize < 32768) {
555 } else if (total_dsize < 65536) {
560 } else if (communicator_size < 1024) {
561 if (total_dsize < 16384) {
563 } else if (total_dsize < 32768) {
568 } else if (communicator_size < 2048) {
569 if (total_dsize < 524288) {
574 } else if (communicator_size < 4096) {
575 if (total_dsize < 262144) {
581 if (total_dsize < 8192) {
583 } else if (total_dsize < 16384) {
585 } else if (total_dsize < 262144) {
591 return funcs[alg-1](buff, count, datatype, root, comm);
594 int reduce__ompi(const void *sendbuf, void *recvbuf,
595 int count, MPI_Datatype datatype,
599 size_t total_dsize, dsize;
601 int communicator_size = comm->size();
603 dsize=datatype->size();
604 total_dsize = dsize * count;
605 int (*funcs[])(const void*, void*, int, MPI_Datatype, MPI_Op, int, MPI_Comm) = {
606 &reduce__ompi_basic_linear,
608 &reduce__ompi_pipeline,
609 &reduce__ompi_binary,
610 &reduce__ompi_binomial,
611 &reduce__ompi_in_order_binary,
612 //&reduce__rab our rab can't be used with all datatypes
613 &reduce__ompi_basic_linear
621 * {6, "in-order_binary"},
622 * {7, "rabenseifner"},
624 * Currently, only linear and in-order binary tree algorithms are
625 * capable of non commutative ops.
627 if ((op != MPI_OP_NULL) && not op->is_commutative()) {
628 if (communicator_size < 4) {
629 if (total_dsize < 8) {
634 } else if (communicator_size < 8) {
636 } else if (communicator_size < 16) {
637 if (total_dsize < 1024) {
639 } else if (total_dsize < 8192) {
641 } else if (total_dsize < 16384) {
643 } else if (total_dsize < 262144) {
648 } else if (communicator_size < 128) {
650 } else if (communicator_size < 256) {
651 if (total_dsize < 512) {
653 } else if (total_dsize < 1024) {
662 if (communicator_size < 4) {
663 if (total_dsize < 8) {
665 } else if (total_dsize < 16) {
667 } else if (total_dsize < 32) {
669 } else if (total_dsize < 262144) {
671 } else if (total_dsize < 524288) {
673 } else if (total_dsize < 1048576) {
678 } else if (communicator_size < 8) {
679 if (total_dsize < 4096) {
681 } else if (total_dsize < 65536) {
683 } else if (total_dsize < 262144) {
685 } else if (total_dsize < 524288) {
687 } else if (total_dsize < 1048576) {
692 } else if (communicator_size < 16) {
693 if (total_dsize < 8192) {
698 } else if (communicator_size < 32) {
699 if (total_dsize < 4096) {
704 } else if (communicator_size < 256) {
706 } else if (communicator_size < 512) {
707 if (total_dsize < 8192) {
709 } else if (total_dsize < 16384) {
714 } else if (communicator_size < 2048) {
716 } else if (communicator_size < 4096) {
717 if (total_dsize < 512) {
719 } else if (total_dsize < 1024) {
721 } else if (total_dsize < 8192) {
723 } else if (total_dsize < 16384) {
729 if (total_dsize < 16) {
731 } else if (total_dsize < 32) {
733 } else if (total_dsize < 1024) {
735 } else if (total_dsize < 2048) {
737 } else if (total_dsize < 8192) {
739 } else if (total_dsize < 16384) {
747 return funcs[alg-1] (sendbuf, recvbuf, count, datatype, op, root, comm);
750 int reduce_scatter__ompi(const void *sbuf, void *rbuf,
757 size_t total_dsize, dsize;
758 int communicator_size = comm->size();
763 for (int i = 0; i < communicator_size; i++) {
764 total_dsize += rcounts[i];
765 // if (0 == rcounts[i]) {
769 total_dsize *= dsize;
770 int (*funcs[])(const void*, void*, const int*, MPI_Datatype, MPI_Op, MPI_Comm) = {
771 &reduce_scatter__default,
772 &reduce_scatter__ompi_basic_recursivehalving,
773 &reduce_scatter__ompi_ring,
774 &reduce_scatter__ompi_butterfly,
777 * {1, "non-overlapping"},
778 * {2, "recursive_halving"},
782 * Non commutative algorithm capability needs re-investigation.
783 * Defaulting to non overlapping for non commutative ops.
785 if (((op != MPI_OP_NULL) && not op->is_commutative()) || (zerocounts)) {
788 if (communicator_size < 4) {
789 if (total_dsize < 65536) {
791 } else if (total_dsize < 131072) {
796 } else if (communicator_size < 8) {
797 if (total_dsize < 8) {
799 } else if (total_dsize < 262144) {
804 } else if (communicator_size < 32) {
805 if (total_dsize < 262144) {
810 } else if (communicator_size < 64) {
811 if (total_dsize < 64) {
813 } else if (total_dsize < 2048) {
815 } else if (total_dsize < 524288) {
820 } else if (communicator_size < 128) {
821 if (total_dsize < 256) {
823 } else if (total_dsize < 512) {
825 } else if (total_dsize < 2048) {
827 } else if (total_dsize < 4096) {
832 } else if (communicator_size < 256) {
833 if (total_dsize < 256) {
835 } else if (total_dsize < 512) {
840 } else if (communicator_size < 512) {
841 if (total_dsize < 256) {
843 } else if (total_dsize < 1024) {
848 } else if (communicator_size < 1024) {
849 if (total_dsize < 512) {
851 } else if (total_dsize < 2048) {
853 } else if (total_dsize < 8192) {
855 } else if (total_dsize < 16384) {
860 } else if (communicator_size < 2048) {
861 if (total_dsize < 512) {
863 } else if (total_dsize < 4096) {
865 } else if (total_dsize < 16384) {
867 } else if (total_dsize < 32768) {
872 } else if (communicator_size < 4096) {
873 if (total_dsize < 512) {
875 } else if (total_dsize < 4096) {
881 if (total_dsize < 1024) {
883 } else if (total_dsize < 8192) {
891 return funcs[alg-1] (sbuf, rbuf, rcounts, dtype, op, comm);
894 int allgather__ompi(const void *sbuf, int scount,
896 void* rbuf, int rcount,
901 int communicator_size;
902 size_t dsize, total_dsize;
904 communicator_size = comm->size();
905 if (MPI_IN_PLACE != sbuf) {
906 dsize = sdtype->size();
908 dsize = rdtype->size();
910 total_dsize = dsize * (ptrdiff_t)scount;
911 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm) = {
912 &allgather__NTSLR_NB,
916 &allgather__ompi_neighborexchange,
922 * {3, "recursive_doubling"},
927 if (communicator_size == 2) {
929 } else if (communicator_size < 32) {
931 } else if (communicator_size < 64) {
932 if (total_dsize < 1024) {
934 } else if (total_dsize < 65536) {
939 } else if (communicator_size < 128) {
940 if (total_dsize < 512) {
942 } else if (total_dsize < 65536) {
947 } else if (communicator_size < 256) {
948 if (total_dsize < 512) {
950 } else if (total_dsize < 131072) {
952 } else if (total_dsize < 524288) {
954 } else if (total_dsize < 1048576) {
959 } else if (communicator_size < 512) {
960 if (total_dsize < 32) {
962 } else if (total_dsize < 128) {
964 } else if (total_dsize < 1024) {
966 } else if (total_dsize < 131072) {
968 } else if (total_dsize < 524288) {
970 } else if (total_dsize < 1048576) {
975 } else if (communicator_size < 1024) {
976 if (total_dsize < 64) {
978 } else if (total_dsize < 256) {
980 } else if (total_dsize < 2048) {
985 } else if (communicator_size < 2048) {
986 if (total_dsize < 4) {
988 } else if (total_dsize < 8) {
990 } else if (total_dsize < 16) {
992 } else if (total_dsize < 32) {
994 } else if (total_dsize < 256) {
996 } else if (total_dsize < 512) {
998 } else if (total_dsize < 4096) {
1003 } else if (communicator_size < 4096) {
1004 if (total_dsize < 32) {
1006 } else if (total_dsize < 128) {
1008 } else if (total_dsize < 512) {
1010 } else if (total_dsize < 4096) {
1016 if (total_dsize < 2) {
1018 } else if (total_dsize < 8) {
1020 } else if (total_dsize < 16) {
1022 } else if (total_dsize < 512) {
1024 } else if (total_dsize < 4096) {
1031 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
1035 int allgatherv__ompi(const void *sbuf, int scount,
1036 MPI_Datatype sdtype,
1037 void* rbuf, const int *rcounts,
1039 MPI_Datatype rdtype,
1044 int communicator_size;
1045 size_t dsize, total_dsize;
1047 communicator_size = comm->size();
1048 if (MPI_IN_PLACE != sbuf) {
1049 dsize = sdtype->size();
1051 dsize = rdtype->size();
1055 for (i = 0; i < communicator_size; i++) {
1056 total_dsize += dsize * rcounts[i];
1059 /* use the per-rank data size as basis, similar to allgather */
1060 size_t per_rank_dsize = total_dsize / communicator_size;
1062 int (*funcs[])(const void*, int, MPI_Datatype, void*, const int*, const int*, MPI_Datatype, MPI_Comm) = {
1064 &allgatherv__ompi_bruck,
1065 &allgatherv__mpich_ring,
1066 &allgatherv__ompi_neighborexchange,
1076 if (communicator_size == 2) {
1077 if (per_rank_dsize < 2048) {
1079 } else if (per_rank_dsize < 4096) {
1081 } else if (per_rank_dsize < 8192) {
1086 } else if (communicator_size < 8) {
1087 if (per_rank_dsize < 256) {
1089 } else if (per_rank_dsize < 4096) {
1091 } else if (per_rank_dsize < 8192) {
1093 } else if (per_rank_dsize < 16384) {
1095 } else if (per_rank_dsize < 262144) {
1100 } else if (communicator_size < 16) {
1101 if (per_rank_dsize < 1024) {
1106 } else if (communicator_size < 32) {
1107 if (per_rank_dsize < 128) {
1109 } else if (per_rank_dsize < 262144) {
1114 } else if (communicator_size < 64) {
1115 if (per_rank_dsize < 256) {
1117 } else if (per_rank_dsize < 8192) {
1122 } else if (communicator_size < 128) {
1123 if (per_rank_dsize < 256) {
1125 } else if (per_rank_dsize < 4096) {
1130 } else if (communicator_size < 256) {
1131 if (per_rank_dsize < 1024) {
1133 } else if (per_rank_dsize < 65536) {
1138 } else if (communicator_size < 512) {
1139 if (per_rank_dsize < 1024) {
1144 } else if (communicator_size < 1024) {
1145 if (per_rank_dsize < 512) {
1147 } else if (per_rank_dsize < 1024) {
1149 } else if (per_rank_dsize < 4096) {
1151 } else if (per_rank_dsize < 1048576) {
1157 if (per_rank_dsize < 4096) {
1164 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm);
1167 int gather__ompi(const void *sbuf, int scount,
1168 MPI_Datatype sdtype,
1169 void* rbuf, int rcount,
1170 MPI_Datatype rdtype,
1175 int communicator_size, rank;
1176 size_t dsize, total_dsize;
1178 communicator_size = comm->size();
1179 rank = comm->rank();
1182 dsize = rdtype->size();
1183 total_dsize = dsize * rcount;
1185 dsize = sdtype->size();
1186 total_dsize = dsize * scount;
1188 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
1189 &gather__ompi_basic_linear,
1190 &gather__ompi_binomial,
1191 &gather__ompi_linear_sync
1194 * {1, "basic_linear"},
1196 * {3, "linear_sync"},
1198 * We do not make any rank specific checks since the params
1199 * should be uniform across ranks.
1201 if (communicator_size < 4) {
1202 if (total_dsize < 2) {
1204 } else if (total_dsize < 4) {
1206 } else if (total_dsize < 32768) {
1208 } else if (total_dsize < 65536) {
1210 } else if (total_dsize < 131072) {
1215 } else if (communicator_size < 8) {
1216 if (total_dsize < 1024) {
1218 } else if (total_dsize < 8192) {
1220 } else if (total_dsize < 32768) {
1222 } else if (total_dsize < 262144) {
1227 } else if (communicator_size < 256) {
1229 } else if (communicator_size < 512) {
1230 if (total_dsize < 2048) {
1232 } else if (total_dsize < 8192) {
1241 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);
1245 int scatter__ompi(const void *sbuf, int scount,
1246 MPI_Datatype sdtype,
1247 void* rbuf, int rcount,
1248 MPI_Datatype rdtype,
1249 int root, MPI_Comm comm
1252 int communicator_size, rank;
1253 size_t dsize, total_dsize;
1256 communicator_size = comm->size();
1257 rank = comm->rank();
1259 dsize=sdtype->size();
1260 total_dsize = dsize * scount;
1262 dsize=rdtype->size();
1263 total_dsize = dsize * rcount;
1265 int (*funcs[])(const void*, int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm) = {
1266 &scatter__ompi_basic_linear,
1267 &scatter__ompi_binomial,
1268 &scatter__ompi_linear_nb
1271 * {1, "basic_linear"},
1275 * We do not make any rank specific checks since the params
1276 * should be uniform across ranks.
1278 if (communicator_size < 4) {
1279 if (total_dsize < 2) {
1281 } else if (total_dsize < 131072) {
1283 } else if (total_dsize < 262144) {
1288 } else if (communicator_size < 8) {
1289 if (total_dsize < 2048) {
1291 } else if (total_dsize < 4096) {
1293 } else if (total_dsize < 8192) {
1295 } else if (total_dsize < 32768) {
1297 } else if (total_dsize < 1048576) {
1302 } else if (communicator_size < 16) {
1303 if (total_dsize < 16384) {
1305 } else if (total_dsize < 1048576) {
1310 } else if (communicator_size < 32) {
1311 if (total_dsize < 16384) {
1313 } else if (total_dsize < 32768) {
1318 } else if (communicator_size < 64) {
1319 if (total_dsize < 512) {
1321 } else if (total_dsize < 8192) {
1323 } else if (total_dsize < 16384) {
1329 if (total_dsize < 512) {
1336 return funcs[alg-1](sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm);