From: zianekhodja Date: Wed, 6 Jan 2016 22:11:05 +0000 (+0100) Subject: figure 9 X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/kahina_paper2.git/commitdiff_plain/9b5105b4df54f7d4b03b2a93dd954fff21ff231b?ds=inline figure 9 --- diff --git a/Simulations/BIG/sparse_openmp_4GPU.o140798 b/Simulations/BIG/sparse_openmp_4GPU.o140798 new file mode 100644 index 0000000..2324c09 --- /dev/null +++ b/Simulations/BIG/sparse_openmp_4GPU.o140798 @@ -0,0 +1,419 @@ +REMAINING: 4 +Taking GPU #0 +Taking GPU #1 +Taking GPU #2 +Taking GPU #3 +(1 +i*0)*x^0 + (-0.1 +i*0)*x^450000 + (-10 +i*0)*x^750000 + (1 +i*0)*x^1200000 + +(-45000 +i*0)*x^449999 + (-7.5E+06 +i*0)*x^749999 + (1.2E+06 +i*0)*x^1199999 + +zone limite de 'log-exp' 1.0003 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 0 (of 4) uses CUDA device 0 +CPU thread 3 (of 4) uses CUDA device 3 +gpu 2 dimgrid 4688 dimblock 256 degrePoly 1200000 +gpu 2 dimgrid2 1172 start 600064 size 300032 +gpu 0 dimgrid 4688 dimblock 256 degrePoly 1200000 +gpu 0 dimgrid2 1172 start 0 size 300032 +gpu 1 dimgrid 4688 dimblock 256 degrePoly 1200000 +gpu 1 dimgrid2 1172 start 300032 size 300032 +gpu 3 dimgrid 4688 dimblock 256 degrePoly 1200000 +gpu 3 dimgrid2 1172 start 900096 size 300032 +gpu 0 iter : 1 Arret : 0.00258962 s/iter 26.827899 +gpu 0 iter : 2 Arret : 0.00986733 s/iter 26.734262 +gpu 0 iter : 3 Arret : 1.00309 s/iter 26.875480 +gpu 0 iter : 4 Arret : 1.00888 s/iter 26.839520 +gpu 0 iter : 5 Arret : 2.5903 s/iter 27.934856 +gpu 0 iter : 6 Arret : 2.37402 s/iter 27.441899 +gpu 0 iter : 7 Arret : 0.328667 s/iter 27.604107 +gpu 0 iter : 8 Arret : 0.229095 s/iter 27.860597 +gpu 0 iter : 9 Arret : 0.19497 s/iter 27.971677 +gpu 0 iter : 10 Arret : 0.164003 s/iter 28.895103 +gpu 0 iter : 11 Arret : 0.146769 s/iter 28.154589 +gpu 0 iter : 12 Arret : 0.141542 s/iter 27.787744 +gpu 0 iter : 13 Arret : 0.13035 s/iter 27.722678 +gpu 0 iter : 14 Arret : 0.125184 s/iter 27.642161 +gpu 0 iter : 15 Arret : 0.123971 s/iter 27.676039 +gpu 0 iter : 16 Arret : 0.124793 s/iter 27.541685 +gpu 0 iter : 17 Arret : 0.12594 s/iter 27.614293 +gpu 0 iter : 18 Arret : 0.128039 s/iter 27.665275 +gpu 0 iter : 19 Arret : 0.132043 s/iter 27.743370 +gpu 0 iter : 20 Arret : 0.138844 s/iter 27.682937 +gpu 0 iter : 21 Arret : 0.134995 s/iter 27.574432 +gpu 0 iter : 22 Arret : 0.146227 s/iter 27.683216 +gpu 0 iter : 23 Arret : 0.101316 s/iter 27.685417 +gpu 0 iter : 24 Arret : 0.0965641 s/iter 27.616963 +gpu 0 iter : 25 Arret : 0.0956392 s/iter 27.639845 +gpu 0 iter : 26 Arret : 0.107239 s/iter 27.629365 +gpu 0 iter : 27 Arret : 0.107475 s/iter 27.699761 +gpu 0 iter : 28 Arret : 0.0953323 s/iter 27.656435 +gpu 0 iter : 29 Arret : 0.09591 s/iter 27.588976 +gpu 0 iter : 30 Arret : 0.0966778 s/iter 27.661109 +gpu 0 iter : 31 Arret : 0.0977275 s/iter 27.762631 +gpu 0 iter : 32 Arret : 0.0990466 s/iter 27.686399 +gpu 0 iter : 33 Arret : 0.100221 s/iter 27.746296 +gpu 0 iter : 34 Arret : 0.0990477 s/iter 27.734201 +gpu 0 iter : 35 Arret : 0.0908046 s/iter 27.800281 +gpu 0 iter : 36 Arret : 0.0868311 s/iter 27.708798 +gpu 0 iter : 37 Arret : 0.0863073 s/iter 27.667143 +gpu 0 iter : 38 Arret : 0.0861844 s/iter 27.628118 +gpu 0 iter : 39 Arret : 0.0862547 s/iter 27.577567 +gpu 0 iter : 40 Arret : 0.0865435 s/iter 27.695190 +gpu 0 iter : 41 Arret : 0.087011 s/iter 27.693141 +gpu 0 iter : 42 Arret : 0.0874144 s/iter 27.785899 +gpu 0 iter : 43 Arret : 0.0874339 s/iter 27.639840 +gpu 0 iter : 44 Arret : 0.0859151 s/iter 27.652549 +gpu 0 iter : 45 Arret : 0.081442 s/iter 27.706480 +gpu 0 iter : 46 Arret : 0.0811917 s/iter 27.670721 +gpu 0 iter : 47 Arret : 0.0822112 s/iter 27.609868 +gpu 0 iter : 48 Arret : 0.0835771 s/iter 27.597375 +gpu 0 iter : 49 Arret : 0.0857379 s/iter 27.729707 +gpu 0 iter : 50 Arret : 0.0897612 s/iter 27.623075 +gpu 0 iter : 51 Arret : 0.0997583 s/iter 27.644660 +gpu 0 iter : 52 Arret : 0.148009 s/iter 27.641711 +gpu 0 iter : 53 Arret : 0.0810578 s/iter 27.809112 +gpu 0 iter : 54 Arret : 0.0761754 s/iter 27.726821 +gpu 0 iter : 55 Arret : 0.0753544 s/iter 27.713916 +gpu 0 iter : 56 Arret : 0.0747394 s/iter 27.641510 +gpu 0 iter : 57 Arret : 0.0741459 s/iter 27.656637 +gpu 0 iter : 58 Arret : 0.0736156 s/iter 27.684889 +gpu 0 iter : 59 Arret : 0.0731337 s/iter 27.584138 +gpu 0 iter : 60 Arret : 0.0726841 s/iter 27.603977 +gpu 0 iter : 61 Arret : 0.0722627 s/iter 27.521752 +gpu 0 iter : 62 Arret : 0.0722214 s/iter 27.593495 +gpu 0 iter : 63 Arret : 0.072309 s/iter 27.716730 +gpu 0 iter : 64 Arret : 0.0724031 s/iter 27.644477 +gpu 0 iter : 65 Arret : 0.0725447 s/iter 27.570075 +gpu 0 iter : 66 Arret : 0.0730198 s/iter 27.654704 +gpu 0 iter : 67 Arret : 0.0734493 s/iter 27.670200 +gpu 0 iter : 68 Arret : 0.0735663 s/iter 27.707876 +gpu 0 iter : 69 Arret : 0.0730875 s/iter 27.631047 +gpu 0 iter : 70 Arret : 0.0733015 s/iter 27.768731 +gpu 0 iter : 71 Arret : 0.0735456 s/iter 27.639630 +gpu 0 iter : 72 Arret : 0.0738262 s/iter 27.692827 +gpu 0 iter : 73 Arret : 0.0741584 s/iter 27.694123 +gpu 0 iter : 74 Arret : 0.0745504 s/iter 27.657722 +gpu 0 iter : 75 Arret : 0.0750287 s/iter 27.530421 +gpu 0 iter : 76 Arret : 0.075606 s/iter 27.691534 +gpu 0 iter : 77 Arret : 0.0763381 s/iter 27.627515 +gpu 0 iter : 78 Arret : 0.0772609 s/iter 27.722162 +gpu 0 iter : 79 Arret : 0.0784986 s/iter 27.737349 +gpu 0 iter : 80 Arret : 0.0803216 s/iter 27.795140 +gpu 0 iter : 81 Arret : 0.0833763 s/iter 27.712014 +gpu 0 iter : 82 Arret : 0.0857804 s/iter 27.616624 +gpu 0 iter : 83 Arret : 0.0681289 s/iter 27.647584 +gpu 0 iter : 84 Arret : 0.0661738 s/iter 27.702826 +gpu 0 iter : 85 Arret : 0.065728 s/iter 27.680471 +gpu 0 iter : 86 Arret : 0.0656501 s/iter 27.749677 +gpu 0 iter : 87 Arret : 0.0660945 s/iter 27.586654 +gpu 0 iter : 88 Arret : 0.0666066 s/iter 27.651654 +gpu 0 iter : 89 Arret : 0.067212 s/iter 27.604856 +gpu 0 iter : 90 Arret : 0.0679323 s/iter 27.580859 +gpu 0 iter : 91 Arret : 0.0687486 s/iter 27.606257 +gpu 0 iter : 92 Arret : 0.0694555 s/iter 27.668065 +gpu 0 iter : 93 Arret : 0.0691873 s/iter 27.742445 +gpu 0 iter : 94 Arret : 0.0659672 s/iter 27.684775 +gpu 0 iter : 95 Arret : 0.0630197 s/iter 27.676622 +gpu 0 iter : 96 Arret : 0.0627875 s/iter 27.635821 +gpu 0 iter : 97 Arret : 0.0625551 s/iter 27.696600 +gpu 0 iter : 98 Arret : 0.06233 s/iter 27.657175 +gpu 0 iter : 99 Arret : 0.0621206 s/iter 27.636799 +gpu 0 iter : 100 Arret : 0.0619223 s/iter 27.496227 +gpu 0 iter : 101 Arret : 0.0617419 s/iter 27.653072 +gpu 0 iter : 102 Arret : 0.0615747 s/iter 27.605520 +gpu 0 iter : 103 Arret : 0.0614268 s/iter 27.557247 +gpu 0 iter : 104 Arret : 0.0613071 s/iter 27.617322 +gpu 0 iter : 105 Arret : 0.0612073 s/iter 27.596535 +gpu 0 iter : 106 Arret : 0.0611241 s/iter 27.742761 +gpu 0 iter : 107 Arret : 0.0610522 s/iter 27.637998 +gpu 0 iter : 108 Arret : 0.0609967 s/iter 27.717476 +gpu 0 iter : 109 Arret : 0.0609504 s/iter 27.724174 +gpu 0 iter : 110 Arret : 0.0609186 s/iter 27.653768 +gpu 0 iter : 111 Arret : 0.0608947 s/iter 27.659002 +gpu 0 iter : 112 Arret : 0.0608857 s/iter 27.686514 +gpu 0 iter : 113 Arret : 0.0608874 s/iter 27.566403 +gpu 0 iter : 114 Arret : 0.0608988 s/iter 27.624237 +gpu 0 iter : 115 Arret : 0.0609196 s/iter 27.621325 +gpu 0 iter : 116 Arret : 0.0609492 s/iter 27.581922 +gpu 0 iter : 117 Arret : 0.0609907 s/iter 27.632755 +gpu 0 iter : 118 Arret : 0.061041 s/iter 27.684812 +gpu 0 iter : 119 Arret : 0.0610987 s/iter 27.684929 +gpu 0 iter : 120 Arret : 0.0611635 s/iter 27.704854 +gpu 0 iter : 121 Arret : 0.0612368 s/iter 27.607966 +gpu 0 iter : 122 Arret : 0.0613124 s/iter 27.642532 +gpu 0 iter : 123 Arret : 0.0613912 s/iter 27.631656 +gpu 0 iter : 124 Arret : 0.0614667 s/iter 27.688838 +gpu 0 iter : 125 Arret : 0.0615356 s/iter 27.623137 +gpu 0 iter : 126 Arret : 0.0617597 s/iter 27.703026 +gpu 0 iter : 127 Arret : 0.0620478 s/iter 27.740887 +gpu 0 iter : 128 Arret : 0.0624229 s/iter 27.715521 +gpu 0 iter : 129 Arret : 0.0629074 s/iter 27.751329 +gpu 0 iter : 130 Arret : 0.0634698 s/iter 27.797828 +gpu 0 iter : 131 Arret : 0.0637699 s/iter 27.702156 +gpu 0 iter : 132 Arret : 0.0640308 s/iter 27.690555 +gpu 0 iter : 133 Arret : 0.0659287 s/iter 27.524283 +gpu 0 iter : 134 Arret : 0.0693681 s/iter 27.638889 +gpu 0 iter : 135 Arret : 0.0710344 s/iter 27.618419 +gpu 0 iter : 136 Arret : 0.0694396 s/iter 27.711971 +gpu 0 iter : 137 Arret : 0.0733943 s/iter 27.615772 +gpu 0 iter : 138 Arret : 0.0810003 s/iter 27.621519 +gpu 0 iter : 139 Arret : 0.100414 s/iter 27.753471 +gpu 0 iter : 140 Arret : 0.537557 s/iter 27.638308 +gpu 0 iter : 141 Arret : 0.507225 s/iter 27.575889 +gpu 0 iter : 142 Arret : 2.33889 s/iter 27.726919 +gpu 0 iter : 143 Arret : 1.54355 s/iter 27.538023 +gpu 0 iter : 144 Arret : 0.563172 s/iter 27.484199 +gpu 0 iter : 145 Arret : 0.226945 s/iter 27.442553 +gpu 0 iter : 146 Arret : 0.171912 s/iter 27.309655 +gpu 0 iter : 147 Arret : 0.156097 s/iter 27.270619 +gpu 0 iter : 148 Arret : 0.0971293 s/iter 26.735380 +gpu 0 iter : 149 Arret : 0.0428118 s/iter 26.655854 +gpu 0 iter : 150 Arret : 0.0142023 s/iter 26.581889 +gpu 0 iter : 151 Arret : 0.0051282 s/iter 26.579230 +gpu 0 iter : 152 Arret : 0.00148395 s/iter 26.561289 +gpu 0 iter : 153 Arret : 0.000175276 s/iter 26.561155 +gpu 0 iter : 154 Arret : 5.37195E-09 s/iter 26.561401 +gpu 0 iter : 155 Arret : 1.16023E-11 s/iter 26.562237 +gpu 0 iter : 156 Arret : 2.50282E-14 s/iter 26.561680 +temps : 4310.13 seconde(s) +(1 +i*0)*x^0 + (-0.1 +i*0)*x^550000 + (-10 +i*0)*x^850000 + (1 +i*0)*x^1400000 + +(-55000 +i*0)*x^549999 + (-8.5E+06 +i*0)*x^849999 + (1.4E+06 +i*0)*x^1399999 + +zone limite de 'log-exp' 1.00025 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 0 (of 4) uses CUDA device 0 +gpu 2 dimgrid 5469 dimblock 256 degrePoly 1400000 +gpu 2 dimgrid2 1368 start 700032 size 350016 +gpu 1 dimgrid 5469 dimblock 256 degrePoly 1400000 +gpu 1 dimgrid2 1368 start 350016 size 350016 +gpu 0 dimgrid 5469 dimblock 256 degrePoly 1400000 +gpu 0 dimgrid2 1368 start 0 size 350016 +gpu 3 dimgrid 5469 dimblock 256 degrePoly 1400000 +gpu 3 dimgrid2 1368 start 1050048 size 350016 +gpu 0 iter : 1 Arret : 0.00301134 s/iter 36.150724 +gpu 0 iter : 2 Arret : 1.0164 s/iter 36.798604 +gpu 0 iter : 3 Arret : 0.527057 s/iter 36.500356 +gpu 0 iter : 4 Arret : 0.812989 s/iter 37.737377 +gpu 0 iter : 5 Arret : 1.19856 s/iter 38.078910 +gpu 0 iter : 6 Arret : 1.31385 s/iter 37.969050 +gpu 0 iter : 7 Arret : 0.771729 s/iter 38.556733 +gpu 0 iter : 8 Arret : 0.33112 s/iter 38.122633 +gpu 0 iter : 9 Arret : 1.31175 s/iter 38.633804 +gpu 0 iter : 10 Arret : 2.14622 s/iter 39.213451 +gpu 0 iter : 11 Arret : 2.04392 s/iter 39.361824 +gpu 0 iter : 12 Arret : 1.60414 s/iter 40.017241 +gpu 0 iter : 13 Arret : 3.65838 s/iter 39.438270 +gpu 0 iter : 14 Arret : 2.89151 s/iter 39.215504 +gpu 0 iter : 15 Arret : 1.05476 s/iter 39.289893 +gpu 0 iter : 16 Arret : 2.33471 s/iter 38.697660 +gpu 0 iter : 17 Arret : 1.31967 s/iter 38.803148 +gpu 0 iter : 18 Arret : 1.72042 s/iter 38.333798 +gpu 0 iter : 19 Arret : 1.26041 s/iter 38.223915 +gpu 0 iter : 20 Arret : 2.02686 s/iter 38.118103 +gpu 0 iter : 21 Arret : 1.94287 s/iter 38.143924 +gpu 0 iter : 22 Arret : 0.758266 s/iter 38.160562 +gpu 0 iter : 23 Arret : 0.662841 s/iter 38.091311 +gpu 0 iter : 24 Arret : 0.451559 s/iter 38.111037 +gpu 0 iter : 25 Arret : 0.890363 s/iter 38.178186 +gpu 0 iter : 26 Arret : 0.558147 s/iter 37.880066 +gpu 0 iter : 27 Arret : 0.980988 s/iter 37.969720 +gpu 0 iter : 28 Arret : 0.650408 s/iter 37.971275 +gpu 0 iter : 29 Arret : 0.408451 s/iter 37.801977 +gpu 0 iter : 30 Arret : 0.325854 s/iter 37.001443 +gpu 0 iter : 31 Arret : 0.557985 s/iter 36.776558 +gpu 0 iter : 32 Arret : 0.657544 s/iter 36.678739 +gpu 0 iter : 33 Arret : 0.064597 s/iter 36.383532 +gpu 0 iter : 34 Arret : 0.0379355 s/iter 36.309674 +gpu 0 iter : 35 Arret : 0.0111125 s/iter 36.242018 +gpu 0 iter : 36 Arret : 0.00419826 s/iter 36.128804 +gpu 0 iter : 37 Arret : 0.0004051 s/iter 36.128298 +gpu 0 iter : 38 Arret : 2.6764E-06 s/iter 36.132280 +gpu 0 iter : 39 Arret : 4.06209E-09 s/iter 36.130923 +gpu 0 iter : 40 Arret : 1.15654E-11 s/iter 36.137012 +gpu 0 iter : 41 Arret : 3.29406E-14 s/iter 36.136398 +temps : 1548.53 seconde(s) +(1 +i*0)*x^0 + (-0.1 +i*0)*x^200000 + (-10 +i*0)*x^1800000 + (1 +i*0)*x^2000000 + +(-20000 +i*0)*x^199999 + (-1.8E+07 +i*0)*x^1799999 + (2E+06 +i*0)*x^1999999 + +zone limite de 'log-exp' 1.00018 +CPU thread 0 (of 4) uses CUDA device 0 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 2 (of 4) uses CUDA device 2 +gpu 0 dimgrid 7813 dimblock 256 degrePoly 2000000 +gpu 0 dimgrid2 1954 start 0 size 500032 +gpu 1 dimgrid 7813 dimblock 256 degrePoly 2000000 +gpu 1 dimgrid2 1954 start 500032 size 500032 +gpu 3 dimgrid 7813 dimblock 256 degrePoly 2000000 +gpu 3 dimgrid2 1954 start 1500096 size 500032 +gpu 2 dimgrid 7813 dimblock 256 degrePoly 2000000 +gpu 2 dimgrid2 1954 start 1000064 size 500032 +gpu 0 iter : 1 Arret : 2.36057E-06 s/iter 73.200206 +gpu 0 iter : 2 Arret : 3.44757E-06 s/iter 73.206619 +gpu 0 iter : 3 Arret : 2.67755E-05 s/iter 73.218838 +gpu 0 iter : 4 Arret : 0.000276623 s/iter 73.273064 +gpu 0 iter : 5 Arret : 0.00217412 s/iter 73.135160 +gpu 0 iter : 6 Arret : 0.00328038 s/iter 73.985536 +gpu 0 iter : 7 Arret : 0.00254268 s/iter 73.908764 +gpu 0 iter : 8 Arret : 0.00225769 s/iter 73.920152 +gpu 0 iter : 9 Arret : 0.000786152 s/iter 73.138282 +gpu 0 iter : 10 Arret : 0.000588392 s/iter 73.290807 +gpu 0 iter : 11 Arret : 0.00102007 s/iter 73.081068 +gpu 0 iter : 12 Arret : 0.000979946 s/iter 73.082770 +gpu 0 iter : 13 Arret : 0.000114646 s/iter 73.173739 +gpu 0 iter : 14 Arret : 7.57524E-05 s/iter 73.274915 +gpu 0 iter : 15 Arret : 5.05796E-05 s/iter 73.063632 +gpu 0 iter : 16 Arret : 8.64598E-06 s/iter 73.110540 +gpu 0 iter : 17 Arret : 2.69825E-07 s/iter 73.168646 +gpu 0 iter : 18 Arret : 1.52775E-11 s/iter 73.173578 +gpu 0 iter : 19 Arret : 4.67252E-15 s/iter 73.286665 +temps : 1394.75 seconde(s) +(1 +i*0)*x^0 + (-0.1 +i*0)*x^1200000 + (-10 +i*0)*x^1800000 + (1 +i*0)*x^3000000 + +(-120000 +i*0)*x^1199999 + (-1.8E+07 +i*0)*x^1799999 + (3E+06 +i*0)*x^2999999 + +zone limite de 'log-exp' 1.00012 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 0 (of 4) uses CUDA device 0 +CPU thread 2 (of 4) uses CUDA device 2 +gpu 0 dimgrid 11719 dimblock 256 degrePoly 3000000 +gpu 0 dimgrid2 2930 start 0 size 750016 +gpu 3 dimgrid 11719 dimblock 256 degrePoly 3000000 +gpu 3 dimgrid2 2930 start 2250048 size 750016 +gpu 2 dimgrid 11719 dimblock 256 degrePoly 3000000 +gpu 2 dimgrid2 2930 start 1500032 size 750016 +gpu 1 dimgrid 11719 dimblock 256 degrePoly 3000000 +gpu 1 dimgrid2 2930 start 750016 size 750016 +gpu 0 iter : 1 Arret : 2.1478E-05 s/iter 164.876819 +gpu 0 iter : 2 Arret : 0.000420737 s/iter 164.787431 +gpu 0 iter : 3 Arret : 0.00357277 s/iter 165.005356 +gpu 0 iter : 4 Arret : 0.0572092 s/iter 165.495652 +gpu 0 iter : 5 Arret : 0.761627 s/iter 165.743492 +gpu 0 iter : 6 Arret : 1.01141 s/iter 167.164569 +gpu 0 iter : 7 Arret : 2.07506 s/iter 169.847618 +gpu 0 iter : 8 Arret : 2.06513 s/iter 170.981840 +gpu 0 iter : 9 Arret : 1.85771 s/iter 172.907405 +gpu 0 iter : 10 Arret : 1.32021 s/iter 173.682887 +gpu 0 iter : 11 Arret : 0.653246 s/iter 174.990375 +gpu 0 iter : 12 Arret : 1.13868 s/iter 177.748152 +gpu 0 iter : 13 Arret : 0.659181 s/iter 181.308007 +gpu 0 iter : 14 Arret : 0.790133 s/iter 180.509792 +gpu 0 iter : 15 Arret : 0.987422 s/iter 180.644018 +gpu 0 iter : 16 Arret : 0.76864 s/iter 178.428579 +gpu 0 iter : 17 Arret : 1.75305 s/iter 173.449056 +gpu 0 iter : 18 Arret : 0.94182 s/iter 172.561179 +gpu 0 iter : 19 Arret : 1.12708 s/iter 171.091505 +gpu 0 iter : 20 Arret : 1.73067 s/iter 169.907975 +gpu 0 iter : 21 Arret : 0.362559 s/iter 169.765571 +gpu 0 iter : 22 Arret : 0.268441 s/iter 167.991027 +gpu 0 iter : 23 Arret : 0.129326 s/iter 167.939620 +gpu 0 iter : 24 Arret : 0.168344 s/iter 166.717733 +gpu 0 iter : 25 Arret : 0.157303 s/iter 165.967129 +gpu 0 iter : 26 Arret : 0.191584 s/iter 165.597470 +gpu 0 iter : 27 Arret : 0.0858363 s/iter 165.564472 +gpu 0 iter : 28 Arret : 0.15835 s/iter 165.543554 +gpu 0 iter : 29 Arret : 0.180216 s/iter 165.510484 +gpu 0 iter : 30 Arret : 0.151569 s/iter 165.401908 +gpu 0 iter : 31 Arret : 0.137137 s/iter 165.511141 +gpu 0 iter : 32 Arret : 0.150851 s/iter 165.427653 +gpu 0 iter : 33 Arret : 0.0650185 s/iter 165.387441 +gpu 0 iter : 34 Arret : 0.0343198 s/iter 165.406594 +gpu 0 iter : 35 Arret : 0.0106375 s/iter 165.392004 +gpu 0 iter : 36 Arret : 0.003294 s/iter 165.417487 +gpu 0 iter : 37 Arret : 0.000595952 s/iter 164.958632 +gpu 0 iter : 38 Arret : 2.78741E-06 s/iter 164.828188 +gpu 0 iter : 39 Arret : 7.24205E-10 s/iter 164.853101 +gpu 0 iter : 40 Arret : 2.28814E-12 s/iter 164.824094 +gpu 0 iter : 41 Arret : 7.24967E-15 s/iter 164.839113 +temps : 6926.85 seconde(s) +(1 +i*0)*x^0 + (-0.1 +i*0)*x^1800000 + (-10 +i*0)*x^2200000 + (1 +i*0)*x^4000000 + +(-180000 +i*0)*x^1799999 + (-2.2E+07 +i*0)*x^2199999 + (4E+06 +i*0)*x^3999999 + +zone limite de 'log-exp' 1.00009 +CPU thread 0 (of 4) uses CUDA device 0 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 3 (of 4) uses CUDA device 3 +gpu 0 dimgrid 15625 dimblock 256 degrePoly 4000000 +gpu 2 dimgrid 15625 dimblock 256 degrePoly 4000000 +gpu 2 dimgrid2 3907 start 2000000 size 1000000 +gpu 1 dimgrid 15625 dimblock 256 degrePoly 4000000 +gpu 1 dimgrid2 3907 start 1000000 size 1000000 +gpu 0 dimgrid2 3907 start 0 size 1000000 +gpu 3 dimgrid 15625 dimblock 256 degrePoly 4000000 +gpu 3 dimgrid2 3907 start 3000000 size 1000000 +gpu 0 iter : 1 Arret : 0.000123591 s/iter 291.935400 +gpu 0 iter : 2 Arret : 0.00230327 s/iter 292.141703 +gpu 0 iter : 3 Arret : 0.0286795 s/iter 292.393594 +gpu 0 iter : 4 Arret : 0.0285429 s/iter 294.939645 +gpu 0 iter : 5 Arret : 0.015126 s/iter 295.052179 +gpu 0 iter : 6 Arret : 0.00914977 s/iter 299.530354 +gpu 0 iter : 7 Arret : 0.0118855 s/iter 299.463510 +gpu 0 iter : 8 Arret : 0.0162261 s/iter 300.249822 +gpu 0 iter : 9 Arret : 0.0117884 s/iter 301.899531 +gpu 0 iter : 10 Arret : 0.026364 s/iter 302.404244 +gpu 0 iter : 11 Arret : 0.0103138 s/iter 301.564423 +gpu 0 iter : 12 Arret : 0.00848671 s/iter 297.523873 +gpu 0 iter : 13 Arret : 0.00411255 s/iter 294.825185 +gpu 0 iter : 14 Arret : 0.00247994 s/iter 292.686873 +gpu 0 iter : 15 Arret : 0.0023979 s/iter 291.952790 +gpu 0 iter : 16 Arret : 0.00136324 s/iter 291.600549 +gpu 0 iter : 17 Arret : 0.000779307 s/iter 291.963112 +gpu 0 iter : 18 Arret : 0.000313181 s/iter 292.084410 +gpu 0 iter : 19 Arret : 0.000378682 s/iter 291.784771 +gpu 0 iter : 20 Arret : 6.22512E-05 s/iter 291.868967 +gpu 0 iter : 21 Arret : 2.33182E-07 s/iter 291.862188 +gpu 0 iter : 22 Arret : 1.11637E-09 s/iter 291.843744 +gpu 0 iter : 23 Arret : 6.70081E-12 s/iter 291.918387 +gpu 0 iter : 24 Arret : 4.01469E-14 s/iter 291.751401 +temps : 7077.91 seconde(s) +(1 +i*0)*x^0 + (-0.1 +i*0)*x^2200000 + (-10 +i*0)*x^2800000 + (1 +i*0)*x^5000000 + +(-220000 +i*0)*x^2199999 + (-2.8E+07 +i*0)*x^2799999 + (5E+06 +i*0)*x^4999999 + +zone limite de 'log-exp' 1.00007 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 0 (of 4) uses CUDA device 0 +gpu 3 dimgrid 19532 dimblock 256 degrePoly 5000000 +gpu 3 dimgrid2 4883 start 3750144 size 1250048 +gpu 1 dimgrid 19532 dimblock 256 degrePoly 5000000 +gpu 1 dimgrid2 4883 start 1250048 size 1250048 +gpu 2 dimgrid 19532 dimblock 256 degrePoly 5000000 +gpu 2 dimgrid2 4883 start 2500096 size 1250048 +gpu 0 dimgrid 19532 dimblock 256 degrePoly 5000000 +gpu 0 dimgrid2 4883 start 0 size 1250048 +gpu 0 iter : 1 Arret : 1.19865E-05 s/iter 455.001790 +gpu 0 iter : 2 Arret : 0.000937715 s/iter 455.137301 +gpu 0 iter : 3 Arret : 0.00669372 s/iter 456.599961 +gpu 0 iter : 4 Arret : 0.0061142 s/iter 458.992297 +gpu 0 iter : 5 Arret : 0.00333273 s/iter 465.500240 +gpu 0 iter : 6 Arret : 0.007957 s/iter 471.496241 +gpu 0 iter : 7 Arret : 0.00765568 s/iter 472.594002 +gpu 0 iter : 8 Arret : 0.00597041 s/iter 469.414085 +gpu 0 iter : 9 Arret : 0.0045263 s/iter 466.933629 +gpu 0 iter : 10 Arret : 0.00304707 s/iter 461.362014 +gpu 0 iter : 11 Arret : 0.00147222 s/iter 459.819568 +gpu 0 iter : 12 Arret : 0.00149084 s/iter 456.851360 +gpu 0 iter : 13 Arret : 0.000983199 s/iter 455.191156 +gpu 0 iter : 14 Arret : 0.000936713 s/iter 454.782453 +gpu 0 iter : 15 Arret : 0.000273346 s/iter 454.693120 +gpu 0 iter : 16 Arret : 0.000154751 s/iter 454.812950 +gpu 0 iter : 17 Arret : 0.00012654 s/iter 454.677231 +gpu 0 iter : 18 Arret : 1.18763E-05 s/iter 454.639549 +gpu 0 iter : 19 Arret : 2.69153E-05 s/iter 454.756226 +gpu 0 iter : 20 Arret : 3.82756E-05 s/iter 454.574410 +gpu 0 iter : 21 Arret : 1.41624E-06 s/iter 454.528836 +gpu 0 iter : 22 Arret : 3.26409E-07 s/iter 454.527899 +gpu 0 iter : 23 Arret : 1.84482E-09 s/iter 455.142443 +gpu 0 iter : 24 Arret : 9.8703E-12 s/iter 454.502871 +gpu 0 iter : 25 Arret : 5.2671E-14 s/iter 454.997696 +temps : 11464.3 seconde(s) diff --git a/Simulations/big.eps b/Simulations/big.eps index cb11d57..21b0383 100644 --- a/Simulations/big.eps +++ b/Simulations/big.eps @@ -1,7 +1,7 @@ %!PS-Adobe-2.0 %%Title: big.eps %%Creator: gnuplot 4.6 patchlevel 0 -%%CreationDate: Tue Jan 5 12:09:44 2016 +%%CreationDate: Wed Jan 6 23:09:38 2016 %%DocumentFonts: (atend) %%BoundingBox: 50 50 554 770 %%Orientation: Landscape @@ -462,7 +462,7 @@ SDict begin [ /Author (lilia) % /Producer (gnuplot) % /Keywords () - /CreationDate (Tue Jan 5 12:09:44 2016) + /CreationDate (Wed Jan 6 23:09:38 2016) /DOCINFO pdfmark end } ifelse @@ -690,11 +690,11 @@ LTb 3.000 UL LT0 LCb setrgbcolor -3482 4738 M +2075 4738 M [ [(Helvetica) 140.0 0.0 true true 0 (Full MPI)] ] -46.7 MRshow LT0 -3566 4738 M +2159 4738 M 399 0 V 938 1723 M 2042 3459 L @@ -708,18 +708,18 @@ LT0 4494 4127 Pls 5721 4349 Pls 6947 4507 Pls -3765 4738 Pls +2358 4738 Pls % End plot #1 % Begin plot #2 1.000 UP 3.000 UL LT1 LCb setrgbcolor -4889 4738 M +3482 4738 M [ [(Helvetica) 140.0 0.0 true true 0 (Full OMP)] ] -46.7 MRshow LT1 -4973 4738 M +3566 4738 M 399 0 V 938 1719 M 2042 3452 L @@ -733,18 +733,18 @@ LT1 4494 4101 Crs 5721 4361 Crs 6947 4543 Crs -5172 4738 Crs +3765 4738 Crs % End plot #2 % Begin plot #3 1.000 UP 3.000 UL LT2 LCb setrgbcolor -6296 4738 M +4889 4738 M [ [(Helvetica) 140.0 0.0 true true 0 (Sparse MPI)] ] -46.7 MRshow LT2 -6380 4738 M +4973 4738 M 399 0 V 938 1035 M 2042 2786 L @@ -758,8 +758,33 @@ LT2 4494 4102 Star 5721 3694 Star 6947 3931 Star -6579 4738 Star +5172 4738 Star % End plot #3 +% Begin plot #4 +1.000 UP +3.000 UL +LT3 +LCb setrgbcolor +6296 4738 M +[ [(Helvetica) 140.0 0.0 true true 0 (Sparse OMP)] +] -46.7 MRshow +LT3 +6380 4738 M +399 0 V +938 1128 M +2042 2771 L +1226 459 V +1226 615 V +1227 9 V +1226 185 V +938 1128 Box +2042 2771 Box +3268 3230 Box +4494 3845 Box +5721 3854 Box +6947 4039 Box +6579 4738 Box +% End plot #4 1.000 UL LTb 938 4871 N diff --git a/Simulations/big.txt b/Simulations/big.txt index f707cd8..98eaaff 100644 --- a/Simulations/big.txt +++ b/Simulations/big.txt @@ -1,7 +1,7 @@ #degree FMPI FOMP SMPI SOMP -100000 27.6039 27.3746 4.60588 -1000000 2532.79 2489.06 440.06 -2000000 6652.78 6105.16 2771.4 -3000000 14415.7 13488 13526.2 -4000000 25693.2 26495 4672 -5000000 38787.6 42620.5 8647.11 +100000 27.6039 27.3746 4.60588 5.86914 +1000000 2532.79 2489.06 440.06 422.324 +2000000 6652.78 6105.16 2771.4 1394.75 +3000000 14415.7 13488 13526.2 6926.85 +4000000 25693.2 26495 4672 7077.91 +5000000 38787.6 42620.5 8647.11 11464.3 diff --git a/big.pdf b/big.pdf index 833ab1c..c8edc16 100644 Binary files a/big.pdf and b/big.pdf differ