From: zianekhodja Date: Mon, 4 Jan 2016 00:57:21 +0000 (+0100) Subject: corrections et ajout des figures X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/kahina_paper2.git/commitdiff_plain/13cfd00767389ce6a0bcc1e192ff84f8e5ead47c corrections et ajout des figures --- diff --git a/Simulations/BIG/full_mpi_4GPU.o140664 b/Simulations/BIG/full_mpi_4GPU.o140664 new file mode 100644 index 0000000..5a8a896 --- /dev/null +++ b/Simulations/BIG/full_mpi_4GPU.o140664 @@ -0,0 +1,293 @@ +REMAINING: 4 +Taking GPU #0 +Taking GPU #1 +Taking GPU #2 +Taking GPU #3 +proc 2 +proc 3 +proc 0 +proc 1 +ici +ici +ici +ici +ici2 +ici2 +ici2 +ici2 +ici3 +zone limite de 'log-exp' 1.00018 +ici3 +zone limite de 'log-exp' 1.00018 +ici3 +zone limite de 'log-exp' 1.00018 +ici3 +ici4 +ici3 +ici4 +ici3 +ici4 +ici3 +zone limite de 'log-exp' 1.00018 +ici3 +ici4 +dimgrid 7813 dimblock 256 degrePoly 2000000 +proc 1, start 500032 size 500032 +dimgrid 7813 dimblock 256 degrePoly 2000000 +proc 3, start 1500096 size 500032 +dimgrid 7813 dimblock 256 degrePoly 2000000 +proc 2, start 1000064 size 500032 +dimgrid 7813 dimblock 256 degrePoly 2000000 +proc 0, start 0 size 500032 +proc 0 start 0 size 500032 +proc 2 start 1000064 size 500032 +proc 1 start 500032 size 500032 +proc 3 start 1500096 size 500032 +iter : 1 Arret : 1.44834E-06 s/iter 205.940825 +iter : 2 Arret : 1.34957E-06 s/iter 206.359114 +iter : 3 Arret : 1.28543E-06 s/iter 206.338404 +iter : 4 Arret : 1.32123E-06 s/iter 205.569801 +iter : 5 Arret : 1.28108E-06 s/iter 206.362388 +iter : 6 Arret : 1.52528E-06 s/iter 205.947635 +iter : 7 Arret : 1.44796E-06 s/iter 205.570036 +iter : 8 Arret : 1.4197E-06 s/iter 206.274854 +iter : 9 Arret : 1.36981E-06 s/iter 205.523999 +iter : 10 Arret : 1.78699E-06 s/iter 205.517166 +iter : 11 Arret : 0.000120828 s/iter 206.080759 +iter : 12 Arret : 0.000388623 s/iter 205.612439 +iter : 13 Arret : 0.00226391 s/iter 206.088028 +iter : 14 Arret : 0.00536308 s/iter 206.777804 +iter : 15 Arret : 0.00505109 s/iter 207.995774 +iter : 16 Arret : 1.09453 s/iter 208.693534 +iter : 17 Arret : 0.00718604 s/iter 212.802838 +iter : 18 Arret : 0.0462084 s/iter 220.362911 +iter : 19 Arret : 0.067973 s/iter 213.850652 +iter : 20 Arret : 0.0352674 s/iter 211.078385 +iter : 21 Arret : 0.0256645 s/iter 210.310228 +iter : 22 Arret : 0.0042868 s/iter 207.901863 +iter : 23 Arret : 0.00261662 s/iter 208.696215 +iter : 24 Arret : 0.000777403 s/iter 207.126076 +iter : 25 Arret : 0.000262332 s/iter 207.347948 +iter : 26 Arret : 0.000480261 s/iter 207.491371 +iter : 27 Arret : 0.000843968 s/iter 207.287417 +iter : 28 Arret : 0.00107575 s/iter 206.757688 +iter : 29 Arret : 0.00102761 s/iter 207.166812 +iter : 30 Arret : 2.61225E-05 s/iter 207.625209 +iter : 31 Arret : 5.54935E-10 s/iter 207.334182 +iter : 32 Arret : 3.51083E-16 s/iter 207.140239 +temps : 6652.78 seconde(s) +proc 0 +proc 1 +proc 2 +proc 3 +ici +ici +ici +ici +ici2 +ici2 +ici2 +ici2 +ici3 +zone limite de 'log-exp' 1.00012 +ici3 +zone limite de 'log-exp' 1.00012 +ici3 +zone limite de 'log-exp' 1.00012 +ici3 +ici3 +ici4 +ici4 +ici3 +ici4 +ici3 +zone limite de 'log-exp' 1.00012 +ici3 +ici4 +dimgrid 11719 dimblock 256 degrePoly 3000000 +proc 1, start 750016 size 750016 +dimgrid 11719 dimblock 256 degrePoly 3000000 +proc 2, start 1500032 size 750016 +dimgrid 11719 dimblock 256 degrePoly 3000000 +proc 3, start 2250048 size 750016 +proc 2 start 1500032 size 750016 +dimgrid 11719 dimblock 256 degrePoly 3000000 +proc 0, start 0 size 750016 +proc 0 start 0 size 750016 +proc 1 start 750016 size 750016 +proc 3 start 2250048 size 750016 +iter : 1 Arret : 9.53204E-07 s/iter 461.459712 +iter : 2 Arret : 9.37007E-07 s/iter 461.131045 +iter : 3 Arret : 9.0577E-07 s/iter 460.922963 +iter : 4 Arret : 1.01222E-06 s/iter 460.829574 +iter : 5 Arret : 9.44944E-07 s/iter 460.829019 +iter : 6 Arret : 9.58987E-07 s/iter 461.290910 +iter : 7 Arret : 9.72984E-07 s/iter 461.358879 +iter : 8 Arret : 9.83283E-07 s/iter 461.123318 +iter : 9 Arret : 1.04215E-06 s/iter 460.693998 +iter : 10 Arret : 1.44009E-06 s/iter 460.466568 +iter : 11 Arret : 8.73152E-05 s/iter 460.624768 +iter : 12 Arret : 0.000206674 s/iter 460.586929 +iter : 13 Arret : 0.000928336 s/iter 462.302008 +iter : 14 Arret : 0.967664 s/iter 464.620100 +iter : 15 Arret : 2.70874 s/iter 469.114186 +iter : 16 Arret : 0.0755852 s/iter 475.379134 +iter : 17 Arret : 0.203816 s/iter 481.768376 +iter : 18 Arret : 0.262828 s/iter 485.444654 +iter : 19 Arret : 0.235532 s/iter 477.928214 +iter : 20 Arret : 0.190572 s/iter 472.097443 +iter : 21 Arret : 0.0446409 s/iter 469.191689 +iter : 22 Arret : 0.0217248 s/iter 469.351025 +iter : 23 Arret : 0.026835 s/iter 466.587422 +iter : 24 Arret : 0.0102186 s/iter 463.265856 +iter : 25 Arret : 0.0072299 s/iter 461.804314 +iter : 26 Arret : 0.00195646 s/iter 460.242651 +iter : 27 Arret : 0.00073504 s/iter 460.262518 +iter : 28 Arret : 0.000350821 s/iter 461.440941 +iter : 29 Arret : 3.63887E-05 s/iter 460.152519 +iter : 30 Arret : 3.54794E-08 s/iter 460.122122 +iter : 31 Arret : 2.23894E-14 s/iter 460.715122 +temps : 14415.7 seconde(s) +proc 2 +proc 3 +proc 0 +proc 1 +ici +ici +ici +ici +ici2 +ici2 +ici2 +ici2 +ici3 +zone limite de 'log-exp' 1.00009 +ici3 +zone limite de 'log-exp' 1.00009 +ici3 +ici4 +ici3 +ici4 +ici3 +zone limite de 'log-exp' 1.00009 +ici3 +ici4 +dimgrid 15625 dimblock 256 degrePoly 4000000 +proc 3, start 3000000 size 1000000 +dimgrid 15625 dimblock 256 degrePoly 4000000 +proc 1, start 1000000 size 1000000 +dimgrid 15625 dimblock 256 degrePoly 4000000 +proc 2, start 2000000 size 1000000 +ici3 +zone limite de 'log-exp' 1.00009 +ici3 +ici4 +dimgrid 15625 dimblock 256 degrePoly 4000000 +proc 0, start 0 size 1000000 +proc 2 start 2000000 size 1000000 +proc 0 start 0 size 1000000 +proc 3 start 3000000 size 1000000 +proc 1 start 1000000 size 1000000 +iter : 1 Arret : 6.88912E-07 s/iter 819.306625 +iter : 2 Arret : 7.14016E-07 s/iter 818.197087 +iter : 3 Arret : 7.3014E-07 s/iter 818.470548 +iter : 4 Arret : 7.21743E-07 s/iter 818.730362 +iter : 5 Arret : 7.35418E-07 s/iter 818.173934 +iter : 6 Arret : 7.52455E-07 s/iter 817.866927 +iter : 7 Arret : 7.34859E-07 s/iter 818.095046 +iter : 8 Arret : 7.31974E-07 s/iter 818.164256 +iter : 9 Arret : 7.48569E-07 s/iter 819.044387 +iter : 10 Arret : 7.74048E-07 s/iter 819.333033 +iter : 11 Arret : 3.48688E-05 s/iter 819.133268 +iter : 12 Arret : 0.000807717 s/iter 818.591881 +iter : 13 Arret : 0.000926508 s/iter 818.950799 +iter : 14 Arret : 0.00303398 s/iter 819.147222 +iter : 15 Arret : 0.0183154 s/iter 830.118271 +iter : 16 Arret : 0.0285894 s/iter 855.852233 +iter : 17 Arret : 0.0236243 s/iter 851.970029 +iter : 18 Arret : 0.731842 s/iter 853.621906 +iter : 19 Arret : 0.542969 s/iter 851.697002 +iter : 20 Arret : 0.327675 s/iter 853.423224 +iter : 21 Arret : 0.0744866 s/iter 837.707807 +iter : 22 Arret : 0.038791 s/iter 853.159762 +iter : 23 Arret : 0.0426196 s/iter 822.647946 +iter : 24 Arret : 0.055558 s/iter 823.298812 +iter : 25 Arret : 0.0472359 s/iter 818.676729 +iter : 26 Arret : 0.00981971 s/iter 818.659135 +iter : 27 Arret : 0.00185408 s/iter 819.292362 +iter : 28 Arret : 0.000122169 s/iter 818.157766 +iter : 29 Arret : 2.6396E-07 s/iter 817.164218 +iter : 30 Arret : 2.38327E-11 s/iter 817.419948 +iter : 31 Arret : 1.57009E-16 s/iter 817.311017 +temps : 25693.2 seconde(s) +proc 2 +proc 3 +proc 0 +proc 1 +ici +ici +ici +ici +ici2 +ici2 +ici2 +ici2 +ici3 +zone limite de 'log-exp' 1.00007 +ici3 +zone limite de 'log-exp' 1.00007 +ici3 +zone limite de 'log-exp' 1.00007 +ici3 +ici4 +ici3 +ici4 +ici3 +ici4 +ici3 +zone limite de 'log-exp' 1.00007 +ici3 +ici4 +dimgrid 19532 dimblock 256 degrePoly 5000000 +proc 2, start 2500096 size 1250048 +dimgrid 19532 dimblock 256 degrePoly 5000000 +proc 1, start 1250048 size 1250048 +dimgrid 19532 dimblock 256 degrePoly 5000000 +proc 3, start 3750144 size 1250048 +proc 3 start 3750144 size 1250048 +proc 1 start 1250048 size 1250048 +proc 2 start 2500096 size 1250048 +dimgrid 19532 dimblock 256 degrePoly 5000000 +proc 0, start 0 size 1250048 +proc 0 start 0 size 1250048 +iter : 1 Arret : 5.52441E-07 s/iter 1278.740665 +iter : 2 Arret : 5.75419E-07 s/iter 1277.699605 +iter : 3 Arret : 5.51509E-07 s/iter 1276.810889 +iter : 4 Arret : 5.69267E-07 s/iter 1279.877073 +iter : 5 Arret : 6.08585E-07 s/iter 1279.500027 +iter : 6 Arret : 5.83038E-07 s/iter 1278.799718 +iter : 7 Arret : 5.79436E-07 s/iter 1277.284006 +iter : 8 Arret : 5.90821E-07 s/iter 1276.630395 +iter : 9 Arret : 6.21271E-07 s/iter 1277.007910 +iter : 10 Arret : 7.74213E-07 s/iter 1277.019126 +iter : 11 Arret : 4.88001E-05 s/iter 1276.118288 +iter : 12 Arret : 0.000260225 s/iter 1276.399074 +iter : 13 Arret : 0.000428267 s/iter 1281.985363 +iter : 14 Arret : 0.00230591 s/iter 1325.812692 +iter : 15 Arret : 0.00294045 s/iter 1297.918923 +iter : 16 Arret : 0.0104684 s/iter 1346.312760 +iter : 17 Arret : 0.0168898 s/iter 1350.545849 +iter : 18 Arret : 0.0175923 s/iter 1339.305912 +iter : 19 Arret : 0.0143619 s/iter 1335.131532 +iter : 20 Arret : 0.0170721 s/iter 1328.137450 +iter : 21 Arret : 0.0384741 s/iter 1315.141047 +iter : 22 Arret : 0.0257741 s/iter 1301.408144 +iter : 23 Arret : 0.0223754 s/iter 1290.526097 +iter : 24 Arret : 0.0384305 s/iter 1284.924772 +iter : 25 Arret : 0.013928 s/iter 1275.931436 +iter : 26 Arret : 0.00315086 s/iter 1275.660735 +iter : 27 Arret : 0.000282398 s/iter 1275.643767 +iter : 28 Arret : 4.02652E-07 s/iter 1275.656268 +iter : 29 Arret : 2.8924E-11 s/iter 1275.459590 +iter : 30 Arret : 1.57009E-16 s/iter 1276.129079 +temps : 38787.6 seconde(s) diff --git a/Simulations/BIG/full_openmp_4GPU.o140684 b/Simulations/BIG/full_openmp_4GPU.o140684 new file mode 100644 index 0000000..5de3b1f --- /dev/null +++ b/Simulations/BIG/full_openmp_4GPU.o140684 @@ -0,0 +1,277 @@ +REMAINING: 4 +Taking GPU #0 +Taking GPU #1 +Taking GPU #2 +Taking GPU #3 +zone limite de 'log-exp' 1.0003 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 0 (of 4) uses CUDA device 0 +CPU thread 1 (of 4) uses CUDA device 1 +gpu 0 dimgrid 4688 dimblock 256 degrePoly 1200000 +gpu 2 dimgrid 4688 dimblock 256 degrePoly 1200000 +gpu 2 dimgrid2 1172 start 600064 size 300032 +gpu 0 dimgrid2 1172 start 0 size 300032 +gpu 1 dimgrid 4688 dimblock 256 degrePoly 1200000 +gpu 1 dimgrid2 1172 start 300032 size 300032 +gpu 3 dimgrid 4688 dimblock 256 degrePoly 1200000 +gpu 3 dimgrid2 1172 start 900096 size 300032 +gpu 0 iter : 1 Arret : 2.44482E-06 s/iter 74.639875 +gpu 0 iter : 2 Arret : 2.30873E-06 s/iter 74.564382 +gpu 0 iter : 3 Arret : 2.11145E-06 s/iter 74.751331 +gpu 0 iter : 4 Arret : 2.21313E-06 s/iter 74.600699 +gpu 0 iter : 5 Arret : 2.19564E-06 s/iter 74.570504 +gpu 0 iter : 6 Arret : 2.11756E-06 s/iter 74.733217 +gpu 0 iter : 7 Arret : 2.14117E-06 s/iter 74.506427 +gpu 0 iter : 8 Arret : 2.20119E-06 s/iter 74.544657 +gpu 0 iter : 9 Arret : 2.37346E-06 s/iter 74.562161 +gpu 0 iter : 10 Arret : 2.64946E-06 s/iter 74.853449 +gpu 0 iter : 11 Arret : 0.000186918 s/iter 74.525156 +gpu 0 iter : 12 Arret : 0.00138474 s/iter 74.516126 +gpu 0 iter : 13 Arret : 0.00258201 s/iter 74.587762 +gpu 0 iter : 14 Arret : 0.0010761 s/iter 84.238307 +gpu 0 iter : 15 Arret : 0.00188521 s/iter 86.686536 +gpu 0 iter : 16 Arret : 0.00499919 s/iter 75.862454 +gpu 0 iter : 17 Arret : 0.00360609 s/iter 76.568609 +gpu 0 iter : 18 Arret : 1.06553 s/iter 94.954529 +gpu 0 iter : 19 Arret : 0.0393766 s/iter 92.481432 +gpu 0 iter : 20 Arret : 0.0223315 s/iter 94.476898 +gpu 0 iter : 21 Arret : 0.0410657 s/iter 94.178787 +gpu 0 iter : 22 Arret : 0.0381889 s/iter 85.335835 +gpu 0 iter : 23 Arret : 0.0874342 s/iter 85.783340 +gpu 0 iter : 24 Arret : 0.203536 s/iter 89.450930 +gpu 0 iter : 25 Arret : 0.120518 s/iter 86.419200 +gpu 0 iter : 26 Arret : 0.0247685 s/iter 74.681052 +gpu 0 iter : 27 Arret : 0.0082978 s/iter 74.762347 +gpu 0 iter : 28 Arret : 0.0042467 s/iter 74.824984 +gpu 0 iter : 29 Arret : 0.000439949 s/iter 74.307768 +gpu 0 iter : 30 Arret : 0.000137259 s/iter 74.423240 +gpu 0 iter : 31 Arret : 7.309E-05 s/iter 74.324788 +gpu 0 iter : 32 Arret : 8.35608E-06 s/iter 74.355819 +gpu 0 iter : 33 Arret : 1.88149E-08 s/iter 74.258167 +gpu 0 iter : 34 Arret : 1.77927E-14 s/iter 74.313275 +temps : 2689.68 seconde(s) +zone limite de 'log-exp' 1.00025 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 0 (of 4) uses CUDA device 0 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 1 (of 4) uses CUDA device 1 +gpu 3 dimgrid 5469 dimblock 256 degrePoly 1400000 +gpu 3 dimgrid2 1368 start 1050048 size 350016 +gpu 0 dimgrid 5469 dimblock 256 degrePoly 1400000 +gpu 0 dimgrid2 1368 start 0 size 350016 +gpu 1 dimgrid 5469 dimblock 256 degrePoly 1400000 +gpu 1 dimgrid2 1368 start 350016 size 350016 +gpu 2 dimgrid 5469 dimblock 256 degrePoly 1400000 +gpu 2 dimgrid2 1368 start 700032 size 350016 +gpu 0 iter : 1 Arret : 2.10282E-06 s/iter 101.078414 +gpu 0 iter : 2 Arret : 2.0946E-06 s/iter 101.041217 +gpu 0 iter : 3 Arret : 2.13477E-06 s/iter 100.998517 +gpu 0 iter : 4 Arret : 2.10394E-06 s/iter 100.989310 +gpu 0 iter : 5 Arret : 2.00366E-06 s/iter 101.050293 +gpu 0 iter : 6 Arret : 2.09791E-06 s/iter 100.993768 +gpu 0 iter : 7 Arret : 2.04507E-06 s/iter 101.097449 +gpu 0 iter : 8 Arret : 1.84181E-06 s/iter 100.925116 +gpu 0 iter : 9 Arret : 1.99862E-06 s/iter 101.253105 +gpu 0 iter : 10 Arret : 2.47959E-06 s/iter 100.921977 +gpu 0 iter : 11 Arret : 0.000783744 s/iter 100.977066 +gpu 0 iter : 12 Arret : 0.00804452 s/iter 101.068138 +gpu 0 iter : 13 Arret : 0.00729316 s/iter 101.335318 +gpu 0 iter : 14 Arret : 0.00213 s/iter 102.383645 +gpu 0 iter : 15 Arret : 0.159477 s/iter 107.708354 +gpu 0 iter : 16 Arret : 0.847468 s/iter 109.172629 +gpu 0 iter : 17 Arret : 0.811732 s/iter 107.915741 +gpu 0 iter : 18 Arret : 3.93575 s/iter 111.916735 +gpu 0 iter : 19 Arret : 0.121284 s/iter 116.713178 +gpu 0 iter : 20 Arret : 0.0192941 s/iter 120.572398 +gpu 0 iter : 21 Arret : 0.0182609 s/iter 121.039853 +gpu 0 iter : 22 Arret : 0.0172062 s/iter 122.036788 +gpu 0 iter : 23 Arret : 0.0107296 s/iter 120.368267 +gpu 0 iter : 24 Arret : 0.0238956 s/iter 119.713485 +gpu 0 iter : 25 Arret : 0.0585596 s/iter 113.854843 +gpu 0 iter : 26 Arret : 0.0166418 s/iter 100.980212 +gpu 0 iter : 27 Arret : 0.000530953 s/iter 101.008612 +gpu 0 iter : 28 Arret : 0.000564291 s/iter 100.812084 +gpu 0 iter : 29 Arret : 8.45556E-07 s/iter 100.855989 +gpu 0 iter : 30 Arret : 1.25696E-12 s/iter 100.729882 +gpu 0 iter : 31 Arret : 1.57009E-16 s/iter 100.862921 +temps : 3295.39 seconde(s) +zone limite de 'log-exp' 1.00018 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 0 (of 4) uses CUDA device 0 +CPU thread 3 (of 4) uses CUDA device 3 +gpu 0 dimgrid 7813 dimblock 256 degrePoly 2000000 +gpu 0 dimgrid2 1954 start 0 size 500032 +gpu 1 dimgrid 7813 dimblock 256 degrePoly 2000000 +gpu 1 dimgrid2 1954 start 500032 size 500032 +gpu 2 dimgrid 7813 dimblock 256 degrePoly 2000000 +gpu 2 dimgrid2 1954 start 1000064 size 500032 +gpu 3 dimgrid 7813 dimblock 256 degrePoly 2000000 +gpu 3 dimgrid2 1954 start 1500096 size 500032 +gpu 0 iter : 1 Arret : 1.41783E-06 s/iter 205.699797 +gpu 0 iter : 2 Arret : 1.3616E-06 s/iter 205.388330 +gpu 0 iter : 3 Arret : 1.32097E-06 s/iter 205.527548 +gpu 0 iter : 4 Arret : 1.33335E-06 s/iter 206.148644 +gpu 0 iter : 5 Arret : 1.31806E-06 s/iter 206.270502 +gpu 0 iter : 6 Arret : 1.52658E-06 s/iter 206.476330 +gpu 0 iter : 7 Arret : 1.53874E-06 s/iter 206.070477 +gpu 0 iter : 8 Arret : 1.36502E-06 s/iter 205.982843 +gpu 0 iter : 9 Arret : 1.36688E-06 s/iter 205.493588 +gpu 0 iter : 10 Arret : 1.7716E-06 s/iter 206.082545 +gpu 0 iter : 11 Arret : 0.000713789 s/iter 206.598775 +gpu 0 iter : 12 Arret : 0.00129587 s/iter 206.013463 +gpu 0 iter : 13 Arret : 0.00356853 s/iter 206.828948 +gpu 0 iter : 14 Arret : 0.00567563 s/iter 214.007985 +gpu 0 iter : 15 Arret : 0.00554453 s/iter 232.252817 +gpu 0 iter : 16 Arret : 1.04921 s/iter 228.088790 +gpu 0 iter : 17 Arret : 0.121352 s/iter 209.740568 +gpu 0 iter : 18 Arret : 0.177027 s/iter 220.589742 +gpu 0 iter : 19 Arret : 0.142704 s/iter 217.273146 +gpu 0 iter : 20 Arret : 0.030086 s/iter 220.609907 +gpu 0 iter : 21 Arret : 0.0270766 s/iter 209.003185 +gpu 0 iter : 22 Arret : 0.0185505 s/iter 218.854174 +gpu 0 iter : 23 Arret : 0.00830722 s/iter 207.800890 +gpu 0 iter : 24 Arret : 0.0125614 s/iter 207.676175 +gpu 0 iter : 25 Arret : 0.0017746 s/iter 207.822734 +gpu 0 iter : 26 Arret : 3.36244E-05 s/iter 207.869291 +gpu 0 iter : 27 Arret : 2.30077E-06 s/iter 207.532544 +gpu 0 iter : 28 Arret : 4.08095E-08 s/iter 207.329257 +gpu 0 iter : 29 Arret : 9.32731E-13 s/iter 206.858689 +temps : 6105.16 seconde(s) +zone limite de 'log-exp' 1.00012 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 0 (of 4) uses CUDA device 0 +CPU thread 1 (of 4) uses CUDA device 1 +gpu 0 dimgrid 11719 dimblock 256 degrePoly 3000000 +gpu 0 dimgrid2 2930 start 0 size 750016 +gpu 2 dimgrid 11719 dimblock 256 degrePoly 3000000 +gpu 2 dimgrid2 2930 start 1500032 size 750016 +gpu 3 dimgrid 11719 dimblock 256 degrePoly 3000000 +gpu 3 dimgrid2 2930 start 2250048 size 750016 +gpu 1 dimgrid 11719 dimblock 256 degrePoly 3000000 +gpu 1 dimgrid2 2930 start 750016 size 750016 +gpu 0 iter : 1 Arret : 9.65662E-07 s/iter 461.902449 +gpu 0 iter : 2 Arret : 9.76206E-07 s/iter 461.818288 +gpu 0 iter : 3 Arret : 9.88763E-07 s/iter 461.410666 +gpu 0 iter : 4 Arret : 9.97989E-07 s/iter 461.587599 +gpu 0 iter : 5 Arret : 9.31223E-07 s/iter 461.314767 +gpu 0 iter : 6 Arret : 9.48351E-07 s/iter 461.753957 +gpu 0 iter : 7 Arret : 1.00829E-06 s/iter 461.809276 +gpu 0 iter : 8 Arret : 9.72791E-07 s/iter 461.539355 +gpu 0 iter : 9 Arret : 9.87121E-07 s/iter 461.458642 +gpu 0 iter : 10 Arret : 1.19479E-06 s/iter 461.810796 +gpu 0 iter : 11 Arret : 0.000221282 s/iter 461.821476 +gpu 0 iter : 12 Arret : 0.000344341 s/iter 460.985545 +gpu 0 iter : 13 Arret : 0.00110164 s/iter 461.528817 +gpu 0 iter : 14 Arret : 0.964677 s/iter 466.427092 +gpu 0 iter : 15 Arret : 3.82939 s/iter 467.570894 +gpu 0 iter : 16 Arret : 0.381779 s/iter 468.615707 +gpu 0 iter : 17 Arret : 0.211695 s/iter 480.267149 +gpu 0 iter : 18 Arret : 0.0742952 s/iter 476.977879 +gpu 0 iter : 19 Arret : 0.164849 s/iter 473.636002 +gpu 0 iter : 20 Arret : 0.159823 s/iter 472.866587 +gpu 0 iter : 21 Arret : 0.0472861 s/iter 474.015677 +gpu 0 iter : 22 Arret : 0.0323264 s/iter 466.289093 +gpu 0 iter : 23 Arret : 0.0485177 s/iter 466.379497 +gpu 0 iter : 24 Arret : 0.0595915 s/iter 463.698664 +gpu 0 iter : 25 Arret : 0.0164616 s/iter 462.422176 +gpu 0 iter : 26 Arret : 0.000451002 s/iter 460.400840 +gpu 0 iter : 27 Arret : 1.45653E-06 s/iter 460.573741 +gpu 0 iter : 28 Arret : 1.41903E-11 s/iter 462.724001 +gpu 0 iter : 29 Arret : 1.57009E-16 s/iter 460.548684 +temps : 13488 seconde(s) +zone limite de 'log-exp' 1.00009 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 0 (of 4) uses CUDA device 0 +gpu 1 dimgrid 15625 dimblock 256 degrePoly 4000000 +gpu 2 dimgrid 15625 dimblock 256 degrePoly 4000000 +gpu 3 dimgrid 15625 dimblock 256 degrePoly 4000000 +gpu 3 dimgrid2 3907 start 3000000 size 1000000 +gpu 1 dimgrid2 3907 start 1000000 size 1000000 +gpu 2 dimgrid2 3907 start 2000000 size 1000000 +gpu 0 dimgrid 15625 dimblock 256 degrePoly 4000000 +gpu 0 dimgrid2 3907 start 0 size 1000000 +gpu 0 iter : 1 Arret : 7.13507E-07 s/iter 817.508617 +gpu 0 iter : 2 Arret : 7.20094E-07 s/iter 818.908125 +gpu 0 iter : 3 Arret : 7.28751E-07 s/iter 820.265865 +gpu 0 iter : 4 Arret : 6.51165E-07 s/iter 818.432643 +gpu 0 iter : 5 Arret : 7.28105E-07 s/iter 817.730901 +gpu 0 iter : 6 Arret : 7.38098E-07 s/iter 819.571359 +gpu 0 iter : 7 Arret : 7.04284E-07 s/iter 818.758532 +gpu 0 iter : 8 Arret : 7.5334E-07 s/iter 818.100543 +gpu 0 iter : 9 Arret : 6.79044E-07 s/iter 818.476009 +gpu 0 iter : 10 Arret : 7.73741E-07 s/iter 819.462739 +gpu 0 iter : 11 Arret : 2.82143E-05 s/iter 818.518901 +gpu 0 iter : 12 Arret : 0.000443039 s/iter 818.516432 +gpu 0 iter : 13 Arret : 0.000886057 s/iter 823.390814 +gpu 0 iter : 14 Arret : 0.00205987 s/iter 819.345638 +gpu 0 iter : 15 Arret : 0.00937057 s/iter 825.773513 +gpu 0 iter : 16 Arret : 0.00932936 s/iter 844.157309 +gpu 0 iter : 17 Arret : 0.194265 s/iter 854.703102 +gpu 0 iter : 18 Arret : 0.0698451 s/iter 852.222049 +gpu 0 iter : 19 Arret : 0.283545 s/iter 867.758665 +gpu 0 iter : 20 Arret : 1.19617 s/iter 847.010405 +gpu 0 iter : 21 Arret : 0.0451126 s/iter 840.429949 +gpu 0 iter : 22 Arret : 0.0159319 s/iter 831.098682 +gpu 0 iter : 23 Arret : 0.0102042 s/iter 821.563847 +gpu 0 iter : 24 Arret : 0.00390116 s/iter 818.546315 +gpu 0 iter : 25 Arret : 0.000294328 s/iter 820.074389 +gpu 0 iter : 26 Arret : 7.774E-05 s/iter 818.592072 +gpu 0 iter : 27 Arret : 2.91292E-06 s/iter 818.542294 +gpu 0 iter : 28 Arret : 3.27025E-06 s/iter 818.997322 +gpu 0 iter : 29 Arret : 2.38632E-06 s/iter 818.581842 +gpu 0 iter : 30 Arret : 1.44132E-07 s/iter 819.569590 +gpu 0 iter : 31 Arret : 1.1039E-10 s/iter 818.574265 +gpu 0 iter : 32 Arret : 1.57009E-16 s/iter 818.740279 +temps : 26495 seconde(s) +zone limite de 'log-exp' 1.00007 +CPU thread 3 (of 4) uses CUDA device 3 +CPU thread 1 (of 4) uses CUDA device 1 +CPU thread 2 (of 4) uses CUDA device 2 +CPU thread 0 (of 4) uses CUDA device 0 +gpu 3 dimgrid 19532 dimblock 256 degrePoly 5000000 +gpu 3 dimgrid2 4883 start 3750144 size 1250048 +gpu 1 dimgrid 19532 dimblock 256 degrePoly 5000000 +gpu 1 dimgrid2 4883 start 1250048 size 1250048 +gpu 2 dimgrid 19532 dimblock 256 degrePoly 5000000 +gpu 2 dimgrid2 4883 start 2500096 size 1250048 +gpu 0 dimgrid 19532 dimblock 256 degrePoly 5000000 +gpu 0 dimgrid2 4883 start 0 size 1250048 +gpu 0 iter : 1 Arret : 5.5526E-07 s/iter 1277.818322 +gpu 0 iter : 2 Arret : 5.61731E-07 s/iter 1277.992626 +gpu 0 iter : 3 Arret : 5.64964E-07 s/iter 1277.611125 +gpu 0 iter : 4 Arret : 5.72211E-07 s/iter 1276.532785 +gpu 0 iter : 5 Arret : 5.52232E-07 s/iter 1277.275538 +gpu 0 iter : 6 Arret : 5.84776E-07 s/iter 1277.220058 +gpu 0 iter : 7 Arret : 5.78912E-07 s/iter 1275.509197 +gpu 0 iter : 8 Arret : 5.79479E-07 s/iter 1278.916489 +gpu 0 iter : 9 Arret : 6.18115E-07 s/iter 1274.895894 +gpu 0 iter : 10 Arret : 7.95575E-07 s/iter 1278.799546 +gpu 0 iter : 11 Arret : 0.000144509 s/iter 1274.000706 +gpu 0 iter : 12 Arret : 0.00033866 s/iter 1279.802690 +gpu 0 iter : 13 Arret : 0.000459288 s/iter 1277.437917 +gpu 0 iter : 14 Arret : 0.00271507 s/iter 1285.725652 +gpu 0 iter : 15 Arret : 0.00416423 s/iter 1306.932716 +gpu 0 iter : 16 Arret : 0.0203828 s/iter 1359.882166 +gpu 0 iter : 17 Arret : 0.0178872 s/iter 1347.000432 +gpu 0 iter : 18 Arret : 0.0127105 s/iter 1348.523047 +gpu 0 iter : 19 Arret : 0.0128704 s/iter 1338.221357 +gpu 0 iter : 20 Arret : 0.0222235 s/iter 1333.615666 +gpu 0 iter : 21 Arret : 0.031043 s/iter 1321.029250 +gpu 0 iter : 22 Arret : 0.0293537 s/iter 1307.821391 +gpu 0 iter : 23 Arret : 0.0147864 s/iter 1292.696397 +gpu 0 iter : 24 Arret : 0.00622656 s/iter 1277.914185 +gpu 0 iter : 25 Arret : 0.00544242 s/iter 1276.236495 +gpu 0 iter : 26 Arret : 0.00645191 s/iter 1275.391733 +gpu 0 iter : 27 Arret : 0.00753073 s/iter 1276.452405 +gpu 0 iter : 28 Arret : 0.000248698 s/iter 1280.456039 +gpu 0 iter : 29 Arret : 1.37858E-06 s/iter 1279.135441 +gpu 0 iter : 30 Arret : 4.93212E-07 s/iter 1274.588590 +gpu 0 iter : 31 Arret : 1.80451E-07 s/iter 1278.396215 +gpu 0 iter : 32 Arret : 3.28612E-09 s/iter 1274.750792 +gpu 0 iter : 33 Arret : 1.99852E-14 s/iter 1276.780371 +temps : 42620.5 seconde(s) diff --git a/Simulations/big.eps b/Simulations/big.eps new file mode 100644 index 0000000..3c479c8 --- /dev/null +++ b/Simulations/big.eps @@ -0,0 +1,823 @@ +%!PS-Adobe-2.0 +%%Title: big.eps +%%Creator: gnuplot 4.6 patchlevel 0 +%%CreationDate: Mon Jan 4 01:37:53 2016 +%%DocumentFonts: (atend) +%%BoundingBox: 50 50 554 770 +%%Orientation: Landscape +%%Pages: (atend) +%%EndComments +%%BeginProlog +/gnudict 256 dict def +gnudict begin +% +% The following true/false flags may be edited by hand if desired. +% The unit line width and grayscale image gamma correction may also be changed. +% +/Color true def +/Blacktext false def +/Solid false def +/Dashlength 1 def +/Landscape true def +/Level1 false def +/Rounded false def +/ClipToBoundingBox false def +/SuppressPDFMark false def +/TransparentPatterns false def +/gnulinewidth 5.000 def +/userlinewidth gnulinewidth def +/Gamma 1.0 def +/BackgroundColor {-1.000 -1.000 -1.000} def +% +/vshift -46 def +/dl1 { + 10.0 Dashlength mul mul + Rounded { currentlinewidth 0.75 mul sub dup 0 le { pop 0.01 } if } if +} def +/dl2 { + 10.0 Dashlength mul mul + Rounded { currentlinewidth 0.75 mul add } if +} def +/hpt_ 31.5 def +/vpt_ 31.5 def +/hpt hpt_ def +/vpt vpt_ def +/doclip { + ClipToBoundingBox { + newpath 50 50 moveto 554 50 lineto 554 770 lineto 50 770 lineto closepath + clip + } if +} def +% +% Gnuplot Prolog Version 4.4 (August 2010) +% +%/SuppressPDFMark true def +% +/M {moveto} bind def +/L {lineto} bind def +/R {rmoveto} bind def +/V {rlineto} bind def +/N {newpath moveto} bind def +/Z {closepath} bind def +/C {setrgbcolor} bind def +/f {rlineto fill} bind def +/g {setgray} bind def +/Gshow {show} def % May be redefined later in the file to support UTF-8 +/vpt2 vpt 2 mul def +/hpt2 hpt 2 mul def +/Lshow {currentpoint stroke M 0 vshift R + Blacktext {gsave 0 setgray show grestore} {show} ifelse} def +/Rshow {currentpoint stroke M dup stringwidth pop neg vshift R + Blacktext {gsave 0 setgray show grestore} {show} ifelse} def +/Cshow {currentpoint stroke M dup stringwidth pop -2 div vshift R + Blacktext {gsave 0 setgray show grestore} {show} ifelse} def +/UP {dup vpt_ mul /vpt exch def hpt_ mul /hpt exch def + /hpt2 hpt 2 mul def /vpt2 vpt 2 mul def} def +/DL {Color {setrgbcolor Solid {pop []} if 0 setdash} + {pop pop pop 0 setgray Solid {pop []} if 0 setdash} ifelse} def +/BL {stroke userlinewidth 2 mul setlinewidth + Rounded {1 setlinejoin 1 setlinecap} if} def +/AL {stroke userlinewidth 2 div setlinewidth + Rounded {1 setlinejoin 1 setlinecap} if} def +/UL {dup gnulinewidth mul /userlinewidth exch def + dup 1 lt {pop 1} if 10 mul /udl exch def} def +/PL {stroke userlinewidth setlinewidth + Rounded {1 setlinejoin 1 setlinecap} if} def +3.8 setmiterlimit +% Default Line colors +/LCw {1 1 1} def +/LCb {0 0 0} def +/LCa {0 0 0} def +/LC0 {1 0 0} def +/LC1 {0 1 0} def +/LC2 {0 0 1} def +/LC3 {1 0 1} def +/LC4 {0 1 1} def +/LC5 {1 1 0} def +/LC6 {0 0 0} def +/LC7 {1 0.3 0} def +/LC8 {0.5 0.5 0.5} def +% Default Line Types +/LTw {PL [] 1 setgray} def +/LTb {BL [] LCb DL} def +/LTa {AL [1 udl mul 2 udl mul] 0 setdash LCa setrgbcolor} def +/LT0 {PL [] LC0 DL} def +/LT1 {PL [4 dl1 2 dl2] LC1 DL} def +/LT2 {PL [2 dl1 3 dl2] LC2 DL} def +/LT3 {PL [1 dl1 1.5 dl2] LC3 DL} def +/LT4 {PL [6 dl1 2 dl2 1 dl1 2 dl2] LC4 DL} def +/LT5 {PL [3 dl1 3 dl2 1 dl1 3 dl2] LC5 DL} def +/LT6 {PL [2 dl1 2 dl2 2 dl1 6 dl2] LC6 DL} def +/LT7 {PL [1 dl1 2 dl2 6 dl1 2 dl2 1 dl1 2 dl2] LC7 DL} def +/LT8 {PL [2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 4 dl2] LC8 DL} def +/Pnt {stroke [] 0 setdash gsave 1 setlinecap M 0 0 V stroke grestore} def +/Dia {stroke [] 0 setdash 2 copy vpt add M + hpt neg vpt neg V hpt vpt neg V + hpt vpt V hpt neg vpt V closepath stroke + Pnt} def +/Pls {stroke [] 0 setdash vpt sub M 0 vpt2 V + currentpoint stroke M + hpt neg vpt neg R hpt2 0 V stroke + } def +/Box {stroke [] 0 setdash 2 copy exch hpt sub exch vpt add M + 0 vpt2 neg V hpt2 0 V 0 vpt2 V + hpt2 neg 0 V closepath stroke + Pnt} def +/Crs {stroke [] 0 setdash exch hpt sub exch vpt add M + hpt2 vpt2 neg V currentpoint stroke M + hpt2 neg 0 R hpt2 vpt2 V stroke} def +/TriU {stroke [] 0 setdash 2 copy vpt 1.12 mul add M + hpt neg vpt -1.62 mul V + hpt 2 mul 0 V + hpt neg vpt 1.62 mul V closepath stroke + Pnt} def +/Star {2 copy Pls Crs} def +/BoxF {stroke [] 0 setdash exch hpt sub exch vpt add M + 0 vpt2 neg V hpt2 0 V 0 vpt2 V + hpt2 neg 0 V closepath fill} def +/TriUF {stroke [] 0 setdash vpt 1.12 mul add M + hpt neg vpt -1.62 mul V + hpt 2 mul 0 V + hpt neg vpt 1.62 mul V closepath fill} def +/TriD {stroke [] 0 setdash 2 copy vpt 1.12 mul sub M + hpt neg vpt 1.62 mul V + hpt 2 mul 0 V + hpt neg vpt -1.62 mul V closepath stroke + Pnt} def +/TriDF {stroke [] 0 setdash vpt 1.12 mul sub M + hpt neg vpt 1.62 mul V + hpt 2 mul 0 V + hpt neg vpt -1.62 mul V closepath fill} def +/DiaF {stroke [] 0 setdash vpt add M + hpt neg vpt neg V hpt vpt neg V + hpt vpt V hpt neg vpt V closepath fill} def +/Pent {stroke [] 0 setdash 2 copy gsave + translate 0 hpt M 4 {72 rotate 0 hpt L} repeat + closepath stroke grestore Pnt} def +/PentF {stroke [] 0 setdash gsave + translate 0 hpt M 4 {72 rotate 0 hpt L} repeat + closepath fill grestore} def +/Circle {stroke [] 0 setdash 2 copy + hpt 0 360 arc stroke Pnt} def +/CircleF {stroke [] 0 setdash hpt 0 360 arc fill} def +/C0 {BL [] 0 setdash 2 copy moveto vpt 90 450 arc} bind def +/C1 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 90 arc closepath fill + vpt 0 360 arc closepath} bind def +/C2 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 90 180 arc closepath fill + vpt 0 360 arc closepath} bind def +/C3 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 180 arc closepath fill + vpt 0 360 arc closepath} bind def +/C4 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 180 270 arc closepath fill + vpt 0 360 arc closepath} bind def +/C5 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 90 arc + 2 copy moveto + 2 copy vpt 180 270 arc closepath fill + vpt 0 360 arc} bind def +/C6 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 90 270 arc closepath fill + vpt 0 360 arc closepath} bind def +/C7 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 270 arc closepath fill + vpt 0 360 arc closepath} bind def +/C8 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 270 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/C9 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 270 450 arc closepath fill + vpt 0 360 arc closepath} bind def +/C10 {BL [] 0 setdash 2 copy 2 copy moveto vpt 270 360 arc closepath fill + 2 copy moveto + 2 copy vpt 90 180 arc closepath fill + vpt 0 360 arc closepath} bind def +/C11 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 180 arc closepath fill + 2 copy moveto + 2 copy vpt 270 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/C12 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 180 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/C13 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 90 arc closepath fill + 2 copy moveto + 2 copy vpt 180 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/C14 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 90 360 arc closepath fill + vpt 0 360 arc} bind def +/C15 {BL [] 0 setdash 2 copy vpt 0 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/Rec {newpath 4 2 roll moveto 1 index 0 rlineto 0 exch rlineto + neg 0 rlineto closepath} bind def +/Square {dup Rec} bind def +/Bsquare {vpt sub exch vpt sub exch vpt2 Square} bind def +/S0 {BL [] 0 setdash 2 copy moveto 0 vpt rlineto BL Bsquare} bind def +/S1 {BL [] 0 setdash 2 copy vpt Square fill Bsquare} bind def +/S2 {BL [] 0 setdash 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def +/S3 {BL [] 0 setdash 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def +/S4 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def +/S5 {BL [] 0 setdash 2 copy 2 copy vpt Square fill + exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def +/S6 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill Bsquare} bind def +/S7 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill + 2 copy vpt Square fill Bsquare} bind def +/S8 {BL [] 0 setdash 2 copy vpt sub vpt Square fill Bsquare} bind def +/S9 {BL [] 0 setdash 2 copy vpt sub vpt vpt2 Rec fill Bsquare} bind def +/S10 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt Square fill + Bsquare} bind def +/S11 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt2 vpt Rec fill + Bsquare} bind def +/S12 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill Bsquare} bind def +/S13 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill + 2 copy vpt Square fill Bsquare} bind def +/S14 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill + 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def +/S15 {BL [] 0 setdash 2 copy Bsquare fill Bsquare} bind def +/D0 {gsave translate 45 rotate 0 0 S0 stroke grestore} bind def +/D1 {gsave translate 45 rotate 0 0 S1 stroke grestore} bind def +/D2 {gsave translate 45 rotate 0 0 S2 stroke grestore} bind def +/D3 {gsave translate 45 rotate 0 0 S3 stroke grestore} bind def +/D4 {gsave translate 45 rotate 0 0 S4 stroke grestore} bind def +/D5 {gsave translate 45 rotate 0 0 S5 stroke grestore} bind def +/D6 {gsave translate 45 rotate 0 0 S6 stroke grestore} bind def +/D7 {gsave translate 45 rotate 0 0 S7 stroke grestore} bind def +/D8 {gsave translate 45 rotate 0 0 S8 stroke grestore} bind def +/D9 {gsave translate 45 rotate 0 0 S9 stroke grestore} bind def +/D10 {gsave translate 45 rotate 0 0 S10 stroke grestore} bind def +/D11 {gsave translate 45 rotate 0 0 S11 stroke grestore} bind def +/D12 {gsave translate 45 rotate 0 0 S12 stroke grestore} bind def +/D13 {gsave translate 45 rotate 0 0 S13 stroke grestore} bind def +/D14 {gsave translate 45 rotate 0 0 S14 stroke grestore} bind def +/D15 {gsave translate 45 rotate 0 0 S15 stroke grestore} bind def +/DiaE {stroke [] 0 setdash vpt add M + hpt neg vpt neg V hpt vpt neg V + hpt vpt V hpt neg vpt V closepath stroke} def +/BoxE {stroke [] 0 setdash exch hpt sub exch vpt add M + 0 vpt2 neg V hpt2 0 V 0 vpt2 V + hpt2 neg 0 V closepath stroke} def +/TriUE {stroke [] 0 setdash vpt 1.12 mul add M + hpt neg vpt -1.62 mul V + hpt 2 mul 0 V + hpt neg vpt 1.62 mul V closepath stroke} def +/TriDE {stroke [] 0 setdash vpt 1.12 mul sub M + hpt neg vpt 1.62 mul V + hpt 2 mul 0 V + hpt neg vpt -1.62 mul V closepath stroke} def +/PentE {stroke [] 0 setdash gsave + translate 0 hpt M 4 {72 rotate 0 hpt L} repeat + closepath stroke grestore} def +/CircE {stroke [] 0 setdash + hpt 0 360 arc stroke} def +/Opaque {gsave closepath 1 setgray fill grestore 0 setgray closepath} def +/DiaW {stroke [] 0 setdash vpt add M + hpt neg vpt neg V hpt vpt neg V + hpt vpt V hpt neg vpt V Opaque stroke} def +/BoxW {stroke [] 0 setdash exch hpt sub exch vpt add M + 0 vpt2 neg V hpt2 0 V 0 vpt2 V + hpt2 neg 0 V Opaque stroke} def +/TriUW {stroke [] 0 setdash vpt 1.12 mul add M + hpt neg vpt -1.62 mul V + hpt 2 mul 0 V + hpt neg vpt 1.62 mul V Opaque stroke} def +/TriDW {stroke [] 0 setdash vpt 1.12 mul sub M + hpt neg vpt 1.62 mul V + hpt 2 mul 0 V + hpt neg vpt -1.62 mul V Opaque stroke} def +/PentW {stroke [] 0 setdash gsave + translate 0 hpt M 4 {72 rotate 0 hpt L} repeat + Opaque stroke grestore} def +/CircW {stroke [] 0 setdash + hpt 0 360 arc Opaque stroke} def +/BoxFill {gsave Rec 1 setgray fill grestore} def +/Density { + /Fillden exch def + currentrgbcolor + /ColB exch def /ColG exch def /ColR exch def + /ColR ColR Fillden mul Fillden sub 1 add def + /ColG ColG Fillden mul Fillden sub 1 add def + /ColB ColB Fillden mul Fillden sub 1 add def + ColR ColG ColB setrgbcolor} def +/BoxColFill {gsave Rec PolyFill} def +/PolyFill {gsave Density fill grestore grestore} def +/h {rlineto rlineto rlineto gsave closepath fill grestore} bind def +% +% PostScript Level 1 Pattern Fill routine for rectangles +% Usage: x y w h s a XX PatternFill +% x,y = lower left corner of box to be filled +% w,h = width and height of box +% a = angle in degrees between lines and x-axis +% XX = 0/1 for no/yes cross-hatch +% +/PatternFill {gsave /PFa [ 9 2 roll ] def + PFa 0 get PFa 2 get 2 div add PFa 1 get PFa 3 get 2 div add translate + PFa 2 get -2 div PFa 3 get -2 div PFa 2 get PFa 3 get Rec + gsave 1 setgray fill grestore clip + currentlinewidth 0.5 mul setlinewidth + /PFs PFa 2 get dup mul PFa 3 get dup mul add sqrt def + 0 0 M PFa 5 get rotate PFs -2 div dup translate + 0 1 PFs PFa 4 get div 1 add floor cvi + {PFa 4 get mul 0 M 0 PFs V} for + 0 PFa 6 get ne { + 0 1 PFs PFa 4 get div 1 add floor cvi + {PFa 4 get mul 0 2 1 roll M PFs 0 V} for + } if + stroke grestore} def +% +/languagelevel where + {pop languagelevel} {1} ifelse + 2 lt + {/InterpretLevel1 true def} + {/InterpretLevel1 Level1 def} + ifelse +% +% PostScript level 2 pattern fill definitions +% +/Level2PatternFill { +/Tile8x8 {/PaintType 2 /PatternType 1 /TilingType 1 /BBox [0 0 8 8] /XStep 8 /YStep 8} + bind def +/KeepColor {currentrgbcolor [/Pattern /DeviceRGB] setcolorspace} bind def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke} +>> matrix makepattern +/Pat1 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke + 0 4 M 4 8 L 8 4 L 4 0 L 0 4 L stroke} +>> matrix makepattern +/Pat2 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 0 0 M 0 8 L + 8 8 L 8 0 L 0 0 L fill} +>> matrix makepattern +/Pat3 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop -4 8 M 8 -4 L + 0 12 M 12 0 L stroke} +>> matrix makepattern +/Pat4 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop -4 0 M 8 12 L + 0 -4 M 12 8 L stroke} +>> matrix makepattern +/Pat5 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop -2 8 M 4 -4 L + 0 12 M 8 -4 L 4 12 M 10 0 L stroke} +>> matrix makepattern +/Pat6 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop -2 0 M 4 12 L + 0 -4 M 8 12 L 4 -4 M 10 8 L stroke} +>> matrix makepattern +/Pat7 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 8 -2 M -4 4 L + 12 0 M -4 8 L 12 4 M 0 10 L stroke} +>> matrix makepattern +/Pat8 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 0 -2 M 12 4 L + -4 0 M 12 8 L -4 4 M 8 10 L stroke} +>> matrix makepattern +/Pat9 exch def +/Pattern1 {PatternBgnd KeepColor Pat1 setpattern} bind def +/Pattern2 {PatternBgnd KeepColor Pat2 setpattern} bind def +/Pattern3 {PatternBgnd KeepColor Pat3 setpattern} bind def +/Pattern4 {PatternBgnd KeepColor Landscape {Pat5} {Pat4} ifelse setpattern} bind def +/Pattern5 {PatternBgnd KeepColor Landscape {Pat4} {Pat5} ifelse setpattern} bind def +/Pattern6 {PatternBgnd KeepColor Landscape {Pat9} {Pat6} ifelse setpattern} bind def +/Pattern7 {PatternBgnd KeepColor Landscape {Pat8} {Pat7} ifelse setpattern} bind def +} def +% +% +%End of PostScript Level 2 code +% +/PatternBgnd { + TransparentPatterns {} {gsave 1 setgray fill grestore} ifelse +} def +% +% Substitute for Level 2 pattern fill codes with +% grayscale if Level 2 support is not selected. +% +/Level1PatternFill { +/Pattern1 {0.250 Density} bind def +/Pattern2 {0.500 Density} bind def +/Pattern3 {0.750 Density} bind def +/Pattern4 {0.125 Density} bind def +/Pattern5 {0.375 Density} bind def +/Pattern6 {0.625 Density} bind def +/Pattern7 {0.875 Density} bind def +} def +% +% Now test for support of Level 2 code +% +Level1 {Level1PatternFill} {Level2PatternFill} ifelse +% +/Symbol-Oblique /Symbol findfont [1 0 .167 1 0 0] makefont +dup length dict begin {1 index /FID eq {pop pop} {def} ifelse} forall +currentdict end definefont pop +/MFshow { + { dup 5 get 3 ge + { 5 get 3 eq {gsave} {grestore} ifelse } + {dup dup 0 get findfont exch 1 get scalefont setfont + [ currentpoint ] exch dup 2 get 0 exch R dup 5 get 2 ne {dup dup 6 + get exch 4 get {Gshow} {stringwidth pop 0 R} ifelse }if dup 5 get 0 eq + {dup 3 get {2 get neg 0 exch R pop} {pop aload pop M} ifelse} {dup 5 + get 1 eq {dup 2 get exch dup 3 get exch 6 get stringwidth pop -2 div + dup 0 R} {dup 6 get stringwidth pop -2 div 0 R 6 get + show 2 index {aload pop M neg 3 -1 roll neg R pop pop} {pop pop pop + pop aload pop M} ifelse }ifelse }ifelse } + ifelse } + forall} def +/Gswidth {dup type /stringtype eq {stringwidth} {pop (n) stringwidth} ifelse} def +/MFwidth {0 exch { dup 5 get 3 ge { 5 get 3 eq { 0 } { pop } ifelse } + {dup 3 get{dup dup 0 get findfont exch 1 get scalefont setfont + 6 get Gswidth pop add} {pop} ifelse} ifelse} forall} def +/MLshow { currentpoint stroke M + 0 exch R + Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def +/MRshow { currentpoint stroke M + exch dup MFwidth neg 3 -1 roll R + Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def +/MCshow { currentpoint stroke M + exch dup MFwidth -2 div 3 -1 roll R + Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def +/XYsave { [( ) 1 2 true false 3 ()] } bind def +/XYrestore { [( ) 1 2 true false 4 ()] } bind def +Level1 SuppressPDFMark or +{} { +/SDict 10 dict def +systemdict /pdfmark known not { + userdict /pdfmark systemdict /cleartomark get put +} if +SDict begin [ + /Title (big.eps) + /Subject (gnuplot plot) + /Creator (gnuplot 4.6 patchlevel 0) + /Author (lilia) +% /Producer (gnuplot) +% /Keywords () + /CreationDate (Mon Jan 4 01:37:53 2016) + /DOCINFO pdfmark +end +} ifelse +end +%%EndProlog +%%Page: 1 1 +gnudict begin +gsave +doclip +50 50 translate +0.100 0.100 scale +90 rotate +0 -5040 translate +0 setgray +newpath +(Helvetica) findfont 140 scalefont setfont +BackgroundColor 0 lt 3 1 roll 0 lt exch 0 lt or or not {gsave BackgroundColor C clippath fill grestore} if +1.000 UL +LTb +938 448 M +63 0 V +5946 0 R +-63 0 V +stroke +854 448 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 10)] +] -46.7 MRshow +1.000 UL +LTb +938 781 M +31 0 V +5978 0 R +-31 0 V +938 976 M +31 0 V +5978 0 R +-31 0 V +938 1114 M +31 0 V +5978 0 R +-31 0 V +938 1221 M +31 0 V +5978 0 R +-31 0 V +938 1308 M +31 0 V +5978 0 R +-31 0 V +938 1382 M +31 0 V +5978 0 R +-31 0 V +938 1447 M +31 0 V +5978 0 R +-31 0 V +938 1503 M +31 0 V +5978 0 R +-31 0 V +938 1554 M +63 0 V +5946 0 R +-63 0 V +stroke +854 1554 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 100)] +] -46.7 MRshow +1.000 UL +LTb +938 1887 M +31 0 V +5978 0 R +-31 0 V +938 2081 M +31 0 V +5978 0 R +-31 0 V +938 2219 M +31 0 V +5978 0 R +-31 0 V +938 2327 M +31 0 V +5978 0 R +-31 0 V +938 2414 M +31 0 V +5978 0 R +-31 0 V +938 2488 M +31 0 V +5978 0 R +-31 0 V +938 2552 M +31 0 V +5978 0 R +-31 0 V +938 2609 M +31 0 V +5978 0 R +-31 0 V +938 2660 M +63 0 V +5946 0 R +-63 0 V +stroke +854 2660 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 1000)] +] -46.7 MRshow +1.000 UL +LTb +938 2992 M +31 0 V +5978 0 R +-31 0 V +938 3187 M +31 0 V +5978 0 R +-31 0 V +938 3325 M +31 0 V +5978 0 R +-31 0 V +938 3432 M +31 0 V +5978 0 R +-31 0 V +938 3520 M +31 0 V +5978 0 R +-31 0 V +938 3594 M +31 0 V +5978 0 R +-31 0 V +938 3658 M +31 0 V +5978 0 R +-31 0 V +938 3715 M +31 0 V +5978 0 R +-31 0 V +938 3765 M +63 0 V +5946 0 R +-63 0 V +stroke +854 3765 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 10000)] +] -46.7 MRshow +1.000 UL +LTb +938 4098 M +31 0 V +5978 0 R +-31 0 V +938 4293 M +31 0 V +5978 0 R +-31 0 V +938 4431 M +31 0 V +5978 0 R +-31 0 V +938 4538 M +31 0 V +5978 0 R +-31 0 V +938 4626 M +31 0 V +5978 0 R +-31 0 V +938 4700 M +31 0 V +5978 0 R +-31 0 V +938 4764 M +31 0 V +5978 0 R +-31 0 V +938 4820 M +31 0 V +5978 0 R +-31 0 V +938 4871 M +63 0 V +5946 0 R +-63 0 V +stroke +854 4871 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 100000)] +] -46.7 MRshow +1.000 UL +LTb +938 448 M +0 63 V +0 4360 R +0 -63 V +stroke +938 308 M +[ [(Helvetica) 140.0 0.0 true true 0 (0)] +] -46.7 MCshow +1.000 UL +LTb +1058 448 M +0 63 V +0 4360 R +0 -63 V +stroke +1058 308 M +[ [(Helvetica) 140.0 0.0 true true 0 (100000)] +] -46.7 MCshow +1.000 UL +LTb +2140 448 M +0 63 V +0 4360 R +0 -63 V +stroke +2140 308 M +[ [(Helvetica) 140.0 0.0 true true 0 (1e+06)] +] -46.7 MCshow +1.000 UL +LTb +3342 448 M +0 63 V +0 4360 R +0 -63 V +stroke +3342 308 M +[ [(Helvetica) 140.0 0.0 true true 0 (2e+06)] +] -46.7 MCshow +1.000 UL +LTb +4543 448 M +0 63 V +0 4360 R +0 -63 V +stroke +4543 308 M +[ [(Helvetica) 140.0 0.0 true true 0 (3e+06)] +] -46.7 MCshow +1.000 UL +LTb +5745 448 M +0 63 V +0 4360 R +0 -63 V +stroke +5745 308 M +[ [(Helvetica) 140.0 0.0 true true 0 (4e+06)] +] -46.7 MCshow +1.000 UL +LTb +6947 448 M +0 63 V +0 4360 R +0 -63 V +stroke +6947 308 M +[ [(Helvetica) 140.0 0.0 true true 0 (5e+06)] +] -46.7 MCshow +1.000 UL +LTb +1.000 UL +LTb +938 4871 N +938 448 L +6009 0 V +0 4423 V +-6009 0 V +Z stroke +LCb setrgbcolor +112 2659 M +currentpoint gsave translate -270 rotate 0 0 moveto +[ [(Helvetica) 140.0 0.0 true true 0 (Execution time in seconds)] +] -46.7 MCshow +grestore +LTb +LCb setrgbcolor +3942 98 M +[ [(Helvetica) 140.0 0.0 true true 0 (Polynomial's degrees)] +] -46.7 MCshow +LTb +1.000 UP +1.000 UL +LTb +% Begin plot #1 +1.000 UP +3.000 UL +LT0 +LCb setrgbcolor +6296 4738 M +[ [(Helvetica) 140.0 0.0 true true 0 (Full MPI)] +] -46.7 MRshow +LT0 +6380 4738 M +399 0 V +1058 936 M +2140 3106 L +1202 464 V +1201 371 V +1202 277 V +1202 198 V +1058 936 Pls +2140 3106 Pls +3342 3570 Pls +4543 3941 Pls +5745 4218 Pls +6947 4416 Pls +6579 4738 Pls +% End plot #1 +% Begin plot #2 +1.000 UP +3.000 UL +LT1 +LCb setrgbcolor +6296 4598 M +[ [(Helvetica) 140.0 0.0 true true 0 (Full OMP)] +] -46.7 MRshow +LT1 +6380 4598 M +399 0 V +1058 932 M +2140 3097 L +1202 431 V +1201 381 V +1202 324 V +1202 228 V +1058 932 Crs +2140 3097 Crs +3342 3528 Crs +4543 3909 Crs +5745 4233 Crs +6947 4461 Crs +6579 4598 Crs +% End plot #2 +1.000 UL +LTb +938 4871 N +938 448 L +6009 0 V +0 4423 V +-6009 0 V +Z stroke +1.000 UP +1.000 UL +LTb +stroke +grestore +end +showpage +%%Trailer +%%DocumentFonts: Helvetica +%%Pages: 1 diff --git a/Simulations/big.txt b/Simulations/big.txt new file mode 100644 index 0000000..f43d1af --- /dev/null +++ b/Simulations/big.txt @@ -0,0 +1,7 @@ +#degree FMPI FOMP SMPI SOMP +100000 27.6039 27.3746 +1000000 2532.79 2489.06 +2000000 6652.78 6105.16 +3000000 14415.7 13488 +4000000 25693.2 26495 +5000000 38787.6 42620.5 diff --git a/big.pdf b/big.pdf new file mode 100644 index 0000000..c4b846a Binary files /dev/null and b/big.pdf differ diff --git a/paper.tex b/paper.tex index 9ebca6f..7064995 100644 --- a/paper.tex +++ b/paper.tex @@ -466,7 +466,8 @@ Finding polynomial roots rapidly and accurately is the main objective of our wor %This paper is organized as follows. In Section~\ref{sec2} we recall the Ehrlich-Aberth method. In section~\ref{sec3} we present EA algorithm on single GPU. In section~\ref{sec4} we propose the EA algorithm implementation on Multi-GPU for (OpenMP-CUDA) approach and (MPI-CUDA) approach. In sectioné\ref{sec5} we present our experiments and discus it. Finally, Section~\ref{sec6} concludes this paper and gives some hints for future research directions in this topic.} -The paper is organized as follows. In Section~\ref{sec2} we present three different parallel programming models OpenMP, MPI and CUDA. +The paper is organized as follows. In Section~\ref{sec2} we present three different parallel programming models OpenMP, MPI and CUDA. In Section~\ref{sec3} we present the Ehrlich-Aberth algorithm on a single GPU. In Section~\ref{sec4} we propose the parallel implementations of the Ehrlich-Aberth algorithm using the OpenMP and MPI approaches. + \LZK{A revoir toute cette organization} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -505,6 +506,9 @@ CUDA (Compute Unified Device Architecture) is a parallel computing architecture %We introduced three paradigms of parallel programming. Our objective consists in implementing a root finding polynomial algorithm on multiple GPUs. To this end, it is primordial to know how to manage CUDA contexts of different GPUs. A direct method for controlling the various GPUs is to use as many threads or processes as GPU devices. We can choose the GPU index based on the identifier of OpenMP thread or the rank of the MPI process. Both approaches will be investigated. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \section{The EA algorithm on a single GPU} \label{sec3} \subsection{The EA method} @@ -860,33 +864,40 @@ This is due to the use of MPI parallel paradigm that divides the problem computa -%\begin{figure}[htbp] -%\centering - % \includegraphics[angle=-90,width=0.5\textwidth]{Sparse} -%\caption{Comparaison between MPI and OpenMP versions of the Ehrlich-Aberth method for solving sparse plynomials on GPUs} -%\label{fig:05} -%\end{figure} +\begin{figure}[htbp] +\centering + \includegraphics[angle=-90,width=0.5\textwidth]{Sparse} +\caption{Comparaison between MPI and OpenMP versions of the Ehrlich-Aberth method for solving sparse plynomials on GPUs.} +\label{fig:05} +\end{figure} -%\begin{figure}[htbp] -%\centering - % \includegraphics[angle=-90,width=0.5\textwidth]{Full} -%\caption{Comparaison between MPI and OpenMP versions of the Ehrlich-Aberth method for solving full polynomials on GPUs} -%\label{fig:06} -%\end{figure} +\begin{figure}[htbp] +\centering + \includegraphics[angle=-90,width=0.5\textwidth]{Full} +\caption{Comparaison between MPI and OpenMP versions of the Ehrlich-Aberth method for solving full polynomials on GPUs.} +\label{fig:06} +\end{figure} -%\begin{figure}[htbp] -%\centering - % \includegraphics[angle=-90,width=0.5\textwidth]{MPI} -%\caption{Comparaison of execution times of the Ehrlich-Aberth method for solving sparse and full polynomials on GPUs with distributed memory paradigm using MPI} -%\label{fig:07} -%\end{figure} +\begin{figure}[htbp] +\centering + \includegraphics[angle=-90,width=0.5\textwidth]{MPI} +\caption{Comparaison of execution times of the Ehrlich-Aberth method for solving sparse and full polynomials on GPUs with distributed memory paradigm using MPI.} +\label{fig:07} +\end{figure} -%\begin{figure}[htbp] -%\centering - % \includegraphics[angle=-90,width=0.5\textwidth]{OMP} -%\caption{Comparaison of execution times of the Ehrlich-Aberth method for solving sparse and full polynomials on GPUs with shared memory paradigm using OpenMP} -%\label{fig:08} -%\end{figure} +\begin{figure}[htbp] +\centering + \includegraphics[angle=-90,width=0.5\textwidth]{OMP} +\caption{Comparaison of execution times of the Ehrlich-Aberth method for solving sparse and full polynomials on GPUs with shared memory paradigm using OpenMP.} +\label{fig:08} +\end{figure} + +\begin{figure}[htbp] +\centering + \includegraphics[angle=-90,width=0.5\textwidth]{big} +\caption{Execution times in seconds of the Ehrlich-Aberth method for solving full polynomials of high degrees on 4 GPUs.} +\label{fig:09} +\end{figure} % An example of a floating figure using the graphicx package. % Note that \label must occur AFTER (or within) \caption.