X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/beb4b32a38394d2c847ba5a733a4906781d95881..2373d6731790822c6e738cfa54aec1ccaf802222:/BookGPU/Chapters/chapter6/ch6.aux?ds=sidebyside diff --git a/BookGPU/Chapters/chapter6/ch6.aux b/BookGPU/Chapters/chapter6/ch6.aux index 2be0894..af524e9 100644 --- a/BookGPU/Chapters/chapter6/ch6.aux +++ b/BookGPU/Chapters/chapter6/ch6.aux @@ -21,13 +21,13 @@ \@writefile{toc}{\contentsline {subsection}{\numberline {6.2.3}Overlapping with sequences of transfers and computations}{88}} \newlabel{algo:ch6p1overlapseqsequence}{{6.2}{89}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.2}Generic scheme explicitly overlapping MPI communications with sequences of CUDA CPU/GPU transfers and CUDA GPU computations}{89}} -\@writefile{lof}{\contentsline {figure}{\numberline {6.3}{\ignorespaces Overlap of internode CPU communications with a streamed sequence of CPU/GPU data transfers and GPU computations.\relax }}{90}} -\newlabel{fig:ch6p1overlapstreamsequence}{{6.3}{90}} +\@writefile{lof}{\contentsline {figure}{\numberline {6.3}{\ignorespaces Overlap of internode CPU communications with a streamed sequence of CPU/GPU data transfers and GPU computations.\relax }}{91}} +\newlabel{fig:ch6p1overlapstreamsequence}{{6.3}{91}} \newlabel{algo:ch6p1overlapstreamsequence}{{6.3}{91}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.3}Generic scheme explicitly overlapping MPI communications with streamed sequences of CUDA CPU/GPU transfers and CUDA GPU computations}{91}} -\@writefile{lof}{\contentsline {figure}{\numberline {6.4}{\ignorespaces Complete overlap of internode CPU communications, CPU/GPU data transfers and GPU computations, interleaving computation-communication iterations\relax }}{93}} -\newlabel{fig:ch6p1overlapinterleaved}{{6.4}{93}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.2.4}Interleaved communications-transfers-computations overlapping}{93}} +\@writefile{lof}{\contentsline {figure}{\numberline {6.4}{\ignorespaces Complete overlap of internode CPU communications, CPU/GPU data transfers and GPU computations, interleaving computation-communication iterations\relax }}{94}} +\newlabel{fig:ch6p1overlapinterleaved}{{6.4}{94}} \newlabel{algo:ch6p1overlapinterleaved}{{6.4}{94}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.4}Generic scheme explicitly overlapping MPI communications, CUDA CPU/GPU transfers and CUDA GPU computations, interleaving computation-communication iterations}{94}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.2.5}Experimental validation}{96}} @@ -39,74 +39,74 @@ \newlabel{ch6:part2}{{6.3}{98}} \@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Synchronous iterative scheme\relax }}{98}} \newlabel{algo:ch6p2sync}{{3}{98}} -\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Asynchronous iterative scheme\relax }}{98}} -\newlabel{algo:ch6p2async}{{4}{98}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.1}A basic asynchronous scheme}{99}} -\newlabel{ch6:p2BasicAsync}{{6.3.1}{99}} +\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Asynchronous iterative scheme\relax }}{99}} +\newlabel{algo:ch6p2async}{{4}{99}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.1}A basic asynchronous scheme}{100}} +\newlabel{ch6:p2BasicAsync}{{6.3.1}{100}} \newlabel{algo:ch6p2BasicAsync}{{6.5}{100}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.5}Initialization of the basic asynchronous scheme}{100}} \newlabel{algo:ch6p2BasicAsyncComp}{{6.6}{101}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.6}Computing function in the basic asynchronous scheme}{101}} -\newlabel{algo:ch6p2BasicAsyncSendings}{{6.7}{102}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.7}Sending function in the basic asynchronous scheme}{102}} +\newlabel{algo:ch6p2BasicAsyncSendings}{{6.7}{103}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.7}Sending function in the basic asynchronous scheme}{103}} \newlabel{algo:ch6p2BasicAsyncReceptions}{{6.8}{103}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.8}Reception function in the basic asynchronous scheme}{103}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.2}Synchronization of the asynchronous scheme}{104}} -\newlabel{ch6:p2SsyncOverAsync}{{6.3.2}{104}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.2}Synchronization of the asynchronous scheme}{105}} +\newlabel{ch6:p2SsyncOverAsync}{{6.3.2}{105}} \newlabel{algo:ch6p2Sync}{{6.9}{105}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.9}Initialization of the synchronized scheme}{105}} \newlabel{algo:ch6p2SyncComp}{{6.10}{106}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.10}Computing function in the synchronized scheme}{106}} -\newlabel{algo:ch6p2SyncReceptions}{{6.11}{107}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.11}Reception function in the synchronized scheme}{107}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.3}Asynchronous scheme using MPI, OpenMP and CUDA}{108}} -\newlabel{ch6:p2GPUAsync}{{6.3.3}{108}} -\newlabel{algo:ch6p2AsyncSyncComp}{{6.12}{109}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.12}Computing function in the final asynchronous scheme}{109}} +\newlabel{algo:ch6p2SyncReceptions}{{6.11}{108}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.11}Reception function in the synchronized scheme}{108}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.3}Asynchronous scheme using MPI, OpenMP and CUDA}{109}} +\newlabel{ch6:p2GPUAsync}{{6.3.3}{109}} +\newlabel{algo:ch6p2AsyncSyncComp}{{6.12}{110}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.12}Computing function in the final asynchronous scheme}{110}} \newlabel{algo:ch6p2syncGPU}{{6.13}{111}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.13}Computing function in the final asynchronous scheme}{111}} -\newlabel{algo:ch6p2FullOverAsyncMain}{{6.14}{113}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.14}Initialization of the main process of complete overlap with asynchronism}{113}} -\newlabel{algo:ch6p2FullOverAsyncComp1}{{6.15}{114}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.15}Computing function in the final asynchronous scheme with CPU/GPU overlap}{114}} -\newlabel{algo:ch6p2FullOverAsyncComp2}{{6.16}{115}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.16}Auxiliary computing function in the final asynchronous scheme with CPU/GPU overlap}{115}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.4}Experimental validation}{116}} -\newlabel{sec:ch6p2expes}{{6.3.4}{116}} -\@writefile{lof}{\contentsline {figure}{\numberline {6.6}{\ignorespaces Computation times of the test application in synchronous and asynchronous modes.\relax }}{117}} -\newlabel{fig:ch6p2syncasync}{{6.6}{117}} -\@writefile{lof}{\contentsline {figure}{\numberline {6.7}{\ignorespaces Computation times with or without overlap of Jacobian updatings in asynchronous mode.\relax }}{118}} -\newlabel{fig:ch6p2aux}{{6.7}{118}} -\@writefile{toc}{\contentsline {section}{\numberline {6.4}Perspective: A unifying programming model}{119}} -\newlabel{sec:ch6p3unify}{{6.4}{119}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.1}Resources}{119}} -\newlabel{sec:ch6p3resources}{{6.4.1}{119}} -\newlabel{algo:ch6p3ORWLresources}{{6.17}{120}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.17}Declaration of ORWL resources for a block-cyclic matrix multiplication}{120}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.2}Control}{120}} -\newlabel{sec:ch6p3ORWLcontrol}{{6.4.2}{120}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.3}Example: block-cyclic matrix multiplication (MM)}{121}} -\newlabel{sec:ch6p3ORWLMM}{{6.4.3}{121}} -\newlabel{algo:ch6p3ORWLBCCMM}{{6.18}{121}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.18}Block-cyclic matrix multiplication, high level per task view}{121}} -\newlabel{algo:ch6p3ORWLlcopy}{{6.19}{122}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.19}An iterative local copy operation}{122}} -\newlabel{algo:ch6p3ORWLrcopy}{{6.20}{122}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.20}An iterative remote copy operation as part of a block cyclic matrix multiplication task}{122}} -\newlabel{algo:ch6p3ORWLtrans}{{6.21}{122}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.21}An iterative GPU transfer and compute operation as part of a block cyclic matrix multiplication task}{122}} -\newlabel{algo:ch6p3ORWLdecl}{{6.22}{123}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.22}Dynamic declaration of handles to represent the resources}{123}} +\newlabel{algo:ch6p2FullOverAsyncMain}{{6.14}{114}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.14}Initialization of the main process of complete overlap with asynchronism}{114}} +\newlabel{algo:ch6p2FullOverAsyncComp1}{{6.15}{115}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.15}Computing function in the final asynchronous scheme with CPU/GPU overlap}{115}} +\newlabel{algo:ch6p2FullOverAsyncComp2}{{6.16}{116}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.16}Auxiliary computing function in the final asynchronous scheme with CPU/GPU overlap}{116}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.4}Experimental validation}{117}} +\newlabel{sec:ch6p2expes}{{6.3.4}{117}} +\@writefile{lof}{\contentsline {figure}{\numberline {6.6}{\ignorespaces Computation times of the test application in synchronous and asynchronous modes.\relax }}{118}} +\newlabel{fig:ch6p2syncasync}{{6.6}{118}} +\@writefile{lof}{\contentsline {figure}{\numberline {6.7}{\ignorespaces Computation times with or without overlap of Jacobian updatings in asynchronous mode.\relax }}{119}} +\newlabel{fig:ch6p2aux}{{6.7}{119}} +\@writefile{toc}{\contentsline {section}{\numberline {6.4}Perspective: A unifying programming model}{120}} +\newlabel{sec:ch6p3unify}{{6.4}{120}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.1}Resources}{120}} +\newlabel{sec:ch6p3resources}{{6.4.1}{120}} +\newlabel{algo:ch6p3ORWLresources}{{6.17}{121}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.17}Declaration of ORWL resources for a block-cyclic matrix multiplication}{121}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.2}Control}{121}} +\newlabel{sec:ch6p3ORWLcontrol}{{6.4.2}{121}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.3}Example: block-cyclic matrix multiplication (MM)}{122}} +\newlabel{sec:ch6p3ORWLMM}{{6.4.3}{122}} +\newlabel{algo:ch6p3ORWLBCCMM}{{6.18}{122}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.18}Block-cyclic matrix multiplication, high level per task view}{122}} +\newlabel{algo:ch6p3ORWLlcopy}{{6.19}{123}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.19}An iterative local copy operation}{123}} +\newlabel{algo:ch6p3ORWLrcopy}{{6.20}{123}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.20}An iterative remote copy operation as part of a block cyclic matrix multiplication task}{123}} +\newlabel{algo:ch6p3ORWLtrans}{{6.21}{123}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.21}An iterative GPU transfer and compute operation as part of a block cyclic matrix multiplication task}{123}} +\newlabel{algo:ch6p3ORWLdecl}{{6.22}{124}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.22}Dynamic declaration of handles to represent the resources}{124}} \newlabel{algo:ch6p3ORWLinit}{{6.23}{124}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.23}Dynamic initialization of access mode and priorities}{124}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.4}Tasks and operations}{124}} -\newlabel{sec:ch6p3tasks}{{6.4.4}{124}} -\@writefile{toc}{\contentsline {section}{\numberline {6.5}Conclusion}{125}} -\newlabel{ch6:conclu}{{6.5}{125}} -\@writefile{toc}{\contentsline {section}{\numberline {6.6}Glossary}{125}} -\@writefile{toc}{\contentsline {section}{Bibliography}{126}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.4}Tasks and operations}{125}} +\newlabel{sec:ch6p3tasks}{{6.4.4}{125}} +\@writefile{toc}{\contentsline {section}{\numberline {6.5}Conclusion}{126}} +\newlabel{ch6:conclu}{{6.5}{126}} +\@writefile{toc}{\contentsline {section}{\numberline {6.6}Glossary}{126}} +\@writefile{toc}{\contentsline {section}{Bibliography}{127}} \@setckpt{Chapters/chapter6/ch6}{ -\setcounter{page}{128} +\setcounter{page}{129} \setcounter{equation}{0} \setcounter{enumi}{4} \setcounter{enumii}{0} @@ -131,7 +131,7 @@ \setcounter{lotdepth}{1} \setcounter{lstnumber}{17} \setcounter{ContinuedFloat}{0} -\setcounter{AlgoLine}{0} +\setcounter{AlgoLine}{6} \setcounter{algocfline}{4} \setcounter{algocfproc}{4} \setcounter{algocf}{4}