\@writefile{toc}{\contentsline {subsection}{\numberline {6.2.3}Overlapping with sequences of transfers and computations}{88}}
\newlabel{algo:ch6p1overlapseqsequence}{{6.2}{89}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.2}Generic scheme explicitly overlapping MPI communications with sequences of CUDA CPU/GPU transfers and CUDA GPU computations}{89}}
-\@writefile{lof}{\contentsline {figure}{\numberline {6.3}{\ignorespaces Overlap of internode CPU communications with a streamed sequence of CPU/GPU data transfers and GPU computations.\relax }}{90}}
-\newlabel{fig:ch6p1overlapstreamsequence}{{6.3}{90}}
+\@writefile{lof}{\contentsline {figure}{\numberline {6.3}{\ignorespaces Overlap of internode CPU communications with a streamed sequence of CPU/GPU data transfers and GPU computations.\relax }}{91}}
+\newlabel{fig:ch6p1overlapstreamsequence}{{6.3}{91}}
\newlabel{algo:ch6p1overlapstreamsequence}{{6.3}{91}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.3}Generic scheme explicitly overlapping MPI communications with streamed sequences of CUDA CPU/GPU transfers and CUDA GPU computations}{91}}
-\@writefile{lof}{\contentsline {figure}{\numberline {6.4}{\ignorespaces Complete overlap of internode CPU communications, CPU/GPU data transfers and GPU computations, interleaving computation-communication iterations\relax }}{93}}
-\newlabel{fig:ch6p1overlapinterleaved}{{6.4}{93}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.2.4}Interleaved communications-transfers-computations overlapping}{93}}
+\@writefile{lof}{\contentsline {figure}{\numberline {6.4}{\ignorespaces Complete overlap of internode CPU communications, CPU/GPU data transfers and GPU computations, interleaving computation-communication iterations\relax }}{94}}
+\newlabel{fig:ch6p1overlapinterleaved}{{6.4}{94}}
\newlabel{algo:ch6p1overlapinterleaved}{{6.4}{94}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.4}Generic scheme explicitly overlapping MPI communications, CUDA CPU/GPU transfers and CUDA GPU computations, interleaving computation-communication iterations}{94}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.2.5}Experimental validation}{96}}
\newlabel{ch6:part2}{{6.3}{98}}
\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Synchronous iterative scheme\relax }}{98}}
\newlabel{algo:ch6p2sync}{{3}{98}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Asynchronous iterative scheme\relax }}{98}}
-\newlabel{algo:ch6p2async}{{4}{98}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.1}A basic asynchronous scheme}{99}}
-\newlabel{ch6:p2BasicAsync}{{6.3.1}{99}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Asynchronous iterative scheme\relax }}{99}}
+\newlabel{algo:ch6p2async}{{4}{99}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.1}A basic asynchronous scheme}{100}}
+\newlabel{ch6:p2BasicAsync}{{6.3.1}{100}}
\newlabel{algo:ch6p2BasicAsync}{{6.5}{100}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.5}Initialization of the basic asynchronous scheme}{100}}
\newlabel{algo:ch6p2BasicAsyncComp}{{6.6}{101}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.6}Computing function in the basic asynchronous scheme}{101}}
-\newlabel{algo:ch6p2BasicAsyncSendings}{{6.7}{102}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.7}Sending function in the basic asynchronous scheme}{102}}
+\newlabel{algo:ch6p2BasicAsyncSendings}{{6.7}{103}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.7}Sending function in the basic asynchronous scheme}{103}}
\newlabel{algo:ch6p2BasicAsyncReceptions}{{6.8}{103}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.8}Reception function in the basic asynchronous scheme}{103}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.2}Synchronization of the asynchronous scheme}{104}}
-\newlabel{ch6:p2SsyncOverAsync}{{6.3.2}{104}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.2}Synchronization of the asynchronous scheme}{105}}
+\newlabel{ch6:p2SsyncOverAsync}{{6.3.2}{105}}
\newlabel{algo:ch6p2Sync}{{6.9}{105}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.9}Initialization of the synchronized scheme}{105}}
\newlabel{algo:ch6p2SyncComp}{{6.10}{106}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.10}Computing function in the synchronized scheme}{106}}
-\newlabel{algo:ch6p2SyncReceptions}{{6.11}{107}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.11}Reception function in the synchronized scheme}{107}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.3}Asynchronous scheme using MPI, OpenMP and CUDA}{108}}
-\newlabel{ch6:p2GPUAsync}{{6.3.3}{108}}
-\newlabel{algo:ch6p2AsyncSyncComp}{{6.12}{109}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.12}Computing function in the final asynchronous scheme}{109}}
+\newlabel{algo:ch6p2SyncReceptions}{{6.11}{108}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.11}Reception function in the synchronized scheme}{108}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.3}Asynchronous scheme using MPI, OpenMP and CUDA}{109}}
+\newlabel{ch6:p2GPUAsync}{{6.3.3}{109}}
+\newlabel{algo:ch6p2AsyncSyncComp}{{6.12}{110}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.12}Computing function in the final asynchronous scheme}{110}}
\newlabel{algo:ch6p2syncGPU}{{6.13}{111}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.13}Computing function in the final asynchronous scheme}{111}}
-\newlabel{algo:ch6p2FullOverAsyncMain}{{6.14}{113}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.14}Initialization of the main process of complete overlap with asynchronism}{113}}
-\newlabel{algo:ch6p2FullOverAsyncComp1}{{6.15}{114}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.15}Computing function in the final asynchronous scheme with CPU/GPU overlap}{114}}
-\newlabel{algo:ch6p2FullOverAsyncComp2}{{6.16}{115}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.16}Auxiliary computing function in the final asynchronous scheme with CPU/GPU overlap}{115}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.4}Experimental validation}{116}}
-\newlabel{sec:ch6p2expes}{{6.3.4}{116}}
-\@writefile{lof}{\contentsline {figure}{\numberline {6.6}{\ignorespaces Computation times of the test application in synchronous and asynchronous modes.\relax }}{117}}
-\newlabel{fig:ch6p2syncasync}{{6.6}{117}}
-\@writefile{lof}{\contentsline {figure}{\numberline {6.7}{\ignorespaces Computation times with or without overlap of Jacobian updatings in asynchronous mode.\relax }}{118}}
-\newlabel{fig:ch6p2aux}{{6.7}{118}}
-\@writefile{toc}{\contentsline {section}{\numberline {6.4}Perspective: A unifying programming model}{119}}
-\newlabel{sec:ch6p3unify}{{6.4}{119}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.1}Resources}{119}}
-\newlabel{sec:ch6p3resources}{{6.4.1}{119}}
-\newlabel{algo:ch6p3ORWLresources}{{6.17}{120}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.17}Declaration of ORWL resources for a block-cyclic matrix multiplication}{120}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.2}Control}{120}}
-\newlabel{sec:ch6p3ORWLcontrol}{{6.4.2}{120}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.3}Example: block-cyclic matrix multiplication (MM)}{121}}
-\newlabel{sec:ch6p3ORWLMM}{{6.4.3}{121}}
-\newlabel{algo:ch6p3ORWLBCCMM}{{6.18}{121}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.18}Block-cyclic matrix multiplication, high level per task view}{121}}
-\newlabel{algo:ch6p3ORWLlcopy}{{6.19}{122}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.19}An iterative local copy operation}{122}}
-\newlabel{algo:ch6p3ORWLrcopy}{{6.20}{122}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.20}An iterative remote copy operation as part of a block cyclic matrix multiplication task}{122}}
-\newlabel{algo:ch6p3ORWLtrans}{{6.21}{122}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.21}An iterative GPU transfer and compute operation as part of a block cyclic matrix multiplication task}{122}}
-\newlabel{algo:ch6p3ORWLdecl}{{6.22}{123}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.22}Dynamic declaration of handles to represent the resources}{123}}
+\newlabel{algo:ch6p2FullOverAsyncMain}{{6.14}{114}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.14}Initialization of the main process of complete overlap with asynchronism}{114}}
+\newlabel{algo:ch6p2FullOverAsyncComp1}{{6.15}{115}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.15}Computing function in the final asynchronous scheme with CPU/GPU overlap}{115}}
+\newlabel{algo:ch6p2FullOverAsyncComp2}{{6.16}{116}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.16}Auxiliary computing function in the final asynchronous scheme with CPU/GPU overlap}{116}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.4}Experimental validation}{117}}
+\newlabel{sec:ch6p2expes}{{6.3.4}{117}}
+\@writefile{lof}{\contentsline {figure}{\numberline {6.6}{\ignorespaces Computation times of the test application in synchronous and asynchronous modes.\relax }}{118}}
+\newlabel{fig:ch6p2syncasync}{{6.6}{118}}
+\@writefile{lof}{\contentsline {figure}{\numberline {6.7}{\ignorespaces Computation times with or without overlap of Jacobian updatings in asynchronous mode.\relax }}{119}}
+\newlabel{fig:ch6p2aux}{{6.7}{119}}
+\@writefile{toc}{\contentsline {section}{\numberline {6.4}Perspective: A unifying programming model}{120}}
+\newlabel{sec:ch6p3unify}{{6.4}{120}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.1}Resources}{120}}
+\newlabel{sec:ch6p3resources}{{6.4.1}{120}}
+\newlabel{algo:ch6p3ORWLresources}{{6.17}{121}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.17}Declaration of ORWL resources for a block-cyclic matrix multiplication}{121}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.2}Control}{121}}
+\newlabel{sec:ch6p3ORWLcontrol}{{6.4.2}{121}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.3}Example: block-cyclic matrix multiplication (MM)}{122}}
+\newlabel{sec:ch6p3ORWLMM}{{6.4.3}{122}}
+\newlabel{algo:ch6p3ORWLBCCMM}{{6.18}{122}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.18}Block-cyclic matrix multiplication, high level per task view}{122}}
+\newlabel{algo:ch6p3ORWLlcopy}{{6.19}{123}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.19}An iterative local copy operation}{123}}
+\newlabel{algo:ch6p3ORWLrcopy}{{6.20}{123}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.20}An iterative remote copy operation as part of a block cyclic matrix multiplication task}{123}}
+\newlabel{algo:ch6p3ORWLtrans}{{6.21}{123}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.21}An iterative GPU transfer and compute operation as part of a block cyclic matrix multiplication task}{123}}
+\newlabel{algo:ch6p3ORWLdecl}{{6.22}{124}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.22}Dynamic declaration of handles to represent the resources}{124}}
\newlabel{algo:ch6p3ORWLinit}{{6.23}{124}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.23}Dynamic initialization of access mode and priorities}{124}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.4}Tasks and operations}{124}}
-\newlabel{sec:ch6p3tasks}{{6.4.4}{124}}
-\@writefile{toc}{\contentsline {section}{\numberline {6.5}Conclusion}{125}}
-\newlabel{ch6:conclu}{{6.5}{125}}
-\@writefile{toc}{\contentsline {section}{\numberline {6.6}Glossary}{125}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{126}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.4.4}Tasks and operations}{125}}
+\newlabel{sec:ch6p3tasks}{{6.4.4}{125}}
+\@writefile{toc}{\contentsline {section}{\numberline {6.5}Conclusion}{126}}
+\newlabel{ch6:conclu}{{6.5}{126}}
+\@writefile{toc}{\contentsline {section}{\numberline {6.6}Glossary}{126}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{127}}
\@setckpt{Chapters/chapter6/ch6}{
-\setcounter{page}{128}
+\setcounter{page}{129}
\setcounter{equation}{0}
\setcounter{enumi}{4}
\setcounter{enumii}{0}
\setcounter{lotdepth}{1}
\setcounter{lstnumber}{17}
\setcounter{ContinuedFloat}{0}
-\setcounter{AlgoLine}{0}
+\setcounter{AlgoLine}{6}
\setcounter{algocfline}{4}
\setcounter{algocfproc}{4}
\setcounter{algocf}{4}