]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter6/ch6.aux
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
e79cde5ff57ddb56f118c3b1afc41b8a0f7ed12c
[book_gpu.git] / BookGPU / Chapters / chapter6 / ch6.aux
1 \relax 
2 \@writefile{toc}{\author{Sylvain Contassot-Vivier}{}}
3 \@writefile{toc}{\author{Stephane Vialle}{}}
4 \@writefile{toc}{\author{Jens Gustedt}{}}
5 \@writefile{loa}{\addvspace {10\p@ }}
6 \@writefile{toc}{\contentsline {chapter}{\numberline {6}Development methodologies for GPU and cluster of GPUs}{81}}
7 \@writefile{lof}{\addvspace {10\p@ }}
8 \@writefile{lot}{\addvspace {10\p@ }}
9 \@writefile{toc}{\contentsline {section}{\numberline {6.1}Introduction}{82}}
10 \newlabel{ch6:intro}{{6.1}{82}}
11 \@writefile{toc}{\contentsline {section}{\numberline {6.2}General scheme of synchronous code with computation/communication overlapping in GPU clusters}{82}}
12 \newlabel{ch6:part1}{{6.2}{82}}
13 \@writefile{toc}{\contentsline {subsection}{\numberline {6.2.1}Synchronous parallel algorithms on GPU clusters}{82}}
14 \@writefile{lof}{\contentsline {figure}{\numberline {6.1}{\ignorespaces Native overlap of internode CPU communications with GPU computations.\relax }}{84}}
15 \newlabel{fig:ch6p1overlapnative}{{6.1}{84}}
16 \@writefile{toc}{\contentsline {subsection}{\numberline {6.2.2}Native overlap of CPU communications and GPU computations}{84}}
17 \newlabel{algo:ch6p1overlapnative}{{6.1}{85}}
18 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.1}Generic scheme implicitly overlapping MPI communications with CUDA GPU computations}{85}}
19 \@writefile{lof}{\contentsline {figure}{\numberline {6.2}{\ignorespaces Overlap of internode CPU communications with a sequence of CPU/GPU data transfers and GPU computations.\relax }}{86}}
20 \newlabel{fig:ch6p1overlapseqsequence}{{6.2}{86}}
21 \@writefile{toc}{\contentsline {subsection}{\numberline {6.2.3}Overlapping with sequences of transfers and computations}{86}}
22 \newlabel{algo:ch6p1overlapseqsequence}{{6.2}{87}}
23 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.2}Generic scheme explicitly overlapping MPI communications with sequences of CUDA CPU/GPU transfers and CUDA GPU computations}{87}}
24 \@writefile{lof}{\contentsline {figure}{\numberline {6.3}{\ignorespaces Overlap of internode CPU communications with a streamed sequence of CPU/GPU data transfers and GPU computations.\relax }}{88}}
25 \newlabel{fig:ch6p1overlapstreamsequence}{{6.3}{88}}
26 \newlabel{algo:ch6p1overlapstreamsequence}{{6.3}{89}}
27 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.3}Generic scheme explicitly overlapping MPI communications with streamed sequences of CUDA CPU/GPU transfers and CUDA GPU computations}{89}}
28 \@writefile{lof}{\contentsline {figure}{\numberline {6.4}{\ignorespaces Complete overlap of internode CPU communications, CPU/GPU data transfers and GPU computations, interleaving computation-communication iterations\relax }}{91}}
29 \newlabel{fig:ch6p1overlapinterleaved}{{6.4}{91}}
30 \@writefile{toc}{\contentsline {subsection}{\numberline {6.2.4}Interleaved communications-transfers-computations overlapping}{91}}
31 \newlabel{algo:ch6p1overlapinterleaved}{{6.4}{92}}
32 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.4}Generic scheme explicitly overlapping MPI communications, CUDA CPU/GPU transfers and CUDA GPU computations, interleaving computation-communication iterations}{92}}
33 \@writefile{toc}{\contentsline {subsection}{\numberline {6.2.5}Experimental validation}{94}}
34 \newlabel{ch6:p1expes}{{6.2.5}{94}}
35 \newlabel{ch6:p1block-cyclic}{{6.2.5}{94}}
36 \@writefile{lof}{\contentsline {figure}{\numberline {6.5}{\ignorespaces Experimental performances of different synchronous algorithms computing a dense matrix product\relax }}{95}}
37 \newlabel{fig:ch6p1syncexpematrixprod}{{6.5}{95}}
38 \@writefile{toc}{\contentsline {section}{\numberline {6.3}General scheme of asynchronous parallel code with computation/communication overlapping}{96}}
39 \newlabel{ch6:part2}{{6.3}{96}}
40 \@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces Synchronous iterative scheme\relax }}{96}}
41 \newlabel{algo:ch6p2sync}{{3}{96}}
42 \@writefile{loa}{\contentsline {algorithm}{\numberline {4}{\ignorespaces Asynchronous iterative scheme\relax }}{96}}
43 \newlabel{algo:ch6p2async}{{4}{96}}
44 \@writefile{toc}{\contentsline {subsection}{\numberline {6.3.1}A basic asynchronous scheme}{98}}
45 \newlabel{ch6:p2BasicAsync}{{6.3.1}{98}}
46 \newlabel{algo:ch6p2BasicAsync}{{6.5}{98}}
47 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.5}Initialization of the basic asynchronous scheme}{98}}
48 \newlabel{algo:ch6p2BasicAsyncComp}{{6.6}{99}}
49 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.6}Computing function in the basic asynchronous scheme}{99}}
50 \newlabel{algo:ch6p2BasicAsyncSendings}{{6.7}{100}}
51 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.7}Sending function in the basic asynchronous scheme}{100}}
52 \newlabel{algo:ch6p2BasicAsyncReceptions}{{6.8}{101}}
53 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.8}Reception function in the basic asynchronous scheme}{101}}
54 \@writefile{toc}{\contentsline {subsection}{\numberline {6.3.2}Synchronization of the asynchronous scheme}{102}}
55 \newlabel{ch6:p2SsyncOverAsync}{{6.3.2}{102}}
56 \newlabel{algo:ch6p2Sync}{{6.9}{103}}
57 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.9}Initialization of the synchronized scheme}{103}}
58 \newlabel{algo:ch6p2SyncComp}{{6.10}{104}}
59 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.10}Computing function in the synchronized scheme}{104}}
60 \newlabel{algo:ch6p2SyncReceptions}{{6.11}{105}}
61 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.11}Reception function in the synchronized scheme}{105}}
62 \@writefile{toc}{\contentsline {subsection}{\numberline {6.3.3}Asynchronous scheme using MPI, OpenMP and CUDA}{106}}
63 \newlabel{ch6:p2GPUAsync}{{6.3.3}{106}}
64 \newlabel{algo:ch6p2AsyncSyncComp}{{6.12}{108}}
65 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.12}Computing function in the final asynchronous scheme}{108}}
66 \newlabel{algo:ch6p2syncGPU}{{6.13}{109}}
67 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.13}Computing function in the final asynchronous scheme}{109}}
68 \newlabel{algo:ch6p2FullOverAsyncMain}{{6.14}{111}}
69 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.14}Initialization of the main process of complete overlap with asynchronism}{111}}
70 \newlabel{algo:ch6p2FullOverAsyncComp1}{{6.15}{112}}
71 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.15}Computing function in the final asynchronous scheme with CPU/GPU overlap}{112}}
72 \newlabel{algo:ch6p2FullOverAsyncComp2}{{6.16}{113}}
73 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.16}Auxiliary computing function in the final asynchronous scheme with CPU/GPU overlap}{113}}
74 \@writefile{toc}{\contentsline {subsection}{\numberline {6.3.4}Experimental validation}{114}}
75 \newlabel{sec:ch6p2expes}{{6.3.4}{114}}
76 \@writefile{lof}{\contentsline {figure}{\numberline {6.6}{\ignorespaces Computation times of the test application in synchronous and asynchronous modes.\relax }}{115}}
77 \newlabel{fig:ch6p2syncasync}{{6.6}{115}}
78 \@writefile{lof}{\contentsline {figure}{\numberline {6.7}{\ignorespaces Computation times with or without overlap of Jacobian updatings in asynchronous mode.\relax }}{116}}
79 \newlabel{fig:ch6p2aux}{{6.7}{116}}
80 \@writefile{toc}{\contentsline {section}{\numberline {6.4}Perspective: A unifying programming model}{117}}
81 \newlabel{sec:ch6p3unify}{{6.4}{117}}
82 \@writefile{toc}{\contentsline {subsection}{\numberline {6.4.1}Resources}{117}}
83 \newlabel{sec:ch6p3resources}{{6.4.1}{117}}
84 \newlabel{algo:ch6p3ORWLresources}{{6.17}{118}}
85 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.17}Declaration of ORWL resources for a block-cyclic matrix multiplication}{118}}
86 \@writefile{toc}{\contentsline {subsection}{\numberline {6.4.2}Control}{118}}
87 \newlabel{sec:ch6p3ORWLcontrol}{{6.4.2}{118}}
88 \@writefile{toc}{\contentsline {subsection}{\numberline {6.4.3}Example: block-cyclic matrix multiplication (MM)}{119}}
89 \newlabel{sec:ch6p3ORWLMM}{{6.4.3}{119}}
90 \newlabel{algo:ch6p3ORWLBCCMM}{{6.18}{119}}
91 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.18}Block-cyclic matrix multiplication, high level per task view}{119}}
92 \newlabel{algo:ch6p3ORWLlcopy}{{6.19}{120}}
93 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.19}An iterative local copy operation}{120}}
94 \newlabel{algo:ch6p3ORWLrcopy}{{6.20}{120}}
95 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.20}An iterative remote copy operation as part of a block cyclic matrix multiplication task}{120}}
96 \newlabel{algo:ch6p3ORWLtrans}{{6.21}{120}}
97 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.21}An iterative GPU transfer and compute operation as part of a block cyclic matrix multiplication task}{120}}
98 \newlabel{algo:ch6p3ORWLdecl}{{6.22}{121}}
99 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.22}Dynamic declaration of handles to represent the resources}{121}}
100 \newlabel{algo:ch6p3ORWLinit}{{6.23}{122}}
101 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.23}Dynamic initialization of access mode and priorities}{122}}
102 \@writefile{toc}{\contentsline {subsection}{\numberline {6.4.4}Tasks and operations}{122}}
103 \newlabel{sec:ch6p3tasks}{{6.4.4}{122}}
104 \@writefile{toc}{\contentsline {section}{\numberline {6.5}Conclusion}{123}}
105 \newlabel{ch6:conclu}{{6.5}{123}}
106 \@writefile{toc}{\contentsline {section}{\numberline {6.6}Glossary}{123}}
107 \@writefile{toc}{\contentsline {section}{Bibliography}{124}}
108 \@setckpt{Chapters/chapter6/ch6}{
109 \setcounter{page}{126}
110 \setcounter{equation}{0}
111 \setcounter{enumi}{4}
112 \setcounter{enumii}{0}
113 \setcounter{enumiii}{0}
114 \setcounter{enumiv}{21}
115 \setcounter{footnote}{0}
116 \setcounter{mpfootnote}{0}
117 \setcounter{part}{1}
118 \setcounter{chapter}{6}
119 \setcounter{section}{6}
120 \setcounter{subsection}{0}
121 \setcounter{subsubsection}{0}
122 \setcounter{paragraph}{0}
123 \setcounter{subparagraph}{0}
124 \setcounter{figure}{7}
125 \setcounter{table}{0}
126 \setcounter{numauthors}{0}
127 \setcounter{parentequation}{8}
128 \setcounter{subfigure}{0}
129 \setcounter{lofdepth}{1}
130 \setcounter{subtable}{0}
131 \setcounter{lotdepth}{1}
132 \setcounter{lstnumber}{17}
133 \setcounter{ContinuedFloat}{0}
134 \setcounter{float@type}{16}
135 \setcounter{algorithm}{4}
136 \setcounter{ALC@unique}{0}
137 \setcounter{ALC@line}{0}
138 \setcounter{ALC@rem}{0}
139 \setcounter{ALC@depth}{0}
140 \setcounter{AlgoLine}{0}
141 \setcounter{algocfline}{0}
142 \setcounter{algocfproc}{0}
143 \setcounter{algocf}{0}
144 \setcounter{proposition}{0}
145 \setcounter{theorem}{0}
146 \setcounter{exercise}{0}
147 \setcounter{example}{0}
148 \setcounter{definition}{0}
149 \setcounter{proof}{0}
150 \setcounter{lstlisting}{23}
151 }