X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/18cb85fd3967a4c46c9582e8515cef9af3448269..bc21a4e60ae5e4ef525ce6933905824c6597401d:/BookGPU/Chapters/chapter13/ch13.aux diff --git a/BookGPU/Chapters/chapter13/ch13.aux b/BookGPU/Chapters/chapter13/ch13.aux index 140e1c4..9460acd 100644 --- a/BookGPU/Chapters/chapter13/ch13.aux +++ b/BookGPU/Chapters/chapter13/ch13.aux @@ -1,82 +1,87 @@ \relax -\@writefile{toc}{\author{}{}} +\@writefile{toc}{\author{Lilia Ziane Khodja}{}} +\@writefile{toc}{\author{Ming Chau}{}} +\@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}} +\@writefile{toc}{\author{Pierre Spit\IeC {\'e}ri}{}} +\@writefile{toc}{\author{Jacques Bahi}{}} \@writefile{loa}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse nonlinear systems of obstacle problems on GPU clusters}{277}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} +\newlabel{ch13}{{12}{277}} \@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{277}} -\newlabel{sec:01}{{12.1}{277}} +\newlabel{ch13:sec:01}{{12.1}{277}} \@writefile{toc}{\contentsline {section}{\numberline {12.2}Obstacle problems}{278}} -\newlabel{sec:02}{{12.2}{278}} +\newlabel{ch13:sec:02}{{12.2}{278}} \@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}Mathematical model}{278}} -\newlabel{sec:02.01}{{12.2.1}{278}} -\newlabel{eq:01}{{12.1}{278}} -\newlabel{eq:02}{{12.2}{279}} +\newlabel{ch13:sec:02.01}{{12.2.1}{278}} +\newlabel{ch13:eq:01}{{12.1}{278}} +\newlabel{ch13:eq:02}{{12.2}{278}} \@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}Discretization}{279}} -\newlabel{sec:02.02}{{12.2.2}{279}} -\newlabel{eq:03}{{12.3}{279}} -\newlabel{eq:04}{{12.4}{279}} -\newlabel{eq:05}{{12.5}{280}} +\newlabel{ch13:sec:02.02}{{12.2.2}{279}} +\newlabel{ch13:eq:03}{{12.3}{279}} +\newlabel{ch13:eq:04}{{12.4}{279}} +\newlabel{ch13:eq:05}{{12.5}{279}} \@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel iterative method}{280}} -\newlabel{sec:03}{{12.3}{280}} -\newlabel{eq:06}{{12.6}{280}} -\newlabel{eq:07}{{12.7}{280}} -\newlabel{eq:08}{{12.8}{281}} -\newlabel{eq:09}{{12.9}{281}} -\newlabel{eq:10}{{12.10}{281}} -\newlabel{eq:11}{{12.11}{281}} -\newlabel{eq:12}{{12.12}{281}} -\newlabel{eq:13}{{12.13}{282}} -\newlabel{eq:14}{{12.14}{282}} -\newlabel{eq:15}{{12.15}{282}} -\newlabel{eq:16}{{12.16}{282}} +\newlabel{ch13:sec:03}{{12.3}{280}} +\newlabel{ch13:eq:06}{{12.6}{280}} +\newlabel{ch13:eq:07}{{12.7}{280}} +\newlabel{ch13:eq:08}{{12.8}{280}} +\newlabel{ch13:eq:09}{{12.9}{280}} +\newlabel{ch13:eq:10}{{12.10}{281}} +\newlabel{ch13:eq:11}{{12.11}{281}} +\newlabel{ch13:eq:12}{{12.12}{281}} +\newlabel{ch13:eq:13}{{12.13}{282}} +\newlabel{ch13:eq:14}{{12.14}{282}} +\newlabel{ch13:eq:15}{{12.15}{282}} +\newlabel{ch13:eq:16}{{12.16}{282}} \@writefile{toc}{\contentsline {section}{\numberline {12.4}Parallel implementation on a GPU cluster}{283}} -\newlabel{sec:04}{{12.4}{283}} +\newlabel{ch13:sec:04}{{12.4}{283}} \@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces Data partitioning of a problem to be solved among $S=3\times 4$ computing nodes.\relax }}{283}} -\newlabel{fig:01}{{12.1}{283}} +\newlabel{ch13:fig:01}{{12.1}{283}} \@writefile{loa}{\contentsline {algocf}{\numberline {11}{\ignorespaces Parallel solving of the obstacle problem on a GPU cluster\relax }}{284}} -\newlabel{alg:01}{{11}{284}} -\newlabel{eq:18}{{12.17}{284}} +\newlabel{ch13:alg:01}{{11}{284}} +\newlabel{ch13:eq:18}{{12.17}{284}} \@writefile{loa}{\contentsline {algocf}{\numberline {12}{\ignorespaces Parallel iterative solving of the nonlinear systems on a GPU cluster ($Solve()$ function)\relax }}{285}} -\newlabel{alg:02}{{12}{285}} +\newlabel{ch13:alg:02}{{12}{285}} \@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Decomposition of a sub-problem in a GPU into $nz$ slices.\relax }}{286}} -\newlabel{fig:02}{{12.2}{286}} -\newlabel{list:01}{{12.1}{286}} +\newlabel{ch13:fig:02}{{12.2}{286}} +\newlabel{ch13:list:01}{{12.1}{286}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {12.1}Skeleton codes of a GPU kernel and a CPU function}{286}} \@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Matrix constant coefficients in a three-dimensional domain.\relax }}{288}} -\newlabel{fig:03}{{12.3}{288}} -\newlabel{eq:17}{{12.18}{288}} -\newlabel{list:02}{{12.2}{289}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.2}GPU kernels of the projected Richardson method}{289}} +\newlabel{ch13:fig:03}{{12.3}{288}} +\newlabel{ch13:eq:17}{{12.18}{288}} +\newlabel{ch13:list:02}{{12.2}{288}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.2}GPU kernels of the projected Richardson method}{288}} \@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces Computation of a vector element with the projected Richardson method.\relax }}{290}} -\newlabel{fig:04}{{12.4}{290}} -\newlabel{list:03}{{12.3}{290}} +\newlabel{ch13:fig:04}{{12.4}{290}} +\newlabel{ch13:list:03}{{12.3}{290}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {12.3}Memory access to the cache texture memory}{290}} \@writefile{toc}{\contentsline {section}{\numberline {12.5}Experimental tests on a GPU cluster}{291}} -\newlabel{sec:05}{{12.5}{291}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces GPU cluster of tests composed of 12 computing nodes (six machines, each with two GPUs.\relax }}{292}} -\newlabel{fig:05}{{12.5}{292}} +\newlabel{ch13:sec:05}{{12.5}{291}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces GPU cluster of tests composed of 12 computing nodes (six machines, each with two GPUs.\relax }}{293}} +\newlabel{ch13:fig:05}{{12.5}{293}} \@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 24 CPU cores.\relax }}{293}} -\newlabel{tab:01}{{12.1}{293}} -\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 12 GPUs.\relax }}{293}} -\newlabel{tab:02}{{12.2}{293}} +\newlabel{ch13:tab:01}{{12.1}{293}} +\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 12 GPUs.\relax }}{294}} +\newlabel{ch13:tab:02}{{12.2}{294}} \@writefile{toc}{\contentsline {section}{\numberline {12.6}Red-Black ordering technique}{294}} -\newlabel{sec:06}{{12.6}{294}} -\newlabel{fig:06.01}{{12.6(a)}{295}} -\newlabel{sub@fig:06.01}{{(a)}{295}} -\newlabel{fig:06.02}{{12.6(b)}{295}} -\newlabel{sub@fig:06.02}{{(b)}{295}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Red-black ordering for computing the iterate vector elements in a three-dimensional space.\relax }}{295}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Red-black ordering on x, y and z axises}}}{295}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Red-black ordering on y axis}}}{295}} -\newlabel{list:04}{{12.4}{296}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.4}GPU kernels of the projected Richardson method using the red-black technique}{296}} +\newlabel{ch13:sec:06}{{12.6}{294}} +\newlabel{ch13:list:04}{{12.4}{295}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.4}GPU kernels of the projected Richardson method using the red-black technique}{295}} +\newlabel{ch13:fig:06.01}{{12.6(a)}{296}} +\newlabel{sub@ch13:fig:06.01}{{(a)}{296}} +\newlabel{ch13:fig:06.02}{{12.6(b)}{296}} +\newlabel{sub@ch13:fig:06.02}{{(b)}{296}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Red-Black ordering for computing the iterate vector elements in a three-dimensional space.\relax }}{296}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Red-Black ordering on x, y and z axises}}}{296}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Red-Black ordering on y axis}}}{296}} \@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Execution times in seconds of the parallel projected Richardson method using read-black ordering technique implemented on a cluster of 12 GPUs.\relax }}{297}} -\newlabel{tab:03}{{12.3}{297}} +\newlabel{ch13:tab:03}{{12.3}{297}} \@writefile{lof}{\contentsline {figure}{\numberline {12.7}{\ignorespaces Weak scaling of both synchronous and asynchronous algorithms of the projected Richardson method using red-black ordering technique.\relax }}{298}} -\newlabel{fig:07}{{12.7}{298}} -\@writefile{toc}{\contentsline {section}{\numberline {12.7}Conclusion}{299}} -\newlabel{sec:07}{{12.7}{299}} +\newlabel{ch13:fig:07}{{12.7}{298}} +\@writefile{toc}{\contentsline {section}{\numberline {12.7}Conclusion}{298}} +\newlabel{ch13:sec:07}{{12.7}{298}} \@writefile{toc}{\contentsline {section}{Bibliography}{299}} \@setckpt{Chapters/chapter13/ch13}{ \setcounter{page}{301}