X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/bc21a4e60ae5e4ef525ce6933905824c6597401d..2373d6731790822c6e738cfa54aec1ccaf802222:/BookGPU/Chapters/chapter13/ch13.aux

diff --git a/BookGPU/Chapters/chapter13/ch13.aux b/BookGPU/Chapters/chapter13/ch13.aux
index 9460acd..750fdc1 100644
--- a/BookGPU/Chapters/chapter13/ch13.aux
+++ b/BookGPU/Chapters/chapter13/ch13.aux
@@ -5,86 +5,86 @@
 \@writefile{toc}{\author{Pierre Spit\IeC {\'e}ri}{}}
 \@writefile{toc}{\author{Jacques Bahi}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse nonlinear systems of obstacle problems on GPU clusters}{277}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse nonlinear systems of obstacle problems on GPU clusters}{271}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
-\newlabel{ch13}{{12}{277}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{277}}
-\newlabel{ch13:sec:01}{{12.1}{277}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.2}Obstacle problems}{278}}
-\newlabel{ch13:sec:02}{{12.2}{278}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}Mathematical model}{278}}
-\newlabel{ch13:sec:02.01}{{12.2.1}{278}}
-\newlabel{ch13:eq:01}{{12.1}{278}}
-\newlabel{ch13:eq:02}{{12.2}{278}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}Discretization}{279}}
-\newlabel{ch13:sec:02.02}{{12.2.2}{279}}
-\newlabel{ch13:eq:03}{{12.3}{279}}
-\newlabel{ch13:eq:04}{{12.4}{279}}
-\newlabel{ch13:eq:05}{{12.5}{279}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel iterative method}{280}}
-\newlabel{ch13:sec:03}{{12.3}{280}}
-\newlabel{ch13:eq:06}{{12.6}{280}}
-\newlabel{ch13:eq:07}{{12.7}{280}}
-\newlabel{ch13:eq:08}{{12.8}{280}}
-\newlabel{ch13:eq:09}{{12.9}{280}}
-\newlabel{ch13:eq:10}{{12.10}{281}}
-\newlabel{ch13:eq:11}{{12.11}{281}}
-\newlabel{ch13:eq:12}{{12.12}{281}}
-\newlabel{ch13:eq:13}{{12.13}{282}}
-\newlabel{ch13:eq:14}{{12.14}{282}}
-\newlabel{ch13:eq:15}{{12.15}{282}}
-\newlabel{ch13:eq:16}{{12.16}{282}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.4}Parallel implementation on a GPU cluster}{283}}
-\newlabel{ch13:sec:04}{{12.4}{283}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces Data partitioning of a problem to be solved among $S=3\times 4$ computing nodes.\relax }}{283}}
-\newlabel{ch13:fig:01}{{12.1}{283}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {11}{\ignorespaces Parallel solving of the obstacle problem on a GPU cluster\relax }}{284}}
-\newlabel{ch13:alg:01}{{11}{284}}
-\newlabel{ch13:eq:18}{{12.17}{284}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {12}{\ignorespaces Parallel iterative solving of the nonlinear systems on a GPU cluster ($Solve()$ function)\relax }}{285}}
-\newlabel{ch13:alg:02}{{12}{285}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Decomposition of a sub-problem in a GPU into $nz$ slices.\relax }}{286}}
-\newlabel{ch13:fig:02}{{12.2}{286}}
-\newlabel{ch13:list:01}{{12.1}{286}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.1}Skeleton codes of a GPU kernel and a CPU function}{286}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Matrix constant coefficients in a three-dimensional domain.\relax }}{288}}
-\newlabel{ch13:fig:03}{{12.3}{288}}
-\newlabel{ch13:eq:17}{{12.18}{288}}
-\newlabel{ch13:list:02}{{12.2}{288}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.2}GPU kernels of the projected Richardson method}{288}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces Computation of a vector element with the projected Richardson method.\relax }}{290}}
-\newlabel{ch13:fig:04}{{12.4}{290}}
-\newlabel{ch13:list:03}{{12.3}{290}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.3}Memory access to the cache texture memory}{290}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.5}Experimental tests on a GPU cluster}{291}}
-\newlabel{ch13:sec:05}{{12.5}{291}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces GPU cluster of tests composed of 12 computing nodes (six machines, each with two GPUs.\relax }}{293}}
-\newlabel{ch13:fig:05}{{12.5}{293}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 24 CPU cores.\relax }}{293}}
-\newlabel{ch13:tab:01}{{12.1}{293}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 12 GPUs.\relax }}{294}}
-\newlabel{ch13:tab:02}{{12.2}{294}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.6}Red-Black ordering technique}{294}}
-\newlabel{ch13:sec:06}{{12.6}{294}}
-\newlabel{ch13:list:04}{{12.4}{295}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.4}GPU kernels of the projected Richardson method using the red-black technique}{295}}
-\newlabel{ch13:fig:06.01}{{12.6(a)}{296}}
-\newlabel{sub@ch13:fig:06.01}{{(a)}{296}}
-\newlabel{ch13:fig:06.02}{{12.6(b)}{296}}
-\newlabel{sub@ch13:fig:06.02}{{(b)}{296}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Red-Black ordering for computing the iterate vector elements in a three-dimensional space.\relax }}{296}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Red-Black ordering on x, y and z axises}}}{296}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Red-Black ordering on y axis}}}{296}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Execution times in seconds of the parallel projected Richardson method using read-black ordering technique implemented on a cluster of 12 GPUs.\relax }}{297}}
-\newlabel{ch13:tab:03}{{12.3}{297}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.7}{\ignorespaces Weak scaling of both synchronous and asynchronous algorithms of the projected Richardson method using red-black ordering technique.\relax }}{298}}
-\newlabel{ch13:fig:07}{{12.7}{298}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.7}Conclusion}{298}}
-\newlabel{ch13:sec:07}{{12.7}{298}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{299}}
+\newlabel{ch13}{{12}{271}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{271}}
+\newlabel{ch13:sec:01}{{12.1}{271}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.2}Obstacle problems}{272}}
+\newlabel{ch13:sec:02}{{12.2}{272}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}Mathematical model}{272}}
+\newlabel{ch13:sec:02.01}{{12.2.1}{272}}
+\newlabel{ch13:eq:01}{{12.1}{272}}
+\newlabel{ch13:eq:02}{{12.2}{272}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}Discretization}{273}}
+\newlabel{ch13:sec:02.02}{{12.2.2}{273}}
+\newlabel{ch13:eq:03}{{12.3}{273}}
+\newlabel{ch13:eq:04}{{12.4}{273}}
+\newlabel{ch13:eq:05}{{12.5}{273}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel iterative method}{274}}
+\newlabel{ch13:sec:03}{{12.3}{274}}
+\newlabel{ch13:eq:06}{{12.6}{274}}
+\newlabel{ch13:eq:07}{{12.7}{274}}
+\newlabel{ch13:eq:08}{{12.8}{274}}
+\newlabel{ch13:eq:09}{{12.9}{274}}
+\newlabel{ch13:eq:10}{{12.10}{275}}
+\newlabel{ch13:eq:11}{{12.11}{275}}
+\newlabel{ch13:eq:12}{{12.12}{275}}
+\newlabel{ch13:eq:13}{{12.13}{276}}
+\newlabel{ch13:eq:14}{{12.14}{276}}
+\newlabel{ch13:eq:15}{{12.15}{276}}
+\newlabel{ch13:eq:16}{{12.16}{276}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.4}Parallel implementation on a GPU cluster}{277}}
+\newlabel{ch13:sec:04}{{12.4}{277}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces Data partitioning of a problem to be solved among $S=3\times 4$ computing nodes.\relax }}{277}}
+\newlabel{ch13:fig:01}{{12.1}{277}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {11}{\ignorespaces Parallel solving of the obstacle problem on a GPU cluster\relax }}{278}}
+\newlabel{ch13:alg:01}{{11}{278}}
+\newlabel{ch13:eq:18}{{12.17}{278}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {12}{\ignorespaces Parallel iterative solving of the nonlinear systems on a GPU cluster ($Solve()$ function)\relax }}{279}}
+\newlabel{ch13:alg:02}{{12}{279}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Decomposition of a sub-problem in a GPU into $nz$ slices.\relax }}{280}}
+\newlabel{ch13:fig:02}{{12.2}{280}}
+\newlabel{ch13:list:01}{{12.1}{280}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.1}Skeleton codes of a GPU kernel and a CPU function}{280}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Matrix constant coefficients in a three-dimensional domain.\relax }}{282}}
+\newlabel{ch13:fig:03}{{12.3}{282}}
+\newlabel{ch13:eq:17}{{12.18}{282}}
+\newlabel{ch13:list:02}{{12.2}{282}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.2}GPU kernels of the projected Richardson method}{282}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces Computation of a vector element with the projected Richardson method.\relax }}{284}}
+\newlabel{ch13:fig:04}{{12.4}{284}}
+\newlabel{ch13:list:03}{{12.3}{284}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.3}Memory access to the cache texture memory}{284}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.5}Experimental tests on a GPU cluster}{285}}
+\newlabel{ch13:sec:05}{{12.5}{285}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces GPU cluster of tests composed of 12 computing nodes (six machines, each with two GPUs.\relax }}{287}}
+\newlabel{ch13:fig:05}{{12.5}{287}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 24 CPU cores.\relax }}{287}}
+\newlabel{ch13:tab:01}{{12.1}{287}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 12 GPUs.\relax }}{288}}
+\newlabel{ch13:tab:02}{{12.2}{288}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.6}Red-Black ordering technique}{288}}
+\newlabel{ch13:sec:06}{{12.6}{288}}
+\newlabel{ch13:list:04}{{12.4}{289}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.4}GPU kernels of the projected Richardson method using the red-black technique}{289}}
+\newlabel{ch13:fig:06.01}{{12.6(a)}{290}}
+\newlabel{sub@ch13:fig:06.01}{{(a)}{290}}
+\newlabel{ch13:fig:06.02}{{12.6(b)}{290}}
+\newlabel{sub@ch13:fig:06.02}{{(b)}{290}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Red-Black ordering for computing the iterate vector elements in a three-dimensional space.\relax }}{290}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Red-Black ordering on x, y and z axises}}}{290}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Red-Black ordering on y axis}}}{290}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Execution times in seconds of the parallel projected Richardson method using read-black ordering technique implemented on a cluster of 12 GPUs.\relax }}{291}}
+\newlabel{ch13:tab:03}{{12.3}{291}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.7}{\ignorespaces Weak scaling of both synchronous and asynchronous algorithms of the projected Richardson method using red-black ordering technique.\relax }}{292}}
+\newlabel{ch13:fig:07}{{12.7}{292}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.7}Conclusion}{292}}
+\newlabel{ch13:sec:07}{{12.7}{292}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{293}}
 \@setckpt{Chapters/chapter13/ch13}{
-\setcounter{page}{301}
+\setcounter{page}{295}
 \setcounter{equation}{18}
 \setcounter{enumi}{4}
 \setcounter{enumii}{0}