X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/c8b308b236018000b34d6d58d5bdc4cb8313111b..390b92667ac7e43b5bbd34c7fd7b8c8314a05b3d:/BookGPU/Chapters/chapter13/ch13.aux?ds=inline diff --git a/BookGPU/Chapters/chapter13/ch13.aux b/BookGPU/Chapters/chapter13/ch13.aux index 750fdc1..f830fe0 100644 --- a/BookGPU/Chapters/chapter13/ch13.aux +++ b/BookGPU/Chapters/chapter13/ch13.aux @@ -5,86 +5,86 @@ \@writefile{toc}{\author{Pierre Spit\IeC {\'e}ri}{}} \@writefile{toc}{\author{Jacques Bahi}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse nonlinear systems of obstacle problems on GPU clusters}{271}} +\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse nonlinear systems of obstacle problems on GPU clusters}{279}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{ch13}{{12}{271}} -\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{271}} -\newlabel{ch13:sec:01}{{12.1}{271}} -\@writefile{toc}{\contentsline {section}{\numberline {12.2}Obstacle problems}{272}} -\newlabel{ch13:sec:02}{{12.2}{272}} -\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}Mathematical model}{272}} -\newlabel{ch13:sec:02.01}{{12.2.1}{272}} -\newlabel{ch13:eq:01}{{12.1}{272}} -\newlabel{ch13:eq:02}{{12.2}{272}} -\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}Discretization}{273}} -\newlabel{ch13:sec:02.02}{{12.2.2}{273}} -\newlabel{ch13:eq:03}{{12.3}{273}} -\newlabel{ch13:eq:04}{{12.4}{273}} -\newlabel{ch13:eq:05}{{12.5}{273}} -\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel iterative method}{274}} -\newlabel{ch13:sec:03}{{12.3}{274}} -\newlabel{ch13:eq:06}{{12.6}{274}} -\newlabel{ch13:eq:07}{{12.7}{274}} -\newlabel{ch13:eq:08}{{12.8}{274}} -\newlabel{ch13:eq:09}{{12.9}{274}} -\newlabel{ch13:eq:10}{{12.10}{275}} -\newlabel{ch13:eq:11}{{12.11}{275}} -\newlabel{ch13:eq:12}{{12.12}{275}} -\newlabel{ch13:eq:13}{{12.13}{276}} -\newlabel{ch13:eq:14}{{12.14}{276}} -\newlabel{ch13:eq:15}{{12.15}{276}} -\newlabel{ch13:eq:16}{{12.16}{276}} -\@writefile{toc}{\contentsline {section}{\numberline {12.4}Parallel implementation on a GPU cluster}{277}} -\newlabel{ch13:sec:04}{{12.4}{277}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces Data partitioning of a problem to be solved among $S=3\times 4$ computing nodes.\relax }}{277}} -\newlabel{ch13:fig:01}{{12.1}{277}} -\@writefile{loa}{\contentsline {algocf}{\numberline {11}{\ignorespaces Parallel solving of the obstacle problem on a GPU cluster\relax }}{278}} -\newlabel{ch13:alg:01}{{11}{278}} -\newlabel{ch13:eq:18}{{12.17}{278}} -\@writefile{loa}{\contentsline {algocf}{\numberline {12}{\ignorespaces Parallel iterative solving of the nonlinear systems on a GPU cluster ($Solve()$ function)\relax }}{279}} -\newlabel{ch13:alg:02}{{12}{279}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Decomposition of a sub-problem in a GPU into $nz$ slices.\relax }}{280}} -\newlabel{ch13:fig:02}{{12.2}{280}} -\newlabel{ch13:list:01}{{12.1}{280}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.1}Skeleton codes of a GPU kernel and a CPU function}{280}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Matrix constant coefficients in a three-dimensional domain.\relax }}{282}} -\newlabel{ch13:fig:03}{{12.3}{282}} -\newlabel{ch13:eq:17}{{12.18}{282}} -\newlabel{ch13:list:02}{{12.2}{282}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.2}GPU kernels of the projected Richardson method}{282}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces Computation of a vector element with the projected Richardson method.\relax }}{284}} -\newlabel{ch13:fig:04}{{12.4}{284}} -\newlabel{ch13:list:03}{{12.3}{284}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.3}Memory access to the cache texture memory}{284}} -\@writefile{toc}{\contentsline {section}{\numberline {12.5}Experimental tests on a GPU cluster}{285}} -\newlabel{ch13:sec:05}{{12.5}{285}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces GPU cluster of tests composed of 12 computing nodes (six machines, each with two GPUs.\relax }}{287}} -\newlabel{ch13:fig:05}{{12.5}{287}} -\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 24 CPU cores.\relax }}{287}} -\newlabel{ch13:tab:01}{{12.1}{287}} -\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 12 GPUs.\relax }}{288}} -\newlabel{ch13:tab:02}{{12.2}{288}} -\@writefile{toc}{\contentsline {section}{\numberline {12.6}Red-Black ordering technique}{288}} -\newlabel{ch13:sec:06}{{12.6}{288}} -\newlabel{ch13:list:04}{{12.4}{289}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.4}GPU kernels of the projected Richardson method using the red-black technique}{289}} -\newlabel{ch13:fig:06.01}{{12.6(a)}{290}} -\newlabel{sub@ch13:fig:06.01}{{(a)}{290}} -\newlabel{ch13:fig:06.02}{{12.6(b)}{290}} -\newlabel{sub@ch13:fig:06.02}{{(b)}{290}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Red-Black ordering for computing the iterate vector elements in a three-dimensional space.\relax }}{290}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Red-Black ordering on x, y and z axises}}}{290}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Red-Black ordering on y axis}}}{290}} -\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Execution times in seconds of the parallel projected Richardson method using read-black ordering technique implemented on a cluster of 12 GPUs.\relax }}{291}} -\newlabel{ch13:tab:03}{{12.3}{291}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.7}{\ignorespaces Weak scaling of both synchronous and asynchronous algorithms of the projected Richardson method using red-black ordering technique.\relax }}{292}} -\newlabel{ch13:fig:07}{{12.7}{292}} -\@writefile{toc}{\contentsline {section}{\numberline {12.7}Conclusion}{292}} -\newlabel{ch13:sec:07}{{12.7}{292}} -\@writefile{toc}{\contentsline {section}{Bibliography}{293}} +\newlabel{ch13}{{12}{279}} +\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{279}} +\newlabel{ch13:sec:01}{{12.1}{279}} +\@writefile{toc}{\contentsline {section}{\numberline {12.2}Obstacle problems}{280}} +\newlabel{ch13:sec:02}{{12.2}{280}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}Mathematical model}{280}} +\newlabel{ch13:sec:02.01}{{12.2.1}{280}} +\newlabel{ch13:eq:01}{{12.1}{280}} +\newlabel{ch13:eq:02}{{12.2}{280}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}Discretization}{281}} +\newlabel{ch13:sec:02.02}{{12.2.2}{281}} +\newlabel{ch13:eq:03}{{12.3}{281}} +\newlabel{ch13:eq:04}{{12.4}{281}} +\newlabel{ch13:eq:05}{{12.5}{281}} +\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel iterative method}{282}} +\newlabel{ch13:sec:03}{{12.3}{282}} +\newlabel{ch13:eq:06}{{12.6}{282}} +\newlabel{ch13:eq:07}{{12.7}{282}} +\newlabel{ch13:eq:08}{{12.8}{282}} +\newlabel{ch13:eq:09}{{12.9}{282}} +\newlabel{ch13:eq:10}{{12.10}{283}} +\newlabel{ch13:eq:11}{{12.11}{283}} +\newlabel{ch13:eq:12}{{12.12}{283}} +\newlabel{ch13:eq:13}{{12.13}{284}} +\newlabel{ch13:eq:14}{{12.14}{284}} +\newlabel{ch13:eq:15}{{12.15}{284}} +\newlabel{ch13:eq:16}{{12.16}{284}} +\@writefile{toc}{\contentsline {section}{\numberline {12.4}Parallel implementation on a GPU cluster}{285}} +\newlabel{ch13:sec:04}{{12.4}{285}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces Data partitioning of a problem to be solved among $S=3\times 4$ computing nodes.\relax }}{285}} +\newlabel{ch13:fig:01}{{12.1}{285}} +\@writefile{loa}{\contentsline {algocf}{\numberline {11}{\ignorespaces Parallel solving of the obstacle problem on a GPU cluster\relax }}{286}} +\newlabel{ch13:alg:01}{{11}{286}} +\newlabel{ch13:eq:18}{{12.17}{286}} +\@writefile{loa}{\contentsline {algocf}{\numberline {12}{\ignorespaces Parallel iterative solving of the nonlinear systems on a GPU cluster ($Solve()$ function)\relax }}{287}} +\newlabel{ch13:alg:02}{{12}{287}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Decomposition of a sub-problem in a GPU into $nz$ slices.\relax }}{288}} +\newlabel{ch13:fig:02}{{12.2}{288}} +\newlabel{ch13:list:01}{{12.1}{288}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.1}Skeleton codes of a GPU kernel and a CPU function}{288}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Matrix constant coefficients in a three-dimensional domain.\relax }}{290}} +\newlabel{ch13:fig:03}{{12.3}{290}} +\newlabel{ch13:eq:17}{{12.18}{290}} +\newlabel{ch13:list:02}{{12.2}{290}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.2}GPU kernels of the projected Richardson method}{290}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces Computation of a vector element with the projected Richardson method.\relax }}{292}} +\newlabel{ch13:fig:04}{{12.4}{292}} +\newlabel{ch13:list:03}{{12.3}{292}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.3}Memory access to the cache texture memory}{292}} +\@writefile{toc}{\contentsline {section}{\numberline {12.5}Experimental tests on a GPU cluster}{293}} +\newlabel{ch13:sec:05}{{12.5}{293}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces GPU cluster of tests composed of 12 computing nodes (six machines, each with two GPUs.\relax }}{295}} +\newlabel{ch13:fig:05}{{12.5}{295}} +\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 24 CPU cores.\relax }}{295}} +\newlabel{ch13:tab:01}{{12.1}{295}} +\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 12 GPUs.\relax }}{296}} +\newlabel{ch13:tab:02}{{12.2}{296}} +\@writefile{toc}{\contentsline {section}{\numberline {12.6}Red-Black ordering technique}{296}} +\newlabel{ch13:sec:06}{{12.6}{296}} +\newlabel{ch13:list:04}{{12.4}{297}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {12.4}GPU kernels of the projected Richardson method using the red-black technique}{297}} +\newlabel{ch13:fig:06.01}{{12.6(a)}{298}} +\newlabel{sub@ch13:fig:06.01}{{(a)}{298}} +\newlabel{ch13:fig:06.02}{{12.6(b)}{298}} +\newlabel{sub@ch13:fig:06.02}{{(b)}{298}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Red-Black ordering for computing the iterate vector elements in a three-dimensional space.\relax }}{298}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Red-Black ordering on x, y and z axises}}}{298}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Red-Black ordering on y axis}}}{298}} +\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Execution times in seconds of the parallel projected Richardson method using read-black ordering technique implemented on a cluster of 12 GPUs.\relax }}{299}} +\newlabel{ch13:tab:03}{{12.3}{299}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.7}{\ignorespaces Weak scaling of both synchronous and asynchronous algorithms of the projected Richardson method using red-black ordering technique.\relax }}{300}} +\newlabel{ch13:fig:07}{{12.7}{300}} +\@writefile{toc}{\contentsline {section}{\numberline {12.7}Conclusion}{300}} +\newlabel{ch13:sec:07}{{12.7}{300}} +\@writefile{toc}{\contentsline {section}{Bibliography}{301}} \@setckpt{Chapters/chapter13/ch13}{ -\setcounter{page}{295} +\setcounter{page}{303} \setcounter{equation}{18} \setcounter{enumi}{4} \setcounter{enumii}{0} @@ -92,7 +92,7 @@ \setcounter{enumiv}{15} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} -\setcounter{part}{1} +\setcounter{part}{5} \setcounter{chapter}{12} \setcounter{section}{7} \setcounter{subsection}{0}