\relax \@writefile{toc}{\author{}{}} \@writefile{loa}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {chapter}{\numberline {11}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{249}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {11.1}Introduction}{249}} \newlabel{sec:01}{{11.1}{249}} \@writefile{toc}{\contentsline {section}{\numberline {11.2}Krylov iterative methods}{250}} \newlabel{sec:02}{{11.2}{250}} \newlabel{eq:01}{{11.1}{250}} \newlabel{eq:02}{{11.2}{250}} \newlabel{eq:03}{{11.3}{250}} \newlabel{eq:11}{{11.4}{251}} \@writefile{toc}{\contentsline {subsection}{\numberline {11.2.1}CG method}{251}} \newlabel{sec:02.01}{{11.2.1}{251}} \newlabel{eq:04}{{11.5}{251}} \newlabel{eq:05}{{11.6}{251}} \newlabel{eq:06}{{11.7}{251}} \newlabel{eq:07}{{11.8}{251}} \newlabel{eq:08}{{11.9}{251}} \newlabel{eq:09}{{11.10}{251}} \@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces Left-preconditioned CG method\relax }}{252}} \newlabel{alg:01}{{9}{252}} \newlabel{eq:10}{{11.11}{252}} \@writefile{toc}{\contentsline {subsection}{\numberline {11.2.2}GMRES method}{253}} \newlabel{sec:02.02}{{11.2.2}{253}} \newlabel{eq:12}{{11.12}{253}} \newlabel{eq:13}{{11.13}{253}} \newlabel{eq:14}{{11.14}{253}} \newlabel{eq:15}{{11.15}{253}} \newlabel{eq:16}{{11.16}{253}} \newlabel{eq:17}{{11.17}{253}} \newlabel{eq:18}{{11.18}{253}} \newlabel{eq:19}{{11.19}{253}} \@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{254}} \newlabel{alg:02}{{10}{254}} \@writefile{toc}{\contentsline {section}{\numberline {11.3}Parallel implementation on a GPU cluster}{255}} \newlabel{sec:03}{{11.3}{255}} \@writefile{toc}{\contentsline {subsection}{\numberline {11.3.1}Data partitioning}{255}} \newlabel{sec:03.01}{{11.3.1}{255}} \@writefile{lof}{\contentsline {figure}{\numberline {11.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{256}} \newlabel{fig:01}{{11.1}{256}} \@writefile{toc}{\contentsline {subsection}{\numberline {11.3.2}GPU computing}{256}} \newlabel{sec:03.02}{{11.3.2}{256}} \@writefile{toc}{\contentsline {subsection}{\numberline {11.3.3}Data communications}{257}} \newlabel{sec:03.03}{{11.3.3}{257}} \@writefile{lof}{\contentsline {figure}{\numberline {11.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{258}} \newlabel{fig:02}{{11.2}{258}} \@writefile{lof}{\contentsline {figure}{\numberline {11.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{259}} \newlabel{fig:03}{{11.3}{259}} \@writefile{lof}{\contentsline {figure}{\numberline {11.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{260}} \newlabel{fig:04}{{11.4}{260}} \@writefile{toc}{\contentsline {section}{\numberline {11.4}Experimental results}{260}} \newlabel{sec:04}{{11.4}{260}} \@writefile{lof}{\contentsline {figure}{\numberline {11.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis's collection.\relax }}{261}} \newlabel{fig:05}{{11.5}{261}} \@writefile{lot}{\contentsline {table}{\numberline {11.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis's collection.\relax }}{262}} \newlabel{tab:01}{{11.1}{262}} \@writefile{lot}{\contentsline {table}{\numberline {11.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{262}} \newlabel{tab:02}{{11.2}{262}} \@writefile{lot}{\contentsline {table}{\numberline {11.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{263}} \newlabel{tab:03}{{11.3}{263}} \newlabel{eq:20}{{11.20}{263}} \@writefile{lof}{\contentsline {figure}{\numberline {11.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{264}} \newlabel{fig:06}{{11.6}{264}} \@writefile{lot}{\contentsline {table}{\numberline {11.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis's collection.\relax }}{265}} \newlabel{tab:04}{{11.4}{265}} \@writefile{lot}{\contentsline {table}{\numberline {11.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{265}} \newlabel{tab:05}{{11.5}{265}} \@writefile{toc}{\contentsline {section}{\numberline {11.5}Hypergraph partitioning}{265}} \newlabel{sec:05}{{11.5}{265}} \@writefile{lot}{\contentsline {table}{\numberline {11.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{266}} \newlabel{tab:06}{{11.6}{266}} \@writefile{lot}{\contentsline {table}{\numberline {11.7}{\ignorespaces Main characteristics of sparse five-bands matrices generated from those of the Davis's collection.\relax }}{266}} \newlabel{tab:07}{{11.7}{266}} \@writefile{lof}{\contentsline {figure}{\numberline {11.7}{\ignorespaces Parallel generation of a large sparse five-bands matrix by four computing nodes.\relax }}{267}} \newlabel{fig:07}{{11.7}{267}} \@writefile{lot}{\contentsline {table}{\numberline {11.8}{\ignorespaces Performances of parallel CG solver for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs\relax }}{267}} \newlabel{tab:08}{{11.8}{267}} \@writefile{lot}{\contentsline {table}{\numberline {11.9}{\ignorespaces Performances of parallel GMRES solver for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs\relax }}{268}} \newlabel{tab:09}{{11.9}{268}} \@writefile{lof}{\contentsline {figure}{\numberline {11.8}{\ignorespaces An example of the hypergraph partitioning of a sparse matrix decomposed between three computing nodes.\relax }}{269}} \newlabel{fig:08}{{11.8}{269}} \@writefile{lot}{\contentsline {table}{\numberline {11.10}{\ignorespaces Performances of the parallel CG solver using hypergraph partitioning for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPU.\relax }}{270}} \newlabel{tab:10}{{11.10}{270}} \@writefile{lot}{\contentsline {table}{\numberline {11.11}{\ignorespaces Performances of the parallel GMRES solver using hypergraph partitioning for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPU.\relax }}{271}} \newlabel{tab:11}{{11.11}{271}} \@writefile{lot}{\contentsline {table}{\numberline {11.12}{\ignorespaces The total communication volume between 12 GPU computing nodes without and with the hypergraph partitioning method.\relax }}{272}} \newlabel{tab:12}{{11.12}{272}} \@writefile{lof}{\contentsline {figure}{\numberline {11.9}{\ignorespaces Weak-scaling of the parallel CG and GMRES solvers on a GPU cluster for solving large sparse linear systems.\relax }}{273}} \newlabel{fig:09}{{11.9}{273}} \@writefile{toc}{\contentsline {section}{\numberline {11.6}Conclusion}{273}} \newlabel{sec:06}{{11.6}{273}} \@writefile{toc}{\contentsline {section}{Bibliography}{274}} \@setckpt{Chapters/chapter12/ch12}{ \setcounter{page}{276} \setcounter{equation}{25} \setcounter{enumi}{4} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{15} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{1} \setcounter{chapter}{11} \setcounter{section}{6} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{9} \setcounter{table}{12} \setcounter{numauthors}{0} \setcounter{parentequation}{46} \setcounter{subfigure}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{lotdepth}{1} \setcounter{lstnumber}{50} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{29} \setcounter{algocfline}{10} \setcounter{algocfproc}{10} \setcounter{algocf}{10} \setcounter{proposition}{1} \setcounter{theorem}{0} \setcounter{exercise}{0} \setcounter{example}{0} \setcounter{definition}{0} \setcounter{proof}{1} \setcounter{lstlisting}{0} }