X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/bc21a4e60ae5e4ef525ce6933905824c6597401d..4083a275e6f2d901d47cfaf5bc5a21043366778f:/BookGPU/Chapters/chapter12/ch12.aux diff --git a/BookGPU/Chapters/chapter12/ch12.aux b/BookGPU/Chapters/chapter12/ch12.aux index 26d263a..d3b6ca3 100644 --- a/BookGPU/Chapters/chapter12/ch12.aux +++ b/BookGPU/Chapters/chapter12/ch12.aux @@ -3,107 +3,107 @@ \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}} \@writefile{toc}{\author{Jacques Bahi}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {11}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{249}} +\@writefile{toc}{\contentsline {chapter}{\numberline {11}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{251}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{ch12}{{11}{249}} -\@writefile{toc}{\contentsline {section}{\numberline {11.1}Introduction}{249}} -\newlabel{ch12:sec:01}{{11.1}{249}} -\@writefile{toc}{\contentsline {section}{\numberline {11.2}Krylov iterative methods}{250}} -\newlabel{ch12:sec:02}{{11.2}{250}} -\newlabel{ch12:eq:01}{{11.1}{250}} -\newlabel{ch12:eq:02}{{11.2}{250}} -\newlabel{ch12:eq:03}{{11.3}{250}} -\newlabel{ch12:eq:11}{{11.4}{251}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.1}CG method}{251}} -\newlabel{ch12:sec:02.01}{{11.2.1}{251}} -\newlabel{ch12:eq:04}{{11.5}{251}} -\newlabel{ch12:eq:05}{{11.6}{251}} -\newlabel{ch12:eq:06}{{11.7}{251}} -\newlabel{ch12:eq:07}{{11.8}{251}} -\newlabel{ch12:eq:08}{{11.9}{251}} -\newlabel{ch12:eq:09}{{11.10}{251}} -\@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces Left-preconditioned CG method\relax }}{252}} -\newlabel{ch12:alg:01}{{9}{252}} -\newlabel{ch12:eq:10}{{11.11}{252}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.2}GMRES method}{253}} -\newlabel{ch12:sec:02.02}{{11.2.2}{253}} -\newlabel{ch12:eq:12}{{11.12}{253}} -\newlabel{ch12:eq:13}{{11.13}{253}} -\newlabel{ch12:eq:14}{{11.14}{253}} -\newlabel{ch12:eq:15}{{11.15}{253}} -\newlabel{ch12:eq:16}{{11.16}{253}} -\newlabel{ch12:eq:17}{{11.17}{253}} -\newlabel{ch12:eq:18}{{11.18}{253}} -\newlabel{ch12:eq:19}{{11.19}{253}} -\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{254}} -\newlabel{ch12:alg:02}{{10}{254}} -\@writefile{toc}{\contentsline {section}{\numberline {11.3}Parallel implementation on a GPU cluster}{255}} -\newlabel{ch12:sec:03}{{11.3}{255}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.1}Data partitioning}{255}} -\newlabel{ch12:sec:03.01}{{11.3.1}{255}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{256}} -\newlabel{ch12:fig:01}{{11.1}{256}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.2}GPU computing}{256}} -\newlabel{ch12:sec:03.02}{{11.3.2}{256}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.3}Data communications}{257}} -\newlabel{ch12:sec:03.03}{{11.3.3}{257}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{258}} -\newlabel{ch12:fig:02}{{11.2}{258}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{259}} -\newlabel{ch12:fig:03}{{11.3}{259}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{260}} -\newlabel{ch12:fig:04}{{11.4}{260}} -\@writefile{toc}{\contentsline {section}{\numberline {11.4}Experimental results}{260}} -\newlabel{ch12:sec:04}{{11.4}{260}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis's collection.\relax }}{261}} -\newlabel{ch12:fig:05}{{11.5}{261}} -\@writefile{lot}{\contentsline {table}{\numberline {11.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis's collection.\relax }}{262}} -\newlabel{ch12:tab:01}{{11.1}{262}} -\@writefile{lot}{\contentsline {table}{\numberline {11.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{262}} -\newlabel{ch12:tab:02}{{11.2}{262}} -\@writefile{lot}{\contentsline {table}{\numberline {11.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{263}} -\newlabel{ch12:tab:03}{{11.3}{263}} -\newlabel{ch12:eq:20}{{11.20}{263}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{264}} -\newlabel{ch12:fig:06}{{11.6}{264}} -\@writefile{lot}{\contentsline {table}{\numberline {11.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis's collection.\relax }}{265}} -\newlabel{ch12:tab:04}{{11.4}{265}} -\@writefile{lot}{\contentsline {table}{\numberline {11.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{265}} -\newlabel{ch12:tab:05}{{11.5}{265}} -\@writefile{toc}{\contentsline {section}{\numberline {11.5}Hypergraph partitioning}{265}} -\newlabel{ch12:sec:05}{{11.5}{265}} -\@writefile{lot}{\contentsline {table}{\numberline {11.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{266}} -\newlabel{ch12:tab:06}{{11.6}{266}} -\@writefile{lot}{\contentsline {table}{\numberline {11.7}{\ignorespaces Main characteristics of sparse five-bands matrices generated from those of the Davis's collection.\relax }}{266}} -\newlabel{ch12:tab:07}{{11.7}{266}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.7}{\ignorespaces Parallel generation of a large sparse five-bands matrix by four computing nodes.\relax }}{267}} -\newlabel{ch12:fig:07}{{11.7}{267}} -\@writefile{lot}{\contentsline {table}{\numberline {11.8}{\ignorespaces Performances of parallel CG solver for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs\relax }}{267}} -\newlabel{ch12:tab:08}{{11.8}{267}} -\@writefile{lot}{\contentsline {table}{\numberline {11.9}{\ignorespaces Performances of parallel GMRES solver for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs\relax }}{268}} -\newlabel{ch12:tab:09}{{11.9}{268}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.8}{\ignorespaces An example of the hypergraph partitioning of a sparse matrix decomposed between three computing nodes.\relax }}{269}} -\newlabel{ch12:fig:08}{{11.8}{269}} -\@writefile{lot}{\contentsline {table}{\numberline {11.10}{\ignorespaces Performances of the parallel CG solver using hypergraph partitioning for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPU.\relax }}{270}} -\newlabel{ch12:tab:10}{{11.10}{270}} -\@writefile{lot}{\contentsline {table}{\numberline {11.11}{\ignorespaces Performances of the parallel GMRES solver using hypergraph partitioning for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPU.\relax }}{271}} -\newlabel{ch12:tab:11}{{11.11}{271}} -\@writefile{lot}{\contentsline {table}{\numberline {11.12}{\ignorespaces The total communication volume between 12 GPU computing nodes without and with the hypergraph partitioning method.\relax }}{272}} -\newlabel{ch12:tab:12}{{11.12}{272}} -\newlabel{ch12:fig:09.01}{{11.9(a)}{273}} -\newlabel{sub@ch12:fig:09.01}{{(a)}{273}} -\newlabel{ch12:fig:09.02}{{11.9(b)}{273}} -\newlabel{sub@ch12:fig:09.02}{{(b)}{273}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.9}{\ignorespaces Weak-scaling of the parallel CG and GMRES solvers on a GPU cluster for solving large sparse linear systems.\relax }}{273}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Sparse band matrices}}}{273}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Sparse five-bands matrices}}}{273}} -\newlabel{ch12:fig:09}{{11.9}{273}} -\@writefile{toc}{\contentsline {section}{\numberline {11.6}Conclusion}{273}} -\newlabel{ch12:sec:06}{{11.6}{273}} -\@writefile{toc}{\contentsline {section}{Bibliography}{274}} +\newlabel{ch12}{{11}{251}} +\@writefile{toc}{\contentsline {section}{\numberline {11.1}Introduction}{251}} +\newlabel{ch12:sec:01}{{11.1}{251}} +\@writefile{toc}{\contentsline {section}{\numberline {11.2}Krylov iterative methods}{252}} +\newlabel{ch12:sec:02}{{11.2}{252}} +\newlabel{ch12:eq:01}{{11.1}{252}} +\newlabel{ch12:eq:02}{{11.2}{252}} +\newlabel{ch12:eq:03}{{11.3}{252}} +\newlabel{ch12:eq:11}{{11.4}{253}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.1}CG method}{253}} +\newlabel{ch12:sec:02.01}{{11.2.1}{253}} +\newlabel{ch12:eq:04}{{11.5}{253}} +\newlabel{ch12:eq:05}{{11.6}{253}} +\newlabel{ch12:eq:06}{{11.7}{253}} +\newlabel{ch12:eq:07}{{11.8}{253}} +\newlabel{ch12:eq:08}{{11.9}{253}} +\newlabel{ch12:eq:09}{{11.10}{253}} +\@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces Left-preconditioned CG method\relax }}{254}} +\newlabel{ch12:alg:01}{{9}{254}} +\newlabel{ch12:eq:10}{{11.11}{254}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.2}GMRES method}{255}} +\newlabel{ch12:sec:02.02}{{11.2.2}{255}} +\newlabel{ch12:eq:12}{{11.12}{255}} +\newlabel{ch12:eq:13}{{11.13}{255}} +\newlabel{ch12:eq:14}{{11.14}{255}} +\newlabel{ch12:eq:15}{{11.15}{255}} +\newlabel{ch12:eq:16}{{11.16}{255}} +\newlabel{ch12:eq:17}{{11.17}{255}} +\newlabel{ch12:eq:18}{{11.18}{255}} +\newlabel{ch12:eq:19}{{11.19}{255}} +\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{256}} +\newlabel{ch12:alg:02}{{10}{256}} +\@writefile{toc}{\contentsline {section}{\numberline {11.3}Parallel implementation on a GPU cluster}{257}} +\newlabel{ch12:sec:03}{{11.3}{257}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.1}Data partitioning}{257}} +\newlabel{ch12:sec:03.01}{{11.3.1}{257}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{258}} +\newlabel{ch12:fig:01}{{11.1}{258}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.2}GPU computing}{258}} +\newlabel{ch12:sec:03.02}{{11.3.2}{258}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.3}Data communications}{259}} +\newlabel{ch12:sec:03.03}{{11.3.3}{259}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{260}} +\newlabel{ch12:fig:02}{{11.2}{260}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{261}} +\newlabel{ch12:fig:03}{{11.3}{261}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{262}} +\newlabel{ch12:fig:04}{{11.4}{262}} +\@writefile{toc}{\contentsline {section}{\numberline {11.4}Experimental results}{262}} +\newlabel{ch12:sec:04}{{11.4}{262}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis's collection.\relax }}{263}} +\newlabel{ch12:fig:05}{{11.5}{263}} +\@writefile{lot}{\contentsline {table}{\numberline {11.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis's collection.\relax }}{264}} +\newlabel{ch12:tab:01}{{11.1}{264}} +\@writefile{lot}{\contentsline {table}{\numberline {11.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{264}} +\newlabel{ch12:tab:02}{{11.2}{264}} +\@writefile{lot}{\contentsline {table}{\numberline {11.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{265}} +\newlabel{ch12:tab:03}{{11.3}{265}} +\newlabel{ch12:eq:20}{{11.20}{265}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{266}} +\newlabel{ch12:fig:06}{{11.6}{266}} +\@writefile{lot}{\contentsline {table}{\numberline {11.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis's collection.\relax }}{267}} +\newlabel{ch12:tab:04}{{11.4}{267}} +\@writefile{lot}{\contentsline {table}{\numberline {11.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{267}} +\newlabel{ch12:tab:05}{{11.5}{267}} +\@writefile{toc}{\contentsline {section}{\numberline {11.5}Hypergraph partitioning}{267}} +\newlabel{ch12:sec:05}{{11.5}{267}} +\@writefile{lot}{\contentsline {table}{\numberline {11.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{268}} +\newlabel{ch12:tab:06}{{11.6}{268}} +\@writefile{lot}{\contentsline {table}{\numberline {11.7}{\ignorespaces Main characteristics of sparse five-bands matrices generated from those of the Davis's collection.\relax }}{268}} +\newlabel{ch12:tab:07}{{11.7}{268}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.7}{\ignorespaces Parallel generation of a large sparse five-bands matrix by four computing nodes.\relax }}{269}} +\newlabel{ch12:fig:07}{{11.7}{269}} +\@writefile{lot}{\contentsline {table}{\numberline {11.8}{\ignorespaces Performances of parallel CG solver for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs\relax }}{269}} +\newlabel{ch12:tab:08}{{11.8}{269}} +\@writefile{lot}{\contentsline {table}{\numberline {11.9}{\ignorespaces Performances of parallel GMRES solver for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs\relax }}{270}} +\newlabel{ch12:tab:09}{{11.9}{270}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.8}{\ignorespaces An example of the hypergraph partitioning of a sparse matrix decomposed between three computing nodes.\relax }}{271}} +\newlabel{ch12:fig:08}{{11.8}{271}} +\@writefile{lot}{\contentsline {table}{\numberline {11.10}{\ignorespaces Performances of the parallel CG solver using hypergraph partitioning for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPU.\relax }}{272}} +\newlabel{ch12:tab:10}{{11.10}{272}} +\@writefile{lot}{\contentsline {table}{\numberline {11.11}{\ignorespaces Performances of the parallel GMRES solver using hypergraph partitioning for solving linear systems associated to sparse five-bands matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPU.\relax }}{273}} +\newlabel{ch12:tab:11}{{11.11}{273}} +\@writefile{lot}{\contentsline {table}{\numberline {11.12}{\ignorespaces The total communication volume between 12 GPU computing nodes without and with the hypergraph partitioning method.\relax }}{274}} +\newlabel{ch12:tab:12}{{11.12}{274}} +\newlabel{ch12:fig:09.01}{{11.9(a)}{275}} +\newlabel{sub@ch12:fig:09.01}{{(a)}{275}} +\newlabel{ch12:fig:09.02}{{11.9(b)}{275}} +\newlabel{sub@ch12:fig:09.02}{{(b)}{275}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.9}{\ignorespaces Weak-scaling of the parallel CG and GMRES solvers on a GPU cluster for solving large sparse linear systems.\relax }}{275}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Sparse band matrices}}}{275}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Sparse five-bands matrices}}}{275}} +\newlabel{ch12:fig:09}{{11.9}{275}} +\@writefile{toc}{\contentsline {section}{\numberline {11.6}Conclusion}{275}} +\newlabel{ch12:sec:06}{{11.6}{275}} +\@writefile{toc}{\contentsline {section}{Bibliography}{276}} \@setckpt{Chapters/chapter12/ch12}{ -\setcounter{page}{276} +\setcounter{page}{278} \setcounter{equation}{25} \setcounter{enumi}{4} \setcounter{enumii}{0}