From: couturie Date: Wed, 24 Apr 2013 14:05:47 +0000 (+0200) Subject: correction embedded fonts X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/commitdiff_plain/d6c0eb8af968b2b5d3562240345777b17776520b?ds=sidebyside;hp=-c correction embedded fonts --- d6c0eb8af968b2b5d3562240345777b17776520b diff --git a/BookGPU/BookGPU.tex b/BookGPU/BookGPU.tex index 5895e6e..23267c5 100755 --- a/BookGPU/BookGPU.tex +++ b/BookGPU/BookGPU.tex @@ -147,7 +147,7 @@ \makeindex -% \includeonly{Chapters/chapter10/ch10} +% \includeonly{Chapters/chapter7/ch7} \begin{document} @@ -166,7 +166,7 @@ \frontmatter %\include{frontmatter/Foreword} -%\include{frontmatter/preface} +\include{frontmatter/preface} \listoffigures \listoftables @@ -183,23 +183,23 @@ \part{Image processing} \include{Chapters/chapter3/ch3} \part{Software development} -\include{Chapters/chapter5/ch5} +\include{Chapters/chapter5/ch5}% HeatSolution0.049307.pdf HeatSolution0.pdf ... \include{Chapters/chapter6/ch6} \part{Optimization} \include{Chapters/chapter8/ch8} \include{Chapters/chapter9/ch9} -\include{Chapters/chapter10/ch10} +\include{Chapters/chapter10/ch10} %pb fonts tree2.pdf \part{Numerical applications} -\include{Chapters/chapter7/ch7} %pb fonts +\include{Chapters/chapter7/ch7} \include{Chapters/chapter11/ch11} \include{Chapters/chapter12/ch12} \include{Chapters/chapter13/ch13} -\include{Chapters/chapter14/ch14} +\include{Chapters/chapter14/ch14} %index \include{Chapters/chapter15/ch15} \include{Chapters/chapter16/ch16} \part{Other} -\include{Chapters/chapter17/ch17} +\include{Chapters/chapter17/ch17} %index \include{Chapters/chapter18/ch18} \include{Chapters/chapter19/ch19} diff --git a/BookGPU/Chapters/chapter1/figures/scalability.pdf b/BookGPU/Chapters/chapter1/figures/scalability.pdf index e484f59..43ee856 100644 Binary files a/BookGPU/Chapters/chapter1/figures/scalability.pdf and b/BookGPU/Chapters/chapter1/figures/scalability.pdf differ diff --git a/BookGPU/Chapters/chapter10/biblio.bib b/BookGPU/Chapters/chapter10/biblio.bib index 380d5f6..f5bc1eb 100644 --- a/BookGPU/Chapters/chapter10/biblio.bib +++ b/BookGPU/Chapters/chapter10/biblio.bib @@ -89,20 +89,20 @@ year = {2010}, } @Book{WRIGHT, -author = {S.J. Wright}, +author = {S. J. Wright}, title = {Primal-dual interior-point methods}, publisher = {SIAM}, year = {1998} } @Misc{THOMAD, -author = {M.E. Thomadakis and J.-C. Liu}, +author = {M. E. Thomadakis and J.-C. Liu}, title = {An efficient steepest-edge simplex algorithm for {SIMD} computers}, year = {1996} } @Misc{HALL, -author = {J.A.J. Hall}, +author = {J. A. J. Hall}, title = {Towards a practical parallelisation of the simplex method}, year = {2007} } @@ -134,7 +134,7 @@ year = {2011} } @TechReport{DANTZIG80, -author = {G.B. Dantzig}, +author = {G. B. Dantzig}, title = {Expected number of steps of the simplex method for a linear program with a convexity constraint}, institution = {Stanford {U}niversity}, year = {1980} @@ -148,7 +148,7 @@ year = {2011}, } @Article{LALAMI11, -author = {M.E. Lalami{,} D. El-Baz and V. Boyer}, +author = {M. E. Lalami{,} D. El-Baz and V. Boyer}, title = {Multi GPU Implementation of the Simplex Algorithm}, journal = {IEEE International Conference on High Performance Computing and Communications}, year = {2011}, @@ -175,7 +175,7 @@ volume = {19} @book{dantzig1953product, title={The Product Form for the Inverse in the Simplex Method}, - author={DANTZIG, G.B. and Orchard-Hays, W. and RAND CORP SANTA MONICA CALIF.}, + author={Dantzig, G. B. and Orchard-Hays, W. and RAND Corp. Santa Monica Calif.}, url={http://books.google.ch/books?id=XLuttgAACAAJ}, year={1953}, publisher={Defense Technical Information Center} @@ -204,14 +204,14 @@ volume = {19} } @PhdThesis{WOLTER06, -author = {Kati Wolter}, +author = {K. Wolter}, title = {Implementation of cutting plane separators for mixed integer programs}, school = {TU Berlin}, year = {2006} } @PhdThesis{ACHTERBERG07, -author = {Tobias Achterberg}, +author = {T. Achterberg}, title = {Constraint integer programming}, school = {TU Berlin}, year = {2007} @@ -227,7 +227,7 @@ doi={10.1007/BF02025534}, title={A fast LU update for linear programming}, url={http://dx.doi.org/10.1007/BF02025534}, publisher={Baltzer Science Publishers, Baarn/Kluwer Academic Publishers}, -author={Suhl, LeenaM. and Suhl, UweH.}, +author={Suhl, L. M. and Suhl, U. H.}, pages={33-47}, language={English} } @@ -254,13 +254,13 @@ doi={10.1007/BF01584074}, title={Experiments in mixed-integer linear programming}, url={http://dx.doi.org/10.1007/BF01584074}, publisher={Springer-Verlag}, -author={Benichou, M. and Gauthier, J.M. and Girodet, P. and Hentges, G. and Ribiere, G. and Vincent, O.}, +author={Benichou, M. and Gauthier, J. M. and Girodet, P. and Hentges, G. and Ribiere, G. and Vincent, O.}, pages={76-94}, language={English} } @article{Achterberg05, - author = {Achterberg, Tobias and Koch, Thorsten and Martin, Alexander}, + author = {Achterberg, T. and Koch, T. and Martin, A.}, title = {Branching rules revisited}, journal = {Oper. Res. Lett.}, issue_date = {January, 2005}, diff --git a/BookGPU/Chapters/chapter12/ch12.aux b/BookGPU/Chapters/chapter12/ch12.aux index 65b67a8..5841c60 100644 --- a/BookGPU/Chapters/chapter12/ch12.aux +++ b/BookGPU/Chapters/chapter12/ch12.aux @@ -3,81 +3,81 @@ \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}} \@writefile{toc}{\author{Jacques Bahi}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{291}} +\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{293}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{ch12}{{12}{291}} -\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{291}} -\newlabel{ch12:sec:01}{{12.1}{291}} -\@writefile{toc}{\contentsline {section}{\numberline {12.2}Krylov iterative methods}{292}} -\newlabel{ch12:sec:02}{{12.2}{292}} -\newlabel{ch12:eq:01}{{12.1}{292}} -\newlabel{ch12:eq:02}{{12.2}{292}} -\newlabel{ch12:eq:03}{{12.3}{292}} -\newlabel{ch12:eq:11}{{12.4}{293}} -\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}CG method}{293}} -\newlabel{ch12:sec:02.01}{{12.2.1}{293}} -\newlabel{ch12:eq:04}{{12.5}{293}} -\newlabel{ch12:eq:05}{{12.6}{293}} -\newlabel{ch12:eq:06}{{12.7}{293}} -\newlabel{ch12:eq:07}{{12.8}{293}} -\newlabel{ch12:eq:08}{{12.9}{293}} -\newlabel{ch12:eq:09}{{12.10}{293}} -\@writefile{loa}{\contentsline {algocf}{\numberline {12}{\ignorespaces Left-preconditioned CG method\relax }}{294}} -\newlabel{ch12:alg:01}{{12}{294}} -\newlabel{ch12:eq:10}{{12.11}{294}} -\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}GMRES method}{295}} -\newlabel{ch12:sec:02.02}{{12.2.2}{295}} -\newlabel{ch12:eq:12}{{12.12}{295}} -\newlabel{ch12:eq:13}{{12.13}{295}} -\newlabel{ch12:eq:14}{{12.14}{295}} -\newlabel{ch12:eq:15}{{12.15}{295}} -\newlabel{ch12:eq:16}{{12.16}{295}} -\newlabel{ch12:eq:17}{{12.17}{295}} -\newlabel{ch12:eq:18}{{12.18}{295}} -\newlabel{ch12:eq:19}{{12.19}{295}} -\@writefile{loa}{\contentsline {algocf}{\numberline {13}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{296}} -\newlabel{ch12:alg:02}{{13}{296}} -\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel implementation on a GPU cluster}{297}} -\newlabel{ch12:sec:03}{{12.3}{297}} -\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.1}Data partitioning}{297}} -\newlabel{ch12:sec:03.01}{{12.3.1}{297}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{298}} -\newlabel{ch12:fig:01}{{12.1}{298}} -\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.2}GPU computing}{298}} -\newlabel{ch12:sec:03.02}{{12.3.2}{298}} -\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.3}Data communications}{299}} -\newlabel{ch12:sec:03.03}{{12.3.3}{299}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{300}} -\newlabel{ch12:fig:02}{{12.2}{300}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{301}} -\newlabel{ch12:fig:03}{{12.3}{301}} -\@writefile{toc}{\contentsline {section}{\numberline {12.4}Experimental results}{302}} -\newlabel{ch12:sec:04}{{12.4}{302}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{302}} -\newlabel{ch12:fig:04}{{12.4}{302}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{303}} -\newlabel{ch12:fig:05}{{12.5}{303}} -\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{303}} -\newlabel{ch12:tab:01}{{12.1}{303}} -\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{304}} -\newlabel{ch12:tab:02}{{12.2}{304}} -\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{304}} -\newlabel{ch12:tab:03}{{12.3}{304}} -\newlabel{ch12:eq:20}{{12.20}{305}} -\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{306}} -\newlabel{ch12:fig:06}{{12.6}{306}} -\@writefile{lot}{\contentsline {table}{\numberline {12.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{306}} -\newlabel{ch12:tab:04}{{12.4}{306}} -\@writefile{lot}{\contentsline {table}{\numberline {12.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{307}} -\newlabel{ch12:tab:05}{{12.5}{307}} -\@writefile{toc}{\contentsline {section}{\numberline {12.5}Conclusion}{307}} -\newlabel{ch12:sec:05}{{12.5}{307}} -\@writefile{lot}{\contentsline {table}{\numberline {12.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{308}} -\newlabel{ch12:tab:06}{{12.6}{308}} -\@writefile{toc}{\contentsline {section}{Bibliography}{308}} +\newlabel{ch12}{{12}{293}} +\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{293}} +\newlabel{ch12:sec:01}{{12.1}{293}} +\@writefile{toc}{\contentsline {section}{\numberline {12.2}Krylov iterative methods}{294}} +\newlabel{ch12:sec:02}{{12.2}{294}} +\newlabel{ch12:eq:01}{{12.1}{294}} +\newlabel{ch12:eq:02}{{12.2}{294}} +\newlabel{ch12:eq:03}{{12.3}{294}} +\newlabel{ch12:eq:11}{{12.4}{295}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}CG method}{295}} +\newlabel{ch12:sec:02.01}{{12.2.1}{295}} +\newlabel{ch12:eq:04}{{12.5}{295}} +\newlabel{ch12:eq:05}{{12.6}{295}} +\newlabel{ch12:eq:06}{{12.7}{295}} +\newlabel{ch12:eq:07}{{12.8}{295}} +\newlabel{ch12:eq:08}{{12.9}{295}} +\newlabel{ch12:eq:09}{{12.10}{295}} +\@writefile{loa}{\contentsline {algocf}{\numberline {14}{\ignorespaces Left-preconditioned CG method\relax }}{296}} +\newlabel{ch12:alg:01}{{14}{296}} +\newlabel{ch12:eq:10}{{12.11}{296}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}GMRES method}{297}} +\newlabel{ch12:sec:02.02}{{12.2.2}{297}} +\newlabel{ch12:eq:12}{{12.12}{297}} +\newlabel{ch12:eq:13}{{12.13}{297}} +\newlabel{ch12:eq:14}{{12.14}{297}} +\newlabel{ch12:eq:15}{{12.15}{297}} +\newlabel{ch12:eq:16}{{12.16}{297}} +\newlabel{ch12:eq:17}{{12.17}{297}} +\newlabel{ch12:eq:18}{{12.18}{297}} +\newlabel{ch12:eq:19}{{12.19}{297}} +\@writefile{loa}{\contentsline {algocf}{\numberline {15}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{298}} +\newlabel{ch12:alg:02}{{15}{298}} +\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel implementation on a GPU cluster}{299}} +\newlabel{ch12:sec:03}{{12.3}{299}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.1}Data partitioning}{299}} +\newlabel{ch12:sec:03.01}{{12.3.1}{299}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{300}} +\newlabel{ch12:fig:01}{{12.1}{300}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.2}GPU computing}{300}} +\newlabel{ch12:sec:03.02}{{12.3.2}{300}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.3}Data communications}{301}} +\newlabel{ch12:sec:03.03}{{12.3.3}{301}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{302}} +\newlabel{ch12:fig:02}{{12.2}{302}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{303}} +\newlabel{ch12:fig:03}{{12.3}{303}} +\@writefile{toc}{\contentsline {section}{\numberline {12.4}Experimental results}{304}} +\newlabel{ch12:sec:04}{{12.4}{304}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{304}} +\newlabel{ch12:fig:04}{{12.4}{304}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{305}} +\newlabel{ch12:fig:05}{{12.5}{305}} +\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{305}} +\newlabel{ch12:tab:01}{{12.1}{305}} +\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{306}} +\newlabel{ch12:tab:02}{{12.2}{306}} +\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{306}} +\newlabel{ch12:tab:03}{{12.3}{306}} +\newlabel{ch12:eq:20}{{12.20}{307}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{308}} +\newlabel{ch12:fig:06}{{12.6}{308}} +\@writefile{lot}{\contentsline {table}{\numberline {12.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{308}} +\newlabel{ch12:tab:04}{{12.4}{308}} +\@writefile{lot}{\contentsline {table}{\numberline {12.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{309}} +\newlabel{ch12:tab:05}{{12.5}{309}} +\@writefile{toc}{\contentsline {section}{\numberline {12.5}Conclusion}{309}} +\newlabel{ch12:sec:05}{{12.5}{309}} +\@writefile{lot}{\contentsline {table}{\numberline {12.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{310}} +\newlabel{ch12:tab:06}{{12.6}{310}} +\@writefile{toc}{\contentsline {section}{Bibliography}{310}} \@setckpt{Chapters/chapter12/ch12}{ -\setcounter{page}{310} +\setcounter{page}{312} \setcounter{equation}{22} \setcounter{enumi}{2} \setcounter{enumii}{0} @@ -103,9 +103,9 @@ \setcounter{lstnumber}{50} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{29} -\setcounter{algocfline}{13} -\setcounter{algocfproc}{13} -\setcounter{algocf}{13} +\setcounter{algocfline}{15} +\setcounter{algocfproc}{15} +\setcounter{algocf}{15} \setcounter{nprt@mantissa@digitsbefore}{0} \setcounter{nprt@mantissa@digitsafter}{0} \setcounter{nprt@exponent@digitsbefore}{0} diff --git a/BookGPU/Chapters/chapter16/ch16.aux b/BookGPU/Chapters/chapter16/ch16.aux index 75d0256..3a5b76e 100644 --- a/BookGPU/Chapters/chapter16/ch16.aux +++ b/BookGPU/Chapters/chapter16/ch16.aux @@ -4,72 +4,72 @@ \@writefile{toc}{\author{H. Wang}{}} \@writefile{toc}{\author{H. Yu}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {16}GPU-Accelerated Envelope-Following Method}{375}} +\@writefile{toc}{\contentsline {chapter}{\numberline {16}GPU-Accelerated Envelope-Following Method}{377}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {16.1}Introduction}{375}} -\newlabel{fig:ef1}{{16.1(a)}{377}} -\newlabel{sub@fig:ef1}{{(a)}{377}} -\newlabel{fig:ef2}{{16.1(b)}{377}} -\newlabel{sub@fig:ef2}{{(b)}{377}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{377}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{377}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{377}} -\newlabel{fig:ef_intro}{{16.1}{377}} -\@writefile{toc}{\contentsline {section}{\numberline {16.2}The envelope-following method in a nutshell}{378}} -\newlabel{sec:ef}{{16.2}{378}} -\newlabel{eq:dae}{{16.1}{378}} -\newlabel{eq:Newton}{{16.2}{379}} -\newlabel{eq:A}{{16.3}{379}} -\@writefile{toc}{\contentsline {section}{\numberline {16.3}New parallel envelope-following method}{380}} -\newlabel{sec:gmres}{{16.3}{380}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.1}GMRES solver for Newton update equation}{380}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.2}{\ignorespaces The flow of envelope-following method.\relax }}{381}} -\newlabel{fig:ef_flow}{{16.2}{381}} -\@writefile{loa}{\contentsline {algocf}{\numberline {17}{\ignorespaces Standard GMRES algorithm.\relax }}{382}} -\newlabel{alg:GMRES}{{17}{382}} -\newlabel{line:mvp}{{5}{382}} -\newlabel{line:newnorm}{{11}{382}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.2}Parallelization on GPU platforms}{382}} -\newlabel{sec:gpu}{{16.3.2}{382}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{383}} -\newlabel{fig:gmres}{{16.3}{383}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.3}Gear-2 based sensitivity calculation}{384}} -\newlabel{sec:gear}{{16.3.3}{384}} -\newlabel{eq:BE}{{16.4}{384}} -\newlabel{eq:sens1}{{16.5}{384}} -\newlabel{eq:Gear_t2}{{16.6}{385}} -\newlabel{eq:sens2}{{16.7}{385}} -\newlabel{eq:Gear_t3}{{16.8}{385}} -\newlabel{eq:sensM}{{16.9}{385}} -\@writefile{loa}{\contentsline {algocf}{\numberline {18}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{386}} -\newlabel{alg:mf_Gear}{{18}{386}} -\newlabel{line:mf_Gear_loop}{{4}{386}} -\newlabel{line:shift}{{8}{386}} -\@writefile{toc}{\contentsline {section}{\numberline {16.4}Numerical examples}{386}} -\newlabel{sec:exp}{{16.4}{386}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{387}} -\newlabel{fig:flyback}{{16.4}{387}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.5}{\ignorespaces Illustration of power/ground network model.\relax }}{387}} -\newlabel{fig:pg}{{16.5}{387}} -\newlabel{fig:flybackWhole}{{16.6(a)}{388}} -\newlabel{sub@fig:flybackWhole}{{(a)}{388}} -\newlabel{fig:flybackZoom}{{16.6(b)}{388}} -\newlabel{sub@fig:flybackZoom}{{(b)}{388}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{388}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{388}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{388}} -\newlabel{fig:flyback_wave}{{16.6}{388}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{389}} -\newlabel{fig:buck_wave}{{16.7}{389}} -\@writefile{lot}{\contentsline {table}{\numberline {16.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{389}} -\newlabel{table:circuit}{{16.1}{389}} -\@writefile{toc}{\contentsline {section}{\numberline {16.5}Summary}{390}} -\newlabel{sec:summary}{{16.5}{390}} -\@writefile{toc}{\contentsline {section}{\numberline {16.6}Glossary}{390}} -\@writefile{toc}{\contentsline {section}{Bibliography}{390}} +\@writefile{toc}{\contentsline {section}{\numberline {16.1}Introduction}{377}} +\newlabel{fig:ef1}{{16.1(a)}{379}} +\newlabel{sub@fig:ef1}{{(a)}{379}} +\newlabel{fig:ef2}{{16.1(b)}{379}} +\newlabel{sub@fig:ef2}{{(b)}{379}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{379}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{379}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{379}} +\newlabel{fig:ef_intro}{{16.1}{379}} +\@writefile{toc}{\contentsline {section}{\numberline {16.2}The envelope-following method in a nutshell}{380}} +\newlabel{sec:ef}{{16.2}{380}} +\newlabel{eq:dae}{{16.1}{380}} +\newlabel{eq:Newton}{{16.2}{381}} +\newlabel{eq:A}{{16.3}{381}} +\@writefile{toc}{\contentsline {section}{\numberline {16.3}New parallel envelope-following method}{382}} +\newlabel{sec:gmres}{{16.3}{382}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.1}GMRES solver for Newton update equation}{382}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.2}{\ignorespaces The flow of envelope-following method.\relax }}{383}} +\newlabel{fig:ef_flow}{{16.2}{383}} +\@writefile{loa}{\contentsline {algocf}{\numberline {19}{\ignorespaces Standard GMRES algorithm.\relax }}{384}} +\newlabel{alg:GMRES}{{19}{384}} +\newlabel{line:mvp}{{5}{384}} +\newlabel{line:newnorm}{{11}{384}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.2}Parallelization on GPU platforms}{384}} +\newlabel{sec:gpu}{{16.3.2}{384}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{385}} +\newlabel{fig:gmres}{{16.3}{385}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.3}Gear-2 based sensitivity calculation}{386}} +\newlabel{sec:gear}{{16.3.3}{386}} +\newlabel{eq:BE}{{16.4}{386}} +\newlabel{eq:sens1}{{16.5}{386}} +\newlabel{eq:Gear_t2}{{16.6}{387}} +\newlabel{eq:sens2}{{16.7}{387}} +\newlabel{eq:Gear_t3}{{16.8}{387}} +\newlabel{eq:sensM}{{16.9}{387}} +\@writefile{loa}{\contentsline {algocf}{\numberline {20}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{388}} +\newlabel{alg:mf_Gear}{{20}{388}} +\newlabel{line:mf_Gear_loop}{{4}{388}} +\newlabel{line:shift}{{8}{388}} +\@writefile{toc}{\contentsline {section}{\numberline {16.4}Numerical examples}{388}} +\newlabel{sec:exp}{{16.4}{388}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{389}} +\newlabel{fig:flyback}{{16.4}{389}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.5}{\ignorespaces Illustration of power/ground network model.\relax }}{389}} +\newlabel{fig:pg}{{16.5}{389}} +\newlabel{fig:flybackWhole}{{16.6(a)}{390}} +\newlabel{sub@fig:flybackWhole}{{(a)}{390}} +\newlabel{fig:flybackZoom}{{16.6(b)}{390}} +\newlabel{sub@fig:flybackZoom}{{(b)}{390}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{390}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{390}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{390}} +\newlabel{fig:flyback_wave}{{16.6}{390}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{391}} +\newlabel{fig:buck_wave}{{16.7}{391}} +\@writefile{lot}{\contentsline {table}{\numberline {16.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{391}} +\newlabel{table:circuit}{{16.1}{391}} +\@writefile{toc}{\contentsline {section}{\numberline {16.5}Summary}{392}} +\newlabel{sec:summary}{{16.5}{392}} +\@writefile{toc}{\contentsline {section}{\numberline {16.6}Glossary}{392}} +\@writefile{toc}{\contentsline {section}{Bibliography}{392}} \@setckpt{Chapters/chapter16/ch16}{ -\setcounter{page}{392} +\setcounter{page}{394} \setcounter{equation}{9} \setcounter{enumi}{2} \setcounter{enumii}{0} @@ -95,9 +95,9 @@ \setcounter{lstnumber}{9} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{8} -\setcounter{algocfline}{18} -\setcounter{algocfproc}{18} -\setcounter{algocf}{18} +\setcounter{algocfline}{20} +\setcounter{algocfproc}{20} +\setcounter{algocf}{20} \setcounter{nprt@mantissa@digitsbefore}{0} \setcounter{nprt@mantissa@digitsafter}{0} \setcounter{nprt@exponent@digitsbefore}{0} diff --git a/BookGPU/Chapters/chapter17/ch17.aux b/BookGPU/Chapters/chapter17/ch17.aux index 005df6d..1745216 100644 --- a/BookGPU/Chapters/chapter17/ch17.aux +++ b/BookGPU/Chapters/chapter17/ch17.aux @@ -6,76 +6,76 @@ \@writefile{toc}{\author{B\IeC {\'e}n\IeC {\'e}dicte Herrmann}{}} \@writefile{toc}{\author{Laurent Philippe}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {17}Implementing Multi-Agent Systems on GPU}{395}} +\@writefile{toc}{\contentsline {chapter}{\numberline {17}Implementing Multi-Agent Systems on GPU}{397}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{chapter17}{{17}{396}} -\@writefile{toc}{\contentsline {section}{\numberline {17.1}Introduction}{396}} -\newlabel{ch17:intro}{{17.1}{396}} -\@writefile{toc}{\contentsline {section}{\numberline {17.2}Running Agent-Based Simulations}{397}} -\newlabel{ch17:ABM}{{17.2}{397}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.1}Multi-agent systems and parallelism}{397}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.2}MAS Implementation on GPU}{399}} -\newlabel{ch17:subsec:gpu}{{17.2.2}{399}} -\@writefile{toc}{\contentsline {section}{\numberline {17.3}A first practical example}{400}} -\newlabel{ch17:sec:1stmodel}{{17.3}{400}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.1}The Collembola model}{400}} -\newlabel{ch17:subsec:collembolamodel}{{17.3.1}{400}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.1}{\ignorespaces Evolution algorithm of Collembola model\relax }}{401}} -\newlabel{ch17:fig:collem_algorithm}{{17.1}{401}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.2}Collembola Implementation}{401}} -\newlabel{ch17:listing:collembola-diffuse}{{17.1}{402}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.1}Collembola OpenCL Diffusion kernel}{402}} -\newlabel{ch17:listing:collembola-reduc}{{17.2}{402}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.2}Collembola OpenCL reduction kernel}{402}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.3}Collembola performance}{403}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.2}{\ignorespaces Performance of the Collembola model on CPU and GPU\relax }}{404}} -\newlabel{ch17:fig:mior_perfs_collem}{{17.2}{404}} -\@writefile{toc}{\contentsline {section}{\numberline {17.4}Second example}{404}} -\newlabel{ch17:sec:2ndmodel}{{17.4}{404}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.1}The MIOR model}{404}} -\newlabel{ch17:subsec:miormodel}{{17.4.1}{404}} -\@writefile{loa}{\contentsline {algocf}{\numberline {19}{\ignorespaces Evolution step of each Meta-Mior (microbial colony) agent\relax }}{405}} -\newlabel{ch17:seqalgo}{{19}{405}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.2}MIOR Implementation}{405}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.3}{\ignorespaces Execution distribution retained on GPU\relax }}{406}} -\newlabel{ch17:fig:gpu_distribution}{{17.3}{406}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.1}Execution mapping on GPU}{406}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.2}Data structures translation}{407}} -\newlabel{ch17:subsec:datastructures}{{17.4.2.2}{407}} -\newlabel{ch17:listing:mior_data_structures}{{17.3}{407}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.3}Main data structures used in a MIOR simulation}{407}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.4}{\ignorespaces Compact representation of the topology of a MIOR simulation\relax }}{408}} -\newlabel{ch17:fig:csr_representation}{{17.4}{408}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.3}Critical resources access management}{408}} -\newlabel{ch17:subsec:concurrency}{{17.4.2.3}{408}} -\newlabel{ch17:listing:mior_kernels}{{17.4}{409}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.4}Main MIOR kernel}{409}} -\newlabel{ch17:fig:mior_launcher}{{17.5}{410}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.5}MIOR simulation launcher}{410}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.4}Termination detection}{410}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.3}Performance of MIOR implementations}{411}} -\newlabel{ch17:subsec:miorexperiments}{{17.4.3}{411}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.5}{\ignorespaces CPU and GPU performance on a Tesla C1060 node\relax }}{412}} -\newlabel{ch17:fig:mior_perfs_tesla}{{17.5}{412}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.6}{\ignorespaces CPU and GPU performance on a personal computer with a Geforce 8800GT\relax }}{413}} -\newlabel{ch17:fig:mior_perfs_8800gt}{{17.6}{413}} -\@writefile{toc}{\contentsline {section}{\numberline {17.5}Analysis and recommendations}{413}} -\newlabel{ch17:analysis}{{17.5}{413}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.7}{\ignorespaces Execution time of one multi-simulation kernel on the Tesla platform\relax }}{414}} -\newlabel{ch17:fig:monokernel_graph}{{17.7}{414}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.8}{\ignorespaces Total execution time for 1000 simulations on the Tesla platform, while varying the number of simulations for each kernel\relax }}{414}} -\newlabel{ch17:fig:multikernel_graph}{{17.8}{414}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.1}Analysis}{414}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.2}MAS execution workflow}{415}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.3}Implementation challenges}{416}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.4}MCSMA}{416}} -\newlabel{ch17:Mcsma}{{17.5.4}{416}} -\@writefile{toc}{\contentsline {section}{\numberline {17.6}Conclusion}{417}} -\newlabel{ch17:conclusion}{{17.6}{417}} -\@writefile{toc}{\contentsline {section}{Bibliography}{418}} +\newlabel{chapter17}{{17}{398}} +\@writefile{toc}{\contentsline {section}{\numberline {17.1}Introduction}{398}} +\newlabel{ch17:intro}{{17.1}{398}} +\@writefile{toc}{\contentsline {section}{\numberline {17.2}Running Agent-Based Simulations}{399}} +\newlabel{ch17:ABM}{{17.2}{399}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.1}Multi-agent systems and parallelism}{399}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.2}MAS Implementation on GPU}{401}} +\newlabel{ch17:subsec:gpu}{{17.2.2}{401}} +\@writefile{toc}{\contentsline {section}{\numberline {17.3}A first practical example}{402}} +\newlabel{ch17:sec:1stmodel}{{17.3}{402}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.1}The Collembola model}{402}} +\newlabel{ch17:subsec:collembolamodel}{{17.3.1}{402}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.1}{\ignorespaces Evolution algorithm of Collembola model\relax }}{403}} +\newlabel{ch17:fig:collem_algorithm}{{17.1}{403}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.2}Collembola Implementation}{403}} +\newlabel{ch17:listing:collembola-diffuse}{{17.1}{404}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.1}Collembola OpenCL Diffusion kernel}{404}} +\newlabel{ch17:listing:collembola-reduc}{{17.2}{404}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.2}Collembola OpenCL reduction kernel}{404}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.3}Collembola performance}{405}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.2}{\ignorespaces Performance of the Collembola model on CPU and GPU\relax }}{406}} +\newlabel{ch17:fig:mior_perfs_collem}{{17.2}{406}} +\@writefile{toc}{\contentsline {section}{\numberline {17.4}Second example}{406}} +\newlabel{ch17:sec:2ndmodel}{{17.4}{406}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.1}The MIOR model}{406}} +\newlabel{ch17:subsec:miormodel}{{17.4.1}{406}} +\@writefile{loa}{\contentsline {algocf}{\numberline {21}{\ignorespaces Evolution step of each Meta-Mior (microbial colony) agent\relax }}{407}} +\newlabel{ch17:seqalgo}{{21}{407}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.2}MIOR Implementation}{407}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.3}{\ignorespaces Execution distribution retained on GPU\relax }}{408}} +\newlabel{ch17:fig:gpu_distribution}{{17.3}{408}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.1}Execution mapping on GPU}{408}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.2}Data structures translation}{409}} +\newlabel{ch17:subsec:datastructures}{{17.4.2.2}{409}} +\newlabel{ch17:listing:mior_data_structures}{{17.3}{409}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.3}Main data structures used in a MIOR simulation}{409}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.4}{\ignorespaces Compact representation of the topology of a MIOR simulation\relax }}{410}} +\newlabel{ch17:fig:csr_representation}{{17.4}{410}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.3}Critical resources access management}{410}} +\newlabel{ch17:subsec:concurrency}{{17.4.2.3}{410}} +\newlabel{ch17:listing:mior_kernels}{{17.4}{411}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.4}Main MIOR kernel}{411}} +\newlabel{ch17:fig:mior_launcher}{{17.5}{412}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.5}MIOR simulation launcher}{412}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.4}Termination detection}{412}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.3}Performance of MIOR implementations}{413}} +\newlabel{ch17:subsec:miorexperiments}{{17.4.3}{413}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.5}{\ignorespaces CPU and GPU performance on a Tesla C1060 node\relax }}{414}} +\newlabel{ch17:fig:mior_perfs_tesla}{{17.5}{414}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.6}{\ignorespaces CPU and GPU performance on a personal computer with a Geforce 8800GT\relax }}{415}} +\newlabel{ch17:fig:mior_perfs_8800gt}{{17.6}{415}} +\@writefile{toc}{\contentsline {section}{\numberline {17.5}Analysis and recommendations}{415}} +\newlabel{ch17:analysis}{{17.5}{415}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.7}{\ignorespaces Execution time of one multi-simulation kernel on the Tesla platform\relax }}{416}} +\newlabel{ch17:fig:monokernel_graph}{{17.7}{416}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.8}{\ignorespaces Total execution time for 1000 simulations on the Tesla platform, while varying the number of simulations for each kernel\relax }}{416}} +\newlabel{ch17:fig:multikernel_graph}{{17.8}{416}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.1}Analysis}{416}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.2}MAS execution workflow}{417}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.3}Implementation challenges}{418}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.4}MCSMA}{418}} +\newlabel{ch17:Mcsma}{{17.5.4}{418}} +\@writefile{toc}{\contentsline {section}{\numberline {17.6}Conclusion}{419}} +\newlabel{ch17:conclusion}{{17.6}{419}} +\@writefile{toc}{\contentsline {section}{Bibliography}{420}} \@setckpt{Chapters/chapter17/ch17}{ -\setcounter{page}{422} +\setcounter{page}{424} \setcounter{equation}{0} \setcounter{enumi}{3} \setcounter{enumii}{0} @@ -101,9 +101,9 @@ \setcounter{lstnumber}{21} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{17} -\setcounter{algocfline}{19} -\setcounter{algocfproc}{19} -\setcounter{algocf}{19} +\setcounter{algocfline}{21} +\setcounter{algocfproc}{21} +\setcounter{algocf}{21} \setcounter{nprt@mantissa@digitsbefore}{0} \setcounter{nprt@mantissa@digitsafter}{0} \setcounter{nprt@exponent@digitsbefore}{0} diff --git a/BookGPU/Chapters/chapter18/ch18.aux b/BookGPU/Chapters/chapter18/ch18.aux index 884902a..4420c4d 100644 --- a/BookGPU/Chapters/chapter18/ch18.aux +++ b/BookGPU/Chapters/chapter18/ch18.aux @@ -2,45 +2,45 @@ \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}} \@writefile{toc}{\author{Christophe Guyeux}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {18}Pseudorandom Number Generator on GPU}{423}} +\@writefile{toc}{\contentsline {chapter}{\numberline {18}Pseudorandom Number Generator on GPU}{425}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{chapter18}{{18}{423}} -\@writefile{toc}{\contentsline {section}{\numberline {18.1}Introduction}{423}} -\@writefile{toc}{\contentsline {section}{\numberline {18.2}Basic Remindees}{425}} -\newlabel{section:BASIC RECALLS}{{18.2}{425}} -\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.1}A Short Presentation of Chaos}{425}} -\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.2}On Devaney's Definition of Chaos}{425}} -\newlabel{sec:dev}{{18.2.2}{425}} -\newlabel{Devaney}{{18.1}{425}} -\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.3}Chaotic iterations}{426}} -\newlabel{subsection:Chaotic iterations}{{18.2.3}{426}} -\newlabel{Chaotic iterations}{{2}{426}} -\newlabel{eq:generalIC}{{18.4}{427}} -\newlabel{equation Oplus}{{18.5}{427}} -\@writefile{toc}{\contentsline {section}{\numberline {18.3}Toward Efficiency and Improvement for CI PRNG}{427}} -\newlabel{sec:efficient PRNG}{{18.3}{427}} -\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{427}} -\newlabel{algo:seqCIPRNG}{{18.1}{427}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {18.1}C code of the sequential PRNG based on chaotic iterations}{427}} -\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{428}} -\newlabel{sec:efficient PRNG gpu}{{18.3.2}{428}} -\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.3}Naive Version for GPU}{428}} -\@writefile{loa}{\contentsline {algocf}{\numberline {20}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{429}} -\newlabel{algo:gpu_kernel}{{20}{429}} -\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.4}Improved Version for GPU}{429}} -\newlabel{IR}{{21}{430}} -\@writefile{loa}{\contentsline {algocf}{\numberline {21}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{430}} -\newlabel{algo:gpu_kernel2}{{21}{430}} -\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.5}Chaos Evaluation of the Improved Version}{430}} -\@writefile{toc}{\contentsline {section}{\numberline {18.4}Experiments}{431}} -\newlabel{sec:experiments}{{18.4}{431}} -\@writefile{toc}{\contentsline {section}{\numberline {18.5}Summary}{431}} -\@writefile{lof}{\contentsline {figure}{\numberline {18.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{432}} -\newlabel{fig:time_xorlike_gpu}{{18.1}{432}} -\@writefile{toc}{\contentsline {section}{Bibliography}{433}} +\newlabel{chapter18}{{18}{425}} +\@writefile{toc}{\contentsline {section}{\numberline {18.1}Introduction}{425}} +\@writefile{toc}{\contentsline {section}{\numberline {18.2}Basic Remindees}{427}} +\newlabel{section:BASIC RECALLS}{{18.2}{427}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.1}A Short Presentation of Chaos}{427}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.2}On Devaney's Definition of Chaos}{427}} +\newlabel{sec:dev}{{18.2.2}{427}} +\newlabel{Devaney}{{18.1}{427}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.3}Chaotic iterations}{428}} +\newlabel{subsection:Chaotic iterations}{{18.2.3}{428}} +\newlabel{Chaotic iterations}{{2}{428}} +\newlabel{eq:generalIC}{{18.4}{429}} +\newlabel{equation Oplus}{{18.5}{429}} +\@writefile{toc}{\contentsline {section}{\numberline {18.3}Toward Efficiency and Improvement for CI PRNG}{429}} +\newlabel{sec:efficient PRNG}{{18.3}{429}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{429}} +\newlabel{algo:seqCIPRNG}{{18.1}{429}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {18.1}C code of the sequential PRNG based on chaotic iterations}{429}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{430}} +\newlabel{sec:efficient PRNG gpu}{{18.3.2}{430}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.3}Naive Version for GPU}{430}} +\@writefile{loa}{\contentsline {algocf}{\numberline {22}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{431}} +\newlabel{algo:gpu_kernel}{{22}{431}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.4}Improved Version for GPU}{431}} +\newlabel{IR}{{23}{432}} +\@writefile{loa}{\contentsline {algocf}{\numberline {23}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{432}} +\newlabel{algo:gpu_kernel2}{{23}{432}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.5}Chaos Evaluation of the Improved Version}{432}} +\@writefile{toc}{\contentsline {section}{\numberline {18.4}Experiments}{433}} +\newlabel{sec:experiments}{{18.4}{433}} +\@writefile{toc}{\contentsline {section}{\numberline {18.5}Summary}{433}} +\@writefile{lof}{\contentsline {figure}{\numberline {18.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{434}} +\newlabel{fig:time_xorlike_gpu}{{18.1}{434}} +\@writefile{toc}{\contentsline {section}{Bibliography}{435}} \@setckpt{Chapters/chapter18/ch18}{ -\setcounter{page}{435} +\setcounter{page}{437} \setcounter{equation}{5} \setcounter{enumi}{3} \setcounter{enumii}{0} @@ -66,9 +66,9 @@ \setcounter{lstnumber}{15} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{14} -\setcounter{algocfline}{21} -\setcounter{algocfproc}{21} -\setcounter{algocf}{21} +\setcounter{algocfline}{23} +\setcounter{algocfproc}{23} +\setcounter{algocf}{23} \setcounter{nprt@mantissa@digitsbefore}{0} \setcounter{nprt@mantissa@digitsafter}{0} \setcounter{nprt@exponent@digitsbefore}{0} diff --git a/BookGPU/Chapters/chapter7/ch7.tex b/BookGPU/Chapters/chapter7/ch7.tex index e4ffb0a..c18d417 100644 --- a/BookGPU/Chapters/chapter7/ch7.tex +++ b/BookGPU/Chapters/chapter7/ch7.tex @@ -819,10 +819,12 @@ Ideally, the ratio $\mathcal{C}_\mathcal{G}/\mathcal{C}_\mathcal{F}$ is small an \setlength\figureheight{0.35\textwidth} \setlength\figurewidth{0.37\textwidth} \subfigure[Performance scaling]{ - {\small\input{Chapters/chapter7/figures/PararealScaletestGTX590.tikz}} +% {\small\input{Chapters/chapter7/figures/PararealScaletestGTX590.tikz}} + \includegraphics[width=0.5\textwidth]{Chapters/chapter7/figures/PararealScaletestGTX590_conv.pdf} } \subfigure[Speedup]{ - {\small\input{Chapters/chapter7/figures/PararealSpeedupGTX590.tikz}} + % {\small\input{Chapters/chapter7/figures/PararealSpeedupGTX590.tikz}} + \includegraphics[width=0.5\textwidth]{Chapters/chapter7/figures/PararealSpeedupGTX590_conv.pdf} } \end{center} \caption{(a) Parareal absolute timings for an increasingly number of water waves traveling one wave length, each wave resolution is ($33\times 9$). (b) Parareal speedup for two to sixteen compute nodes compared to the purely sequential single GPU solver. Notice how insensitive the parareal scheme is to the size of the problem solved. Test environment 2.}\label{ch7:fig:DDPA_SPEEDUP} diff --git a/BookGPU/Chapters/chapter7/figures/PararealScaletestGTX590_conv.pdf b/BookGPU/Chapters/chapter7/figures/PararealScaletestGTX590_conv.pdf new file mode 100644 index 0000000..1da9279 Binary files /dev/null and b/BookGPU/Chapters/chapter7/figures/PararealScaletestGTX590_conv.pdf differ diff --git a/BookGPU/Chapters/chapter7/figures/PararealSpeedupGTX590_conv.pdf b/BookGPU/Chapters/chapter7/figures/PararealSpeedupGTX590_conv.pdf new file mode 100644 index 0000000..603b629 Binary files /dev/null and b/BookGPU/Chapters/chapter7/figures/PararealSpeedupGTX590_conv.pdf differ diff --git a/BookGPU/Chapters/chapter8/ch8.tex b/BookGPU/Chapters/chapter8/ch8.tex index 9352b56..101a552 100644 --- a/BookGPU/Chapters/chapter8/ch8.tex +++ b/BookGPU/Chapters/chapter8/ch8.tex @@ -3,7 +3,7 @@ \chapterauthor{Nouredine Melab}{Universit\'e Lille 1 CNRS/LIFL, INRIA Lille Nord Europe, Cit\'e scientifique - 59655, Villeneuve d'Ascq cedex, France\\} \chapter{GPU-accelerated Tree-based Exact Optimization Methods} - +\label{ch8:GPU-accelerated-tree-based-exact-optimization-methods} \section{Introduction} \label{ch8:introduction} @@ -858,4 +858,4 @@ Experiments show also that the proposed refactoring approach improves the parall In the near future, we plan to extend this work to a cluster of GPU-accelerated multi-core processors. From the application point of view, the objective is to optimally solve challenging and unsolved Flow-Shop instances as we did it for one 50$\times$20 problem instance with grid computing \cite{ch8:Mezmaz_2007}. Finally, we plan to investigate other lower bound functions to deal with other combinatorial optimization problems. -\putbib[Chapters/chapter8/biblio8] \ No newline at end of file +\putbib[Chapters/chapter8/biblio8]