\makeindex
-% \includeonly{Chapters/chapter10/ch10}
+% \includeonly{Chapters/chapter7/ch7}
\begin{document}
\frontmatter
%\include{frontmatter/Foreword}
-%\include{frontmatter/preface}
+\include{frontmatter/preface}
\listoffigures
\listoftables
\part{Image processing}
\include{Chapters/chapter3/ch3}
\part{Software development}
-\include{Chapters/chapter5/ch5}
+\include{Chapters/chapter5/ch5}% HeatSolution0.049307.pdf HeatSolution0.pdf ...
\include{Chapters/chapter6/ch6}
\part{Optimization}
\include{Chapters/chapter8/ch8}
\include{Chapters/chapter9/ch9}
-\include{Chapters/chapter10/ch10}
+\include{Chapters/chapter10/ch10} %pb fonts tree2.pdf
\part{Numerical applications}
-\include{Chapters/chapter7/ch7} %pb fonts
+\include{Chapters/chapter7/ch7}
\include{Chapters/chapter11/ch11}
\include{Chapters/chapter12/ch12}
\include{Chapters/chapter13/ch13}
-\include{Chapters/chapter14/ch14}
+\include{Chapters/chapter14/ch14} %index
\include{Chapters/chapter15/ch15}
\include{Chapters/chapter16/ch16}
\part{Other}
-\include{Chapters/chapter17/ch17}
+\include{Chapters/chapter17/ch17} %index
\include{Chapters/chapter18/ch18}
\include{Chapters/chapter19/ch19}
}
@Book{WRIGHT,
-author = {S.J. Wright},
+author = {S. J. Wright},
title = {Primal-dual interior-point methods},
publisher = {SIAM},
year = {1998}
}
@Misc{THOMAD,
-author = {M.E. Thomadakis and J.-C. Liu},
+author = {M. E. Thomadakis and J.-C. Liu},
title = {An efficient steepest-edge simplex algorithm for {SIMD} computers},
year = {1996}
}
@Misc{HALL,
-author = {J.A.J. Hall},
+author = {J. A. J. Hall},
title = {Towards a practical parallelisation of the simplex method},
year = {2007}
}
}
@TechReport{DANTZIG80,
-author = {G.B. Dantzig},
+author = {G. B. Dantzig},
title = {Expected number of steps of the simplex method for a linear program with a convexity constraint},
institution = {Stanford {U}niversity},
year = {1980}
}
@Article{LALAMI11,
-author = {M.E. Lalami{,} D. El-Baz and V. Boyer},
+author = {M. E. Lalami{,} D. El-Baz and V. Boyer},
title = {Multi GPU Implementation of the Simplex Algorithm},
journal = {IEEE International Conference on High Performance Computing and Communications},
year = {2011},
@book{dantzig1953product,
title={The Product Form for the Inverse in the Simplex Method},
- author={DANTZIG, G.B. and Orchard-Hays, W. and RAND CORP SANTA MONICA CALIF.},
+ author={Dantzig, G. B. and Orchard-Hays, W. and RAND Corp. Santa Monica Calif.},
url={http://books.google.ch/books?id=XLuttgAACAAJ},
year={1953},
publisher={Defense Technical Information Center}
}
@PhdThesis{WOLTER06,
-author = {Kati Wolter},
+author = {K. Wolter},
title = {Implementation of cutting plane separators for mixed integer programs},
school = {TU Berlin},
year = {2006}
}
@PhdThesis{ACHTERBERG07,
-author = {Tobias Achterberg},
+author = {T. Achterberg},
title = {Constraint integer programming},
school = {TU Berlin},
year = {2007}
title={A fast LU update for linear programming},
url={http://dx.doi.org/10.1007/BF02025534},
publisher={Baltzer Science Publishers, Baarn/Kluwer Academic Publishers},
-author={Suhl, LeenaM. and Suhl, UweH.},
+author={Suhl, L. M. and Suhl, U. H.},
pages={33-47},
language={English}
}
title={Experiments in mixed-integer linear programming},
url={http://dx.doi.org/10.1007/BF01584074},
publisher={Springer-Verlag},
-author={Benichou, M. and Gauthier, J.M. and Girodet, P. and Hentges, G. and Ribiere, G. and Vincent, O.},
+author={Benichou, M. and Gauthier, J. M. and Girodet, P. and Hentges, G. and Ribiere, G. and Vincent, O.},
pages={76-94},
language={English}
}
@article{Achterberg05,
- author = {Achterberg, Tobias and Koch, Thorsten and Martin, Alexander},
+ author = {Achterberg, T. and Koch, T. and Martin, A.},
title = {Branching rules revisited},
journal = {Oper. Res. Lett.},
issue_date = {January, 2005},
\@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}}
\@writefile{toc}{\author{Jacques Bahi}{}}
\@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{291}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{293}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
-\newlabel{ch12}{{12}{291}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{291}}
-\newlabel{ch12:sec:01}{{12.1}{291}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.2}Krylov iterative methods}{292}}
-\newlabel{ch12:sec:02}{{12.2}{292}}
-\newlabel{ch12:eq:01}{{12.1}{292}}
-\newlabel{ch12:eq:02}{{12.2}{292}}
-\newlabel{ch12:eq:03}{{12.3}{292}}
-\newlabel{ch12:eq:11}{{12.4}{293}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}CG method}{293}}
-\newlabel{ch12:sec:02.01}{{12.2.1}{293}}
-\newlabel{ch12:eq:04}{{12.5}{293}}
-\newlabel{ch12:eq:05}{{12.6}{293}}
-\newlabel{ch12:eq:06}{{12.7}{293}}
-\newlabel{ch12:eq:07}{{12.8}{293}}
-\newlabel{ch12:eq:08}{{12.9}{293}}
-\newlabel{ch12:eq:09}{{12.10}{293}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {12}{\ignorespaces Left-preconditioned CG method\relax }}{294}}
-\newlabel{ch12:alg:01}{{12}{294}}
-\newlabel{ch12:eq:10}{{12.11}{294}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}GMRES method}{295}}
-\newlabel{ch12:sec:02.02}{{12.2.2}{295}}
-\newlabel{ch12:eq:12}{{12.12}{295}}
-\newlabel{ch12:eq:13}{{12.13}{295}}
-\newlabel{ch12:eq:14}{{12.14}{295}}
-\newlabel{ch12:eq:15}{{12.15}{295}}
-\newlabel{ch12:eq:16}{{12.16}{295}}
-\newlabel{ch12:eq:17}{{12.17}{295}}
-\newlabel{ch12:eq:18}{{12.18}{295}}
-\newlabel{ch12:eq:19}{{12.19}{295}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {13}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{296}}
-\newlabel{ch12:alg:02}{{13}{296}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel implementation on a GPU cluster}{297}}
-\newlabel{ch12:sec:03}{{12.3}{297}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.1}Data partitioning}{297}}
-\newlabel{ch12:sec:03.01}{{12.3.1}{297}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{298}}
-\newlabel{ch12:fig:01}{{12.1}{298}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.2}GPU computing}{298}}
-\newlabel{ch12:sec:03.02}{{12.3.2}{298}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.3}Data communications}{299}}
-\newlabel{ch12:sec:03.03}{{12.3.3}{299}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{300}}
-\newlabel{ch12:fig:02}{{12.2}{300}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{301}}
-\newlabel{ch12:fig:03}{{12.3}{301}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.4}Experimental results}{302}}
-\newlabel{ch12:sec:04}{{12.4}{302}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{302}}
-\newlabel{ch12:fig:04}{{12.4}{302}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{303}}
-\newlabel{ch12:fig:05}{{12.5}{303}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{303}}
-\newlabel{ch12:tab:01}{{12.1}{303}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{304}}
-\newlabel{ch12:tab:02}{{12.2}{304}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{304}}
-\newlabel{ch12:tab:03}{{12.3}{304}}
-\newlabel{ch12:eq:20}{{12.20}{305}}
-\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{306}}
-\newlabel{ch12:fig:06}{{12.6}{306}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{306}}
-\newlabel{ch12:tab:04}{{12.4}{306}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{307}}
-\newlabel{ch12:tab:05}{{12.5}{307}}
-\@writefile{toc}{\contentsline {section}{\numberline {12.5}Conclusion}{307}}
-\newlabel{ch12:sec:05}{{12.5}{307}}
-\@writefile{lot}{\contentsline {table}{\numberline {12.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{308}}
-\newlabel{ch12:tab:06}{{12.6}{308}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{308}}
+\newlabel{ch12}{{12}{293}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{293}}
+\newlabel{ch12:sec:01}{{12.1}{293}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.2}Krylov iterative methods}{294}}
+\newlabel{ch12:sec:02}{{12.2}{294}}
+\newlabel{ch12:eq:01}{{12.1}{294}}
+\newlabel{ch12:eq:02}{{12.2}{294}}
+\newlabel{ch12:eq:03}{{12.3}{294}}
+\newlabel{ch12:eq:11}{{12.4}{295}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}CG method}{295}}
+\newlabel{ch12:sec:02.01}{{12.2.1}{295}}
+\newlabel{ch12:eq:04}{{12.5}{295}}
+\newlabel{ch12:eq:05}{{12.6}{295}}
+\newlabel{ch12:eq:06}{{12.7}{295}}
+\newlabel{ch12:eq:07}{{12.8}{295}}
+\newlabel{ch12:eq:08}{{12.9}{295}}
+\newlabel{ch12:eq:09}{{12.10}{295}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {14}{\ignorespaces Left-preconditioned CG method\relax }}{296}}
+\newlabel{ch12:alg:01}{{14}{296}}
+\newlabel{ch12:eq:10}{{12.11}{296}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}GMRES method}{297}}
+\newlabel{ch12:sec:02.02}{{12.2.2}{297}}
+\newlabel{ch12:eq:12}{{12.12}{297}}
+\newlabel{ch12:eq:13}{{12.13}{297}}
+\newlabel{ch12:eq:14}{{12.14}{297}}
+\newlabel{ch12:eq:15}{{12.15}{297}}
+\newlabel{ch12:eq:16}{{12.16}{297}}
+\newlabel{ch12:eq:17}{{12.17}{297}}
+\newlabel{ch12:eq:18}{{12.18}{297}}
+\newlabel{ch12:eq:19}{{12.19}{297}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {15}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{298}}
+\newlabel{ch12:alg:02}{{15}{298}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel implementation on a GPU cluster}{299}}
+\newlabel{ch12:sec:03}{{12.3}{299}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.1}Data partitioning}{299}}
+\newlabel{ch12:sec:03.01}{{12.3.1}{299}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{300}}
+\newlabel{ch12:fig:01}{{12.1}{300}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.2}GPU computing}{300}}
+\newlabel{ch12:sec:03.02}{{12.3.2}{300}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.3}Data communications}{301}}
+\newlabel{ch12:sec:03.03}{{12.3.3}{301}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{302}}
+\newlabel{ch12:fig:02}{{12.2}{302}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{303}}
+\newlabel{ch12:fig:03}{{12.3}{303}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.4}Experimental results}{304}}
+\newlabel{ch12:sec:04}{{12.4}{304}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{304}}
+\newlabel{ch12:fig:04}{{12.4}{304}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{305}}
+\newlabel{ch12:fig:05}{{12.5}{305}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{305}}
+\newlabel{ch12:tab:01}{{12.1}{305}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{306}}
+\newlabel{ch12:tab:02}{{12.2}{306}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{306}}
+\newlabel{ch12:tab:03}{{12.3}{306}}
+\newlabel{ch12:eq:20}{{12.20}{307}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{308}}
+\newlabel{ch12:fig:06}{{12.6}{308}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{308}}
+\newlabel{ch12:tab:04}{{12.4}{308}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{309}}
+\newlabel{ch12:tab:05}{{12.5}{309}}
+\@writefile{toc}{\contentsline {section}{\numberline {12.5}Conclusion}{309}}
+\newlabel{ch12:sec:05}{{12.5}{309}}
+\@writefile{lot}{\contentsline {table}{\numberline {12.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{310}}
+\newlabel{ch12:tab:06}{{12.6}{310}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{310}}
\@setckpt{Chapters/chapter12/ch12}{
-\setcounter{page}{310}
+\setcounter{page}{312}
\setcounter{equation}{22}
\setcounter{enumi}{2}
\setcounter{enumii}{0}
\setcounter{lstnumber}{50}
\setcounter{ContinuedFloat}{0}
\setcounter{AlgoLine}{29}
-\setcounter{algocfline}{13}
-\setcounter{algocfproc}{13}
-\setcounter{algocf}{13}
+\setcounter{algocfline}{15}
+\setcounter{algocfproc}{15}
+\setcounter{algocf}{15}
\setcounter{nprt@mantissa@digitsbefore}{0}
\setcounter{nprt@mantissa@digitsafter}{0}
\setcounter{nprt@exponent@digitsbefore}{0}
\@writefile{toc}{\author{H. Wang}{}}
\@writefile{toc}{\author{H. Yu}{}}
\@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {16}GPU-Accelerated Envelope-Following Method}{375}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {16}GPU-Accelerated Envelope-Following Method}{377}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {section}{\numberline {16.1}Introduction}{375}}
-\newlabel{fig:ef1}{{16.1(a)}{377}}
-\newlabel{sub@fig:ef1}{{(a)}{377}}
-\newlabel{fig:ef2}{{16.1(b)}{377}}
-\newlabel{sub@fig:ef2}{{(b)}{377}}
-\@writefile{lof}{\contentsline {figure}{\numberline {16.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{377}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{377}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{377}}
-\newlabel{fig:ef_intro}{{16.1}{377}}
-\@writefile{toc}{\contentsline {section}{\numberline {16.2}The envelope-following method in a nutshell}{378}}
-\newlabel{sec:ef}{{16.2}{378}}
-\newlabel{eq:dae}{{16.1}{378}}
-\newlabel{eq:Newton}{{16.2}{379}}
-\newlabel{eq:A}{{16.3}{379}}
-\@writefile{toc}{\contentsline {section}{\numberline {16.3}New parallel envelope-following method}{380}}
-\newlabel{sec:gmres}{{16.3}{380}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.1}GMRES solver for Newton update equation}{380}}
-\@writefile{lof}{\contentsline {figure}{\numberline {16.2}{\ignorespaces The flow of envelope-following method.\relax }}{381}}
-\newlabel{fig:ef_flow}{{16.2}{381}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {17}{\ignorespaces Standard GMRES algorithm.\relax }}{382}}
-\newlabel{alg:GMRES}{{17}{382}}
-\newlabel{line:mvp}{{5}{382}}
-\newlabel{line:newnorm}{{11}{382}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.2}Parallelization on GPU platforms}{382}}
-\newlabel{sec:gpu}{{16.3.2}{382}}
-\@writefile{lof}{\contentsline {figure}{\numberline {16.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{383}}
-\newlabel{fig:gmres}{{16.3}{383}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.3}Gear-2 based sensitivity calculation}{384}}
-\newlabel{sec:gear}{{16.3.3}{384}}
-\newlabel{eq:BE}{{16.4}{384}}
-\newlabel{eq:sens1}{{16.5}{384}}
-\newlabel{eq:Gear_t2}{{16.6}{385}}
-\newlabel{eq:sens2}{{16.7}{385}}
-\newlabel{eq:Gear_t3}{{16.8}{385}}
-\newlabel{eq:sensM}{{16.9}{385}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {18}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{386}}
-\newlabel{alg:mf_Gear}{{18}{386}}
-\newlabel{line:mf_Gear_loop}{{4}{386}}
-\newlabel{line:shift}{{8}{386}}
-\@writefile{toc}{\contentsline {section}{\numberline {16.4}Numerical examples}{386}}
-\newlabel{sec:exp}{{16.4}{386}}
-\@writefile{lof}{\contentsline {figure}{\numberline {16.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{387}}
-\newlabel{fig:flyback}{{16.4}{387}}
-\@writefile{lof}{\contentsline {figure}{\numberline {16.5}{\ignorespaces Illustration of power/ground network model.\relax }}{387}}
-\newlabel{fig:pg}{{16.5}{387}}
-\newlabel{fig:flybackWhole}{{16.6(a)}{388}}
-\newlabel{sub@fig:flybackWhole}{{(a)}{388}}
-\newlabel{fig:flybackZoom}{{16.6(b)}{388}}
-\newlabel{sub@fig:flybackZoom}{{(b)}{388}}
-\@writefile{lof}{\contentsline {figure}{\numberline {16.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{388}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{388}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{388}}
-\newlabel{fig:flyback_wave}{{16.6}{388}}
-\@writefile{lof}{\contentsline {figure}{\numberline {16.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{389}}
-\newlabel{fig:buck_wave}{{16.7}{389}}
-\@writefile{lot}{\contentsline {table}{\numberline {16.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{389}}
-\newlabel{table:circuit}{{16.1}{389}}
-\@writefile{toc}{\contentsline {section}{\numberline {16.5}Summary}{390}}
-\newlabel{sec:summary}{{16.5}{390}}
-\@writefile{toc}{\contentsline {section}{\numberline {16.6}Glossary}{390}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{390}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.1}Introduction}{377}}
+\newlabel{fig:ef1}{{16.1(a)}{379}}
+\newlabel{sub@fig:ef1}{{(a)}{379}}
+\newlabel{fig:ef2}{{16.1(b)}{379}}
+\newlabel{sub@fig:ef2}{{(b)}{379}}
+\@writefile{lof}{\contentsline {figure}{\numberline {16.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{379}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{379}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{379}}
+\newlabel{fig:ef_intro}{{16.1}{379}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.2}The envelope-following method in a nutshell}{380}}
+\newlabel{sec:ef}{{16.2}{380}}
+\newlabel{eq:dae}{{16.1}{380}}
+\newlabel{eq:Newton}{{16.2}{381}}
+\newlabel{eq:A}{{16.3}{381}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.3}New parallel envelope-following method}{382}}
+\newlabel{sec:gmres}{{16.3}{382}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.1}GMRES solver for Newton update equation}{382}}
+\@writefile{lof}{\contentsline {figure}{\numberline {16.2}{\ignorespaces The flow of envelope-following method.\relax }}{383}}
+\newlabel{fig:ef_flow}{{16.2}{383}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {19}{\ignorespaces Standard GMRES algorithm.\relax }}{384}}
+\newlabel{alg:GMRES}{{19}{384}}
+\newlabel{line:mvp}{{5}{384}}
+\newlabel{line:newnorm}{{11}{384}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.2}Parallelization on GPU platforms}{384}}
+\newlabel{sec:gpu}{{16.3.2}{384}}
+\@writefile{lof}{\contentsline {figure}{\numberline {16.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{385}}
+\newlabel{fig:gmres}{{16.3}{385}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.3}Gear-2 based sensitivity calculation}{386}}
+\newlabel{sec:gear}{{16.3.3}{386}}
+\newlabel{eq:BE}{{16.4}{386}}
+\newlabel{eq:sens1}{{16.5}{386}}
+\newlabel{eq:Gear_t2}{{16.6}{387}}
+\newlabel{eq:sens2}{{16.7}{387}}
+\newlabel{eq:Gear_t3}{{16.8}{387}}
+\newlabel{eq:sensM}{{16.9}{387}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {20}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{388}}
+\newlabel{alg:mf_Gear}{{20}{388}}
+\newlabel{line:mf_Gear_loop}{{4}{388}}
+\newlabel{line:shift}{{8}{388}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.4}Numerical examples}{388}}
+\newlabel{sec:exp}{{16.4}{388}}
+\@writefile{lof}{\contentsline {figure}{\numberline {16.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{389}}
+\newlabel{fig:flyback}{{16.4}{389}}
+\@writefile{lof}{\contentsline {figure}{\numberline {16.5}{\ignorespaces Illustration of power/ground network model.\relax }}{389}}
+\newlabel{fig:pg}{{16.5}{389}}
+\newlabel{fig:flybackWhole}{{16.6(a)}{390}}
+\newlabel{sub@fig:flybackWhole}{{(a)}{390}}
+\newlabel{fig:flybackZoom}{{16.6(b)}{390}}
+\newlabel{sub@fig:flybackZoom}{{(b)}{390}}
+\@writefile{lof}{\contentsline {figure}{\numberline {16.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{390}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{390}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{390}}
+\newlabel{fig:flyback_wave}{{16.6}{390}}
+\@writefile{lof}{\contentsline {figure}{\numberline {16.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{391}}
+\newlabel{fig:buck_wave}{{16.7}{391}}
+\@writefile{lot}{\contentsline {table}{\numberline {16.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{391}}
+\newlabel{table:circuit}{{16.1}{391}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.5}Summary}{392}}
+\newlabel{sec:summary}{{16.5}{392}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.6}Glossary}{392}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{392}}
\@setckpt{Chapters/chapter16/ch16}{
-\setcounter{page}{392}
+\setcounter{page}{394}
\setcounter{equation}{9}
\setcounter{enumi}{2}
\setcounter{enumii}{0}
\setcounter{lstnumber}{9}
\setcounter{ContinuedFloat}{0}
\setcounter{AlgoLine}{8}
-\setcounter{algocfline}{18}
-\setcounter{algocfproc}{18}
-\setcounter{algocf}{18}
+\setcounter{algocfline}{20}
+\setcounter{algocfproc}{20}
+\setcounter{algocf}{20}
\setcounter{nprt@mantissa@digitsbefore}{0}
\setcounter{nprt@mantissa@digitsafter}{0}
\setcounter{nprt@exponent@digitsbefore}{0}
\@writefile{toc}{\author{B\IeC {\'e}n\IeC {\'e}dicte Herrmann}{}}
\@writefile{toc}{\author{Laurent Philippe}{}}
\@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {17}Implementing Multi-Agent Systems on GPU}{395}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {17}Implementing Multi-Agent Systems on GPU}{397}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
-\newlabel{chapter17}{{17}{396}}
-\@writefile{toc}{\contentsline {section}{\numberline {17.1}Introduction}{396}}
-\newlabel{ch17:intro}{{17.1}{396}}
-\@writefile{toc}{\contentsline {section}{\numberline {17.2}Running Agent-Based Simulations}{397}}
-\newlabel{ch17:ABM}{{17.2}{397}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.1}Multi-agent systems and parallelism}{397}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.2}MAS Implementation on GPU}{399}}
-\newlabel{ch17:subsec:gpu}{{17.2.2}{399}}
-\@writefile{toc}{\contentsline {section}{\numberline {17.3}A first practical example}{400}}
-\newlabel{ch17:sec:1stmodel}{{17.3}{400}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.1}The Collembola model}{400}}
-\newlabel{ch17:subsec:collembolamodel}{{17.3.1}{400}}
-\@writefile{lof}{\contentsline {figure}{\numberline {17.1}{\ignorespaces Evolution algorithm of Collembola model\relax }}{401}}
-\newlabel{ch17:fig:collem_algorithm}{{17.1}{401}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.2}Collembola Implementation}{401}}
-\newlabel{ch17:listing:collembola-diffuse}{{17.1}{402}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.1}Collembola OpenCL Diffusion kernel}{402}}
-\newlabel{ch17:listing:collembola-reduc}{{17.2}{402}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.2}Collembola OpenCL reduction kernel}{402}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.3}Collembola performance}{403}}
-\@writefile{lof}{\contentsline {figure}{\numberline {17.2}{\ignorespaces Performance of the Collembola model on CPU and GPU\relax }}{404}}
-\newlabel{ch17:fig:mior_perfs_collem}{{17.2}{404}}
-\@writefile{toc}{\contentsline {section}{\numberline {17.4}Second example}{404}}
-\newlabel{ch17:sec:2ndmodel}{{17.4}{404}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.1}The MIOR model}{404}}
-\newlabel{ch17:subsec:miormodel}{{17.4.1}{404}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {19}{\ignorespaces Evolution step of each Meta-Mior (microbial colony) agent\relax }}{405}}
-\newlabel{ch17:seqalgo}{{19}{405}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.2}MIOR Implementation}{405}}
-\@writefile{lof}{\contentsline {figure}{\numberline {17.3}{\ignorespaces Execution distribution retained on GPU\relax }}{406}}
-\newlabel{ch17:fig:gpu_distribution}{{17.3}{406}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.1}Execution mapping on GPU}{406}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.2}Data structures translation}{407}}
-\newlabel{ch17:subsec:datastructures}{{17.4.2.2}{407}}
-\newlabel{ch17:listing:mior_data_structures}{{17.3}{407}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.3}Main data structures used in a MIOR simulation}{407}}
-\@writefile{lof}{\contentsline {figure}{\numberline {17.4}{\ignorespaces Compact representation of the topology of a MIOR simulation\relax }}{408}}
-\newlabel{ch17:fig:csr_representation}{{17.4}{408}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.3}Critical resources access management}{408}}
-\newlabel{ch17:subsec:concurrency}{{17.4.2.3}{408}}
-\newlabel{ch17:listing:mior_kernels}{{17.4}{409}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.4}Main MIOR kernel}{409}}
-\newlabel{ch17:fig:mior_launcher}{{17.5}{410}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.5}MIOR simulation launcher}{410}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.4}Termination detection}{410}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.3}Performance of MIOR implementations}{411}}
-\newlabel{ch17:subsec:miorexperiments}{{17.4.3}{411}}
-\@writefile{lof}{\contentsline {figure}{\numberline {17.5}{\ignorespaces CPU and GPU performance on a Tesla C1060 node\relax }}{412}}
-\newlabel{ch17:fig:mior_perfs_tesla}{{17.5}{412}}
-\@writefile{lof}{\contentsline {figure}{\numberline {17.6}{\ignorespaces CPU and GPU performance on a personal computer with a Geforce 8800GT\relax }}{413}}
-\newlabel{ch17:fig:mior_perfs_8800gt}{{17.6}{413}}
-\@writefile{toc}{\contentsline {section}{\numberline {17.5}Analysis and recommendations}{413}}
-\newlabel{ch17:analysis}{{17.5}{413}}
-\@writefile{lof}{\contentsline {figure}{\numberline {17.7}{\ignorespaces Execution time of one multi-simulation kernel on the Tesla platform\relax }}{414}}
-\newlabel{ch17:fig:monokernel_graph}{{17.7}{414}}
-\@writefile{lof}{\contentsline {figure}{\numberline {17.8}{\ignorespaces Total execution time for 1000 simulations on the Tesla platform, while varying the number of simulations for each kernel\relax }}{414}}
-\newlabel{ch17:fig:multikernel_graph}{{17.8}{414}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.1}Analysis}{414}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.2}MAS execution workflow}{415}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.3}Implementation challenges}{416}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.4}MCSMA}{416}}
-\newlabel{ch17:Mcsma}{{17.5.4}{416}}
-\@writefile{toc}{\contentsline {section}{\numberline {17.6}Conclusion}{417}}
-\newlabel{ch17:conclusion}{{17.6}{417}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{418}}
+\newlabel{chapter17}{{17}{398}}
+\@writefile{toc}{\contentsline {section}{\numberline {17.1}Introduction}{398}}
+\newlabel{ch17:intro}{{17.1}{398}}
+\@writefile{toc}{\contentsline {section}{\numberline {17.2}Running Agent-Based Simulations}{399}}
+\newlabel{ch17:ABM}{{17.2}{399}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.1}Multi-agent systems and parallelism}{399}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.2}MAS Implementation on GPU}{401}}
+\newlabel{ch17:subsec:gpu}{{17.2.2}{401}}
+\@writefile{toc}{\contentsline {section}{\numberline {17.3}A first practical example}{402}}
+\newlabel{ch17:sec:1stmodel}{{17.3}{402}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.1}The Collembola model}{402}}
+\newlabel{ch17:subsec:collembolamodel}{{17.3.1}{402}}
+\@writefile{lof}{\contentsline {figure}{\numberline {17.1}{\ignorespaces Evolution algorithm of Collembola model\relax }}{403}}
+\newlabel{ch17:fig:collem_algorithm}{{17.1}{403}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.2}Collembola Implementation}{403}}
+\newlabel{ch17:listing:collembola-diffuse}{{17.1}{404}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.1}Collembola OpenCL Diffusion kernel}{404}}
+\newlabel{ch17:listing:collembola-reduc}{{17.2}{404}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.2}Collembola OpenCL reduction kernel}{404}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.3}Collembola performance}{405}}
+\@writefile{lof}{\contentsline {figure}{\numberline {17.2}{\ignorespaces Performance of the Collembola model on CPU and GPU\relax }}{406}}
+\newlabel{ch17:fig:mior_perfs_collem}{{17.2}{406}}
+\@writefile{toc}{\contentsline {section}{\numberline {17.4}Second example}{406}}
+\newlabel{ch17:sec:2ndmodel}{{17.4}{406}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.1}The MIOR model}{406}}
+\newlabel{ch17:subsec:miormodel}{{17.4.1}{406}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {21}{\ignorespaces Evolution step of each Meta-Mior (microbial colony) agent\relax }}{407}}
+\newlabel{ch17:seqalgo}{{21}{407}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.2}MIOR Implementation}{407}}
+\@writefile{lof}{\contentsline {figure}{\numberline {17.3}{\ignorespaces Execution distribution retained on GPU\relax }}{408}}
+\newlabel{ch17:fig:gpu_distribution}{{17.3}{408}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.1}Execution mapping on GPU}{408}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.2}Data structures translation}{409}}
+\newlabel{ch17:subsec:datastructures}{{17.4.2.2}{409}}
+\newlabel{ch17:listing:mior_data_structures}{{17.3}{409}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.3}Main data structures used in a MIOR simulation}{409}}
+\@writefile{lof}{\contentsline {figure}{\numberline {17.4}{\ignorespaces Compact representation of the topology of a MIOR simulation\relax }}{410}}
+\newlabel{ch17:fig:csr_representation}{{17.4}{410}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.3}Critical resources access management}{410}}
+\newlabel{ch17:subsec:concurrency}{{17.4.2.3}{410}}
+\newlabel{ch17:listing:mior_kernels}{{17.4}{411}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.4}Main MIOR kernel}{411}}
+\newlabel{ch17:fig:mior_launcher}{{17.5}{412}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.5}MIOR simulation launcher}{412}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.4}Termination detection}{412}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.3}Performance of MIOR implementations}{413}}
+\newlabel{ch17:subsec:miorexperiments}{{17.4.3}{413}}
+\@writefile{lof}{\contentsline {figure}{\numberline {17.5}{\ignorespaces CPU and GPU performance on a Tesla C1060 node\relax }}{414}}
+\newlabel{ch17:fig:mior_perfs_tesla}{{17.5}{414}}
+\@writefile{lof}{\contentsline {figure}{\numberline {17.6}{\ignorespaces CPU and GPU performance on a personal computer with a Geforce 8800GT\relax }}{415}}
+\newlabel{ch17:fig:mior_perfs_8800gt}{{17.6}{415}}
+\@writefile{toc}{\contentsline {section}{\numberline {17.5}Analysis and recommendations}{415}}
+\newlabel{ch17:analysis}{{17.5}{415}}
+\@writefile{lof}{\contentsline {figure}{\numberline {17.7}{\ignorespaces Execution time of one multi-simulation kernel on the Tesla platform\relax }}{416}}
+\newlabel{ch17:fig:monokernel_graph}{{17.7}{416}}
+\@writefile{lof}{\contentsline {figure}{\numberline {17.8}{\ignorespaces Total execution time for 1000 simulations on the Tesla platform, while varying the number of simulations for each kernel\relax }}{416}}
+\newlabel{ch17:fig:multikernel_graph}{{17.8}{416}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.1}Analysis}{416}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.2}MAS execution workflow}{417}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.3}Implementation challenges}{418}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.4}MCSMA}{418}}
+\newlabel{ch17:Mcsma}{{17.5.4}{418}}
+\@writefile{toc}{\contentsline {section}{\numberline {17.6}Conclusion}{419}}
+\newlabel{ch17:conclusion}{{17.6}{419}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{420}}
\@setckpt{Chapters/chapter17/ch17}{
-\setcounter{page}{422}
+\setcounter{page}{424}
\setcounter{equation}{0}
\setcounter{enumi}{3}
\setcounter{enumii}{0}
\setcounter{lstnumber}{21}
\setcounter{ContinuedFloat}{0}
\setcounter{AlgoLine}{17}
-\setcounter{algocfline}{19}
-\setcounter{algocfproc}{19}
-\setcounter{algocf}{19}
+\setcounter{algocfline}{21}
+\setcounter{algocfproc}{21}
+\setcounter{algocf}{21}
\setcounter{nprt@mantissa@digitsbefore}{0}
\setcounter{nprt@mantissa@digitsafter}{0}
\setcounter{nprt@exponent@digitsbefore}{0}
\@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}}
\@writefile{toc}{\author{Christophe Guyeux}{}}
\@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {18}Pseudorandom Number Generator on GPU}{423}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {18}Pseudorandom Number Generator on GPU}{425}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
-\newlabel{chapter18}{{18}{423}}
-\@writefile{toc}{\contentsline {section}{\numberline {18.1}Introduction}{423}}
-\@writefile{toc}{\contentsline {section}{\numberline {18.2}Basic Remindees}{425}}
-\newlabel{section:BASIC RECALLS}{{18.2}{425}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.1}A Short Presentation of Chaos}{425}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.2}On Devaney's Definition of Chaos}{425}}
-\newlabel{sec:dev}{{18.2.2}{425}}
-\newlabel{Devaney}{{18.1}{425}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.3}Chaotic iterations}{426}}
-\newlabel{subsection:Chaotic iterations}{{18.2.3}{426}}
-\newlabel{Chaotic iterations}{{2}{426}}
-\newlabel{eq:generalIC}{{18.4}{427}}
-\newlabel{equation Oplus}{{18.5}{427}}
-\@writefile{toc}{\contentsline {section}{\numberline {18.3}Toward Efficiency and Improvement for CI PRNG}{427}}
-\newlabel{sec:efficient PRNG}{{18.3}{427}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{427}}
-\newlabel{algo:seqCIPRNG}{{18.1}{427}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {18.1}C code of the sequential PRNG based on chaotic iterations}{427}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{428}}
-\newlabel{sec:efficient PRNG gpu}{{18.3.2}{428}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.3}Naive Version for GPU}{428}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {20}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{429}}
-\newlabel{algo:gpu_kernel}{{20}{429}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.4}Improved Version for GPU}{429}}
-\newlabel{IR}{{21}{430}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {21}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{430}}
-\newlabel{algo:gpu_kernel2}{{21}{430}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.5}Chaos Evaluation of the Improved Version}{430}}
-\@writefile{toc}{\contentsline {section}{\numberline {18.4}Experiments}{431}}
-\newlabel{sec:experiments}{{18.4}{431}}
-\@writefile{toc}{\contentsline {section}{\numberline {18.5}Summary}{431}}
-\@writefile{lof}{\contentsline {figure}{\numberline {18.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{432}}
-\newlabel{fig:time_xorlike_gpu}{{18.1}{432}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{433}}
+\newlabel{chapter18}{{18}{425}}
+\@writefile{toc}{\contentsline {section}{\numberline {18.1}Introduction}{425}}
+\@writefile{toc}{\contentsline {section}{\numberline {18.2}Basic Remindees}{427}}
+\newlabel{section:BASIC RECALLS}{{18.2}{427}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.1}A Short Presentation of Chaos}{427}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.2}On Devaney's Definition of Chaos}{427}}
+\newlabel{sec:dev}{{18.2.2}{427}}
+\newlabel{Devaney}{{18.1}{427}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.3}Chaotic iterations}{428}}
+\newlabel{subsection:Chaotic iterations}{{18.2.3}{428}}
+\newlabel{Chaotic iterations}{{2}{428}}
+\newlabel{eq:generalIC}{{18.4}{429}}
+\newlabel{equation Oplus}{{18.5}{429}}
+\@writefile{toc}{\contentsline {section}{\numberline {18.3}Toward Efficiency and Improvement for CI PRNG}{429}}
+\newlabel{sec:efficient PRNG}{{18.3}{429}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{429}}
+\newlabel{algo:seqCIPRNG}{{18.1}{429}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {18.1}C code of the sequential PRNG based on chaotic iterations}{429}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{430}}
+\newlabel{sec:efficient PRNG gpu}{{18.3.2}{430}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.3}Naive Version for GPU}{430}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {22}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{431}}
+\newlabel{algo:gpu_kernel}{{22}{431}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.4}Improved Version for GPU}{431}}
+\newlabel{IR}{{23}{432}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {23}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{432}}
+\newlabel{algo:gpu_kernel2}{{23}{432}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.5}Chaos Evaluation of the Improved Version}{432}}
+\@writefile{toc}{\contentsline {section}{\numberline {18.4}Experiments}{433}}
+\newlabel{sec:experiments}{{18.4}{433}}
+\@writefile{toc}{\contentsline {section}{\numberline {18.5}Summary}{433}}
+\@writefile{lof}{\contentsline {figure}{\numberline {18.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{434}}
+\newlabel{fig:time_xorlike_gpu}{{18.1}{434}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{435}}
\@setckpt{Chapters/chapter18/ch18}{
-\setcounter{page}{435}
+\setcounter{page}{437}
\setcounter{equation}{5}
\setcounter{enumi}{3}
\setcounter{enumii}{0}
\setcounter{lstnumber}{15}
\setcounter{ContinuedFloat}{0}
\setcounter{AlgoLine}{14}
-\setcounter{algocfline}{21}
-\setcounter{algocfproc}{21}
-\setcounter{algocf}{21}
+\setcounter{algocfline}{23}
+\setcounter{algocfproc}{23}
+\setcounter{algocf}{23}
\setcounter{nprt@mantissa@digitsbefore}{0}
\setcounter{nprt@mantissa@digitsafter}{0}
\setcounter{nprt@exponent@digitsbefore}{0}
\setlength\figureheight{0.35\textwidth}
\setlength\figurewidth{0.37\textwidth}
\subfigure[Performance scaling]{
- {\small\input{Chapters/chapter7/figures/PararealScaletestGTX590.tikz}}
+% {\small\input{Chapters/chapter7/figures/PararealScaletestGTX590.tikz}}
+ \includegraphics[width=0.5\textwidth]{Chapters/chapter7/figures/PararealScaletestGTX590_conv.pdf}
}
\subfigure[Speedup]{
- {\small\input{Chapters/chapter7/figures/PararealSpeedupGTX590.tikz}}
+ % {\small\input{Chapters/chapter7/figures/PararealSpeedupGTX590.tikz}}
+ \includegraphics[width=0.5\textwidth]{Chapters/chapter7/figures/PararealSpeedupGTX590_conv.pdf}
}
\end{center}
\caption{(a) Parareal absolute timings for an increasingly number of water waves traveling one wave length, each wave resolution is ($33\times 9$). (b) Parareal speedup for two to sixteen compute nodes compared to the purely sequential single GPU solver. Notice how insensitive the parareal scheme is to the size of the problem solved. Test environment 2.}\label{ch7:fig:DDPA_SPEEDUP}
\chapterauthor{Nouredine Melab}{Universit\'e Lille 1 CNRS/LIFL, INRIA Lille Nord Europe, Cit\'e scientifique - 59655, Villeneuve d'Ascq cedex, France\\}
\chapter{GPU-accelerated Tree-based Exact Optimization Methods}
-
+\label{ch8:GPU-accelerated-tree-based-exact-optimization-methods}
\section{Introduction}
\label{ch8:introduction}
In the near future, we plan to extend this work to a cluster of GPU-accelerated multi-core processors. From the application point of view, the objective is to optimally solve challenging and unsolved Flow-Shop instances as we did it for one 50$\times$20 problem instance with grid computing \cite{ch8:Mezmaz_2007}. Finally, we plan to investigate other lower bound functions to deal with other combinatorial optimization problems.
-\putbib[Chapters/chapter8/biblio8]
\ No newline at end of file
+\putbib[Chapters/chapter8/biblio8]