From: couturie Date: Tue, 23 Apr 2013 18:50:21 +0000 (+0200) Subject: new X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/commitdiff_plain/620e57fe130fbf0a4aa2cba23938159c7ab14719?hp=--cc new --- 620e57fe130fbf0a4aa2cba23938159c7ab14719 diff --git a/BookGPU/BookGPU.tex b/BookGPU/BookGPU.tex index 0d2d4e8..5895e6e 100755 --- a/BookGPU/BookGPU.tex +++ b/BookGPU/BookGPU.tex @@ -147,7 +147,7 @@ \makeindex - \includeonly{Chapters/chapter10/ch10} +% \includeonly{Chapters/chapter10/ch10} \begin{document} diff --git a/BookGPU/Chapters/chapter12/ch12.aux b/BookGPU/Chapters/chapter12/ch12.aux index 2252156..65b67a8 100644 --- a/BookGPU/Chapters/chapter12/ch12.aux +++ b/BookGPU/Chapters/chapter12/ch12.aux @@ -3,90 +3,90 @@ \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}} \@writefile{toc}{\author{Jacques Bahi}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {11}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{259}} +\@writefile{toc}{\contentsline {chapter}{\numberline {12}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{291}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{ch12}{{11}{259}} -\@writefile{toc}{\contentsline {section}{\numberline {11.1}Introduction}{259}} -\newlabel{ch12:sec:01}{{11.1}{259}} -\@writefile{toc}{\contentsline {section}{\numberline {11.2}Krylov iterative methods}{260}} -\newlabel{ch12:sec:02}{{11.2}{260}} -\newlabel{ch12:eq:01}{{11.1}{260}} -\newlabel{ch12:eq:02}{{11.2}{260}} -\newlabel{ch12:eq:03}{{11.3}{260}} -\newlabel{ch12:eq:11}{{11.4}{261}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.1}CG method}{261}} -\newlabel{ch12:sec:02.01}{{11.2.1}{261}} -\newlabel{ch12:eq:04}{{11.5}{261}} -\newlabel{ch12:eq:05}{{11.6}{261}} -\newlabel{ch12:eq:06}{{11.7}{261}} -\newlabel{ch12:eq:07}{{11.8}{261}} -\newlabel{ch12:eq:08}{{11.9}{261}} -\newlabel{ch12:eq:09}{{11.10}{261}} -\@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces Left-preconditioned CG method\relax }}{262}} -\newlabel{ch12:alg:01}{{9}{262}} -\newlabel{ch12:eq:10}{{11.11}{262}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.2}GMRES method}{263}} -\newlabel{ch12:sec:02.02}{{11.2.2}{263}} -\newlabel{ch12:eq:12}{{11.12}{263}} -\newlabel{ch12:eq:13}{{11.13}{263}} -\newlabel{ch12:eq:14}{{11.14}{263}} -\newlabel{ch12:eq:15}{{11.15}{263}} -\newlabel{ch12:eq:16}{{11.16}{263}} -\newlabel{ch12:eq:17}{{11.17}{263}} -\newlabel{ch12:eq:18}{{11.18}{263}} -\newlabel{ch12:eq:19}{{11.19}{263}} -\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{264}} -\newlabel{ch12:alg:02}{{10}{264}} -\@writefile{toc}{\contentsline {section}{\numberline {11.3}Parallel implementation on a GPU cluster}{265}} -\newlabel{ch12:sec:03}{{11.3}{265}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.1}Data partitioning}{265}} -\newlabel{ch12:sec:03.01}{{11.3.1}{265}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{266}} -\newlabel{ch12:fig:01}{{11.1}{266}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.2}GPU computing}{266}} -\newlabel{ch12:sec:03.02}{{11.3.2}{266}} -\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.3}Data communications}{267}} -\newlabel{ch12:sec:03.03}{{11.3.3}{267}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{268}} -\newlabel{ch12:fig:02}{{11.2}{268}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{269}} -\newlabel{ch12:fig:03}{{11.3}{269}} -\@writefile{toc}{\contentsline {section}{\numberline {11.4}Experimental results}{270}} -\newlabel{ch12:sec:04}{{11.4}{270}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{270}} -\newlabel{ch12:fig:04}{{11.4}{270}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{271}} -\newlabel{ch12:fig:05}{{11.5}{271}} -\@writefile{lot}{\contentsline {table}{\numberline {11.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{271}} -\newlabel{ch12:tab:01}{{11.1}{271}} -\@writefile{lot}{\contentsline {table}{\numberline {11.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{272}} -\newlabel{ch12:tab:02}{{11.2}{272}} -\@writefile{lot}{\contentsline {table}{\numberline {11.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{272}} -\newlabel{ch12:tab:03}{{11.3}{272}} -\newlabel{ch12:eq:20}{{11.20}{273}} -\@writefile{lof}{\contentsline {figure}{\numberline {11.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{274}} -\newlabel{ch12:fig:06}{{11.6}{274}} -\@writefile{lot}{\contentsline {table}{\numberline {11.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{274}} -\newlabel{ch12:tab:04}{{11.4}{274}} -\@writefile{lot}{\contentsline {table}{\numberline {11.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{275}} -\newlabel{ch12:tab:05}{{11.5}{275}} -\@writefile{toc}{\contentsline {section}{\numberline {11.5}Conclusion}{275}} -\newlabel{ch12:sec:05}{{11.5}{275}} -\@writefile{lot}{\contentsline {table}{\numberline {11.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{276}} -\newlabel{ch12:tab:06}{{11.6}{276}} -\@writefile{toc}{\contentsline {section}{Bibliography}{276}} +\newlabel{ch12}{{12}{291}} +\@writefile{toc}{\contentsline {section}{\numberline {12.1}Introduction}{291}} +\newlabel{ch12:sec:01}{{12.1}{291}} +\@writefile{toc}{\contentsline {section}{\numberline {12.2}Krylov iterative methods}{292}} +\newlabel{ch12:sec:02}{{12.2}{292}} +\newlabel{ch12:eq:01}{{12.1}{292}} +\newlabel{ch12:eq:02}{{12.2}{292}} +\newlabel{ch12:eq:03}{{12.3}{292}} +\newlabel{ch12:eq:11}{{12.4}{293}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.1}CG method}{293}} +\newlabel{ch12:sec:02.01}{{12.2.1}{293}} +\newlabel{ch12:eq:04}{{12.5}{293}} +\newlabel{ch12:eq:05}{{12.6}{293}} +\newlabel{ch12:eq:06}{{12.7}{293}} +\newlabel{ch12:eq:07}{{12.8}{293}} +\newlabel{ch12:eq:08}{{12.9}{293}} +\newlabel{ch12:eq:09}{{12.10}{293}} +\@writefile{loa}{\contentsline {algocf}{\numberline {12}{\ignorespaces Left-preconditioned CG method\relax }}{294}} +\newlabel{ch12:alg:01}{{12}{294}} +\newlabel{ch12:eq:10}{{12.11}{294}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.2.2}GMRES method}{295}} +\newlabel{ch12:sec:02.02}{{12.2.2}{295}} +\newlabel{ch12:eq:12}{{12.12}{295}} +\newlabel{ch12:eq:13}{{12.13}{295}} +\newlabel{ch12:eq:14}{{12.14}{295}} +\newlabel{ch12:eq:15}{{12.15}{295}} +\newlabel{ch12:eq:16}{{12.16}{295}} +\newlabel{ch12:eq:17}{{12.17}{295}} +\newlabel{ch12:eq:18}{{12.18}{295}} +\newlabel{ch12:eq:19}{{12.19}{295}} +\@writefile{loa}{\contentsline {algocf}{\numberline {13}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{296}} +\newlabel{ch12:alg:02}{{13}{296}} +\@writefile{toc}{\contentsline {section}{\numberline {12.3}Parallel implementation on a GPU cluster}{297}} +\newlabel{ch12:sec:03}{{12.3}{297}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.1}Data partitioning}{297}} +\newlabel{ch12:sec:03.01}{{12.3.1}{297}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{298}} +\newlabel{ch12:fig:01}{{12.1}{298}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.2}GPU computing}{298}} +\newlabel{ch12:sec:03.02}{{12.3.2}{298}} +\@writefile{toc}{\contentsline {subsection}{\numberline {12.3.3}Data communications}{299}} +\newlabel{ch12:sec:03.03}{{12.3.3}{299}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{300}} +\newlabel{ch12:fig:02}{{12.2}{300}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{301}} +\newlabel{ch12:fig:03}{{12.3}{301}} +\@writefile{toc}{\contentsline {section}{\numberline {12.4}Experimental results}{302}} +\newlabel{ch12:sec:04}{{12.4}{302}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{302}} +\newlabel{ch12:fig:04}{{12.4}{302}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{303}} +\newlabel{ch12:fig:05}{{12.5}{303}} +\@writefile{lot}{\contentsline {table}{\numberline {12.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{303}} +\newlabel{ch12:tab:01}{{12.1}{303}} +\@writefile{lot}{\contentsline {table}{\numberline {12.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{304}} +\newlabel{ch12:tab:02}{{12.2}{304}} +\@writefile{lot}{\contentsline {table}{\numberline {12.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{304}} +\newlabel{ch12:tab:03}{{12.3}{304}} +\newlabel{ch12:eq:20}{{12.20}{305}} +\@writefile{lof}{\contentsline {figure}{\numberline {12.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{306}} +\newlabel{ch12:fig:06}{{12.6}{306}} +\@writefile{lot}{\contentsline {table}{\numberline {12.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{306}} +\newlabel{ch12:tab:04}{{12.4}{306}} +\@writefile{lot}{\contentsline {table}{\numberline {12.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{307}} +\newlabel{ch12:tab:05}{{12.5}{307}} +\@writefile{toc}{\contentsline {section}{\numberline {12.5}Conclusion}{307}} +\newlabel{ch12:sec:05}{{12.5}{307}} +\@writefile{lot}{\contentsline {table}{\numberline {12.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{308}} +\newlabel{ch12:tab:06}{{12.6}{308}} +\@writefile{toc}{\contentsline {section}{Bibliography}{308}} \@setckpt{Chapters/chapter12/ch12}{ -\setcounter{page}{278} +\setcounter{page}{310} \setcounter{equation}{22} -\setcounter{enumi}{4} +\setcounter{enumi}{2} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{10} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{5} -\setcounter{chapter}{11} +\setcounter{chapter}{12} \setcounter{section}{5} \setcounter{subsection}{0} \setcounter{subsubsection}{0} @@ -103,9 +103,9 @@ \setcounter{lstnumber}{50} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{29} -\setcounter{algocfline}{10} -\setcounter{algocfproc}{10} -\setcounter{algocf}{10} +\setcounter{algocfline}{13} +\setcounter{algocfproc}{13} +\setcounter{algocf}{13} \setcounter{nprt@mantissa@digitsbefore}{0} \setcounter{nprt@mantissa@digitsafter}{0} \setcounter{nprt@exponent@digitsbefore}{0} diff --git a/BookGPU/Chapters/chapter16/ch16.aux b/BookGPU/Chapters/chapter16/ch16.aux index a0d90ea..75d0256 100644 --- a/BookGPU/Chapters/chapter16/ch16.aux +++ b/BookGPU/Chapters/chapter16/ch16.aux @@ -4,72 +4,72 @@ \@writefile{toc}{\author{H. Wang}{}} \@writefile{toc}{\author{H. Yu}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {15}GPU-Accelerated Envelope-Following Method}{343}} +\@writefile{toc}{\contentsline {chapter}{\numberline {16}GPU-Accelerated Envelope-Following Method}{375}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {15.1}Introduction}{343}} -\newlabel{fig:ef1}{{15.1(a)}{345}} -\newlabel{sub@fig:ef1}{{(a)}{345}} -\newlabel{fig:ef2}{{15.1(b)}{345}} -\newlabel{sub@fig:ef2}{{(b)}{345}} -\@writefile{lof}{\contentsline {figure}{\numberline {15.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{345}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{345}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{345}} -\newlabel{fig:ef_intro}{{15.1}{345}} -\@writefile{toc}{\contentsline {section}{\numberline {15.2}The envelope-following method in a nutshell}{346}} -\newlabel{sec:ef}{{15.2}{346}} -\newlabel{eq:dae}{{15.1}{346}} -\newlabel{eq:Newton}{{15.2}{347}} -\newlabel{eq:A}{{15.3}{347}} -\@writefile{toc}{\contentsline {section}{\numberline {15.3}New parallel envelope-following method}{348}} -\newlabel{sec:gmres}{{15.3}{348}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.1}GMRES solver for Newton update equation}{348}} -\@writefile{lof}{\contentsline {figure}{\numberline {15.2}{\ignorespaces The flow of envelope-following method.\relax }}{349}} -\newlabel{fig:ef_flow}{{15.2}{349}} -\@writefile{loa}{\contentsline {algocf}{\numberline {14}{\ignorespaces Standard GMRES algorithm.\relax }}{350}} -\newlabel{alg:GMRES}{{14}{350}} -\newlabel{line:mvp}{{5}{350}} -\newlabel{line:newnorm}{{11}{350}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.2}Parallelization on GPU platforms}{350}} -\newlabel{sec:gpu}{{15.3.2}{350}} -\@writefile{lof}{\contentsline {figure}{\numberline {15.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{351}} -\newlabel{fig:gmres}{{15.3}{351}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.3}Gear-2 based sensitivity calculation}{352}} -\newlabel{sec:gear}{{15.3.3}{352}} -\newlabel{eq:BE}{{15.4}{352}} -\newlabel{eq:sens1}{{15.5}{352}} -\newlabel{eq:Gear_t2}{{15.6}{353}} -\newlabel{eq:sens2}{{15.7}{353}} -\newlabel{eq:Gear_t3}{{15.8}{353}} -\newlabel{eq:sensM}{{15.9}{353}} -\@writefile{loa}{\contentsline {algocf}{\numberline {15}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{354}} -\newlabel{alg:mf_Gear}{{15}{354}} -\newlabel{line:mf_Gear_loop}{{4}{354}} -\newlabel{line:shift}{{8}{354}} -\@writefile{toc}{\contentsline {section}{\numberline {15.4}Numerical examples}{354}} -\newlabel{sec:exp}{{15.4}{354}} -\@writefile{lof}{\contentsline {figure}{\numberline {15.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{355}} -\newlabel{fig:flyback}{{15.4}{355}} -\@writefile{lof}{\contentsline {figure}{\numberline {15.5}{\ignorespaces Illustration of power/ground network model.\relax }}{355}} -\newlabel{fig:pg}{{15.5}{355}} -\newlabel{fig:flybackWhole}{{15.6(a)}{356}} -\newlabel{sub@fig:flybackWhole}{{(a)}{356}} -\newlabel{fig:flybackZoom}{{15.6(b)}{356}} -\newlabel{sub@fig:flybackZoom}{{(b)}{356}} -\@writefile{lof}{\contentsline {figure}{\numberline {15.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{356}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{356}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{356}} -\newlabel{fig:flyback_wave}{{15.6}{356}} -\@writefile{lof}{\contentsline {figure}{\numberline {15.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{357}} -\newlabel{fig:buck_wave}{{15.7}{357}} -\@writefile{lot}{\contentsline {table}{\numberline {15.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{357}} -\newlabel{table:circuit}{{15.1}{357}} -\@writefile{toc}{\contentsline {section}{\numberline {15.5}Summary}{358}} -\newlabel{sec:summary}{{15.5}{358}} -\@writefile{toc}{\contentsline {section}{\numberline {15.6}Glossary}{358}} -\@writefile{toc}{\contentsline {section}{Bibliography}{358}} +\@writefile{toc}{\contentsline {section}{\numberline {16.1}Introduction}{375}} +\newlabel{fig:ef1}{{16.1(a)}{377}} +\newlabel{sub@fig:ef1}{{(a)}{377}} +\newlabel{fig:ef2}{{16.1(b)}{377}} +\newlabel{sub@fig:ef2}{{(b)}{377}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{377}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{377}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{377}} +\newlabel{fig:ef_intro}{{16.1}{377}} +\@writefile{toc}{\contentsline {section}{\numberline {16.2}The envelope-following method in a nutshell}{378}} +\newlabel{sec:ef}{{16.2}{378}} +\newlabel{eq:dae}{{16.1}{378}} +\newlabel{eq:Newton}{{16.2}{379}} +\newlabel{eq:A}{{16.3}{379}} +\@writefile{toc}{\contentsline {section}{\numberline {16.3}New parallel envelope-following method}{380}} +\newlabel{sec:gmres}{{16.3}{380}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.1}GMRES solver for Newton update equation}{380}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.2}{\ignorespaces The flow of envelope-following method.\relax }}{381}} +\newlabel{fig:ef_flow}{{16.2}{381}} +\@writefile{loa}{\contentsline {algocf}{\numberline {17}{\ignorespaces Standard GMRES algorithm.\relax }}{382}} +\newlabel{alg:GMRES}{{17}{382}} +\newlabel{line:mvp}{{5}{382}} +\newlabel{line:newnorm}{{11}{382}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.2}Parallelization on GPU platforms}{382}} +\newlabel{sec:gpu}{{16.3.2}{382}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{383}} +\newlabel{fig:gmres}{{16.3}{383}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.3}Gear-2 based sensitivity calculation}{384}} +\newlabel{sec:gear}{{16.3.3}{384}} +\newlabel{eq:BE}{{16.4}{384}} +\newlabel{eq:sens1}{{16.5}{384}} +\newlabel{eq:Gear_t2}{{16.6}{385}} +\newlabel{eq:sens2}{{16.7}{385}} +\newlabel{eq:Gear_t3}{{16.8}{385}} +\newlabel{eq:sensM}{{16.9}{385}} +\@writefile{loa}{\contentsline {algocf}{\numberline {18}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{386}} +\newlabel{alg:mf_Gear}{{18}{386}} +\newlabel{line:mf_Gear_loop}{{4}{386}} +\newlabel{line:shift}{{8}{386}} +\@writefile{toc}{\contentsline {section}{\numberline {16.4}Numerical examples}{386}} +\newlabel{sec:exp}{{16.4}{386}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{387}} +\newlabel{fig:flyback}{{16.4}{387}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.5}{\ignorespaces Illustration of power/ground network model.\relax }}{387}} +\newlabel{fig:pg}{{16.5}{387}} +\newlabel{fig:flybackWhole}{{16.6(a)}{388}} +\newlabel{sub@fig:flybackWhole}{{(a)}{388}} +\newlabel{fig:flybackZoom}{{16.6(b)}{388}} +\newlabel{sub@fig:flybackZoom}{{(b)}{388}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{388}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{388}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{388}} +\newlabel{fig:flyback_wave}{{16.6}{388}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{389}} +\newlabel{fig:buck_wave}{{16.7}{389}} +\@writefile{lot}{\contentsline {table}{\numberline {16.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{389}} +\newlabel{table:circuit}{{16.1}{389}} +\@writefile{toc}{\contentsline {section}{\numberline {16.5}Summary}{390}} +\newlabel{sec:summary}{{16.5}{390}} +\@writefile{toc}{\contentsline {section}{\numberline {16.6}Glossary}{390}} +\@writefile{toc}{\contentsline {section}{Bibliography}{390}} \@setckpt{Chapters/chapter16/ch16}{ -\setcounter{page}{360} +\setcounter{page}{392} \setcounter{equation}{9} \setcounter{enumi}{2} \setcounter{enumii}{0} @@ -78,7 +78,7 @@ \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{5} -\setcounter{chapter}{15} +\setcounter{chapter}{16} \setcounter{section}{6} \setcounter{subsection}{0} \setcounter{subsubsection}{0} @@ -95,9 +95,9 @@ \setcounter{lstnumber}{9} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{8} -\setcounter{algocfline}{15} -\setcounter{algocfproc}{15} -\setcounter{algocf}{15} +\setcounter{algocfline}{18} +\setcounter{algocfproc}{18} +\setcounter{algocf}{18} \setcounter{nprt@mantissa@digitsbefore}{0} \setcounter{nprt@mantissa@digitsafter}{0} \setcounter{nprt@exponent@digitsbefore}{0} diff --git a/BookGPU/Chapters/chapter17/ch17.aux b/BookGPU/Chapters/chapter17/ch17.aux index c515b8c..005df6d 100644 --- a/BookGPU/Chapters/chapter17/ch17.aux +++ b/BookGPU/Chapters/chapter17/ch17.aux @@ -6,76 +6,76 @@ \@writefile{toc}{\author{B\IeC {\'e}n\IeC {\'e}dicte Herrmann}{}} \@writefile{toc}{\author{Laurent Philippe}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {16}Implementing MAS on GPU}{363}} +\@writefile{toc}{\contentsline {chapter}{\numberline {17}Implementing Multi-Agent Systems on GPU}{395}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{chapter17}{{16}{364}} -\@writefile{toc}{\contentsline {section}{\numberline {16.1}Introduction}{364}} -\newlabel{ch17:intro}{{16.1}{364}} -\@writefile{toc}{\contentsline {section}{\numberline {16.2}Running Agent-Based Simulations}{365}} -\newlabel{ch17:ABM}{{16.2}{365}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.2.1}Multi-agent systems and parallelism}{365}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.2.2}MAS Implementation on GPU}{367}} -\newlabel{ch17:subsec:gpu}{{16.2.2}{367}} -\@writefile{toc}{\contentsline {section}{\numberline {16.3}A first practical example}{368}} -\newlabel{ch17:sec:1stmodel}{{16.3}{368}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.1}The Collembola model}{368}} -\newlabel{ch17:subsec:collembolamodel}{{16.3.1}{368}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.1}{\ignorespaces Evolution algorithm of Collembola model\relax }}{369}} -\newlabel{ch17:fig:collem_algorithm}{{16.1}{369}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.2}Collembola Implementation}{369}} -\newlabel{ch17:listing:collembola-diffuse}{{16.1}{370}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {16.1}Collembola OpenCL Diffusion kernel}{370}} -\newlabel{ch17:listing:collembola-reduc}{{16.2}{370}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {16.2}Collembola OpenCL reduction kernel}{370}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.3}Collembola performance}{371}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.2}{\ignorespaces Performance of the Collembola model on CPU and GPU\relax }}{372}} -\newlabel{ch17:fig:mior_perfs_collem}{{16.2}{372}} -\@writefile{toc}{\contentsline {section}{\numberline {16.4}Second example}{372}} -\newlabel{ch17:sec:2ndmodel}{{16.4}{372}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.4.1}The MIOR model}{372}} -\newlabel{ch17:subsec:miormodel}{{16.4.1}{372}} -\@writefile{loa}{\contentsline {algocf}{\numberline {16}{\ignorespaces Evolution step of each Meta-Mior (microbial colony) agent\relax }}{373}} -\newlabel{ch17:seqalgo}{{16}{373}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.4.2}MIOR Implementation}{373}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.3}{\ignorespaces Execution distribution retained on GPU\relax }}{374}} -\newlabel{ch17:fig:gpu_distribution}{{16.3}{374}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {16.4.2.1}Execution mapping on GPU}{374}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {16.4.2.2}Data structures translation}{375}} -\newlabel{ch17:subsec:datastructures}{{16.4.2.2}{375}} -\newlabel{ch17:listing:mior_data_structures}{{16.3}{375}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {16.3}Main data structures used in a MIOR simulation}{375}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.4}{\ignorespaces Compact representation of the topology of a MIOR simulation\relax }}{376}} -\newlabel{ch17:fig:csr_representation}{{16.4}{376}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {16.4.2.3}Critical resources access management}{376}} -\newlabel{ch17:subsec:concurrency}{{16.4.2.3}{376}} -\newlabel{ch17:listing:mior_kernels}{{16.4}{377}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {16.4}Main MIOR kernel}{377}} -\newlabel{ch17:fig:mior_launcher}{{16.5}{378}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {16.5}MIOR simulation launcher}{378}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {16.4.2.4}Termination detection}{378}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.4.3}Performance of MIOR implementations}{379}} -\newlabel{ch17:subsec:miorexperiments}{{16.4.3}{379}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.5}{\ignorespaces CPU and GPU performance on a Tesla C1060 node\relax }}{380}} -\newlabel{ch17:fig:mior_perfs_tesla}{{16.5}{380}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.6}{\ignorespaces CPU and GPU performance on a personal computer with a Geforce 8800GT\relax }}{381}} -\newlabel{ch17:fig:mior_perfs_8800gt}{{16.6}{381}} -\@writefile{toc}{\contentsline {section}{\numberline {16.5}Analysis and recommendations}{381}} -\newlabel{ch17:analysis}{{16.5}{381}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.7}{\ignorespaces Execution time of one multi-simulation kernel on the Tesla platform\relax }}{382}} -\newlabel{ch17:fig:monokernel_graph}{{16.7}{382}} -\@writefile{lof}{\contentsline {figure}{\numberline {16.8}{\ignorespaces Total execution time for 1000 simulations on the Tesla platform, while varying the number of simulations for each kernel\relax }}{382}} -\newlabel{ch17:fig:multikernel_graph}{{16.8}{382}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.5.1}Analysis}{382}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.5.2}MAS execution workflow}{383}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.5.3}Implementation challenges}{384}} -\@writefile{toc}{\contentsline {subsection}{\numberline {16.5.4}MCSMA}{384}} -\newlabel{ch17:Mcsma}{{16.5.4}{384}} -\@writefile{toc}{\contentsline {section}{\numberline {16.6}Conclusion}{385}} -\newlabel{ch17:conclusion}{{16.6}{385}} -\@writefile{toc}{\contentsline {section}{Bibliography}{386}} +\newlabel{chapter17}{{17}{396}} +\@writefile{toc}{\contentsline {section}{\numberline {17.1}Introduction}{396}} +\newlabel{ch17:intro}{{17.1}{396}} +\@writefile{toc}{\contentsline {section}{\numberline {17.2}Running Agent-Based Simulations}{397}} +\newlabel{ch17:ABM}{{17.2}{397}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.1}Multi-agent systems and parallelism}{397}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.2}MAS Implementation on GPU}{399}} +\newlabel{ch17:subsec:gpu}{{17.2.2}{399}} +\@writefile{toc}{\contentsline {section}{\numberline {17.3}A first practical example}{400}} +\newlabel{ch17:sec:1stmodel}{{17.3}{400}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.1}The Collembola model}{400}} +\newlabel{ch17:subsec:collembolamodel}{{17.3.1}{400}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.1}{\ignorespaces Evolution algorithm of Collembola model\relax }}{401}} +\newlabel{ch17:fig:collem_algorithm}{{17.1}{401}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.2}Collembola Implementation}{401}} +\newlabel{ch17:listing:collembola-diffuse}{{17.1}{402}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.1}Collembola OpenCL Diffusion kernel}{402}} +\newlabel{ch17:listing:collembola-reduc}{{17.2}{402}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.2}Collembola OpenCL reduction kernel}{402}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.3}Collembola performance}{403}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.2}{\ignorespaces Performance of the Collembola model on CPU and GPU\relax }}{404}} +\newlabel{ch17:fig:mior_perfs_collem}{{17.2}{404}} +\@writefile{toc}{\contentsline {section}{\numberline {17.4}Second example}{404}} +\newlabel{ch17:sec:2ndmodel}{{17.4}{404}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.1}The MIOR model}{404}} +\newlabel{ch17:subsec:miormodel}{{17.4.1}{404}} +\@writefile{loa}{\contentsline {algocf}{\numberline {19}{\ignorespaces Evolution step of each Meta-Mior (microbial colony) agent\relax }}{405}} +\newlabel{ch17:seqalgo}{{19}{405}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.2}MIOR Implementation}{405}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.3}{\ignorespaces Execution distribution retained on GPU\relax }}{406}} +\newlabel{ch17:fig:gpu_distribution}{{17.3}{406}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.1}Execution mapping on GPU}{406}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.2}Data structures translation}{407}} +\newlabel{ch17:subsec:datastructures}{{17.4.2.2}{407}} +\newlabel{ch17:listing:mior_data_structures}{{17.3}{407}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.3}Main data structures used in a MIOR simulation}{407}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.4}{\ignorespaces Compact representation of the topology of a MIOR simulation\relax }}{408}} +\newlabel{ch17:fig:csr_representation}{{17.4}{408}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.3}Critical resources access management}{408}} +\newlabel{ch17:subsec:concurrency}{{17.4.2.3}{408}} +\newlabel{ch17:listing:mior_kernels}{{17.4}{409}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.4}Main MIOR kernel}{409}} +\newlabel{ch17:fig:mior_launcher}{{17.5}{410}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.5}MIOR simulation launcher}{410}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {17.4.2.4}Termination detection}{410}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.4.3}Performance of MIOR implementations}{411}} +\newlabel{ch17:subsec:miorexperiments}{{17.4.3}{411}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.5}{\ignorespaces CPU and GPU performance on a Tesla C1060 node\relax }}{412}} +\newlabel{ch17:fig:mior_perfs_tesla}{{17.5}{412}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.6}{\ignorespaces CPU and GPU performance on a personal computer with a Geforce 8800GT\relax }}{413}} +\newlabel{ch17:fig:mior_perfs_8800gt}{{17.6}{413}} +\@writefile{toc}{\contentsline {section}{\numberline {17.5}Analysis and recommendations}{413}} +\newlabel{ch17:analysis}{{17.5}{413}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.7}{\ignorespaces Execution time of one multi-simulation kernel on the Tesla platform\relax }}{414}} +\newlabel{ch17:fig:monokernel_graph}{{17.7}{414}} +\@writefile{lof}{\contentsline {figure}{\numberline {17.8}{\ignorespaces Total execution time for 1000 simulations on the Tesla platform, while varying the number of simulations for each kernel\relax }}{414}} +\newlabel{ch17:fig:multikernel_graph}{{17.8}{414}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.1}Analysis}{414}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.2}MAS execution workflow}{415}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.3}Implementation challenges}{416}} +\@writefile{toc}{\contentsline {subsection}{\numberline {17.5.4}MCSMA}{416}} +\newlabel{ch17:Mcsma}{{17.5.4}{416}} +\@writefile{toc}{\contentsline {section}{\numberline {17.6}Conclusion}{417}} +\newlabel{ch17:conclusion}{{17.6}{417}} +\@writefile{toc}{\contentsline {section}{Bibliography}{418}} \@setckpt{Chapters/chapter17/ch17}{ -\setcounter{page}{390} +\setcounter{page}{422} \setcounter{equation}{0} \setcounter{enumi}{3} \setcounter{enumii}{0} @@ -84,7 +84,7 @@ \setcounter{footnote}{1} \setcounter{mpfootnote}{0} \setcounter{part}{6} -\setcounter{chapter}{16} +\setcounter{chapter}{17} \setcounter{section}{6} \setcounter{subsection}{0} \setcounter{subsubsection}{0} @@ -101,9 +101,9 @@ \setcounter{lstnumber}{21} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{17} -\setcounter{algocfline}{16} -\setcounter{algocfproc}{16} -\setcounter{algocf}{16} +\setcounter{algocfline}{19} +\setcounter{algocfproc}{19} +\setcounter{algocf}{19} \setcounter{nprt@mantissa@digitsbefore}{0} \setcounter{nprt@mantissa@digitsafter}{0} \setcounter{nprt@exponent@digitsbefore}{0} diff --git a/BookGPU/Chapters/chapter18/ch18.aux b/BookGPU/Chapters/chapter18/ch18.aux index 230f20a..884902a 100644 --- a/BookGPU/Chapters/chapter18/ch18.aux +++ b/BookGPU/Chapters/chapter18/ch18.aux @@ -2,45 +2,45 @@ \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}} \@writefile{toc}{\author{Christophe Guyeux}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {17}Pseudorandom Number Generator on GPU}{391}} +\@writefile{toc}{\contentsline {chapter}{\numberline {18}Pseudorandom Number Generator on GPU}{423}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{chapter18}{{17}{391}} -\@writefile{toc}{\contentsline {section}{\numberline {17.1}Introduction}{391}} -\@writefile{toc}{\contentsline {section}{\numberline {17.2}Basic Remindees}{393}} -\newlabel{section:BASIC RECALLS}{{17.2}{393}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.1}A Short Presentation of Chaos}{393}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.2}On Devaney's Definition of Chaos}{393}} -\newlabel{sec:dev}{{17.2.2}{393}} -\newlabel{Devaney}{{17.1}{393}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.2.3}Chaotic iterations}{394}} -\newlabel{subsection:Chaotic iterations}{{17.2.3}{394}} -\newlabel{Chaotic iterations}{{2}{394}} -\newlabel{eq:generalIC}{{17.4}{395}} -\newlabel{equation Oplus}{{17.5}{395}} -\@writefile{toc}{\contentsline {section}{\numberline {17.3}Toward Efficiency and Improvement for CI PRNG}{395}} -\newlabel{sec:efficient PRNG}{{17.3}{395}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{395}} -\newlabel{algo:seqCIPRNG}{{17.1}{395}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {17.1}C code of the sequential PRNG based on chaotic iterations}{395}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{396}} -\newlabel{sec:efficient PRNG gpu}{{17.3.2}{396}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.3}Naive Version for GPU}{396}} -\@writefile{loa}{\contentsline {algocf}{\numberline {17}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{397}} -\newlabel{algo:gpu_kernel}{{17}{397}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.4}Improved Version for GPU}{397}} -\newlabel{IR}{{18}{398}} -\@writefile{loa}{\contentsline {algocf}{\numberline {18}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{398}} -\newlabel{algo:gpu_kernel2}{{18}{398}} -\@writefile{toc}{\contentsline {subsection}{\numberline {17.3.5}Chaos Evaluation of the Improved Version}{398}} -\@writefile{toc}{\contentsline {section}{\numberline {17.4}Experiments}{399}} -\newlabel{sec:experiments}{{17.4}{399}} -\@writefile{toc}{\contentsline {section}{\numberline {17.5}Summary}{399}} -\@writefile{lof}{\contentsline {figure}{\numberline {17.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{400}} -\newlabel{fig:time_xorlike_gpu}{{17.1}{400}} -\@writefile{toc}{\contentsline {section}{Bibliography}{401}} +\newlabel{chapter18}{{18}{423}} +\@writefile{toc}{\contentsline {section}{\numberline {18.1}Introduction}{423}} +\@writefile{toc}{\contentsline {section}{\numberline {18.2}Basic Remindees}{425}} +\newlabel{section:BASIC RECALLS}{{18.2}{425}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.1}A Short Presentation of Chaos}{425}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.2}On Devaney's Definition of Chaos}{425}} +\newlabel{sec:dev}{{18.2.2}{425}} +\newlabel{Devaney}{{18.1}{425}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.2.3}Chaotic iterations}{426}} +\newlabel{subsection:Chaotic iterations}{{18.2.3}{426}} +\newlabel{Chaotic iterations}{{2}{426}} +\newlabel{eq:generalIC}{{18.4}{427}} +\newlabel{equation Oplus}{{18.5}{427}} +\@writefile{toc}{\contentsline {section}{\numberline {18.3}Toward Efficiency and Improvement for CI PRNG}{427}} +\newlabel{sec:efficient PRNG}{{18.3}{427}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{427}} +\newlabel{algo:seqCIPRNG}{{18.1}{427}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {18.1}C code of the sequential PRNG based on chaotic iterations}{427}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{428}} +\newlabel{sec:efficient PRNG gpu}{{18.3.2}{428}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.3}Naive Version for GPU}{428}} +\@writefile{loa}{\contentsline {algocf}{\numberline {20}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{429}} +\newlabel{algo:gpu_kernel}{{20}{429}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.4}Improved Version for GPU}{429}} +\newlabel{IR}{{21}{430}} +\@writefile{loa}{\contentsline {algocf}{\numberline {21}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{430}} +\newlabel{algo:gpu_kernel2}{{21}{430}} +\@writefile{toc}{\contentsline {subsection}{\numberline {18.3.5}Chaos Evaluation of the Improved Version}{430}} +\@writefile{toc}{\contentsline {section}{\numberline {18.4}Experiments}{431}} +\newlabel{sec:experiments}{{18.4}{431}} +\@writefile{toc}{\contentsline {section}{\numberline {18.5}Summary}{431}} +\@writefile{lof}{\contentsline {figure}{\numberline {18.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{432}} +\newlabel{fig:time_xorlike_gpu}{{18.1}{432}} +\@writefile{toc}{\contentsline {section}{Bibliography}{433}} \@setckpt{Chapters/chapter18/ch18}{ -\setcounter{page}{403} +\setcounter{page}{435} \setcounter{equation}{5} \setcounter{enumi}{3} \setcounter{enumii}{0} @@ -49,7 +49,7 @@ \setcounter{footnote}{2} \setcounter{mpfootnote}{0} \setcounter{part}{6} -\setcounter{chapter}{17} +\setcounter{chapter}{18} \setcounter{section}{5} \setcounter{subsection}{0} \setcounter{subsubsection}{0} @@ -66,9 +66,9 @@ \setcounter{lstnumber}{15} \setcounter{ContinuedFloat}{0} \setcounter{AlgoLine}{14} -\setcounter{algocfline}{18} -\setcounter{algocfproc}{18} -\setcounter{algocf}{18} +\setcounter{algocfline}{21} +\setcounter{algocfproc}{21} +\setcounter{algocf}{21} \setcounter{nprt@mantissa@digitsbefore}{0} \setcounter{nprt@mantissa@digitsafter}{0} \setcounter{nprt@exponent@digitsbefore}{0} diff --git a/BookGPU/Chapters/chapter6/ch6.tex b/BookGPU/Chapters/chapter6/ch6.tex index bc5ea3f..0cf6f84 100755 --- a/BookGPU/Chapters/chapter6/ch6.tex +++ b/BookGPU/Chapters/chapter6/ch6.tex @@ -23,7 +23,7 @@ label=#1, caption={#2}} }{} -\def\N{$\mathbb N$ } +%\def\N{$\mathbb N$ } \def\R{$\mathbb R$ } \def\Z{$\mathbb Z$ } \def\Q{$\mathbb Q$ }