From 04efb8c2e1780f52f54ac8d64ee67c68eb104a36 Mon Sep 17 00:00:00 2001 From: couturie Date: Sat, 30 Mar 2013 10:07:43 +0100 Subject: [PATCH] embedded font pb solved --- BookGPU/BookGPU.tex | 39 ++--- BookGPU/Chapters/chapter12/ch12.aux | 148 +++++++++--------- BookGPU/Chapters/chapter16/ch16.aux | 128 +++++++-------- BookGPU/Chapters/chapter18/ch18.aux | 74 ++++----- .../figures/PararealSpeedupGTX590.tikz | 3 +- .../figures/scalingNx25-eps-converted-to.pdf | Bin 5592 -> 7648 bytes 6 files changed, 196 insertions(+), 196 deletions(-) diff --git a/BookGPU/BookGPU.tex b/BookGPU/BookGPU.tex index e744be8..4c27d29 100755 --- a/BookGPU/BookGPU.tex +++ b/BookGPU/BookGPU.tex @@ -35,6 +35,7 @@ \usepackage{moreverb} \usepackage{commath} \usepackage{numprint} +%\usepackage{lmodern} %% \usepackage{listings} %% \usepackage{subfigure} @@ -165,29 +166,29 @@ \include{Chapters/symbollist} \setcounter{page}{1} -%\part{Presentation of GPUs} -%\include{Chapters/chapter1/ch1} -%\include{Chapters/chapter2/ch2} -%\part{Image processing} -%\include{Chapters/chapter3/ch3} -%\part{Software development} -%\include{Chapters/chapter5/ch5} -%\include{Chapters/chapter6/ch6} -%\part{Optimization} -%\include{Chapters/chapter8/ch8} -%\include{Chapters/chapter9/ch9} +\part{Presentation of GPUs} +\include{Chapters/chapter1/ch1} +\include{Chapters/chapter2/ch2} +\part{Image processing} +\include{Chapters/chapter3/ch3} +\part{Software development} +\include{Chapters/chapter5/ch5} +\include{Chapters/chapter6/ch6} +\part{Optimization} +\include{Chapters/chapter8/ch8} +\include{Chapters/chapter9/ch9} \part{Numerical applications} \include{Chapters/chapter7/ch7} %pb fonts -%\include{Chapters/chapter11/ch11} -%\include{Chapters/chapter12/ch12} -%\include{Chapters/chapter13/ch13} -%\include{Chapters/chapter14/ch14} -%\include{Chapters/chapter15/ch15} -%\include{Chapters/chapter16/ch16} +\include{Chapters/chapter11/ch11} +\include{Chapters/chapter12/ch12} +\include{Chapters/chapter13/ch13} +\include{Chapters/chapter14/ch14} +\include{Chapters/chapter15/ch15} +\include{Chapters/chapter16/ch16} \part{Other} -%\include{Chapters/chapter18/ch18} -%\include{Chapters/chapter19/ch19} +\include{Chapters/chapter18/ch18} +\include{Chapters/chapter19/ch19} \bibliographystyle{hep} %%%\bibliography{biblio} diff --git a/BookGPU/Chapters/chapter12/ch12.aux b/BookGPU/Chapters/chapter12/ch12.aux index 97242d0..2252156 100644 --- a/BookGPU/Chapters/chapter12/ch12.aux +++ b/BookGPU/Chapters/chapter12/ch12.aux @@ -3,81 +3,81 @@ \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}} \@writefile{toc}{\author{Jacques Bahi}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {10}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{215}} +\@writefile{toc}{\contentsline {chapter}{\numberline {11}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{259}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{ch12}{{10}{215}} -\@writefile{toc}{\contentsline {section}{\numberline {10.1}Introduction}{215}} -\newlabel{ch12:sec:01}{{10.1}{215}} -\@writefile{toc}{\contentsline {section}{\numberline {10.2}Krylov iterative methods}{216}} -\newlabel{ch12:sec:02}{{10.2}{216}} -\newlabel{ch12:eq:01}{{10.1}{216}} -\newlabel{ch12:eq:02}{{10.2}{216}} -\newlabel{ch12:eq:03}{{10.3}{216}} -\newlabel{ch12:eq:11}{{10.4}{217}} -\@writefile{toc}{\contentsline {subsection}{\numberline {10.2.1}CG method}{217}} -\newlabel{ch12:sec:02.01}{{10.2.1}{217}} -\newlabel{ch12:eq:04}{{10.5}{217}} -\newlabel{ch12:eq:05}{{10.6}{217}} -\newlabel{ch12:eq:06}{{10.7}{217}} -\newlabel{ch12:eq:07}{{10.8}{217}} -\newlabel{ch12:eq:08}{{10.9}{217}} -\newlabel{ch12:eq:09}{{10.10}{217}} -\@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces Left-preconditioned CG method\relax }}{218}} -\newlabel{ch12:alg:01}{{9}{218}} -\newlabel{ch12:eq:10}{{10.11}{218}} -\@writefile{toc}{\contentsline {subsection}{\numberline {10.2.2}GMRES method}{219}} -\newlabel{ch12:sec:02.02}{{10.2.2}{219}} -\newlabel{ch12:eq:12}{{10.12}{219}} -\newlabel{ch12:eq:13}{{10.13}{219}} -\newlabel{ch12:eq:14}{{10.14}{219}} -\newlabel{ch12:eq:15}{{10.15}{219}} -\newlabel{ch12:eq:16}{{10.16}{219}} -\newlabel{ch12:eq:17}{{10.17}{219}} -\newlabel{ch12:eq:18}{{10.18}{219}} -\newlabel{ch12:eq:19}{{10.19}{219}} -\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{220}} -\newlabel{ch12:alg:02}{{10}{220}} -\@writefile{toc}{\contentsline {section}{\numberline {10.3}Parallel implementation on a GPU cluster}{221}} -\newlabel{ch12:sec:03}{{10.3}{221}} -\@writefile{toc}{\contentsline {subsection}{\numberline {10.3.1}Data partitioning}{221}} -\newlabel{ch12:sec:03.01}{{10.3.1}{221}} -\@writefile{lof}{\contentsline {figure}{\numberline {10.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{222}} -\newlabel{ch12:fig:01}{{10.1}{222}} -\@writefile{toc}{\contentsline {subsection}{\numberline {10.3.2}GPU computing}{222}} -\newlabel{ch12:sec:03.02}{{10.3.2}{222}} -\@writefile{toc}{\contentsline {subsection}{\numberline {10.3.3}Data communications}{223}} -\newlabel{ch12:sec:03.03}{{10.3.3}{223}} -\@writefile{lof}{\contentsline {figure}{\numberline {10.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{224}} -\newlabel{ch12:fig:02}{{10.2}{224}} -\@writefile{lof}{\contentsline {figure}{\numberline {10.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{225}} -\newlabel{ch12:fig:03}{{10.3}{225}} -\@writefile{toc}{\contentsline {section}{\numberline {10.4}Experimental results}{226}} -\newlabel{ch12:sec:04}{{10.4}{226}} -\@writefile{lof}{\contentsline {figure}{\numberline {10.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{226}} -\newlabel{ch12:fig:04}{{10.4}{226}} -\@writefile{lof}{\contentsline {figure}{\numberline {10.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{227}} -\newlabel{ch12:fig:05}{{10.5}{227}} -\@writefile{lot}{\contentsline {table}{\numberline {10.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{227}} -\newlabel{ch12:tab:01}{{10.1}{227}} -\@writefile{lot}{\contentsline {table}{\numberline {10.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{228}} -\newlabel{ch12:tab:02}{{10.2}{228}} -\@writefile{lot}{\contentsline {table}{\numberline {10.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{228}} -\newlabel{ch12:tab:03}{{10.3}{228}} -\newlabel{ch12:eq:20}{{10.20}{229}} -\@writefile{lof}{\contentsline {figure}{\numberline {10.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{230}} -\newlabel{ch12:fig:06}{{10.6}{230}} -\@writefile{lot}{\contentsline {table}{\numberline {10.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{230}} -\newlabel{ch12:tab:04}{{10.4}{230}} -\@writefile{lot}{\contentsline {table}{\numberline {10.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{231}} -\newlabel{ch12:tab:05}{{10.5}{231}} -\@writefile{toc}{\contentsline {section}{\numberline {10.5}Conclusion}{231}} -\newlabel{ch12:sec:05}{{10.5}{231}} -\@writefile{lot}{\contentsline {table}{\numberline {10.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{232}} -\newlabel{ch12:tab:06}{{10.6}{232}} -\@writefile{toc}{\contentsline {section}{Bibliography}{232}} +\newlabel{ch12}{{11}{259}} +\@writefile{toc}{\contentsline {section}{\numberline {11.1}Introduction}{259}} +\newlabel{ch12:sec:01}{{11.1}{259}} +\@writefile{toc}{\contentsline {section}{\numberline {11.2}Krylov iterative methods}{260}} +\newlabel{ch12:sec:02}{{11.2}{260}} +\newlabel{ch12:eq:01}{{11.1}{260}} +\newlabel{ch12:eq:02}{{11.2}{260}} +\newlabel{ch12:eq:03}{{11.3}{260}} +\newlabel{ch12:eq:11}{{11.4}{261}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.1}CG method}{261}} +\newlabel{ch12:sec:02.01}{{11.2.1}{261}} +\newlabel{ch12:eq:04}{{11.5}{261}} +\newlabel{ch12:eq:05}{{11.6}{261}} +\newlabel{ch12:eq:06}{{11.7}{261}} +\newlabel{ch12:eq:07}{{11.8}{261}} +\newlabel{ch12:eq:08}{{11.9}{261}} +\newlabel{ch12:eq:09}{{11.10}{261}} +\@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces Left-preconditioned CG method\relax }}{262}} +\newlabel{ch12:alg:01}{{9}{262}} +\newlabel{ch12:eq:10}{{11.11}{262}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.2}GMRES method}{263}} +\newlabel{ch12:sec:02.02}{{11.2.2}{263}} +\newlabel{ch12:eq:12}{{11.12}{263}} +\newlabel{ch12:eq:13}{{11.13}{263}} +\newlabel{ch12:eq:14}{{11.14}{263}} +\newlabel{ch12:eq:15}{{11.15}{263}} +\newlabel{ch12:eq:16}{{11.16}{263}} +\newlabel{ch12:eq:17}{{11.17}{263}} +\newlabel{ch12:eq:18}{{11.18}{263}} +\newlabel{ch12:eq:19}{{11.19}{263}} +\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{264}} +\newlabel{ch12:alg:02}{{10}{264}} +\@writefile{toc}{\contentsline {section}{\numberline {11.3}Parallel implementation on a GPU cluster}{265}} +\newlabel{ch12:sec:03}{{11.3}{265}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.1}Data partitioning}{265}} +\newlabel{ch12:sec:03.01}{{11.3.1}{265}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{266}} +\newlabel{ch12:fig:01}{{11.1}{266}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.2}GPU computing}{266}} +\newlabel{ch12:sec:03.02}{{11.3.2}{266}} +\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.3}Data communications}{267}} +\newlabel{ch12:sec:03.03}{{11.3.3}{267}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.2}{\ignorespaces Data exchanges between \textit {Node 1} and its neighbors \textit {Node 0}, \textit {Node 2} and \textit {Node 3}.\relax }}{268}} +\newlabel{ch12:fig:02}{{11.2}{268}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{269}} +\newlabel{ch12:fig:03}{{11.3}{269}} +\@writefile{toc}{\contentsline {section}{\numberline {11.4}Experimental results}{270}} +\newlabel{ch12:sec:04}{{11.4}{270}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{270}} +\newlabel{ch12:fig:04}{{11.4}{270}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{271}} +\newlabel{ch12:fig:05}{{11.5}{271}} +\@writefile{lot}{\contentsline {table}{\numberline {11.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{271}} +\newlabel{ch12:tab:01}{{11.1}{271}} +\@writefile{lot}{\contentsline {table}{\numberline {11.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{272}} +\newlabel{ch12:tab:02}{{11.2}{272}} +\@writefile{lot}{\contentsline {table}{\numberline {11.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{272}} +\newlabel{ch12:tab:03}{{11.3}{272}} +\newlabel{ch12:eq:20}{{11.20}{273}} +\@writefile{lof}{\contentsline {figure}{\numberline {11.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{274}} +\newlabel{ch12:fig:06}{{11.6}{274}} +\@writefile{lot}{\contentsline {table}{\numberline {11.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{274}} +\newlabel{ch12:tab:04}{{11.4}{274}} +\@writefile{lot}{\contentsline {table}{\numberline {11.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{275}} +\newlabel{ch12:tab:05}{{11.5}{275}} +\@writefile{toc}{\contentsline {section}{\numberline {11.5}Conclusion}{275}} +\newlabel{ch12:sec:05}{{11.5}{275}} +\@writefile{lot}{\contentsline {table}{\numberline {11.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{276}} +\newlabel{ch12:tab:06}{{11.6}{276}} +\@writefile{toc}{\contentsline {section}{Bibliography}{276}} \@setckpt{Chapters/chapter12/ch12}{ -\setcounter{page}{234} +\setcounter{page}{278} \setcounter{equation}{22} \setcounter{enumi}{4} \setcounter{enumii}{0} @@ -86,7 +86,7 @@ \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{5} -\setcounter{chapter}{10} +\setcounter{chapter}{11} \setcounter{section}{5} \setcounter{subsection}{0} \setcounter{subsubsection}{0} @@ -95,7 +95,7 @@ \setcounter{figure}{6} \setcounter{table}{6} \setcounter{numauthors}{0} -\setcounter{parentequation}{8} +\setcounter{parentequation}{46} \setcounter{subfigure}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} diff --git a/BookGPU/Chapters/chapter16/ch16.aux b/BookGPU/Chapters/chapter16/ch16.aux index 690208a..a0d90ea 100644 --- a/BookGPU/Chapters/chapter16/ch16.aux +++ b/BookGPU/Chapters/chapter16/ch16.aux @@ -4,72 +4,72 @@ \@writefile{toc}{\author{H. Wang}{}} \@writefile{toc}{\author{H. Yu}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {14}GPU-Accelerated Envelope-Following Method}{299}} +\@writefile{toc}{\contentsline {chapter}{\numberline {15}GPU-Accelerated Envelope-Following Method}{343}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {14.1}Introduction}{299}} -\newlabel{fig:ef1}{{14.1(a)}{301}} -\newlabel{sub@fig:ef1}{{(a)}{301}} -\newlabel{fig:ef2}{{14.1(b)}{301}} -\newlabel{sub@fig:ef2}{{(b)}{301}} -\@writefile{lof}{\contentsline {figure}{\numberline {14.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{301}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{301}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{301}} -\newlabel{fig:ef_intro}{{14.1}{301}} -\@writefile{toc}{\contentsline {section}{\numberline {14.2}The envelope-following method in a nutshell}{302}} -\newlabel{sec:ef}{{14.2}{302}} -\newlabel{eq:dae}{{14.1}{302}} -\newlabel{eq:Newton}{{14.2}{303}} -\newlabel{eq:A}{{14.3}{303}} -\@writefile{toc}{\contentsline {section}{\numberline {14.3}New parallel envelope-following method}{304}} -\newlabel{sec:gmres}{{14.3}{304}} -\@writefile{toc}{\contentsline {subsection}{\numberline {14.3.1}GMRES solver for Newton update equation}{304}} -\@writefile{lof}{\contentsline {figure}{\numberline {14.2}{\ignorespaces The flow of envelope-following method.\relax }}{305}} -\newlabel{fig:ef_flow}{{14.2}{305}} -\@writefile{loa}{\contentsline {algocf}{\numberline {14}{\ignorespaces Standard GMRES algorithm.\relax }}{306}} -\newlabel{alg:GMRES}{{14}{306}} -\newlabel{line:mvp}{{5}{306}} -\newlabel{line:newnorm}{{11}{306}} -\@writefile{toc}{\contentsline {subsection}{\numberline {14.3.2}Parallelization on GPU platforms}{306}} -\newlabel{sec:gpu}{{14.3.2}{306}} -\@writefile{lof}{\contentsline {figure}{\numberline {14.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{307}} -\newlabel{fig:gmres}{{14.3}{307}} -\@writefile{toc}{\contentsline {subsection}{\numberline {14.3.3}Gear-2 based sensitivity calculation}{308}} -\newlabel{sec:gear}{{14.3.3}{308}} -\newlabel{eq:BE}{{14.4}{308}} -\newlabel{eq:sens1}{{14.5}{308}} -\newlabel{eq:Gear_t2}{{14.6}{309}} -\newlabel{eq:sens2}{{14.7}{309}} -\newlabel{eq:Gear_t3}{{14.8}{309}} -\newlabel{eq:sensM}{{14.9}{309}} -\@writefile{loa}{\contentsline {algocf}{\numberline {15}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{310}} -\newlabel{alg:mf_Gear}{{15}{310}} -\newlabel{line:mf_Gear_loop}{{4}{310}} -\newlabel{line:shift}{{8}{310}} -\@writefile{toc}{\contentsline {section}{\numberline {14.4}Numerical examples}{310}} -\newlabel{sec:exp}{{14.4}{310}} -\@writefile{lof}{\contentsline {figure}{\numberline {14.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{311}} -\newlabel{fig:flyback}{{14.4}{311}} -\@writefile{lof}{\contentsline {figure}{\numberline {14.5}{\ignorespaces Illustration of power/ground network model.\relax }}{311}} -\newlabel{fig:pg}{{14.5}{311}} -\newlabel{fig:flybackWhole}{{14.6(a)}{312}} -\newlabel{sub@fig:flybackWhole}{{(a)}{312}} -\newlabel{fig:flybackZoom}{{14.6(b)}{312}} -\newlabel{sub@fig:flybackZoom}{{(b)}{312}} -\@writefile{lof}{\contentsline {figure}{\numberline {14.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{312}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{312}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{312}} -\newlabel{fig:flyback_wave}{{14.6}{312}} -\@writefile{lof}{\contentsline {figure}{\numberline {14.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{313}} -\newlabel{fig:buck_wave}{{14.7}{313}} -\@writefile{lot}{\contentsline {table}{\numberline {14.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{313}} -\newlabel{table:circuit}{{14.1}{313}} -\@writefile{toc}{\contentsline {section}{\numberline {14.5}Summary}{314}} -\newlabel{sec:summary}{{14.5}{314}} -\@writefile{toc}{\contentsline {section}{\numberline {14.6}Glossary}{314}} -\@writefile{toc}{\contentsline {section}{Bibliography}{314}} +\@writefile{toc}{\contentsline {section}{\numberline {15.1}Introduction}{343}} +\newlabel{fig:ef1}{{15.1(a)}{345}} +\newlabel{sub@fig:ef1}{{(a)}{345}} +\newlabel{fig:ef2}{{15.1(b)}{345}} +\newlabel{sub@fig:ef2}{{(b)}{345}} +\@writefile{lof}{\contentsline {figure}{\numberline {15.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{345}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{345}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{345}} +\newlabel{fig:ef_intro}{{15.1}{345}} +\@writefile{toc}{\contentsline {section}{\numberline {15.2}The envelope-following method in a nutshell}{346}} +\newlabel{sec:ef}{{15.2}{346}} +\newlabel{eq:dae}{{15.1}{346}} +\newlabel{eq:Newton}{{15.2}{347}} +\newlabel{eq:A}{{15.3}{347}} +\@writefile{toc}{\contentsline {section}{\numberline {15.3}New parallel envelope-following method}{348}} +\newlabel{sec:gmres}{{15.3}{348}} +\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.1}GMRES solver for Newton update equation}{348}} +\@writefile{lof}{\contentsline {figure}{\numberline {15.2}{\ignorespaces The flow of envelope-following method.\relax }}{349}} +\newlabel{fig:ef_flow}{{15.2}{349}} +\@writefile{loa}{\contentsline {algocf}{\numberline {14}{\ignorespaces Standard GMRES algorithm.\relax }}{350}} +\newlabel{alg:GMRES}{{14}{350}} +\newlabel{line:mvp}{{5}{350}} +\newlabel{line:newnorm}{{11}{350}} +\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.2}Parallelization on GPU platforms}{350}} +\newlabel{sec:gpu}{{15.3.2}{350}} +\@writefile{lof}{\contentsline {figure}{\numberline {15.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{351}} +\newlabel{fig:gmres}{{15.3}{351}} +\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.3}Gear-2 based sensitivity calculation}{352}} +\newlabel{sec:gear}{{15.3.3}{352}} +\newlabel{eq:BE}{{15.4}{352}} +\newlabel{eq:sens1}{{15.5}{352}} +\newlabel{eq:Gear_t2}{{15.6}{353}} +\newlabel{eq:sens2}{{15.7}{353}} +\newlabel{eq:Gear_t3}{{15.8}{353}} +\newlabel{eq:sensM}{{15.9}{353}} +\@writefile{loa}{\contentsline {algocf}{\numberline {15}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{354}} +\newlabel{alg:mf_Gear}{{15}{354}} +\newlabel{line:mf_Gear_loop}{{4}{354}} +\newlabel{line:shift}{{8}{354}} +\@writefile{toc}{\contentsline {section}{\numberline {15.4}Numerical examples}{354}} +\newlabel{sec:exp}{{15.4}{354}} +\@writefile{lof}{\contentsline {figure}{\numberline {15.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{355}} +\newlabel{fig:flyback}{{15.4}{355}} +\@writefile{lof}{\contentsline {figure}{\numberline {15.5}{\ignorespaces Illustration of power/ground network model.\relax }}{355}} +\newlabel{fig:pg}{{15.5}{355}} +\newlabel{fig:flybackWhole}{{15.6(a)}{356}} +\newlabel{sub@fig:flybackWhole}{{(a)}{356}} +\newlabel{fig:flybackZoom}{{15.6(b)}{356}} +\newlabel{sub@fig:flybackZoom}{{(b)}{356}} +\@writefile{lof}{\contentsline {figure}{\numberline {15.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{356}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{356}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{356}} +\newlabel{fig:flyback_wave}{{15.6}{356}} +\@writefile{lof}{\contentsline {figure}{\numberline {15.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{357}} +\newlabel{fig:buck_wave}{{15.7}{357}} +\@writefile{lot}{\contentsline {table}{\numberline {15.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{357}} +\newlabel{table:circuit}{{15.1}{357}} +\@writefile{toc}{\contentsline {section}{\numberline {15.5}Summary}{358}} +\newlabel{sec:summary}{{15.5}{358}} +\@writefile{toc}{\contentsline {section}{\numberline {15.6}Glossary}{358}} +\@writefile{toc}{\contentsline {section}{Bibliography}{358}} \@setckpt{Chapters/chapter16/ch16}{ -\setcounter{page}{316} +\setcounter{page}{360} \setcounter{equation}{9} \setcounter{enumi}{2} \setcounter{enumii}{0} @@ -78,7 +78,7 @@ \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{5} -\setcounter{chapter}{14} +\setcounter{chapter}{15} \setcounter{section}{6} \setcounter{subsection}{0} \setcounter{subsubsection}{0} diff --git a/BookGPU/Chapters/chapter18/ch18.aux b/BookGPU/Chapters/chapter18/ch18.aux index 4d67663..b69311d 100644 --- a/BookGPU/Chapters/chapter18/ch18.aux +++ b/BookGPU/Chapters/chapter18/ch18.aux @@ -2,45 +2,45 @@ \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}} \@writefile{toc}{\author{Christophe Guyeux}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {15}Pseudorandom Number Generator on GPU}{319}} +\@writefile{toc}{\contentsline {chapter}{\numberline {16}Pseudorandom Number Generator on GPU}{363}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{chapter18}{{15}{319}} -\@writefile{toc}{\contentsline {section}{\numberline {15.1}Introduction}{319}} -\@writefile{toc}{\contentsline {section}{\numberline {15.2}Basic Remindees}{321}} -\newlabel{section:BASIC RECALLS}{{15.2}{321}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.2.1}A Short Presentation of Chaos}{321}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.2.2}On Devaney's Definition of Chaos}{321}} -\newlabel{sec:dev}{{15.2.2}{321}} -\newlabel{Devaney}{{15.1}{321}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.2.3}Chaotic iterations}{322}} -\newlabel{subsection:Chaotic iterations}{{15.2.3}{322}} -\newlabel{Chaotic iterations}{{2}{322}} -\newlabel{eq:generalIC}{{15.4}{323}} -\newlabel{equation Oplus}{{15.5}{323}} -\@writefile{toc}{\contentsline {section}{\numberline {15.3}Toward Efficiency and Improvement for CI PRNG}{323}} -\newlabel{sec:efficient PRNG}{{15.3}{323}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{323}} -\newlabel{algo:seqCIPRNG}{{15.1}{323}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {15.1}C code of the sequential PRNG based on chaotic iterations}{323}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{324}} -\newlabel{sec:efficient PRNG gpu}{{15.3.2}{324}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.3}Naive Version for GPU}{324}} -\@writefile{loa}{\contentsline {algocf}{\numberline {16}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{325}} -\newlabel{algo:gpu_kernel}{{16}{325}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.4}Improved Version for GPU}{325}} -\newlabel{IR}{{17}{326}} -\@writefile{loa}{\contentsline {algocf}{\numberline {17}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{326}} -\newlabel{algo:gpu_kernel2}{{17}{326}} -\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.5}Chaos Evaluation of the Improved Version}{326}} -\@writefile{toc}{\contentsline {section}{\numberline {15.4}Experiments}{327}} -\newlabel{sec:experiments}{{15.4}{327}} -\@writefile{toc}{\contentsline {section}{\numberline {15.5}Summary}{327}} -\@writefile{lof}{\contentsline {figure}{\numberline {15.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{328}} -\newlabel{fig:time_xorlike_gpu}{{15.1}{328}} -\@writefile{toc}{\contentsline {section}{Bibliography}{329}} +\newlabel{chapter18}{{16}{363}} +\@writefile{toc}{\contentsline {section}{\numberline {16.1}Introduction}{363}} +\@writefile{toc}{\contentsline {section}{\numberline {16.2}Basic Remindees}{365}} +\newlabel{section:BASIC RECALLS}{{16.2}{365}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.2.1}A Short Presentation of Chaos}{365}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.2.2}On Devaney's Definition of Chaos}{365}} +\newlabel{sec:dev}{{16.2.2}{365}} +\newlabel{Devaney}{{16.1}{365}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.2.3}Chaotic iterations}{366}} +\newlabel{subsection:Chaotic iterations}{{16.2.3}{366}} +\newlabel{Chaotic iterations}{{2}{366}} +\newlabel{eq:generalIC}{{16.4}{367}} +\newlabel{equation Oplus}{{16.5}{367}} +\@writefile{toc}{\contentsline {section}{\numberline {16.3}Toward Efficiency and Improvement for CI PRNG}{367}} +\newlabel{sec:efficient PRNG}{{16.3}{367}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{367}} +\newlabel{algo:seqCIPRNG}{{16.1}{367}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {16.1}C code of the sequential PRNG based on chaotic iterations}{367}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{368}} +\newlabel{sec:efficient PRNG gpu}{{16.3.2}{368}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.3}Naive Version for GPU}{368}} +\@writefile{loa}{\contentsline {algocf}{\numberline {16}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{369}} +\newlabel{algo:gpu_kernel}{{16}{369}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.4}Improved Version for GPU}{369}} +\newlabel{IR}{{17}{370}} +\@writefile{loa}{\contentsline {algocf}{\numberline {17}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{370}} +\newlabel{algo:gpu_kernel2}{{17}{370}} +\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.5}Chaos Evaluation of the Improved Version}{370}} +\@writefile{toc}{\contentsline {section}{\numberline {16.4}Experiments}{371}} +\newlabel{sec:experiments}{{16.4}{371}} +\@writefile{toc}{\contentsline {section}{\numberline {16.5}Summary}{371}} +\@writefile{lof}{\contentsline {figure}{\numberline {16.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{372}} +\newlabel{fig:time_xorlike_gpu}{{16.1}{372}} +\@writefile{toc}{\contentsline {section}{Bibliography}{373}} \@setckpt{Chapters/chapter18/ch18}{ -\setcounter{page}{331} +\setcounter{page}{375} \setcounter{equation}{5} \setcounter{enumi}{2} \setcounter{enumii}{0} @@ -49,7 +49,7 @@ \setcounter{footnote}{2} \setcounter{mpfootnote}{0} \setcounter{part}{6} -\setcounter{chapter}{15} +\setcounter{chapter}{16} \setcounter{section}{5} \setcounter{subsection}{0} \setcounter{subsubsection}{0} diff --git a/BookGPU/Chapters/chapter7/figures/PararealSpeedupGTX590.tikz b/BookGPU/Chapters/chapter7/figures/PararealSpeedupGTX590.tikz index 0395bfe..ab3475a 100644 --- a/BookGPU/Chapters/chapter7/figures/PararealSpeedupGTX590.tikz +++ b/BookGPU/Chapters/chapter7/figures/PararealSpeedupGTX590.tikz @@ -8,8 +8,7 @@ % % % -\begin{tikzpicture} - +\begin{tikzpicture}%[font={\sffamily}] \begin{semilogxaxis}[% view={0}{90}, width=\figurewidth, diff --git a/BookGPU/Chapters/chapter7/figures/scalingNx25-eps-converted-to.pdf b/BookGPU/Chapters/chapter7/figures/scalingNx25-eps-converted-to.pdf index 8329ef645485aa91ac95c6cec348634ccd3c489c..7a77b7fa109309f1bff22d4fa82f61ae8c9d7dae 100644 GIT binary patch delta 2782 zcma)83piBi8_%WN!BBKz8xE}^P3O# zt#dQ8L-~=YgvVh|*|A2&91$-}A{2o%#b1znbQn6Nk^A9^#1d98TLcnGkOO;qjwis- zMjlrZEcO9OR3b>AP(c_Vk;ve`ztTSeiX5gN;IG*y$(P^n&?{X2i0f@y8vqO17!P85H1BDJp zkRkxIPQvE%I1E7$ACvLXvGlC<=-}MMBf5+@!WV zK8k>Nf}DoWx0PZ&O^X~MK}CwZB&aniCl{5+7!e>*Y$gR|coyV<3W6lTa>i*yI$bOg zq3o?d)G-fP=mxiqS`Dv_`$Zl%nYD=tKQ46xM&nnQuQ44xW(}J-nCM7VQnco3uAAW^ z3CZ0Xv*xD)CU!}%R`><1QnabR5-z!uVx&|=#(T~ySvov~VhmzjEOO(!X+dpyJ(Yia z#Ad*hrFI#q=)Do{O$n>^+)ZRNZ>8H&`X8 z^knSZL$S;eJJH&oi<2j$iT9Zvo>iXxSG==_UcP$u=6C~zO7%MAGWI$pxX!hpu~EA^ zY+pc;`BiXZ)@i@xd;9VqhIpJZS?{+g{p`HE3xJ}4$xRu)XPf8Q6nB&)CQbw#3{A?p z)7H@NNQUhA!M$47M$aYR(s}9peT9vsMi*;Gq)v%oX}^W?PS3KrAKpnv-@JdT2_{2x zbSw6k8xJM_oMaWA>BaQYCMP9$*~2&IJ7E%yBsWb4o%0O$w2-s>kOw)Aqt~NE&m!K2 zy)~kyHJ|O;JpAzvyECZcRLhKq+Es~8ADEQ4E%l-c2KSO7vQMYm zDLb*QdbVt^_YCJ!cH7`Qsnz{VNPD68iJ=$gyW*nQ-n{76JdItyxa&BoMMgb6@W*&$ z**W30b?sS_3ycT<-LSL7tWCe5Ld_a6k@mJSaf?oQqC@uK$!>YChlL@rn{h5=4 zN1uw>8?ZjE>zT_tkaGP4qgJN;okd-<3S%_^qmj8Q>n~2M12S29UD6lCNgYz}L4Ea@ z5pw#Fxpo@v=wZA#(Ea{X!lZV|#qrbG@z&0m+h-?;j2HUZj|rPszB^q)&bw3gB%b${ z^`4qv|9gvisq(=`p+hE3Ph~lV{3IQ^B+em@wc6#^n!KSX%&_l=k9=&U_CWdeqiYOJ zTum39UVB&-YzAuwBwo^-%iiXNFS*KliYcI~Meh+Wt?FXX+q5)cB($tI->~D{Z}jZj zoz(Q`&YZsIiwrK2y+pt63Ln|cxU-;N=O{V-NVlcF57KM5d4}1EusM@tpk~?go_A&? zGwt(O1l5GSlOxyE($*AQ;t_6T^=Sq1XS$WD)FkM>I&(GoRr@tHX}j2H)A%0$&HT8| z8)e5-Uw5t`Xfuc2oab%P+7bL%b-&CXNJTNKm)ei+mNnaLjB)4N$3#WhH{{mzVAGN= zH~5aUQjcB^jEKz_`cJ%1NvRLDeZFVR)k-}zqvcd|T#R9Ias9jhn1Jc}R>6Y0?&z}4 zgak(UnRgWvr(Rs?kCo2JueedtR<89VtJZ%o_tn^B({&cr@?Ou(b?up%J==I*jRH;{ z#%B9$e!A)`V-1JwS<<-chqo)AAMZD=N^GcN;*gE=?s{(UT=EDhWbdAxg6yw++dN*O zQ5j>^El6CZt*J-Da$I7cbsi$7bgHm@^j`napgfY`;mnv(Q(r6{NbB%%^4UILkE4YR zphw@z<*dTBJZLeC(8~Y^8~gjm?=?5togs{#*n5`yaz$$8`P79W4w;A84Y?P(0_fMT zRi$R|;fA|x735Na`DD|sWj7vD;v%IV{IIsOh$jY*P?~aN&JExe9a%v#8x@$OQ zb*ztkdvCRYdFx#mb5b2cVh(KKn04dA7e;ZN8h<^XaJ*r$uN}kiX}=FIg71g33%O-( ze|4nW=;OA`%(eqQtCl58XJidu@ldn})93J~MuGVEi9U7FIieCamo1~K>nc-7mEP)B zN=pd@Jc-DGNjNSZA>r_Nl#7Gq0~d$}uwgcrLgt{dY7KMQm@q$z=Jh{UsV9(7uU_geS`A3$RKgnePGvKiC?v z#oH1IcovzAu*e93N@g)(5(B2OC@dQ)m2UZe5q^NY9~Dd3B1x184Fo878VN8qwsE!v F{s%}v92x)s delta 782 zcma))L2DC16vs&~g^4W{lC*}34@xT%)|of6yR#G1NZTZ)NGZe|1nFT-X3Mr=x5P!% zOAlVv%Ur#CRq`QvDzrC``WgHJ?ncGZ70f%${N8^a|2O=L3@(OOxA1?9St*>D&H0vboJ<^PQrmt`>#**bO@pCXb=y> z=nxRi*^S#v_;=7$Wo|Cqn?r7vOZVqIK6zByEUalMedeLIgg;akqUIJ$%aQ{=3CI;4;Sx{hSzPuJ>q6F1n0J+04o=Q0bT%8eR=OKA*j0C_ z@ylzoxSX7?TokUXRx)3zd$1@nv=S>V5K~$_iaUvM>5o;5v?QAb{VDd(4Ar z3!*>FmXcj(T-wSUukN3UTt~@&FtqbJJEs%O$uq<>2w7#0lCd9-^f(O{&?n&=eF`Xm z_To4VVU{$YyHOAW&(4JIw%}P~OQ2wO$8EP%$5z;C?Qo?$cYDWWw&S8~{$FuG(geRq U{BiPXtOMepRAha