]> AND Private Git Repository - book_gpu.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
embedded font pb solved
authorcouturie <couturie@extinction>
Sat, 30 Mar 2013 09:07:43 +0000 (10:07 +0100)
committercouturie <couturie@extinction>
Sat, 30 Mar 2013 09:07:43 +0000 (10:07 +0100)
BookGPU/BookGPU.tex
BookGPU/Chapters/chapter12/ch12.aux
BookGPU/Chapters/chapter16/ch16.aux
BookGPU/Chapters/chapter18/ch18.aux
BookGPU/Chapters/chapter7/figures/PararealSpeedupGTX590.tikz
BookGPU/Chapters/chapter7/figures/scalingNx25-eps-converted-to.pdf

index e744be88063b87b7480d756e4c14f699fe4d73ad..4c27d29bfb6c4daccf489c6cc4114932c61fb975 100755 (executable)
@@ -35,6 +35,7 @@
 \usepackage{moreverb}
 \usepackage{commath}
 \usepackage{numprint}
 \usepackage{moreverb}
 \usepackage{commath}
 \usepackage{numprint}
+%\usepackage{lmodern}
 %% \usepackage{listings}
 %% \usepackage{subfigure}
 
 %% \usepackage{listings}
 %% \usepackage{subfigure}
 
 \include{Chapters/symbollist}
 
 \setcounter{page}{1}
 \include{Chapters/symbollist}
 
 \setcounter{page}{1}
-%\part{Presentation of GPUs}
-%\include{Chapters/chapter1/ch1}
-%\include{Chapters/chapter2/ch2}
-%\part{Image processing}
-%\include{Chapters/chapter3/ch3}
-%\part{Software development}
-%\include{Chapters/chapter5/ch5}
-%\include{Chapters/chapter6/ch6}
-%\part{Optimization}
-%\include{Chapters/chapter8/ch8}
-%\include{Chapters/chapter9/ch9}
+\part{Presentation of GPUs}
+\include{Chapters/chapter1/ch1}
+\include{Chapters/chapter2/ch2}
+\part{Image processing}
+\include{Chapters/chapter3/ch3}
+\part{Software development}
+\include{Chapters/chapter5/ch5}
+\include{Chapters/chapter6/ch6}
+\part{Optimization}
+\include{Chapters/chapter8/ch8}
+\include{Chapters/chapter9/ch9}
 
 \part{Numerical applications}
 \include{Chapters/chapter7/ch7} %pb fonts
 
 \part{Numerical applications}
 \include{Chapters/chapter7/ch7} %pb fonts
-%\include{Chapters/chapter11/ch11}
-%\include{Chapters/chapter12/ch12}
-%\include{Chapters/chapter13/ch13}
-%\include{Chapters/chapter14/ch14}
-%\include{Chapters/chapter15/ch15}
-%\include{Chapters/chapter16/ch16}
+\include{Chapters/chapter11/ch11}
+\include{Chapters/chapter12/ch12}
+\include{Chapters/chapter13/ch13}
+\include{Chapters/chapter14/ch14}
+\include{Chapters/chapter15/ch15}
+\include{Chapters/chapter16/ch16}
 \part{Other}
 \part{Other}
-%\include{Chapters/chapter18/ch18}
-%\include{Chapters/chapter19/ch19}
+\include{Chapters/chapter18/ch18}
+\include{Chapters/chapter19/ch19}
 
 \bibliographystyle{hep}
 %%%\bibliography{biblio}
 
 \bibliographystyle{hep}
 %%%\bibliography{biblio}
index 97242d0baec3706797d5ba8153c4b2b459ac68a3..225215655e5c03f4f88b475c641ecf625df65b43 100644 (file)
@@ -3,81 +3,81 @@
 \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}}
 \@writefile{toc}{\author{Jacques Bahi}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
 \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}}
 \@writefile{toc}{\author{Jacques Bahi}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {10}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{215}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {11}Solving sparse linear systems with GMRES and CG methods on GPU clusters}{259}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
-\newlabel{ch12}{{10}{215}}
-\@writefile{toc}{\contentsline {section}{\numberline {10.1}Introduction}{215}}
-\newlabel{ch12:sec:01}{{10.1}{215}}
-\@writefile{toc}{\contentsline {section}{\numberline {10.2}Krylov iterative methods}{216}}
-\newlabel{ch12:sec:02}{{10.2}{216}}
-\newlabel{ch12:eq:01}{{10.1}{216}}
-\newlabel{ch12:eq:02}{{10.2}{216}}
-\newlabel{ch12:eq:03}{{10.3}{216}}
-\newlabel{ch12:eq:11}{{10.4}{217}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {10.2.1}CG method}{217}}
-\newlabel{ch12:sec:02.01}{{10.2.1}{217}}
-\newlabel{ch12:eq:04}{{10.5}{217}}
-\newlabel{ch12:eq:05}{{10.6}{217}}
-\newlabel{ch12:eq:06}{{10.7}{217}}
-\newlabel{ch12:eq:07}{{10.8}{217}}
-\newlabel{ch12:eq:08}{{10.9}{217}}
-\newlabel{ch12:eq:09}{{10.10}{217}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces Left-preconditioned CG method\relax }}{218}}
-\newlabel{ch12:alg:01}{{9}{218}}
-\newlabel{ch12:eq:10}{{10.11}{218}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {10.2.2}GMRES method}{219}}
-\newlabel{ch12:sec:02.02}{{10.2.2}{219}}
-\newlabel{ch12:eq:12}{{10.12}{219}}
-\newlabel{ch12:eq:13}{{10.13}{219}}
-\newlabel{ch12:eq:14}{{10.14}{219}}
-\newlabel{ch12:eq:15}{{10.15}{219}}
-\newlabel{ch12:eq:16}{{10.16}{219}}
-\newlabel{ch12:eq:17}{{10.17}{219}}
-\newlabel{ch12:eq:18}{{10.18}{219}}
-\newlabel{ch12:eq:19}{{10.19}{219}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{220}}
-\newlabel{ch12:alg:02}{{10}{220}}
-\@writefile{toc}{\contentsline {section}{\numberline {10.3}Parallel implementation on a GPU cluster}{221}}
-\newlabel{ch12:sec:03}{{10.3}{221}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {10.3.1}Data partitioning}{221}}
-\newlabel{ch12:sec:03.01}{{10.3.1}{221}}
-\@writefile{lof}{\contentsline {figure}{\numberline {10.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{222}}
-\newlabel{ch12:fig:01}{{10.1}{222}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {10.3.2}GPU computing}{222}}
-\newlabel{ch12:sec:03.02}{{10.3.2}{222}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {10.3.3}Data communications}{223}}
-\newlabel{ch12:sec:03.03}{{10.3.3}{223}}
-\@writefile{lof}{\contentsline {figure}{\numberline {10.2}{\ignorespaces Data exchanges between \textit  {Node 1} and its neighbors \textit  {Node 0}, \textit  {Node 2} and \textit  {Node 3}.\relax }}{224}}
-\newlabel{ch12:fig:02}{{10.2}{224}}
-\@writefile{lof}{\contentsline {figure}{\numberline {10.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{225}}
-\newlabel{ch12:fig:03}{{10.3}{225}}
-\@writefile{toc}{\contentsline {section}{\numberline {10.4}Experimental results}{226}}
-\newlabel{ch12:sec:04}{{10.4}{226}}
-\@writefile{lof}{\contentsline {figure}{\numberline {10.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{226}}
-\newlabel{ch12:fig:04}{{10.4}{226}}
-\@writefile{lof}{\contentsline {figure}{\numberline {10.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{227}}
-\newlabel{ch12:fig:05}{{10.5}{227}}
-\@writefile{lot}{\contentsline {table}{\numberline {10.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{227}}
-\newlabel{ch12:tab:01}{{10.1}{227}}
-\@writefile{lot}{\contentsline {table}{\numberline {10.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{228}}
-\newlabel{ch12:tab:02}{{10.2}{228}}
-\@writefile{lot}{\contentsline {table}{\numberline {10.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{228}}
-\newlabel{ch12:tab:03}{{10.3}{228}}
-\newlabel{ch12:eq:20}{{10.20}{229}}
-\@writefile{lof}{\contentsline {figure}{\numberline {10.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{230}}
-\newlabel{ch12:fig:06}{{10.6}{230}}
-\@writefile{lot}{\contentsline {table}{\numberline {10.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{230}}
-\newlabel{ch12:tab:04}{{10.4}{230}}
-\@writefile{lot}{\contentsline {table}{\numberline {10.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{231}}
-\newlabel{ch12:tab:05}{{10.5}{231}}
-\@writefile{toc}{\contentsline {section}{\numberline {10.5}Conclusion}{231}}
-\newlabel{ch12:sec:05}{{10.5}{231}}
-\@writefile{lot}{\contentsline {table}{\numberline {10.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{232}}
-\newlabel{ch12:tab:06}{{10.6}{232}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{232}}
+\newlabel{ch12}{{11}{259}}
+\@writefile{toc}{\contentsline {section}{\numberline {11.1}Introduction}{259}}
+\newlabel{ch12:sec:01}{{11.1}{259}}
+\@writefile{toc}{\contentsline {section}{\numberline {11.2}Krylov iterative methods}{260}}
+\newlabel{ch12:sec:02}{{11.2}{260}}
+\newlabel{ch12:eq:01}{{11.1}{260}}
+\newlabel{ch12:eq:02}{{11.2}{260}}
+\newlabel{ch12:eq:03}{{11.3}{260}}
+\newlabel{ch12:eq:11}{{11.4}{261}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.1}CG method}{261}}
+\newlabel{ch12:sec:02.01}{{11.2.1}{261}}
+\newlabel{ch12:eq:04}{{11.5}{261}}
+\newlabel{ch12:eq:05}{{11.6}{261}}
+\newlabel{ch12:eq:06}{{11.7}{261}}
+\newlabel{ch12:eq:07}{{11.8}{261}}
+\newlabel{ch12:eq:08}{{11.9}{261}}
+\newlabel{ch12:eq:09}{{11.10}{261}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces Left-preconditioned CG method\relax }}{262}}
+\newlabel{ch12:alg:01}{{9}{262}}
+\newlabel{ch12:eq:10}{{11.11}{262}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {11.2.2}GMRES method}{263}}
+\newlabel{ch12:sec:02.02}{{11.2.2}{263}}
+\newlabel{ch12:eq:12}{{11.12}{263}}
+\newlabel{ch12:eq:13}{{11.13}{263}}
+\newlabel{ch12:eq:14}{{11.14}{263}}
+\newlabel{ch12:eq:15}{{11.15}{263}}
+\newlabel{ch12:eq:16}{{11.16}{263}}
+\newlabel{ch12:eq:17}{{11.17}{263}}
+\newlabel{ch12:eq:18}{{11.18}{263}}
+\newlabel{ch12:eq:19}{{11.19}{263}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Left-preconditioned GMRES method with restarts\relax }}{264}}
+\newlabel{ch12:alg:02}{{10}{264}}
+\@writefile{toc}{\contentsline {section}{\numberline {11.3}Parallel implementation on a GPU cluster}{265}}
+\newlabel{ch12:sec:03}{{11.3}{265}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.1}Data partitioning}{265}}
+\newlabel{ch12:sec:03.01}{{11.3.1}{265}}
+\@writefile{lof}{\contentsline {figure}{\numberline {11.1}{\ignorespaces A data partitioning of the sparse matrix $A$, the solution vector $x$ and the right-hand side $b$ into four portions.\relax }}{266}}
+\newlabel{ch12:fig:01}{{11.1}{266}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.2}GPU computing}{266}}
+\newlabel{ch12:sec:03.02}{{11.3.2}{266}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {11.3.3}Data communications}{267}}
+\newlabel{ch12:sec:03.03}{{11.3.3}{267}}
+\@writefile{lof}{\contentsline {figure}{\numberline {11.2}{\ignorespaces Data exchanges between \textit  {Node 1} and its neighbors \textit  {Node 0}, \textit  {Node 2} and \textit  {Node 3}.\relax }}{268}}
+\newlabel{ch12:fig:02}{{11.2}{268}}
+\@writefile{lof}{\contentsline {figure}{\numberline {11.3}{\ignorespaces Columns reordering of a sparse sub-matrix.\relax }}{269}}
+\newlabel{ch12:fig:03}{{11.3}{269}}
+\@writefile{toc}{\contentsline {section}{\numberline {11.4}Experimental results}{270}}
+\newlabel{ch12:sec:04}{{11.4}{270}}
+\@writefile{lof}{\contentsline {figure}{\numberline {11.4}{\ignorespaces General scheme of the GPU cluster of tests composed of six machines, each with two GPUs.\relax }}{270}}
+\newlabel{ch12:fig:04}{{11.4}{270}}
+\@writefile{lof}{\contentsline {figure}{\numberline {11.5}{\ignorespaces Sketches of sparse matrices chosen from the Davis collection.\relax }}{271}}
+\newlabel{ch12:fig:05}{{11.5}{271}}
+\@writefile{lot}{\contentsline {table}{\numberline {11.1}{\ignorespaces Main characteristics of sparse matrices chosen from the Davis collection.\relax }}{271}}
+\newlabel{ch12:tab:01}{{11.1}{271}}
+\@writefile{lot}{\contentsline {table}{\numberline {11.2}{\ignorespaces Performances of the parallel CG method on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{272}}
+\newlabel{ch12:tab:02}{{11.2}{272}}
+\@writefile{lot}{\contentsline {table}{\numberline {11.3}{\ignorespaces Performances of the parallel GMRES method on a cluster 24 CPU cores vs. on cluster of 12 GPUs.\relax }}{272}}
+\newlabel{ch12:tab:03}{{11.3}{272}}
+\newlabel{ch12:eq:20}{{11.20}{273}}
+\@writefile{lof}{\contentsline {figure}{\numberline {11.6}{\ignorespaces Parallel generation of a large sparse matrix by four computing nodes.\relax }}{274}}
+\newlabel{ch12:fig:06}{{11.6}{274}}
+\@writefile{lot}{\contentsline {table}{\numberline {11.4}{\ignorespaces Main characteristics of sparse banded matrices generated from those of the Davis collection.\relax }}{274}}
+\newlabel{ch12:tab:04}{{11.4}{274}}
+\@writefile{lot}{\contentsline {table}{\numberline {11.5}{\ignorespaces Performances of the parallel CG method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{275}}
+\newlabel{ch12:tab:05}{{11.5}{275}}
+\@writefile{toc}{\contentsline {section}{\numberline {11.5}Conclusion}{275}}
+\newlabel{ch12:sec:05}{{11.5}{275}}
+\@writefile{lot}{\contentsline {table}{\numberline {11.6}{\ignorespaces Performances of the parallel GMRES method for solving linear systems associated to sparse banded matrices on a cluster of 24 CPU cores vs. on a cluster of 12 GPUs.\relax }}{276}}
+\newlabel{ch12:tab:06}{{11.6}{276}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{276}}
 \@setckpt{Chapters/chapter12/ch12}{
 \@setckpt{Chapters/chapter12/ch12}{
-\setcounter{page}{234}
+\setcounter{page}{278}
 \setcounter{equation}{22}
 \setcounter{enumi}{4}
 \setcounter{enumii}{0}
 \setcounter{equation}{22}
 \setcounter{enumi}{4}
 \setcounter{enumii}{0}
@@ -86,7 +86,7 @@
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{5}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{5}
-\setcounter{chapter}{10}
+\setcounter{chapter}{11}
 \setcounter{section}{5}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{section}{5}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
@@ -95,7 +95,7 @@
 \setcounter{figure}{6}
 \setcounter{table}{6}
 \setcounter{numauthors}{0}
 \setcounter{figure}{6}
 \setcounter{table}{6}
 \setcounter{numauthors}{0}
-\setcounter{parentequation}{8}
+\setcounter{parentequation}{46}
 \setcounter{subfigure}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{subfigure}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
index 690208a46ef13c94472fa025b01d1cd86f0c2bec..a0d90ea1718299fa134b689e2eaf0b5814e35f07 100644 (file)
@@ -4,72 +4,72 @@
 \@writefile{toc}{\author{H. Wang}{}}
 \@writefile{toc}{\author{H. Yu}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
 \@writefile{toc}{\author{H. Wang}{}}
 \@writefile{toc}{\author{H. Yu}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {14}GPU-Accelerated Envelope-Following Method}{299}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {15}GPU-Accelerated Envelope-Following Method}{343}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {section}{\numberline {14.1}Introduction}{299}}
-\newlabel{fig:ef1}{{14.1(a)}{301}}
-\newlabel{sub@fig:ef1}{{(a)}{301}}
-\newlabel{fig:ef2}{{14.1(b)}{301}}
-\newlabel{sub@fig:ef2}{{(b)}{301}}
-\@writefile{lof}{\contentsline {figure}{\numberline {14.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{301}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{301}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{301}}
-\newlabel{fig:ef_intro}{{14.1}{301}}
-\@writefile{toc}{\contentsline {section}{\numberline {14.2}The envelope-following method in a nutshell}{302}}
-\newlabel{sec:ef}{{14.2}{302}}
-\newlabel{eq:dae}{{14.1}{302}}
-\newlabel{eq:Newton}{{14.2}{303}}
-\newlabel{eq:A}{{14.3}{303}}
-\@writefile{toc}{\contentsline {section}{\numberline {14.3}New parallel envelope-following method}{304}}
-\newlabel{sec:gmres}{{14.3}{304}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {14.3.1}GMRES solver for Newton update equation}{304}}
-\@writefile{lof}{\contentsline {figure}{\numberline {14.2}{\ignorespaces The flow of envelope-following method.\relax }}{305}}
-\newlabel{fig:ef_flow}{{14.2}{305}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {14}{\ignorespaces Standard GMRES algorithm.\relax }}{306}}
-\newlabel{alg:GMRES}{{14}{306}}
-\newlabel{line:mvp}{{5}{306}}
-\newlabel{line:newnorm}{{11}{306}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {14.3.2}Parallelization on GPU platforms}{306}}
-\newlabel{sec:gpu}{{14.3.2}{306}}
-\@writefile{lof}{\contentsline {figure}{\numberline {14.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{307}}
-\newlabel{fig:gmres}{{14.3}{307}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {14.3.3}Gear-2 based sensitivity calculation}{308}}
-\newlabel{sec:gear}{{14.3.3}{308}}
-\newlabel{eq:BE}{{14.4}{308}}
-\newlabel{eq:sens1}{{14.5}{308}}
-\newlabel{eq:Gear_t2}{{14.6}{309}}
-\newlabel{eq:sens2}{{14.7}{309}}
-\newlabel{eq:Gear_t3}{{14.8}{309}}
-\newlabel{eq:sensM}{{14.9}{309}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {15}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{310}}
-\newlabel{alg:mf_Gear}{{15}{310}}
-\newlabel{line:mf_Gear_loop}{{4}{310}}
-\newlabel{line:shift}{{8}{310}}
-\@writefile{toc}{\contentsline {section}{\numberline {14.4}Numerical examples}{310}}
-\newlabel{sec:exp}{{14.4}{310}}
-\@writefile{lof}{\contentsline {figure}{\numberline {14.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{311}}
-\newlabel{fig:flyback}{{14.4}{311}}
-\@writefile{lof}{\contentsline {figure}{\numberline {14.5}{\ignorespaces Illustration of power/ground network model.\relax }}{311}}
-\newlabel{fig:pg}{{14.5}{311}}
-\newlabel{fig:flybackWhole}{{14.6(a)}{312}}
-\newlabel{sub@fig:flybackWhole}{{(a)}{312}}
-\newlabel{fig:flybackZoom}{{14.6(b)}{312}}
-\newlabel{sub@fig:flybackZoom}{{(b)}{312}}
-\@writefile{lof}{\contentsline {figure}{\numberline {14.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{312}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{312}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{312}}
-\newlabel{fig:flyback_wave}{{14.6}{312}}
-\@writefile{lof}{\contentsline {figure}{\numberline {14.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{313}}
-\newlabel{fig:buck_wave}{{14.7}{313}}
-\@writefile{lot}{\contentsline {table}{\numberline {14.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{313}}
-\newlabel{table:circuit}{{14.1}{313}}
-\@writefile{toc}{\contentsline {section}{\numberline {14.5}Summary}{314}}
-\newlabel{sec:summary}{{14.5}{314}}
-\@writefile{toc}{\contentsline {section}{\numberline {14.6}Glossary}{314}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{314}}
+\@writefile{toc}{\contentsline {section}{\numberline {15.1}Introduction}{343}}
+\newlabel{fig:ef1}{{15.1(a)}{345}}
+\newlabel{sub@fig:ef1}{{(a)}{345}}
+\newlabel{fig:ef2}{{15.1(b)}{345}}
+\newlabel{sub@fig:ef2}{{(b)}{345}}
+\@writefile{lof}{\contentsline {figure}{\numberline {15.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{345}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{345}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{345}}
+\newlabel{fig:ef_intro}{{15.1}{345}}
+\@writefile{toc}{\contentsline {section}{\numberline {15.2}The envelope-following method in a nutshell}{346}}
+\newlabel{sec:ef}{{15.2}{346}}
+\newlabel{eq:dae}{{15.1}{346}}
+\newlabel{eq:Newton}{{15.2}{347}}
+\newlabel{eq:A}{{15.3}{347}}
+\@writefile{toc}{\contentsline {section}{\numberline {15.3}New parallel envelope-following method}{348}}
+\newlabel{sec:gmres}{{15.3}{348}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.1}GMRES solver for Newton update equation}{348}}
+\@writefile{lof}{\contentsline {figure}{\numberline {15.2}{\ignorespaces The flow of envelope-following method.\relax }}{349}}
+\newlabel{fig:ef_flow}{{15.2}{349}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {14}{\ignorespaces Standard GMRES algorithm.\relax }}{350}}
+\newlabel{alg:GMRES}{{14}{350}}
+\newlabel{line:mvp}{{5}{350}}
+\newlabel{line:newnorm}{{11}{350}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.2}Parallelization on GPU platforms}{350}}
+\newlabel{sec:gpu}{{15.3.2}{350}}
+\@writefile{lof}{\contentsline {figure}{\numberline {15.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{351}}
+\newlabel{fig:gmres}{{15.3}{351}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.3}Gear-2 based sensitivity calculation}{352}}
+\newlabel{sec:gear}{{15.3.3}{352}}
+\newlabel{eq:BE}{{15.4}{352}}
+\newlabel{eq:sens1}{{15.5}{352}}
+\newlabel{eq:Gear_t2}{{15.6}{353}}
+\newlabel{eq:sens2}{{15.7}{353}}
+\newlabel{eq:Gear_t3}{{15.8}{353}}
+\newlabel{eq:sensM}{{15.9}{353}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {15}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{354}}
+\newlabel{alg:mf_Gear}{{15}{354}}
+\newlabel{line:mf_Gear_loop}{{4}{354}}
+\newlabel{line:shift}{{8}{354}}
+\@writefile{toc}{\contentsline {section}{\numberline {15.4}Numerical examples}{354}}
+\newlabel{sec:exp}{{15.4}{354}}
+\@writefile{lof}{\contentsline {figure}{\numberline {15.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{355}}
+\newlabel{fig:flyback}{{15.4}{355}}
+\@writefile{lof}{\contentsline {figure}{\numberline {15.5}{\ignorespaces Illustration of power/ground network model.\relax }}{355}}
+\newlabel{fig:pg}{{15.5}{355}}
+\newlabel{fig:flybackWhole}{{15.6(a)}{356}}
+\newlabel{sub@fig:flybackWhole}{{(a)}{356}}
+\newlabel{fig:flybackZoom}{{15.6(b)}{356}}
+\newlabel{sub@fig:flybackZoom}{{(b)}{356}}
+\@writefile{lof}{\contentsline {figure}{\numberline {15.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{356}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{356}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{356}}
+\newlabel{fig:flyback_wave}{{15.6}{356}}
+\@writefile{lof}{\contentsline {figure}{\numberline {15.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{357}}
+\newlabel{fig:buck_wave}{{15.7}{357}}
+\@writefile{lot}{\contentsline {table}{\numberline {15.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{357}}
+\newlabel{table:circuit}{{15.1}{357}}
+\@writefile{toc}{\contentsline {section}{\numberline {15.5}Summary}{358}}
+\newlabel{sec:summary}{{15.5}{358}}
+\@writefile{toc}{\contentsline {section}{\numberline {15.6}Glossary}{358}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{358}}
 \@setckpt{Chapters/chapter16/ch16}{
 \@setckpt{Chapters/chapter16/ch16}{
-\setcounter{page}{316}
+\setcounter{page}{360}
 \setcounter{equation}{9}
 \setcounter{enumi}{2}
 \setcounter{enumii}{0}
 \setcounter{equation}{9}
 \setcounter{enumi}{2}
 \setcounter{enumii}{0}
@@ -78,7 +78,7 @@
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{5}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{5}
-\setcounter{chapter}{14}
+\setcounter{chapter}{15}
 \setcounter{section}{6}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{section}{6}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
index 4d676634a2819dda4ded0b9c3590f4d231acc677..b69311d54668f1120350146c1ef6fb04ac39d7f6 100644 (file)
@@ -2,45 +2,45 @@
 \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}}
 \@writefile{toc}{\author{Christophe Guyeux}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
 \@writefile{toc}{\author{Rapha\IeC {\"e}l Couturier}{}}
 \@writefile{toc}{\author{Christophe Guyeux}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {15}Pseudorandom Number Generator on GPU}{319}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {16}Pseudorandom Number Generator on GPU}{363}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
-\newlabel{chapter18}{{15}{319}}
-\@writefile{toc}{\contentsline {section}{\numberline {15.1}Introduction}{319}}
-\@writefile{toc}{\contentsline {section}{\numberline {15.2}Basic Remindees}{321}}
-\newlabel{section:BASIC RECALLS}{{15.2}{321}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {15.2.1}A Short Presentation of Chaos}{321}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {15.2.2}On Devaney's Definition of Chaos}{321}}
-\newlabel{sec:dev}{{15.2.2}{321}}
-\newlabel{Devaney}{{15.1}{321}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {15.2.3}Chaotic iterations}{322}}
-\newlabel{subsection:Chaotic iterations}{{15.2.3}{322}}
-\newlabel{Chaotic iterations}{{2}{322}}
-\newlabel{eq:generalIC}{{15.4}{323}}
-\newlabel{equation Oplus}{{15.5}{323}}
-\@writefile{toc}{\contentsline {section}{\numberline {15.3}Toward Efficiency and Improvement for CI PRNG}{323}}
-\newlabel{sec:efficient PRNG}{{15.3}{323}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{323}}
-\newlabel{algo:seqCIPRNG}{{15.1}{323}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {15.1}C code of the sequential PRNG based on chaotic iterations}{323}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{324}}
-\newlabel{sec:efficient PRNG gpu}{{15.3.2}{324}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.3}Naive Version for GPU}{324}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {16}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{325}}
-\newlabel{algo:gpu_kernel}{{16}{325}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.4}Improved Version for GPU}{325}}
-\newlabel{IR}{{17}{326}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {17}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{326}}
-\newlabel{algo:gpu_kernel2}{{17}{326}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {15.3.5}Chaos Evaluation of the Improved Version}{326}}
-\@writefile{toc}{\contentsline {section}{\numberline {15.4}Experiments}{327}}
-\newlabel{sec:experiments}{{15.4}{327}}
-\@writefile{toc}{\contentsline {section}{\numberline {15.5}Summary}{327}}
-\@writefile{lof}{\contentsline {figure}{\numberline {15.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{328}}
-\newlabel{fig:time_xorlike_gpu}{{15.1}{328}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{329}}
+\newlabel{chapter18}{{16}{363}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.1}Introduction}{363}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.2}Basic Remindees}{365}}
+\newlabel{section:BASIC RECALLS}{{16.2}{365}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.2.1}A Short Presentation of Chaos}{365}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.2.2}On Devaney's Definition of Chaos}{365}}
+\newlabel{sec:dev}{{16.2.2}{365}}
+\newlabel{Devaney}{{16.1}{365}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.2.3}Chaotic iterations}{366}}
+\newlabel{subsection:Chaotic iterations}{{16.2.3}{366}}
+\newlabel{Chaotic iterations}{{2}{366}}
+\newlabel{eq:generalIC}{{16.4}{367}}
+\newlabel{equation Oplus}{{16.5}{367}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.3}Toward Efficiency and Improvement for CI PRNG}{367}}
+\newlabel{sec:efficient PRNG}{{16.3}{367}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.1}First Efficient Implementation of a PRNG based on Chaotic Iterations}{367}}
+\newlabel{algo:seqCIPRNG}{{16.1}{367}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {16.1}C code of the sequential PRNG based on chaotic iterations}{367}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.2}Efficient PRNGs based on Chaotic Iterations on GPU}{368}}
+\newlabel{sec:efficient PRNG gpu}{{16.3.2}{368}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.3}Naive Version for GPU}{368}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {16}{\ignorespaces Main kernel of the GPU ``naive'' version of the PRNG based on chaotic iterations\relax }}{369}}
+\newlabel{algo:gpu_kernel}{{16}{369}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.4}Improved Version for GPU}{369}}
+\newlabel{IR}{{17}{370}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {17}{\ignorespaces Main kernel for the chaotic iterations based PRNG GPU efficient version\relax }}{370}}
+\newlabel{algo:gpu_kernel2}{{17}{370}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {16.3.5}Chaos Evaluation of the Improved Version}{370}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.4}Experiments}{371}}
+\newlabel{sec:experiments}{{16.4}{371}}
+\@writefile{toc}{\contentsline {section}{\numberline {16.5}Summary}{371}}
+\@writefile{lof}{\contentsline {figure}{\numberline {16.1}{\ignorespaces Quantity of pseudorandom numbers generated per second with the xorlike-based PRNG\relax }}{372}}
+\newlabel{fig:time_xorlike_gpu}{{16.1}{372}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{373}}
 \@setckpt{Chapters/chapter18/ch18}{
 \@setckpt{Chapters/chapter18/ch18}{
-\setcounter{page}{331}
+\setcounter{page}{375}
 \setcounter{equation}{5}
 \setcounter{enumi}{2}
 \setcounter{enumii}{0}
 \setcounter{equation}{5}
 \setcounter{enumi}{2}
 \setcounter{enumii}{0}
@@ -49,7 +49,7 @@
 \setcounter{footnote}{2}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{6}
 \setcounter{footnote}{2}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{6}
-\setcounter{chapter}{15}
+\setcounter{chapter}{16}
 \setcounter{section}{5}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{section}{5}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
index 0395bfe1ce97b59166681b91630c7a085e015189..ab3475a650b207faba8c01e0ec5ed9caa46fc249 100644 (file)
@@ -8,8 +8,7 @@
 % 
 % 
 % 
 % 
 % 
 % 
-\begin{tikzpicture}
-
+\begin{tikzpicture}%[font={\sffamily}]
 \begin{semilogxaxis}[%
 view={0}{90},
 width=\figurewidth,
 \begin{semilogxaxis}[%
 view={0}{90},
 width=\figurewidth,
index 8329ef645485aa91ac95c6cec348634ccd3c489c..7a77b7fa109309f1bff22d4fa82f61ae8c9d7dae 100644 (file)
Binary files a/BookGPU/Chapters/chapter7/figures/scalingNx25-eps-converted-to.pdf and b/BookGPU/Chapters/chapter7/figures/scalingNx25-eps-converted-to.pdf differ