From: couturie Date: Wed, 6 Feb 2013 13:11:07 +0000 (+0100) Subject: suite X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/commitdiff_plain/e8ef95b33e7a3163998636257872682e926c913a?ds=sidebyside;hp=-c suite --- e8ef95b33e7a3163998636257872682e926c913a diff --git a/BookGPU/BookGPU.tex b/BookGPU/BookGPU.tex index fab2a67..92b09d1 100755 --- a/BookGPU/BookGPU.tex +++ b/BookGPU/BookGPU.tex @@ -105,10 +105,6 @@ \captionsetup[lstlisting]{format=listing,labelfont=white,textfont=white, singlelinecheck=false, margin=0pt, font={bf,footnotesize}} -\DeclareGraphicsRule{.pdftex}{pdf}{*}{} - -\DeclareGraphicsRule{.pstex}{eps}{*}{} - \makeindex @@ -142,19 +138,19 @@ \setcounter{page}{1} \part{This is a Part} -%% \include{Chapters/chapter1/ch1} -%% \include{Chapters/chapter2/ch2} -%% \include{Chapters/chapter3/ch3} -%% \include{Chapters/chapter5/ch5} -%% \include{Chapters/chapter6/ch6} -%% \include{Chapters/chapter7/ch7} -%% \include{Chapters/chapter8/ch8} -%% \include{Chapters/chapter9/ch9} -%% \include{Chapters/chapter11/ch11} -%% \include{Chapters/chapter14/ch14} -%% \include{Chapters/chapter15/ch15} - \include{Chapters/chapter16/ch16} -%% \include{Chapters/chapter18/ch18} +\include{Chapters/chapter1/ch1} +\include{Chapters/chapter2/ch2} +\include{Chapters/chapter3/ch3} +\include{Chapters/chapter5/ch5} +\include{Chapters/chapter6/ch6} +\include{Chapters/chapter7/ch7} +\include{Chapters/chapter8/ch8} +\include{Chapters/chapter9/ch9} +\include{Chapters/chapter11/ch11} +\include{Chapters/chapter14/ch14} +\include{Chapters/chapter15/ch15} +\include{Chapters/chapter16/ch16} +\include{Chapters/chapter18/ch18} \bibliographystyle{hep} %%%\bibliography{biblio} diff --git a/BookGPU/Chapters/chapter16/bdf.tex b/BookGPU/Chapters/chapter16/bdf.tex index dcf6313..bd4e5d5 100644 --- a/BookGPU/Chapters/chapter16/bdf.tex +++ b/BookGPU/Chapters/chapter16/bdf.tex @@ -116,6 +116,27 @@ J\! =\! \frac{\ud x_M}{\ud x_0} \! + \! \frac{\alpha_2}{h_M} C_{M-2} \frac{\ud x_{M-2}}{\ud x_0}\right]. \end{equation} +\begin{algorithm} +\caption{The matrix-free\index{matrix-free} method for + Krylov subspace\index{Krylov subspace} construction.} +\label{alg:mf_Gear} + \KwIn{ current Krylov subspace basis vector $v$, + time step lengths $h_i$, + saved $C_i$ matrices and LU\index{LU} factors of $J_i$, $i=0,\ldots,M$} + \KwOut{ matrix-vector product $r$, such that $r = A v$} + solve $J_1 p_2 = h_1^{-1} C_0 v$ for $p_2$\; + solve $J_2 p_1 = h_2^{-1} \left( \alpha_1 C_1 p_2 + \alpha_2 C_0 v \right)$ for $p_1$\; + \For{$i=3 \ldots M$}{ + solve + $J_i p_0 = \alpha_1 h_i^{-1} C_{i-1} p_1 + \alpha_2 h_i^{-1} C_{i-2} p_2$ + for $p_0$ \label{line:mf_Gear_loop}\; + $p_2 = p_1$\; + $p_1 = p_0$\; + } + $r = (k-1) p_0 - k v$ \label{line:shift}\; +\end{algorithm} + + %% \begin{algorithm} %% \caption{The matrix-free\index{matrix-free} method for %% Krylov subspace\index{Krylov subspace} construction.} @@ -138,7 +159,6 @@ J\! =\! \frac{\ud x_M}{\ud x_0} %% \end{algorithmic} %% \end{algorithm} - %It is easy to judge that, as $J$ is dense, explicitly %computing $J$ is expensive and impractical for large problems. %In addition, with a dense matrix, diff --git a/BookGPU/Chapters/chapter16/ch16.aux b/BookGPU/Chapters/chapter16/ch16.aux index 7d1293a..f3f5e6a 100644 --- a/BookGPU/Chapters/chapter16/ch16.aux +++ b/BookGPU/Chapters/chapter16/ch16.aux @@ -4,74 +4,81 @@ \@writefile{toc}{\author{H. Wang}{}} \@writefile{toc}{\author{H. Yu}{}} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {chapter}{\numberline {1}GPU-Accelerated Envelope-Following Method}{3}} +\@writefile{toc}{\contentsline {chapter}{\numberline {13}GPU-Accelerated Envelope-Following Method}{289}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {1.1}Introduction}{3}} -\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} -\newlabel{fig:ef1}{{1.1(a)}{5}} -\newlabel{sub@fig:ef1}{{(a)}{5}} -\newlabel{fig:ef2}{{1.1(b)}{5}} -\newlabel{sub@fig:ef2}{{(b)}{5}} -\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{5}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{5}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{5}} -\newlabel{fig:ef_intro}{{1.1}{5}} -\@writefile{toc}{\contentsline {section}{\numberline {1.2}The envelope-following method in a nutshell}{6}} -\newlabel{sec:ef}{{1.2}{6}} -\newlabel{eq:dae}{{1.1}{6}} -\newlabel{eq:Newton}{{1.2}{7}} -\newlabel{eq:A}{{1.3}{7}} -\@writefile{toc}{\contentsline {section}{\numberline {1.3}New parallel envelope-following method}{8}} -\newlabel{sec:gmres}{{1.3}{8}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.1}GMRES solver for Newton update equation}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces The flow of envelope-following method.\relax }}{9}} -\newlabel{fig:ef_flow}{{1.2}{9}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.2}Parallelization on GPU platforms}{10}} -\newlabel{sec:gpu}{{1.3.2}{10}} -\@writefile{lof}{\contentsline {figure}{\numberline {1.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{11}} -\newlabel{fig:gmres}{{1.3}{11}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.3}Gear-2 based sensitivity calculation}{12}} -\newlabel{sec:gear}{{1.3.3}{12}} -\newlabel{eq:BE}{{1.4}{12}} -\newlabel{eq:sens1}{{1.5}{12}} -\newlabel{eq:Gear_t2}{{1.6}{13}} -\newlabel{eq:sens2}{{1.7}{13}} -\newlabel{eq:Gear_t3}{{1.8}{13}} -\newlabel{eq:sensM}{{1.9}{13}} -\@writefile{toc}{\contentsline {section}{\numberline {1.4}Numerical examples}{14}} -\newlabel{sec:exp}{{1.4}{14}} -\@writefile{lof}{\contentsline {figure}{\numberline {1.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{15}} -\newlabel{fig:flyback}{{1.4}{15}} -\@writefile{lof}{\contentsline {figure}{\numberline {1.5}{\ignorespaces Illustration of power/ground network model.\relax }}{15}} -\newlabel{fig:pg}{{1.5}{15}} -\@writefile{lot}{\contentsline {table}{\numberline {1.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{15}} -\newlabel{table:circuit}{{1.1}{15}} -\newlabel{fig:flybackWhole}{{1.6(a)}{16}} -\newlabel{sub@fig:flybackWhole}{{(a)}{16}} -\newlabel{fig:flybackZoom}{{1.6(b)}{16}} -\newlabel{sub@fig:flybackZoom}{{(b)}{16}} -\@writefile{lof}{\contentsline {figure}{\numberline {1.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{16}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{16}} -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{16}} -\newlabel{fig:flyback_wave}{{1.6}{16}} -\@writefile{lof}{\contentsline {figure}{\numberline {1.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{17}} -\newlabel{fig:buck_wave}{{1.7}{17}} -\@writefile{toc}{\contentsline {section}{\numberline {1.5}Summary}{17}} -\newlabel{sec:summary}{{1.5}{17}} -\@writefile{toc}{\contentsline {section}{\numberline {1.6}Glossary}{18}} -\@writefile{toc}{\contentsline {section}{Bibliography}{18}} +\@writefile{toc}{\contentsline {section}{\numberline {13.1}Introduction}{289}} +\newlabel{fig:ef1}{{13.1(a)}{291}} +\newlabel{sub@fig:ef1}{{(a)}{291}} +\newlabel{fig:ef2}{{13.1(b)}{291}} +\newlabel{sub@fig:ef2}{{(b)}{291}} +\@writefile{lof}{\contentsline {figure}{\numberline {13.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{291}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{291}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{291}} +\newlabel{fig:ef_intro}{{13.1}{291}} +\@writefile{toc}{\contentsline {section}{\numberline {13.2}The envelope-following method in a nutshell}{292}} +\newlabel{sec:ef}{{13.2}{292}} +\newlabel{eq:dae}{{13.1}{292}} +\newlabel{eq:Newton}{{13.2}{293}} +\newlabel{eq:A}{{13.3}{293}} +\@writefile{toc}{\contentsline {section}{\numberline {13.3}New parallel envelope-following method}{294}} +\newlabel{sec:gmres}{{13.3}{294}} +\@writefile{toc}{\contentsline {subsection}{\numberline {13.3.1}GMRES solver for Newton update equation}{294}} +\@writefile{lof}{\contentsline {figure}{\numberline {13.2}{\ignorespaces The flow of envelope-following method.\relax }}{295}} +\newlabel{fig:ef_flow}{{13.2}{295}} +\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Standard GMRES algorithm.\relax }}{296}} +\newlabel{alg:GMRES}{{10}{296}} +\newlabel{line:mvp}{{5}{296}} +\newlabel{line:newnorm}{{11}{296}} +\@writefile{toc}{\contentsline {subsection}{\numberline {13.3.2}Parallelization on GPU platforms}{296}} +\newlabel{sec:gpu}{{13.3.2}{296}} +\@writefile{lof}{\contentsline {figure}{\numberline {13.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{297}} +\newlabel{fig:gmres}{{13.3}{297}} +\@writefile{toc}{\contentsline {subsection}{\numberline {13.3.3}Gear-2 based sensitivity calculation}{298}} +\newlabel{sec:gear}{{13.3.3}{298}} +\newlabel{eq:BE}{{13.4}{298}} +\newlabel{eq:sens1}{{13.5}{298}} +\newlabel{eq:Gear_t2}{{13.6}{299}} +\newlabel{eq:sens2}{{13.7}{299}} +\newlabel{eq:Gear_t3}{{13.8}{299}} +\newlabel{eq:sensM}{{13.9}{299}} +\@writefile{loa}{\contentsline {algocf}{\numberline {11}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{300}} +\newlabel{alg:mf_Gear}{{11}{300}} +\newlabel{line:mf_Gear_loop}{{4}{300}} +\newlabel{line:shift}{{8}{300}} +\@writefile{toc}{\contentsline {section}{\numberline {13.4}Numerical examples}{300}} +\newlabel{sec:exp}{{13.4}{300}} +\@writefile{lof}{\contentsline {figure}{\numberline {13.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{301}} +\newlabel{fig:flyback}{{13.4}{301}} +\@writefile{lof}{\contentsline {figure}{\numberline {13.5}{\ignorespaces Illustration of power/ground network model.\relax }}{301}} +\newlabel{fig:pg}{{13.5}{301}} +\newlabel{fig:flybackWhole}{{13.6(a)}{302}} +\newlabel{sub@fig:flybackWhole}{{(a)}{302}} +\newlabel{fig:flybackZoom}{{13.6(b)}{302}} +\newlabel{sub@fig:flybackZoom}{{(b)}{302}} +\@writefile{lof}{\contentsline {figure}{\numberline {13.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{302}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{302}} +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{302}} +\newlabel{fig:flyback_wave}{{13.6}{302}} +\@writefile{lof}{\contentsline {figure}{\numberline {13.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{303}} +\newlabel{fig:buck_wave}{{13.7}{303}} +\@writefile{lot}{\contentsline {table}{\numberline {13.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{303}} +\newlabel{table:circuit}{{13.1}{303}} +\@writefile{toc}{\contentsline {section}{\numberline {13.5}Summary}{304}} +\newlabel{sec:summary}{{13.5}{304}} +\@writefile{toc}{\contentsline {section}{\numberline {13.6}Glossary}{304}} +\@writefile{toc}{\contentsline {section}{Bibliography}{304}} \@setckpt{Chapters/chapter16/ch16}{ -\setcounter{page}{20} +\setcounter{page}{305} \setcounter{equation}{9} -\setcounter{enumi}{0} +\setcounter{enumi}{2} \setcounter{enumii}{0} \setcounter{enumiii}{0} -\setcounter{enumiv}{22} +\setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{1} -\setcounter{chapter}{1} +\setcounter{chapter}{13} \setcounter{section}{6} \setcounter{subsection}{0} \setcounter{subsubsection}{0} @@ -80,22 +87,22 @@ \setcounter{figure}{7} \setcounter{table}{1} \setcounter{numauthors}{0} -\setcounter{parentequation}{0} +\setcounter{parentequation}{4} \setcounter{subfigure}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{lotdepth}{1} -\setcounter{lstnumber}{1} +\setcounter{lstnumber}{9} \setcounter{ContinuedFloat}{0} -\setcounter{AlgoLine}{0} -\setcounter{algocfline}{0} -\setcounter{algocfproc}{0} -\setcounter{algocf}{0} -\setcounter{proposition}{0} +\setcounter{AlgoLine}{8} +\setcounter{algocfline}{11} +\setcounter{algocfproc}{11} +\setcounter{algocf}{11} +\setcounter{proposition}{1} \setcounter{theorem}{0} \setcounter{exercise}{0} \setcounter{example}{0} \setcounter{definition}{0} -\setcounter{proof}{0} +\setcounter{proof}{1} \setcounter{lstlisting}{0} } diff --git a/BookGPU/Chapters/chapter16/gpu.tex b/BookGPU/Chapters/chapter16/gpu.tex index bcfc686..f4f62f1 100644 --- a/BookGPU/Chapters/chapter16/gpu.tex +++ b/BookGPU/Chapters/chapter16/gpu.tex @@ -77,6 +77,36 @@ a preset tolerance~\cite{Golub:Book'96}. %% \end{algorithmic} %% \end{algorithm} +\begin{algorithm} +\caption{Standard GMRES\index{GMRES} algorithm.} \label{alg:GMRES} + \KwIn{ $ A \in \mathbb{R}^{N \times N}$, $b \in \mathbb{R}^N$, + and initial guess $x_0 \in \mathbb{R}^N$} + \KwOut{ $x \in \mathbb{R}^N$: $\| b - A x\|_2 < tol$} + + $r = b - A x_0$\; + $h_{1,0}=\left \| r \right \|_2$\; + $m=0$\; + + \While{$m < max\_iter$} { + $m = m+1$; + $v_{m} = r / h_{m,m-1}$\; + \label{line:mvp} $r = A v_m$\; + \For{$i = 1\ldots m$} { + $h_{i,m} = \langle v_i, r \rangle$\; + $r = r - h_{i,m} v_i$\; + } + $h_{m+1,m} = \left\| r \right\|_2$\label{line:newnorm} \; + %\STATE Generate Givens rotations to triangularize $\tilde{H}_m$ + %\STATE Apply Givens rotations on $h_{1,0}e_1$ to get residual $\epsilon$ + Compute the residual $\epsilon$\; + \If{$\epsilon < tol$} { + Solve the problem: minimize $\|b-Ax_m\|_2$\; + Return $x_m = x_0 + V_m y_m$\; + } + } +\end{algorithm} + + At a first glance, the cost of using standard GMRES directly to solve the Newton update in Eq.~\eqref{eq:Newton} seems to come mainly from two parts: the diff --git a/BookGPU/Makefile b/BookGPU/Makefile index 31bb997..e3aa930 100644 --- a/BookGPU/Makefile +++ b/BookGPU/Makefile @@ -5,17 +5,17 @@ all: pdflatex ${BOOK} pdflatex ${BOOK} bibtex bu1 -# bibtex bu2 -# bibtex bu3 -# bibtex bu4 -# bibtex bu5 -# bibtex bu6 -# bibtex bu7 -# bibtex bu8 -# bibtex bu9 -# #don't put chapter 14, refs are included -# bibtex bu11 -# # bibtex bu12 + bibtex bu2 + bibtex bu3 + bibtex bu4 + bibtex bu5 + bibtex bu6 + bibtex bu7 + bibtex bu8 + bibtex bu9 +#don't put chapter 14, refs are included + bibtex bu11 +# bibtex bu12 makeindex ${BOOK}.idx pdflatex ${BOOK}