]> AND Private Git Repository - book_gpu.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
suite
authorcouturie <couturie@extinction>
Wed, 6 Feb 2013 13:11:07 +0000 (14:11 +0100)
committercouturie <couturie@extinction>
Wed, 6 Feb 2013 13:11:07 +0000 (14:11 +0100)
BookGPU/BookGPU.tex
BookGPU/Chapters/chapter16/bdf.tex
BookGPU/Chapters/chapter16/ch16.aux
BookGPU/Chapters/chapter16/gpu.tex
BookGPU/Makefile

index fab2a67befe268b85bac975761b1bb7872004c6a..92b09d1afe0e488efa881aaa5f4e0d487c3c0f4e 100755 (executable)
 \captionsetup[lstlisting]{format=listing,labelfont=white,textfont=white, singlelinecheck=false, margin=0pt, font={bf,footnotesize}}
 
 
 \captionsetup[lstlisting]{format=listing,labelfont=white,textfont=white, singlelinecheck=false, margin=0pt, font={bf,footnotesize}}
 
 
-\DeclareGraphicsRule{.pdftex}{pdf}{*}{}
-
-\DeclareGraphicsRule{.pstex}{eps}{*}{}
-
 
 
 \makeindex
 
 
 \makeindex
 
 \setcounter{page}{1}
 \part{This is a Part}
 
 \setcounter{page}{1}
 \part{This is a Part}
-%% \include{Chapters/chapter1/ch1}
-%% \include{Chapters/chapter2/ch2}
-%% \include{Chapters/chapter3/ch3}
-%% \include{Chapters/chapter5/ch5}
-%% \include{Chapters/chapter6/ch6}
-%% \include{Chapters/chapter7/ch7}
-%% \include{Chapters/chapter8/ch8}
-%% \include{Chapters/chapter9/ch9}
-%% \include{Chapters/chapter11/ch11}
-%% \include{Chapters/chapter14/ch14}
-%% \include{Chapters/chapter15/ch15}
- \include{Chapters/chapter16/ch16}
-%% \include{Chapters/chapter18/ch18}
+\include{Chapters/chapter1/ch1}
+\include{Chapters/chapter2/ch2}
+\include{Chapters/chapter3/ch3}
+\include{Chapters/chapter5/ch5}
+\include{Chapters/chapter6/ch6}
+\include{Chapters/chapter7/ch7}
+\include{Chapters/chapter8/ch8}
+\include{Chapters/chapter9/ch9}
+\include{Chapters/chapter11/ch11}
+\include{Chapters/chapter14/ch14}
+\include{Chapters/chapter15/ch15}
+\include{Chapters/chapter16/ch16}
+\include{Chapters/chapter18/ch18}
 
 \bibliographystyle{hep}
 %%%\bibliography{biblio}
 
 \bibliographystyle{hep}
 %%%\bibliography{biblio}
index dcf6313e63745df36585ab1dd47c76b2e52ac0b8..bd4e5d5364b881f3044336428d7c82c7cfa959ac 100644 (file)
@@ -116,6 +116,27 @@ J\! =\! \frac{\ud x_M}{\ud x_0}
      \! + \! \frac{\alpha_2}{h_M} C_{M-2} \frac{\ud x_{M-2}}{\ud x_0}\right].
 \end{equation}
 
      \! + \! \frac{\alpha_2}{h_M} C_{M-2} \frac{\ud x_{M-2}}{\ud x_0}\right].
 \end{equation}
 
+\begin{algorithm}
+\caption{The matrix-free\index{matrix-free} method for
+ Krylov subspace\index{Krylov subspace} construction.}
+\label{alg:mf_Gear}
+  \KwIn{ current Krylov subspace basis vector $v$,
+           time step lengths $h_i$,
+           saved $C_i$ matrices and LU\index{LU} factors of $J_i$, $i=0,\ldots,M$}
+  \KwOut{ matrix-vector product $r$, such that $r = A v$}
+  solve $J_1 p_2 = h_1^{-1} C_0 v$ for $p_2$\;
+  solve $J_2 p_1 = h_2^{-1} \left( \alpha_1 C_1 p_2 + \alpha_2 C_0 v \right)$ for $p_1$\;
+  \For{$i=3 \ldots M$}{
+     solve
+       $J_i p_0 = \alpha_1 h_i^{-1} C_{i-1} p_1 + \alpha_2 h_i^{-1} C_{i-2} p_2$
+       for $p_0$ \label{line:mf_Gear_loop}\;
+     $p_2 = p_1$\;
+     $p_1 = p_0$\;
+  }
+   $r = (k-1) p_0 - k v$ \label{line:shift}\;
+\end{algorithm}
+
+
 %% \begin{algorithm}
 %% \caption{The matrix-free\index{matrix-free} method for
 %%  Krylov subspace\index{Krylov subspace} construction.}
 %% \begin{algorithm}
 %% \caption{The matrix-free\index{matrix-free} method for
 %%  Krylov subspace\index{Krylov subspace} construction.}
@@ -138,7 +159,6 @@ J\! =\! \frac{\ud x_M}{\ud x_0}
 %% \end{algorithmic}
 %% \end{algorithm}
 
 %% \end{algorithmic}
 %% \end{algorithm}
 
-
 %It is easy to judge that, as $J$ is dense, explicitly
 %computing $J$ is expensive and impractical for large problems.
 %In addition, with a dense matrix,
 %It is easy to judge that, as $J$ is dense, explicitly
 %computing $J$ is expensive and impractical for large problems.
 %In addition, with a dense matrix,
index 7d1293a246048ebf798972fb4edb952ea876dd28..f3f5e6a323774b9e259e560b8e917350733dea2c 100644 (file)
@@ -4,74 +4,81 @@
 \@writefile{toc}{\author{H. Wang}{}}
 \@writefile{toc}{\author{H. Yu}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
 \@writefile{toc}{\author{H. Wang}{}}
 \@writefile{toc}{\author{H. Yu}{}}
 \@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {1}GPU-Accelerated Envelope-Following Method}{3}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {13}GPU-Accelerated Envelope-Following Method}{289}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {section}{\numberline {1.1}Introduction}{3}}
-\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
-\newlabel{fig:ef1}{{1.1(a)}{5}}
-\newlabel{sub@fig:ef1}{{(a)}{5}}
-\newlabel{fig:ef2}{{1.1(b)}{5}}
-\newlabel{sub@fig:ef2}{{(b)}{5}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{5}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{5}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{5}}
-\newlabel{fig:ef_intro}{{1.1}{5}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.2}The envelope-following method in a nutshell}{6}}
-\newlabel{sec:ef}{{1.2}{6}}
-\newlabel{eq:dae}{{1.1}{6}}
-\newlabel{eq:Newton}{{1.2}{7}}
-\newlabel{eq:A}{{1.3}{7}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.3}New parallel envelope-following method}{8}}
-\newlabel{sec:gmres}{{1.3}{8}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.1}GMRES solver for Newton update equation}{8}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces The flow of envelope-following method.\relax }}{9}}
-\newlabel{fig:ef_flow}{{1.2}{9}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.2}Parallelization on GPU platforms}{10}}
-\newlabel{sec:gpu}{{1.3.2}{10}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{11}}
-\newlabel{fig:gmres}{{1.3}{11}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {1.3.3}Gear-2 based sensitivity calculation}{12}}
-\newlabel{sec:gear}{{1.3.3}{12}}
-\newlabel{eq:BE}{{1.4}{12}}
-\newlabel{eq:sens1}{{1.5}{12}}
-\newlabel{eq:Gear_t2}{{1.6}{13}}
-\newlabel{eq:sens2}{{1.7}{13}}
-\newlabel{eq:Gear_t3}{{1.8}{13}}
-\newlabel{eq:sensM}{{1.9}{13}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.4}Numerical examples}{14}}
-\newlabel{sec:exp}{{1.4}{14}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{15}}
-\newlabel{fig:flyback}{{1.4}{15}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1.5}{\ignorespaces Illustration of power/ground network model.\relax }}{15}}
-\newlabel{fig:pg}{{1.5}{15}}
-\@writefile{lot}{\contentsline {table}{\numberline {1.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{15}}
-\newlabel{table:circuit}{{1.1}{15}}
-\newlabel{fig:flybackWhole}{{1.6(a)}{16}}
-\newlabel{sub@fig:flybackWhole}{{(a)}{16}}
-\newlabel{fig:flybackZoom}{{1.6(b)}{16}}
-\newlabel{sub@fig:flybackZoom}{{(b)}{16}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{16}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{16}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{16}}
-\newlabel{fig:flyback_wave}{{1.6}{16}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{17}}
-\newlabel{fig:buck_wave}{{1.7}{17}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.5}Summary}{17}}
-\newlabel{sec:summary}{{1.5}{17}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.6}Glossary}{18}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{18}}
+\@writefile{toc}{\contentsline {section}{\numberline {13.1}Introduction}{289}}
+\newlabel{fig:ef1}{{13.1(a)}{291}}
+\newlabel{sub@fig:ef1}{{(a)}{291}}
+\newlabel{fig:ef2}{{13.1(b)}{291}}
+\newlabel{sub@fig:ef2}{{(b)}{291}}
+\@writefile{lof}{\contentsline {figure}{\numberline {13.1}{\ignorespaces Transient envelope-following analysis. (Both two figures reflect backward-Euler style envelope-following.)\relax }}{291}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Illustration of one envelope skip.}}}{291}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {The envelope changes in a slow time scale.}}}{291}}
+\newlabel{fig:ef_intro}{{13.1}{291}}
+\@writefile{toc}{\contentsline {section}{\numberline {13.2}The envelope-following method in a nutshell}{292}}
+\newlabel{sec:ef}{{13.2}{292}}
+\newlabel{eq:dae}{{13.1}{292}}
+\newlabel{eq:Newton}{{13.2}{293}}
+\newlabel{eq:A}{{13.3}{293}}
+\@writefile{toc}{\contentsline {section}{\numberline {13.3}New parallel envelope-following method}{294}}
+\newlabel{sec:gmres}{{13.3}{294}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {13.3.1}GMRES solver for Newton update equation}{294}}
+\@writefile{lof}{\contentsline {figure}{\numberline {13.2}{\ignorespaces The flow of envelope-following method.\relax }}{295}}
+\newlabel{fig:ef_flow}{{13.2}{295}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Standard GMRES algorithm.\relax }}{296}}
+\newlabel{alg:GMRES}{{10}{296}}
+\newlabel{line:mvp}{{5}{296}}
+\newlabel{line:newnorm}{{11}{296}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {13.3.2}Parallelization on GPU platforms}{296}}
+\newlabel{sec:gpu}{{13.3.2}{296}}
+\@writefile{lof}{\contentsline {figure}{\numberline {13.3}{\ignorespaces GPU parallel solver for envelope-following update.\relax }}{297}}
+\newlabel{fig:gmres}{{13.3}{297}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {13.3.3}Gear-2 based sensitivity calculation}{298}}
+\newlabel{sec:gear}{{13.3.3}{298}}
+\newlabel{eq:BE}{{13.4}{298}}
+\newlabel{eq:sens1}{{13.5}{298}}
+\newlabel{eq:Gear_t2}{{13.6}{299}}
+\newlabel{eq:sens2}{{13.7}{299}}
+\newlabel{eq:Gear_t3}{{13.8}{299}}
+\newlabel{eq:sensM}{{13.9}{299}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {11}{\ignorespaces The matrix-free method for Krylov subspace construction.\relax }}{300}}
+\newlabel{alg:mf_Gear}{{11}{300}}
+\newlabel{line:mf_Gear_loop}{{4}{300}}
+\newlabel{line:shift}{{8}{300}}
+\@writefile{toc}{\contentsline {section}{\numberline {13.4}Numerical examples}{300}}
+\newlabel{sec:exp}{{13.4}{300}}
+\@writefile{lof}{\contentsline {figure}{\numberline {13.4}{\ignorespaces Diagram of a zero-voltage quasi-resonant flyback converter.\relax }}{301}}
+\newlabel{fig:flyback}{{13.4}{301}}
+\@writefile{lof}{\contentsline {figure}{\numberline {13.5}{\ignorespaces Illustration of power/ground network model.\relax }}{301}}
+\newlabel{fig:pg}{{13.5}{301}}
+\newlabel{fig:flybackWhole}{{13.6(a)}{302}}
+\newlabel{sub@fig:flybackWhole}{{(a)}{302}}
+\newlabel{fig:flybackZoom}{{13.6(b)}{302}}
+\newlabel{sub@fig:flybackZoom}{{(b)}{302}}
+\@writefile{lof}{\contentsline {figure}{\numberline {13.6}{\ignorespaces Flyback converter solution calculated by envelope-following. The red curve is traditional SPICE simulation result, and the back curve is the envelope-following output with simulation points marked.\relax }}{302}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {The whole plot}}}{302}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Detail of one EF simulation period}}}{302}}
+\newlabel{fig:flyback_wave}{{13.6}{302}}
+\@writefile{lof}{\contentsline {figure}{\numberline {13.7}{\ignorespaces Buck converter solution calculated by envelope-following.\relax }}{303}}
+\newlabel{fig:buck_wave}{{13.7}{303}}
+\@writefile{lot}{\contentsline {table}{\numberline {13.1}{\ignorespaces CPU and GPU time comparisons (in seconds) for solving Newton update equation with the proposed Gear-2 sensitivity. \relax }}{303}}
+\newlabel{table:circuit}{{13.1}{303}}
+\@writefile{toc}{\contentsline {section}{\numberline {13.5}Summary}{304}}
+\newlabel{sec:summary}{{13.5}{304}}
+\@writefile{toc}{\contentsline {section}{\numberline {13.6}Glossary}{304}}
+\@writefile{toc}{\contentsline {section}{Bibliography}{304}}
 \@setckpt{Chapters/chapter16/ch16}{
 \@setckpt{Chapters/chapter16/ch16}{
-\setcounter{page}{20}
+\setcounter{page}{305}
 \setcounter{equation}{9}
 \setcounter{equation}{9}
-\setcounter{enumi}{0}
+\setcounter{enumi}{2}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
-\setcounter{enumiv}{22}
+\setcounter{enumiv}{0}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{1}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{1}
-\setcounter{chapter}{1}
+\setcounter{chapter}{13}
 \setcounter{section}{6}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{section}{6}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{figure}{7}
 \setcounter{table}{1}
 \setcounter{numauthors}{0}
 \setcounter{figure}{7}
 \setcounter{table}{1}
 \setcounter{numauthors}{0}
-\setcounter{parentequation}{0}
+\setcounter{parentequation}{4}
 \setcounter{subfigure}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{lotdepth}{1}
 \setcounter{subfigure}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{lotdepth}{1}
-\setcounter{lstnumber}{1}
+\setcounter{lstnumber}{9}
 \setcounter{ContinuedFloat}{0}
 \setcounter{ContinuedFloat}{0}
-\setcounter{AlgoLine}{0}
-\setcounter{algocfline}{0}
-\setcounter{algocfproc}{0}
-\setcounter{algocf}{0}
-\setcounter{proposition}{0}
+\setcounter{AlgoLine}{8}
+\setcounter{algocfline}{11}
+\setcounter{algocfproc}{11}
+\setcounter{algocf}{11}
+\setcounter{proposition}{1}
 \setcounter{theorem}{0}
 \setcounter{exercise}{0}
 \setcounter{example}{0}
 \setcounter{definition}{0}
 \setcounter{theorem}{0}
 \setcounter{exercise}{0}
 \setcounter{example}{0}
 \setcounter{definition}{0}
-\setcounter{proof}{0}
+\setcounter{proof}{1}
 \setcounter{lstlisting}{0}
 }
 \setcounter{lstlisting}{0}
 }
index bcfc686529272c248c47bcd49b7f609553c839eb..f4f62f1611a2ddd1632077ad0daa8118d44d4b4d 100644 (file)
@@ -77,6 +77,36 @@ a preset tolerance~\cite{Golub:Book'96}.
 %% \end{algorithmic}
 %% \end{algorithm}
 
 %% \end{algorithmic}
 %% \end{algorithm}
 
+\begin{algorithm}
+\caption{Standard GMRES\index{GMRES} algorithm.} \label{alg:GMRES}
+  \KwIn{ $ A \in \mathbb{R}^{N \times N}$, $b \in \mathbb{R}^N$,
+      and initial guess $x_0 \in \mathbb{R}^N$}
+  \KwOut{ $x \in \mathbb{R}^N$: $\| b - A x\|_2 < tol$}
+
+  $r = b - A x_0$\;
+  $h_{1,0}=\left \| r \right \|_2$\;
+  $m=0$\;
+
+  \While{$m < max\_iter$} {
+    $m = m+1$;
+    $v_{m} = r / h_{m,m-1}$\;
+    \label{line:mvp} $r = A v_m$\;
+    \For{$i = 1\ldots m$} {
+      $h_{i,m} = \langle v_i, r \rangle$\;
+      $r = r - h_{i,m} v_i$\;
+    }
+    $h_{m+1,m} = \left\| r \right\|_2$\label{line:newnorm} \;
+    %\STATE Generate Givens rotations to triangularize $\tilde{H}_m$
+    %\STATE Apply Givens rotations on $h_{1,0}e_1$ to get residual $\epsilon$
+    Compute the residual $\epsilon$\;
+    \If{$\epsilon < tol$} {
+      Solve the problem: minimize $\|b-Ax_m\|_2$\;
+      Return $x_m = x_0 + V_m y_m$\;
+    }
+  }
+\end{algorithm}
+
+
 At a first glance, the cost of using standard GMRES directly to
 solve the Newton update in Eq.~\eqref{eq:Newton}
 seems to come mainly from two parts: the
 At a first glance, the cost of using standard GMRES directly to
 solve the Newton update in Eq.~\eqref{eq:Newton}
 seems to come mainly from two parts: the
index 31bb997020509eaa2002f8e5a38e241dcfe22850..e3aa930190fa1ada41deffb9c8f6d89735813542 100644 (file)
@@ -5,17 +5,17 @@ all:
        pdflatex ${BOOK}
        pdflatex ${BOOK}
        bibtex bu1
        pdflatex ${BOOK}
        pdflatex ${BOOK}
        bibtex bu1
-#      bibtex bu2
-#      bibtex bu3
-#      bibtex bu4
-#      bibtex bu5
-#      bibtex bu6
-#      bibtex bu7
-#      bibtex bu8
-#      bibtex bu9
-# #don't put chapter 14, refs are included     
-#      bibtex bu11     
-# #    bibtex bu12
+       bibtex bu2
+       bibtex bu3
+       bibtex bu4
+       bibtex bu5
+       bibtex bu6
+       bibtex bu7
+       bibtex bu8
+       bibtex bu9
+#don't put chapter 14, refs are included       
+       bibtex bu11     
+#      bibtex bu12
 
        makeindex  ${BOOK}.idx
        pdflatex ${BOOK}
 
        makeindex  ${BOOK}.idx
        pdflatex ${BOOK}