new

[book_gpu.git] / BookGPU / Chapters / chapter16 / gpu.tex
diff --git a/BookGPU/Chapters/chapter16/gpu.tex b/BookGPU/Chapters/chapter16/gpu.tex

index 623ac81d27112085ea12e9ff159e7c2683a84449..4d4d6ef365353105617ca6b9bde55f864738f997 100644 (file)
--- a/BookGPU/Chapters/chapter16/gpu.tex
+++ b/BookGPU/Chapters/chapter16/gpu.tex
@@ -5,7 +5,7 @@ In this section, we explain how to efficiently
  use matrix-free GMRES to solve
  the Newton update problems with implicit sensitivity calculation,
  i.e., the steps enclosed by the double dashed block
  use matrix-free GMRES to solve
  the Newton update problems with implicit sensitivity calculation,
  i.e., the steps enclosed by the double dashed block
-in Fig.~\ref{fig:ef_flow}.
+in Figure~\ref{fig:ef_flow}.
  Then implementation issues of GPU acceleration
  will be discussed in detail. 
  Finally,  the Gear-2 integration is briefly introduced.
  Then implementation issues of GPU acceleration
  will be discussed in detail. 
  Finally,  the Gear-2 integration is briefly introduced.
@@ -78,7 +78,7 @@ a preset tolerance~\cite{Golub:Book'96}.
  %% \end{algorithm}
  
  \begin{algorithm}
  %% \end{algorithm}
  
  \begin{algorithm}
-\caption{Standard GMRES\index{iterative method!GMRES} algorithm.} \label{alg:GMRES}
+\caption{standard GMRES\index{iterative method!GMRES} algorithm} \label{alg:GMRES}
    \KwIn{ $ A \in \mathbb{R}^{N \times N}$, $b \in \mathbb{R}^N$,
        and initial guess $x_0 \in \mathbb{R}^N$}
    \KwOut{ $x \in \mathbb{R}^N$: $\| b - A x\|_2 < tol$}
    \KwIn{ $ A \in \mathbb{R}^{N \times N}$, $b \in \mathbb{R}^N$,
        and initial guess $x_0 \in \mathbb{R}^N$}
    \KwOut{ $x \in \mathbb{R}^N$: $\| b - A x\|_2 < tol$}
@@ -225,7 +225,7 @@ Hence, in consideration of the serial nature of the trianularization,
  the small size of Hessenberg matrix,
  and the frequent inspection of values by host, it is
  preferable to allocate $\tilde{H}$ in CPU (host) memory.
  the small size of Hessenberg matrix,
  and the frequent inspection of values by host, it is
  preferable to allocate $\tilde{H}$ in CPU (host) memory.
-As shown in Fig.~\ref{fig:gmres}, the memory copy from device to host
+As shown in Figure~\ref{fig:gmres}, the memory copy from device to host
  is called each time when Arnoldi iteration generates a new vector
  and the orthogonalization produces the vector $h$.
  
  is called each time when Arnoldi iteration generates a new vector
  and the orthogonalization produces the vector $h$.