new

[book_gpu.git] / BookGPU / Chapters / chapter7 / ch7.tex
diff --git a/BookGPU/Chapters/chapter7/ch7.tex b/BookGPU/Chapters/chapter7/ch7.tex

index 585515861281423bec92a004543c496983cbe3a9..c18d417bf1e84650ab1605d1a22e9bb926009c8b 100644 (file)
--- a/BookGPU/Chapters/chapter7/ch7.tex
+++ b/BookGPU/Chapters/chapter7/ch7.tex
@@ -9,7 +9,7 @@
  
  \begin{figure}[!htb]
  \centering
  
  \begin{figure}[!htb]
  \centering
-\includegraphics[width=0.95\textwidth]{Chapters/chapter7/figures/figSeries60CB06Type7StedaySnapshot.eps}
+\includegraphics[width=0.95\textwidth]{Chapters/chapter7/figures/figSeries60CB06Type7StedaySnapshot-eps-converted-to.pdf}
  %\caption{Snapshot of steady state wave field generated by a Series 60 ship hull.}
  \end{figure}
  
  %\caption{Snapshot of steady state wave field generated by a Series 60 ship hull.}
  \end{figure}
  
@@ -313,19 +313,19 @@ Similar results were reported for the first time in the context of high-order Bo
  \centering
  \subfigure[Grid scaling, $x=(1-a)\xi^3+a\xi$.]{
  % MainLaplace2D_ex03.m
  \centering
  \subfigure[Grid scaling, $x=(1-a)\xi^3+a\xi$.]{
  % MainLaplace2D_ex03.m
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/scalingNx25.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/scalingNx25-eps-converted-to.pdf}
  }
  \subfigure[High-order spatial discretisation and stable explicit time-stepping with large time steps for a nonlinear standing wave. Scaling based on $a=0$. ]{
  % MainLaplace2D_ex03.m
  }
  \subfigure[High-order spatial discretisation and stable explicit time-stepping with large time steps for a nonlinear standing wave. Scaling based on $a=0$. ]{
  % MainLaplace2D_ex03.m
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/standingwaveglozman.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/standingwaveglozman-eps-converted-to.pdf}
  }
  \subfigure[Uniform grid ($a=1$).]{
  % MainLaplace2D_ex035_nonlinearLaplace.m
  }
  \subfigure[Uniform grid ($a=1$).]{
  % MainLaplace2D_ex035_nonlinearLaplace.m
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/SFwaves_snapshots_uniform.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/SFwaves_snapshots_uniform-eps-converted-to.pdf}
  }
  \subfigure[Clustered grid ($a=0.05$).]{
  % MainLaplace2D_ex035_nonlinearLaplace.m
  }
  \subfigure[Clustered grid ($a=0.05$).]{
  % MainLaplace2D_ex035_nonlinearLaplace.m
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/SFwaves_snapshots_clustered.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/SFwaves_snapshots_clustered-eps-converted-to.pdf}
  }
  \caption{Numerical experiments to assess stability properties of numerical wave model. In three cases, computed snapshots are taken of the wave elevation over one wave period of time. In a) the grid distribution of nodes in a one-parameter mapping for the grid is illustrated. Results from changes in wave elevation are illustrated for b) a mildly nonlinear standing wave on a highly clustered grid, c) regular stream function wave of medium steepness in shallow water $(kh,H/L)=(0.5,0.0292)$ on a uniform grid ($N_x=80$) and d) for a nonuniform grid with a minimal grid spacing 20 times smaller(!). In every case the step size remains fixed at $\Delta t = T/160$ s corresponding to a Courant number $C_r=c\tfrac{\Delta t}{\Delta x}=0.5$ for the uniform grid. A 6'$th$ order scheme and explicit EKR4 time-stepping is used in each test case.}
  \label{ch7:numexp}
  }
  \caption{Numerical experiments to assess stability properties of numerical wave model. In three cases, computed snapshots are taken of the wave elevation over one wave period of time. In a) the grid distribution of nodes in a one-parameter mapping for the grid is illustrated. Results from changes in wave elevation are illustrated for b) a mildly nonlinear standing wave on a highly clustered grid, c) regular stream function wave of medium steepness in shallow water $(kh,H/L)=(0.5,0.0292)$ on a uniform grid ($N_x=80$) and d) for a nonuniform grid with a minimal grid spacing 20 times smaller(!). In every case the step size remains fixed at $\Delta t = T/160$ s corresponding to a Courant number $C_r=c\tfrac{\Delta t}{\Delta x}=0.5$ for the uniform grid. A 6'$th$ order scheme and explicit EKR4 time-stepping is used in each test case.}
  \label{ch7:numexp}
@@ -376,12 +376,12 @@ The profiles can be reversed by a change of coordinate, i.e. $\Gamma(1-x)$, and
  \centering
  \subfigure[Wave generation, reflection and absorption of small-amplitude waves.]{
  % Script : MainLaplace2D_ex03penalityLINEAR_REFLECTEDWAVES.m
  \centering
  \subfigure[Wave generation, reflection and absorption of small-amplitude waves.]{
  % Script : MainLaplace2D_ex03penalityLINEAR_REFLECTEDWAVES.m
-\includegraphics[width=0.98\textwidth]{Chapters/chapter7/figures/standingwavespenalty.eps}
+\includegraphics[width=0.98\textwidth]{Chapters/chapter7/figures/standingwavespenalty-eps-converted-to.pdf}
  % Nx = 480, 6th order, vertical clustering, Nz=6;
  }
  \subfigure[Wave generation and absorption of steep finite-amplitude waves.]{
  % Script : MainLaplace2D_ex03penalityNONLINEAR_GENERATEWAVES.m
  % Nx = 480, 6th order, vertical clustering, Nz=6;
  }
  \subfigure[Wave generation and absorption of steep finite-amplitude waves.]{
  % Script : MainLaplace2D_ex03penalityNONLINEAR_GENERATEWAVES.m
-\includegraphics[width=0.98\textwidth]{Chapters/chapter7/figures/nonlinearwavespenalty.eps}
+\includegraphics[width=0.98\textwidth]{Chapters/chapter7/figures/nonlinearwavespenalty-eps-converted-to.pdf}
  % Nx = 540, 6th order, vertical clustering, Nz=6;
  }
  \caption{Snapshots at intervals $T/8$ over one wave period in time of computed a) small-amplitude $(kh,kH)=(0.63,0.005)$ and b) finite-amplitude $(kh,kH)=(1,0.41)$ stream function waves elevations having reached a steady state after transient startup. Combined wave generation and absorption zones in the western relaxation zone of both a) and b). In b) an absorption zone is positioned next to the eastern boundary and causes minor visible reflections. }
  % Nx = 540, 6th order, vertical clustering, Nz=6;
  }
  \caption{Snapshots at intervals $T/8$ over one wave period in time of computed a) small-amplitude $(kh,kH)=(0.63,0.005)$ and b) finite-amplitude $(kh,kH)=(1,0.41)$ stream function waves elevations having reached a steady state after transient startup. Combined wave generation and absorption zones in the western relaxation zone of both a) and b). In b) an absorption zone is positioned next to the eastern boundary and causes minor visible reflections. }
@@ -468,7 +468,9 @@ The CUDA-based numerical wave model has been developed based on all the numerica
  
  For the unified potential flow model the user will need to provide implementations of the following components; the right hand side operator for the semi-discrete free surface variables \eqref{ch7:FSorigin}, the matrix-vector operator for the discretized $\sigma$-transformed Laplace equation \eqref{ch7:TransformedLaplace}, a smoother for the multigrid relaxation step, and the potential flow solver itself, that reads initial data and advance the solution in time. In order to make the library as generic as possible, all components are template-based, which makes it possible to assemble the PDE solver by combining type definitions in the preamble of the application. An excerpt of the potential flow assembling is given in listing \ref{ch7:lst:solversetup}.
  
  
  For the unified potential flow model the user will need to provide implementations of the following components; the right hand side operator for the semi-discrete free surface variables \eqref{ch7:FSorigin}, the matrix-vector operator for the discretized $\sigma$-transformed Laplace equation \eqref{ch7:TransformedLaplace}, a smoother for the multigrid relaxation step, and the potential flow solver itself, that reads initial data and advance the solution in time. In order to make the library as generic as possible, all components are template-based, which makes it possible to assemble the PDE solver by combining type definitions in the preamble of the application. An excerpt of the potential flow assembling is given in listing \ref{ch7:lst:solversetup}.
  
-\lstset{label=ch7:lst:solversetup,caption={Generic assembling of the potential flow solver for fully nonlinear free surface water waves.},basicstyle=\scriptsize}
+\lstset{label=ch7:lst:solversetup,caption={Generic assembling of the potential flow solver for fully nonlinear free surface water waves.}
+%,basicstyle=\scriptsize
+}
  \begin{lstlisting}
  // Basics
  typedef double value_type;
  \begin{lstlisting}
  // Basics
  typedef double value_type;
@@ -501,7 +503,9 @@ typedef free_surface::potential_flow_solver_3d<potential_flow_types> potential_f
  
  Hereafter, the potential flow solver is aware of all component types that should be used to solve the entire PDE system, and it will be easy for developers to exchange parts at later times. The \texttt{laplace\_sigma\_stencil\_3d} class implements both the matrix-vector and right hand side operator. The flexible-order finite difference kernel for the matrix-free matrix-vector product for the two-dimensional Laplace problem is presented in listing \ref{ch7:lst:fd2d}. Library macros and reusable kernel routines are used throughout the implementations to enhance developer productivity and hide hardware specific details. This kernel can be used both for matrix-vector products for the original system and for the preconditioning.
  
  
  Hereafter, the potential flow solver is aware of all component types that should be used to solve the entire PDE system, and it will be easy for developers to exchange parts at later times. The \texttt{laplace\_sigma\_stencil\_3d} class implements both the matrix-vector and right hand side operator. The flexible-order finite difference kernel for the matrix-free matrix-vector product for the two-dimensional Laplace problem is presented in listing \ref{ch7:lst:fd2d}. Library macros and reusable kernel routines are used throughout the implementations to enhance developer productivity and hide hardware specific details. This kernel can be used both for matrix-vector products for the original system and for the preconditioning.
  
-\lstset{label=ch7:lst:fd2d,caption={CUDA kernel implementation for the two dimensional finite difference approximation to the transformed Laplace equation.},basicstyle=\scriptsize\ttfamily}
+\lstset{label=ch7:lst:fd2d,caption={CUDA kernel implementation for the two dimensional finite difference approximation to the transformed Laplace equation.}
+%,basicstyle=\scriptsize\ttfamily
+}
  \begin{lstlisting}
  template <typename value_type, typename size_type>
  __global__ void laplace_sigma_transformed(
  \begin{lstlisting}
  template <typename value_type, typename size_type>
  __global__ void laplace_sigma_transformed(
@@ -589,7 +593,8 @@ __global__ void laplace_sigma_transformed(
  
  In a similar template-based approach, the kernel for the right hand side operator of the two dimensional problem is implemented and listed in Listing \ref{ch7:lst:rhs2d}. The kernel computes the right hand side updates for both surface variables, $\eta$ and $\tilde{\phi}$, and applies an embedded penalty forcing \eqref{ch7:eq:penalty}, for all nodes within generation or absorption zones. The penalty forcing functions are computed based on linear or non-linear wave theory in a separate device function.
  
  
  In a similar template-based approach, the kernel for the right hand side operator of the two dimensional problem is implemented and listed in Listing \ref{ch7:lst:rhs2d}. The kernel computes the right hand side updates for both surface variables, $\eta$ and $\tilde{\phi}$, and applies an embedded penalty forcing \eqref{ch7:eq:penalty}, for all nodes within generation or absorption zones. The penalty forcing functions are computed based on linear or non-linear wave theory in a separate device function.
  
-\lstset{label=ch7:lst:rhs2d,caption={CUDA kernel implementation for the 2D right hand side.},basicstyle=\scriptsize\ttfamily}
+\lstset{label=ch7:lst:rhs2d,caption={CUDA kernel implementation for the 2D right hand side.}%,basicstyle=\scriptsize\ttfamily
+}
  \begin{lstlisting}
  template <typename value_type, typename size_type>
  __global__ void rhs(value_type const* p    , value_type const* p_surf
  \begin{lstlisting}
  template <typename value_type, typename size_type>
  __global__ void rhs(value_type const* p    , value_type const* p_surf
@@ -676,10 +681,11 @@ where $m$ is one of the scalar functions $\phi,u,w$ describing kinematics, $c$ i
  \begin{figure}[!htb]
  \begin{center}
  \subfigure[Uniform vertical grid.]{
  \begin{figure}[!htb]
  \begin{center}
  \subfigure[Uniform vertical grid.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/lineardispersion_Nx30-HL90-p6-vergrid0_Linear.eps}
+%\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/lineardispersion_Nx30-HL90-p6-vergrid0_Linear.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/lineardispersion_Nx30-HL90-p6-vergrid0_Linear-eps-converted-to.pdf}
  }
  \subfigure[Cosine-clustered vertical grid.]{
  }
  \subfigure[Cosine-clustered vertical grid.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/lineardispersion_Nx30-HL90-p6_Linear.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/lineardispersion_Nx30-HL90-p6_Linear-eps-converted-to.pdf}
  }
  \end{center}
  \caption{The accuracy in phase celerity $c$ determined by \eqref{ch7:errdisp} for small-amplitude (linear) wave.
  }
  \end{center}
  \caption{The accuracy in phase celerity $c$ determined by \eqref{ch7:errdisp} for small-amplitude (linear) wave.
@@ -691,16 +697,16 @@ $N_z\in[6,12]$. Sixth order scheme.}
  \begin{figure}[!htb]
  \begin{center}
  \subfigure[Linear]{
  \begin{figure}[!htb]
  \begin{center}
  \subfigure[Linear]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/kinematicsPHI_Nx30-HL90-p6_Linear.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/kinematicsPHI_Nx30-HL90-p6_Linear-eps-converted-to.pdf}
  }
  \subfigure[Linear]{
  }
  \subfigure[Linear]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/kinematicsW_Nx30-HL90-p6_Linear.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/kinematicsW_Nx30-HL90-p6_Linear-eps-converted-to.pdf}
  }
  \subfigure[Nonlinear]{
  }
  \subfigure[Nonlinear]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/kinematicsPHI_Nx30-HL90-p6_Nonlinear.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/kinematicsPHI_Nx30-HL90-p6_Nonlinear-eps-converted-to.pdf}
  }
  \subfigure[Nonlinear]{
  }
  \subfigure[Nonlinear]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/kinematicsW_Nx30-HL90-p6_Nonlinear.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/kinematicsW_Nx30-HL90-p6_Nonlinear-eps-converted-to.pdf}
  }
  \end{center}
  \caption{Assessment of kinematic error is presented in terms of the depth-averaged error determined by \eqref{ch7:errkin} for a) scalar velocity potential and b) vertical velocity for a small-amplitude (linear) wave, and c) scalar velocity potential and d) vertical velocity for a finite-amplitude (nonlinear) wave with wave height $H/L=90\%(H/L)_\textrm{max}$.
  }
  \end{center}
  \caption{Assessment of kinematic error is presented in terms of the depth-averaged error determined by \eqref{ch7:errkin} for a) scalar velocity potential and b) vertical velocity for a small-amplitude (linear) wave, and c) scalar velocity potential and d) vertical velocity for a finite-amplitude (nonlinear) wave with wave height $H/L=90\%(H/L)_\textrm{max}$.
@@ -725,10 +731,10 @@ Previously reported performance results for the wave model can be taken a step f
  \begin{center}
  % MainLaplace2D_ex025nonlinearLaplaceSINGLE.m
  \subfigure[Single precision.]{
  \begin{center}
  % MainLaplace2D_ex025nonlinearLaplaceSINGLE.m
  \subfigure[Single precision.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/PrecisionSINGLE.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/PrecisionSINGLE-eps-converted-to.pdf}
  }
  \subfigure[Double precision.]{
  }
  \subfigure[Double precision.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/PrecisionDOUBLE.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/PrecisionDOUBLE-eps-converted-to.pdf}
  }
  \end{center}
  \caption{Comparison between convergence histories for single and double precision computations using a PDC method for the solution of the transformed Laplace problem. Very steep nonlinear stream function wave in intermediate water $(kh,H/L)=(1,0.0903)$. Discretizaiton based on $(N_x,N_z)=(15,9)$ with 6'$th$ order stencils.}
  }
  \end{center}
  \caption{Comparison between convergence histories for single and double precision computations using a PDC method for the solution of the transformed Laplace problem. Very steep nonlinear stream function wave in intermediate water $(kh,H/L)=(1,0.0903)$. Discretizaiton based on $(N_x,N_z)=(15,9)$ with 6'$th$ order stencils.}
@@ -753,16 +759,16 @@ Results from numerical experiments are presented in figure \ref{ch7:filtering} a
  \begin{center}
  % DriverWavemodelDecomposition.m
  \subfigure[Direct solve without filter.]{
  \begin{center}
  % DriverWavemodelDecomposition.m
  \subfigure[Direct solve without filter.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/ComparisonLUNoFiltering.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/ComparisonLUNoFiltering-eps-converted-to.pdf}
  }
  \subfigure[Direct solve with filter.]{
  }
  \subfigure[Direct solve with filter.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/ComparisonLUWithFiltering.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/ComparisonLUWithFiltering-eps-converted-to.pdf}
  } \\
  \subfigure[Iterative PDC solve without filter.]{
  } \\
  \subfigure[Iterative PDC solve without filter.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/ComparisonDCNoFiltering.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/ComparisonDCNoFiltering-eps-converted-to.pdf}
  }
  \subfigure[Iterative PDC solve with filter.]{
  }
  \subfigure[Iterative PDC solve with filter.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/ComparisonDCWithFiltering.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/ComparisonDCWithFiltering-eps-converted-to.pdf}
  }
  \end{center}
  \caption{Comparison between accuracy as a function of time for double precision calculations vs. single precision with and without filtering. The double precision result are unfiltered in each comparison and shows to be less sensitive to roundoff-errors. Medium steep nonlinear stream function wave in intermediate water $(kh,H/L)=(1,0.0502)$. Discretization is based on $(N_x,N_z)=(30,6)$, A courant number of $C_r=0.5$ and 6'$th$ order stencils.}
  }
  \end{center}
  \caption{Comparison between accuracy as a function of time for double precision calculations vs. single precision with and without filtering. The double precision result are unfiltered in each comparison and shows to be less sensitive to roundoff-errors. Medium steep nonlinear stream function wave in intermediate water $(kh,H/L)=(1,0.0502)$. Discretization is based on $(N_x,N_z)=(30,6)$, A courant number of $C_r=0.5$ and 6'$th$ order stencils.}
@@ -813,10 +819,12 @@ Ideally, the ratio $\mathcal{C}_\mathcal{G}/\mathcal{C}_\mathcal{F}$ is small an
      \setlength\figureheight{0.35\textwidth}
      \setlength\figurewidth{0.37\textwidth}
      \subfigure[Performance scaling]{
      \setlength\figureheight{0.35\textwidth}
      \setlength\figurewidth{0.37\textwidth}
      \subfigure[Performance scaling]{
-        {\small\input{Chapters/chapter7/figures/PararealScaletestGTX590.tikz}}
+%        {\small\input{Chapters/chapter7/figures/PararealScaletestGTX590.tikz}}
+      \includegraphics[width=0.5\textwidth]{Chapters/chapter7/figures/PararealScaletestGTX590_conv.pdf}
      }
      \subfigure[Speedup]{
      }
      \subfigure[Speedup]{
-        {\small\input{Chapters/chapter7/figures/PararealSpeedupGTX590.tikz}}
+       % {\small\input{Chapters/chapter7/figures/PararealSpeedupGTX590.tikz}}
+ \includegraphics[width=0.5\textwidth]{Chapters/chapter7/figures/PararealSpeedupGTX590_conv.pdf}
      }
      \end{center}
      \caption{(a) Parareal absolute timings for an increasingly number of water waves traveling one wave length, each wave resolution is ($33\times 9$). (b) Parareal speedup for two to sixteen compute nodes compared to the purely sequential single GPU solver. Notice how insensitive the parareal scheme is to the size of the problem solved. Test environment 2.}\label{ch7:fig:DDPA_SPEEDUP}
      }
      \end{center}
      \caption{(a) Parareal absolute timings for an increasingly number of water waves traveling one wave length, each wave resolution is ($33\times 9$). (b) Parareal speedup for two to sixteen compute nodes compared to the purely sequential single GPU solver. Notice how insensitive the parareal scheme is to the size of the problem solved. Test environment 2.}\label{ch7:fig:DDPA_SPEEDUP}
@@ -889,10 +897,10 @@ The modified numerical model can still be based on flexible-order finite differe
  \begin{figure}[!htb]
  \begin{center}
  \subfigure[Hydrodynamic force calculations.]{
  \begin{figure}[!htb]
  \begin{center}
  \subfigure[Hydrodynamic force calculations.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/figSeries60CB06Type7Resistance.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/figSeries60CB06Type7Resistance-eps-converted-to.pdf}
  }
  \subfigure[Kelvin pattern.]{
  }
  \subfigure[Kelvin pattern.]{
-\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/figSeries60CB06Type7kelvin.eps}
+\includegraphics[width=0.45\textwidth]{Chapters/chapter7/figures/figSeries60CB06Type7kelvin-eps-converted-to.pdf}
  }
  \end{center}
  \caption{Computed results. Comparison with experiments for hydrodynamics force calculations confirming engineering accuracy for low Froudes numbers.}
  }
  \end{center}
  \caption{Computed results. Comparison with experiments for hydrodynamics force calculations confirming engineering accuracy for low Froudes numbers.}