-\chapterauthor{Pierre Fortin}{Laboratoire d'Informatique de Paris 6, University Paris 6}
-\chapterauthor{Rachid Habel}{T\'el\'ecom SudParis}
-\chapterauthor{Fabienne J\'ez\'equel}{Laboratoire d'Informatique de Paris 6, University Paris 6}
-\chapterauthor{Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, University Paris 6}
+\chapterauthor{Rachid Habel}{T\'el\'ecom SudParis, France}
+\chapterauthor{Pierre Fortin, Fabienne J\'ez\'equel and Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, Université Pierre et Marie Curie, France}
+
+%\chapterauthor{Fabienne J\'ez\'equel}{Laboratoire d'Informatique de Paris 6, University Paris 6}
+%\chapterauthor{Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, University Paris 6}
\chapterauthor{Stan Scott}{School of Electronics, Electrical Engineering \& Computer Science,
-The Queen's University of Belfast}
+The Queen's University of Belfast, United Kingdom}
-\newcommand{\fixme}[1]{{\bf #1}}
+%\newcommand{\fixme}[1]{{\bf #1}}
-\chapter[Numerical validation and performance optimization on GPUs in atomic physics]{Numerical validation and performance optimization on GPUs of an application in atomic physics}
+\chapter[Numerical validation and GPU performance in atomic physics]{Numerical validation and performance optimization on GPUs of an application in atomic physics}
\label{chapter15}
\section{Introduction}\label{ch15:intro}
the output $R$-matrix becomes the input $R$-matrix
for the next evaluation.
+%% \begin{algorithm}
+%% \caption{\label{prop-algo}PROP algorithm}
+%% \begin{algorithmic}
+%% \FOR{all scattering energies}
+%% \FOR{all sectors}
+%% \STATE Read amplitude arrays
+%% \STATE Read correction data
+%% \STATE Construct local $R$-matrices
+%% \STATE From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$
+%% \STATE $\Re^{O}$ becomes $\Re^{I}$ for the next sector
+%% \ENDFOR
+%% \STATE Compute physical $R$-Matrix
+%% \ENDFOR
+%% \end{algorithmic}
+%% \end{algorithm}
+
\begin{algorithm}
\caption{\label{prop-algo}PROP algorithm}
-\begin{algorithmic}
-\FOR{all scattering energies}
- \FOR{all sectors}
- \STATE Read amplitude arrays
- \STATE Read correction data
-\STATE Construct local $R$-matrices
-\STATE From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$
-\STATE $\Re^{O}$ becomes $\Re^{I}$ for the next sector
- \ENDFOR
- \STATE Compute physical $R$-Matrix
-\ENDFOR
-\end{algorithmic}
+%\begin{algorithmic}
+\For{all scattering energies} {
+ \For{all sectors}{
+ Read amplitude arrays\;
+ Read correction data\;
+ Construct local $R$-matrices\;
+ From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$\;
+ $\Re^{O}$ becomes $\Re^{I}$ for the next sector\;
+ }
+ Compute physical $R$-Matrix \;
+}
+%\end{algorithmic}
\end{algorithm}
\section{Performance results}
\subsection{PROP deployment on GPU}
-\begin{table*}[ht]
+\begin{table}[ht]
\begin{center}
\begin{tabular}{|c||c|c||}
\hline
GPU V5 (\S~\ref{gpuv5}) & 24m27s & 12m39s \\
\hline
\end{tabular}
-\caption{\label{table:time}
-Execution time of PROP on CPU and GPU}
\end{center}
-\end{table*}
+\caption{Execution time of PROP on CPU and GPU}
+\label{table:time}
+\end{table}
-\begin{comment}
-\begin{table*}[ht]
-\begin{center}
-\begin{tabular}{|c||c|c||}
- \hline
- PROP version & \multicolumn{2}{c|}{Execution time} \\
- \hline \hline
-CPU version & 1 core & 4 cores \\\hline
-& {201m32s} & {113m28s} \\ \hline \hline
-GPU version & C1060 & C2050 \\
- \hline\hline
- GPU V1 (\ref{gpuv1}) & 79m25s & 66m22s \\
- \hline
- GPU V2 (\ref{gpuv2}) & 47m58s & 29m52s \\
- \hline
- GPU V3 (\ref{gpuv3}) & 41m28s & 23m46s \\
- \hline
- GPU V4 (\ref{gpuv4}) & 27m21s & 13m55s\\
- \hline
- GPU V5 (\ref{gpuv5}) & 24m27s & 12m39s \\
- \hline
-\end{tabular}
-\caption{\label{table:time}
-Execution time of the successive GPU versions}
-\end{center}
-\end{table*}
-\end{comment}
+
+%% \begin{table}[ht]
+%% \begin{center}
+%% \begin{tabular}{|c||c|c||}
+%% \hline
+%% PROP version & \multicolumn{2}{c|}{Execution time} \\
+%% \hline \hline
+%% CPU version & 1 core & 4 cores \\\hline
+%% & {201m32s} & {113m28s} \\ \hline \hline
+%% GPU version & C1060 & C2050 \\
+%% \hline\hline
+%% GPU V1 (\ref{gpuv1}) & 79m25s & 66m22s \\
+%% \hline
+%% GPU V2 (\ref{gpuv2}) & 47m58s & 29m52s \\
+%% \hline
+%% GPU V3 (\ref{gpuv3}) & 41m28s & 23m46s \\
+%% \hline
+%% GPU V4 (\ref{gpuv4}) & 27m21s & 13m55s\\
+%% \hline
+%% GPU V5 (\ref{gpuv5}) & 24m27s & 12m39s \\
+%% \hline
+%% \end{tabular}
+%% \end{center}
+%% \caption{Execution time of the successive GPU versions}
+%% \label{table:time}
+%% \end{table}
\begin{figure}[h]
\centering