X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/8ed172dc793f429438af9d60c35ae52b85986b14..5dadaa83fd77112c7c6ace09f8ec840af2109d3e:/BookGPU/Chapters/chapter15/ch15.tex?ds=inline diff --git a/BookGPU/Chapters/chapter15/ch15.tex b/BookGPU/Chapters/chapter15/ch15.tex index 5dae9b2..1b3a580 100644 --- a/BookGPU/Chapters/chapter15/ch15.tex +++ b/BookGPU/Chapters/chapter15/ch15.tex @@ -1,13 +1,14 @@ -\chapterauthor{Pierre Fortin}{Laboratoire d'Informatique de Paris 6, University Paris 6} -\chapterauthor{Rachid Habel}{T\'el\'ecom SudParis} -\chapterauthor{Fabienne J\'ez\'equel}{Laboratoire d'Informatique de Paris 6, University Paris 6} -\chapterauthor{Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, University Paris 6} +\chapterauthor{Rachid Habel}{T\'el\'ecom SudParis, France} +\chapterauthor{Pierre Fortin, Fabienne J\'ez\'equel and Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, Université Pierre et Marie Curie, France} + +%\chapterauthor{Fabienne J\'ez\'equel}{Laboratoire d'Informatique de Paris 6, University Paris 6} +%\chapterauthor{Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, University Paris 6} \chapterauthor{Stan Scott}{School of Electronics, Electrical Engineering \& Computer Science, -The Queen's University of Belfast} +The Queen's University of Belfast, United Kingdom} -\newcommand{\fixme}[1]{{\bf #1}} +%\newcommand{\fixme}[1]{{\bf #1}} -\chapter[Numerical validation and performance optimization on GPUs in atomic physics]{Numerical validation and performance optimization on GPUs of an application in atomic physics} +\chapter[Numerical validation and GPU performance in atomic physics]{Numerical validation and performance optimization on GPUs of an application in atomic physics} \label{chapter15} \section{Introduction}\label{ch15:intro} @@ -275,20 +276,36 @@ $\Re^{O}$. the output $R$-matrix becomes the input $R$-matrix for the next evaluation. +%% \begin{algorithm} +%% \caption{\label{prop-algo}PROP algorithm} +%% \begin{algorithmic} +%% \FOR{all scattering energies} +%% \FOR{all sectors} +%% \STATE Read amplitude arrays +%% \STATE Read correction data +%% \STATE Construct local $R$-matrices +%% \STATE From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$ +%% \STATE $\Re^{O}$ becomes $\Re^{I}$ for the next sector +%% \ENDFOR +%% \STATE Compute physical $R$-Matrix +%% \ENDFOR +%% \end{algorithmic} +%% \end{algorithm} + \begin{algorithm} \caption{\label{prop-algo}PROP algorithm} -\begin{algorithmic} -\FOR{all scattering energies} - \FOR{all sectors} - \STATE Read amplitude arrays - \STATE Read correction data -\STATE Construct local $R$-matrices -\STATE From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$ -\STATE $\Re^{O}$ becomes $\Re^{I}$ for the next sector - \ENDFOR - \STATE Compute physical $R$-Matrix -\ENDFOR -\end{algorithmic} +%\begin{algorithmic} +\For{all scattering energies} { + \For{all sectors}{ + Read amplitude arrays\; + Read correction data\; + Construct local $R$-matrices\; + From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$\; + $\Re^{O}$ becomes $\Re^{I}$ for the next sector\; + } + Compute physical $R$-Matrix \; +} +%\end{algorithmic} \end{algorithm} @@ -809,7 +826,7 @@ as the matrices increase in size during the propagation \section{Performance results} \subsection{PROP deployment on GPU} -\begin{table*}[ht] +\begin{table}[ht] \begin{center} \begin{tabular}{|c||c|c||} \hline @@ -833,38 +850,37 @@ GPU version & C1060 & C2050 \\ GPU V5 (\S~\ref{gpuv5}) & 24m27s & 12m39s \\ \hline \end{tabular} -\caption{\label{table:time} -Execution time of PROP on CPU and GPU} \end{center} -\end{table*} +\caption{Execution time of PROP on CPU and GPU} +\label{table:time} +\end{table} -\begin{comment} -\begin{table*}[ht] -\begin{center} -\begin{tabular}{|c||c|c||} - \hline - PROP version & \multicolumn{2}{c|}{Execution time} \\ - \hline \hline -CPU version & 1 core & 4 cores \\\hline -& {201m32s} & {113m28s} \\ \hline \hline -GPU version & C1060 & C2050 \\ - \hline\hline - GPU V1 (\ref{gpuv1}) & 79m25s & 66m22s \\ - \hline - GPU V2 (\ref{gpuv2}) & 47m58s & 29m52s \\ - \hline - GPU V3 (\ref{gpuv3}) & 41m28s & 23m46s \\ - \hline - GPU V4 (\ref{gpuv4}) & 27m21s & 13m55s\\ - \hline - GPU V5 (\ref{gpuv5}) & 24m27s & 12m39s \\ - \hline -\end{tabular} -\caption{\label{table:time} -Execution time of the successive GPU versions} -\end{center} -\end{table*} -\end{comment} + +%% \begin{table}[ht] +%% \begin{center} +%% \begin{tabular}{|c||c|c||} +%% \hline +%% PROP version & \multicolumn{2}{c|}{Execution time} \\ +%% \hline \hline +%% CPU version & 1 core & 4 cores \\\hline +%% & {201m32s} & {113m28s} \\ \hline \hline +%% GPU version & C1060 & C2050 \\ +%% \hline\hline +%% GPU V1 (\ref{gpuv1}) & 79m25s & 66m22s \\ +%% \hline +%% GPU V2 (\ref{gpuv2}) & 47m58s & 29m52s \\ +%% \hline +%% GPU V3 (\ref{gpuv3}) & 41m28s & 23m46s \\ +%% \hline +%% GPU V4 (\ref{gpuv4}) & 27m21s & 13m55s\\ +%% \hline +%% GPU V5 (\ref{gpuv5}) & 24m27s & 12m39s \\ +%% \hline +%% \end{tabular} +%% \end{center} +%% \caption{Execution time of the successive GPU versions} +%% \label{table:time} +%% \end{table} \begin{figure}[h] \centering