]> AND Private Git Repository - book_gpu.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
new
authorRaphael Couturier <raphael.couturier@univ-fcomte.fr>
Tue, 22 Jan 2013 16:00:42 +0000 (17:00 +0100)
committerRaphael Couturier <raphael.couturier@univ-fcomte.fr>
Tue, 22 Jan 2013 16:00:42 +0000 (17:00 +0100)
BookGPU/BookGPU.tex
BookGPU/Chapters/chapter15/ch15.tex
BookGPU/Chapters/chapter3/ch3.aux
BookGPU/Chapters/chapter3/ch3.tex
BookGPU/Chapters/chapter6/ch6.aux
BookGPU/Makefile

index 8bb03d4a6bc52b93e108af983a97afd09c8f9f32..edd313e11bf3e095a15b65546402d4f76e35b955 100755 (executable)
@@ -15,7 +15,7 @@
 \usepackage{cite}
 %\usepackage{algorithm}
 %\usepackage{algorithmic}
-\usepackage[lined,boxed,commentsnumbered]{algorithm2e}
+\usepackage[ruled,lined,linesnumbered]{algorithm2e}
 \usepackage{epstopdf}
 \usepackage{url}
 \usepackage{multirow}
 
 \setcounter{page}{1}
 \part{This is a Part}
-%\include{Chapters/chapter1/ch1}
-%\include{Chapters/chapter2/ch2}
-%\include{Chapters/chapter3/ch3}
-%\include{Chapters/chapter5/ch5}
-%\include{Chapters/chapter6/ch6}
-%\include{Chapters/chapter7/ch7}
-%\include{Chapters/chapter8/ch8}
+\include{Chapters/chapter1/ch1}
+\include{Chapters/chapter2/ch2}
+\include{Chapters/chapter3/ch3}
+\include{Chapters/chapter5/ch5}
+\include{Chapters/chapter6/ch6}
+\include{Chapters/chapter7/ch7}
+\include{Chapters/chapter8/ch8}
 \include{Chapters/chapter9/ch9}
-%\include{Chapters/chapter11/ch11}
-%\include{Chapters/chapter14/ch14}
-%\include{Chapters/chapter15/ch15}
+\include{Chapters/chapter11/ch11}
+\include{Chapters/chapter14/ch14}
+\include{Chapters/chapter15/ch15}
 
 \bibliographystyle{hep}
 %%%\bibliography{biblio}
index 5dae9b22fdb97d2982d3e8f1eff38ec0cb84548d..9b0bf27bfa807183128f49b1abd9dc086e95f8b9 100644 (file)
@@ -5,7 +5,7 @@
 \chapterauthor{Stan Scott}{School of Electronics, Electrical Engineering \& Computer Science,
 The Queen's University of Belfast}
 
-\newcommand{\fixme}[1]{{\bf #1}}
+%\newcommand{\fixme}[1]{{\bf #1}}
 
 \chapter[Numerical validation and performance optimization on GPUs in atomic physics]{Numerical validation and performance optimization on GPUs of an application in atomic physics} 
 \label{chapter15}
@@ -275,20 +275,36 @@ $\Re^{O}$.
 the output $R$-matrix becomes  the input $R$-matrix 
 for the next evaluation.  
 
+%% \begin{algorithm}
+%% \caption{\label{prop-algo}PROP algorithm}
+%% \begin{algorithmic}
+%% \FOR{all scattering energies}
+%%  \FOR{all sectors}
+%%  \STATE Read amplitude arrays
+%%  \STATE Read correction data
+%% \STATE Construct local $R$-matrices
+%% \STATE From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$
+%% \STATE $\Re^{O}$ becomes $\Re^{I}$ for the next sector 
+%%  \ENDFOR
+%%  \STATE Compute physical $R$-Matrix 
+%% \ENDFOR
+%% \end{algorithmic}
+%% \end{algorithm}
+
 \begin{algorithm}
 \caption{\label{prop-algo}PROP algorithm}
-\begin{algorithmic}
-\FOR{all scattering energies}
- \FOR{all sectors}
- \STATE Read amplitude arrays
- \STATE Read correction data
-\STATE Construct local $R$-matrices
-\STATE From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$
-\STATE $\Re^{O}$ becomes $\Re^{I}$ for the next sector 
- \ENDFOR
- \STATE Compute physical $R$-Matrix 
-\ENDFOR
-\end{algorithmic}
+%\begin{algorithmic}
+\For{all scattering energies} {
+ \For{all sectors}{
+  Read amplitude arrays\;
+  Read correction data\;
+  Construct local $R$-matrices\;
+  From $\Re^{I}$ and local $R$-matrices, compute $\Re^{O}$\;
+ $\Re^{O}$ becomes $\Re^{I}$ for the next sector\;
+ }
+ Compute physical $R$-Matrix \;
+}
+%\end{algorithmic}
 \end{algorithm}
 
 
index 6cc370710ee767d746dcdc2192ebb9e7a4a8d701..0d1505e829ed1e791ee768fc7a77dc9cf5943892 100644 (file)
@@ -4,10 +4,10 @@
 \@writefile{toc}{\contentsline {chapter}{\numberline {3}Setting up the environnement.}{23}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
-\newlabel{algo:memcopy:H2D}{{\caption@xref {algo:memcopy:H2D}{ on input line 124}}{23}}
-\newlabel{algo:memcopy:kernel}{{\caption@xref {algo:memcopy:kernel}{ on input line 125}}{23}}
-\newlabel{algo:memcopy:D2H}{{\caption@xref {algo:memcopy:D2H}{ on input line 126}}{23}}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces Global memory management on CPU and GPU sides.\relax }}{23}}
+\newlabel{algo:memcopy:H2D}{{7}{23}}
+\newlabel{algo:memcopy:kernel}{{8}{23}}
+\newlabel{algo:memcopy:D2H}{{9}{23}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Global memory management on CPU and GPU sides.\relax }}{23}}
 \newlabel{algo:memcopy}{{1}{23}}
 \@writefile{toc}{\contentsline {section}{\numberline {3.1}Data transfers, memory management.}{24}}
 \newlabel{lst:main1}{{3.1}{25}}
 \@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces Exemple of 5x5 median filtering\relax }}{33}}
 \newlabel{fig:median_1}{{4.2}{33}}
 \newlabel{algoMedianGeneric}{{2}{33}}
-\newlabel{algoMedianGeneric:memcpyH2D}{{\caption@xref {algoMedianGeneric:memcpyH2D}{ on input line 241}}{33}}
-\newlabel{algoMedianGeneric:cptstart}{{\caption@xref {algoMedianGeneric:cptstart}{ on input line 246}}{33}}
-\newlabel{algoMedianGeneric:cptend}{{\caption@xref {algoMedianGeneric:cptend}{ on input line 246}}{33}}
-\newlabel{algoMedianGeneric:memcpyD2H}{{\caption@xref {algoMedianGeneric:memcpyD2H}{ on input line 247}}{33}}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces generic n$\times $n median filter\relax }}{33}}
+\newlabel{algoMedianGeneric:memcpyH2D}{{1}{33}}
+\newlabel{algoMedianGeneric:cptstart}{{3}{33}}
+\newlabel{algoMedianGeneric:cptend}{{5}{33}}
+\newlabel{algoMedianGeneric:memcpyD2H}{{7}{33}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces generic n$\times $n median filter\relax }}{33}}
 \@writefile{toc}{\contentsline {section}{\numberline {4.3}NVidia GPU tuning recipes}{33}}
 \@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces Illustration of window overlapping in 5x5 median filtering\relax }}{34}}
 \newlabel{fig:median_overlap}{{4.3}{34}}
 \setcounter{lotdepth}{1}
 \setcounter{lstnumber}{70}
 \setcounter{ContinuedFloat}{0}
-\setcounter{float@type}{16}
-\setcounter{algorithm}{2}
-\setcounter{ALC@unique}{0}
-\setcounter{ALC@line}{0}
-\setcounter{ALC@rem}{0}
-\setcounter{ALC@depth}{0}
-\setcounter{AlgoLine}{0}
-\setcounter{algocfline}{0}
-\setcounter{algocfproc}{0}
-\setcounter{algocf}{0}
+\setcounter{AlgoLine}{7}
+\setcounter{algocfline}{2}
+\setcounter{algocfproc}{2}
+\setcounter{algocf}{2}
 \setcounter{proposition}{0}
 \setcounter{theorem}{0}
 \setcounter{exercise}{0}
index 83bb339aa5b82758ac064c9c4facb472005d00b8..e04a36ccb522e86badd570bbd206cd9f73ac0667 100755 (executable)
@@ -115,15 +115,15 @@ However, so as to propose concise and more readable code, we will assume the fol
 
 \begin{algorithm}
 \SetNlSty{}{}{:}
- allocate and populate CPU memory \textbf{h\_in}\;\\
- allocate CPU pinned-memory \textbf{h\_out}\;\\
- allocate GPU global memory \textbf{d\_out}\;\\
- declare GPU texture reference \textbf{tex\_img\_in}\;\\
- allocate GPU array in global memory \textbf{array\_img\_in}\;\\
- bind GPU array \textbf{array\_img\_in} to texture \textbf{tex\_img\_in}\;\\
- copy data from \textbf{h\_in} to \textbf{array\_img\_in}\label{algo:memcopy:H2D}\;\\
- kernel\kl gridDim,blockDim\kr()\tcc*[f]{outputs to d\_out}\label{algo:memcopy:kernel}\;\\
- copy data from \textbf{d\_out} to \textbf{h\_out} \label{algo:memcopy:D2H}\;\\
+ allocate and populate CPU memory \textbf{h\_in}\;
+ allocate CPU pinned-memory \textbf{h\_out}\;
+ allocate GPU global memory \textbf{d\_out}\;
+ declare GPU texture reference \textbf{tex\_img\_in}\;
+ allocate GPU array in global memory \textbf{array\_img\_in}\;
+ bind GPU array \textbf{array\_img\_in} to texture \textbf{tex\_img\_in}\;
+ copy data from \textbf{h\_in} to \textbf{array\_img\_in}\label{algo:memcopy:H2D}\;
+ kernel\kl gridDim,blockDim\kr()\tcc*[f]{outputs to d\_out}\label{algo:memcopy:kernel}\;
+ copy data from \textbf{d\_out} to \textbf{h\_out} \label{algo:memcopy:D2H}\;
 \caption{Global memory management on CPU and GPU sides.}
 \label{algo:memcopy}
 \end{algorithm}
@@ -238,13 +238,13 @@ On the GPU's side, we note high dependence on window size due to the redundancy
  %\SetNlSty{}{}{:} 
   % \SetLine
   %\linesnumbered
-  copy data from CPU to GPU texture memory\label{algoMedianGeneric:memcpyH2D}\;\\ 
+  copy data from CPU to GPU texture memory\label{algoMedianGeneric:memcpyH2D}\; 
   \ForEach(\tcc*[f]{in parallel}){pixel at position $(x, y)$}{
-    Read gray-level values of the n$\times$n neighborhood\label{algoMedianGeneric:cptstart}\;\\
-    Selects the median value among those n$\times$n values\;\\
-    Outputs the new gray-level value \label{algoMedianGeneric:cptend}\;\\
+    Read gray-level values of the n$\times$n neighborhood\label{algoMedianGeneric:cptstart}\;
+    Selects the median value among those n$\times$n values\;
+    Outputs the new gray-level value \label{algoMedianGeneric:cptend}\;
   }
-copy data from GPU global memory to CPU memory\label{algoMedianGeneric:memcpyD2H}\;\\
+copy data from GPU global memory to CPU memory\label{algoMedianGeneric:memcpyD2H}\;
 \caption{\label{algoMedianGeneric}generic n$\times$n median filter}
 \end{algorithm}
 
index e79cde5ff57ddb56f118c3b1afc41b8a0f7ed12c..ef830ac922e48a980678abc037aeff2569fa7106 100644 (file)
 \newlabel{fig:ch6p1syncexpematrixprod}{{6.5}{95}}
 \@writefile{toc}{\contentsline {section}{\numberline {6.3}General scheme of asynchronous parallel code with computation/communication overlapping}{96}}
 \newlabel{ch6:part2}{{6.3}{96}}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces Synchronous iterative scheme\relax }}{96}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Synchronous iterative scheme\relax }}{96}}
 \newlabel{algo:ch6p2sync}{{3}{96}}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {4}{\ignorespaces Asynchronous iterative scheme\relax }}{96}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Asynchronous iterative scheme\relax }}{96}}
 \newlabel{algo:ch6p2async}{{4}{96}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.1}A basic asynchronous scheme}{98}}
-\newlabel{ch6:p2BasicAsync}{{6.3.1}{98}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.3.1}A basic asynchronous scheme}{97}}
+\newlabel{ch6:p2BasicAsync}{{6.3.1}{97}}
 \newlabel{algo:ch6p2BasicAsync}{{6.5}{98}}
 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.5}Initialization of the basic asynchronous scheme}{98}}
 \newlabel{algo:ch6p2BasicAsyncComp}{{6.6}{99}}
@@ -61,8 +61,8 @@
 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.11}Reception function in the synchronized scheme}{105}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {6.3.3}Asynchronous scheme using MPI, OpenMP and CUDA}{106}}
 \newlabel{ch6:p2GPUAsync}{{6.3.3}{106}}
-\newlabel{algo:ch6p2AsyncSyncComp}{{6.12}{108}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.12}Computing function in the final asynchronous scheme}{108}}
+\newlabel{algo:ch6p2AsyncSyncComp}{{6.12}{107}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.12}Computing function in the final asynchronous scheme}{107}}
 \newlabel{algo:ch6p2syncGPU}{{6.13}{109}}
 \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.13}Computing function in the final asynchronous scheme}{109}}
 \newlabel{algo:ch6p2FullOverAsyncMain}{{6.14}{111}}
 \setcounter{lotdepth}{1}
 \setcounter{lstnumber}{17}
 \setcounter{ContinuedFloat}{0}
-\setcounter{float@type}{16}
-\setcounter{algorithm}{4}
-\setcounter{ALC@unique}{0}
-\setcounter{ALC@line}{0}
-\setcounter{ALC@rem}{0}
-\setcounter{ALC@depth}{0}
 \setcounter{AlgoLine}{0}
-\setcounter{algocfline}{0}
-\setcounter{algocfproc}{0}
-\setcounter{algocf}{0}
+\setcounter{algocfline}{4}
+\setcounter{algocfproc}{4}
+\setcounter{algocf}{4}
 \setcounter{proposition}{0}
 \setcounter{theorem}{0}
 \setcounter{exercise}{0}
index 13717041b8e2b123e157c6fa79c512b4524a14af..9280d0bfa0d5337258a87149ac2aee56bd14ed78 100644 (file)
@@ -12,7 +12,9 @@ all:
        bibtex bu6
        bibtex bu7
        bibtex bu8
-       bibtex bu9      
+       bibtex bu9
+#don't put chapter 14, refs are included       
+       bibtex bu11     
        makeindex  ${BOOK}.idx
        pdflatex ${BOOK}
        pdflatex ${BOOK}