ajout du code du kernel

[kahina_paper2.git] / maj.tex
diff --git a/maj.tex b/maj.tex

index dd8b15edd712af315681ce2cf4b0053eb9889174..49a7448bd1e5b1edeebe085005c3c29ed399f9d8 100644 (file)
--- a/maj.tex
+++ b/maj.tex
@@ -9,7 +9,8 @@
  \usepackage[textsize=footnotesize]{todonotes}
  \usepackage{amsmath}
  \usepackage{amssymb}
  \usepackage[textsize=footnotesize]{todonotes}
  \usepackage{amsmath}
  \usepackage{amssymb}
-\usepackage{float} 
+\usepackage{float}
+\usepackage{listings}
  \newcommand{\LZK}[2][inline]{%
    \todo[color=red!10,#1]{\sffamily\textbf{LZK:} #2}\xspace}
  \newcommand{\RC}[2][inline]{%
  \newcommand{\LZK}[2][inline]{%
    \todo[color=red!10,#1]{\sffamily\textbf{LZK:} #2}\xspace}
  \newcommand{\RC}[2][inline]{%
@@ -326,13 +327,20 @@ Copy $P$, $P'$ and $Z$ from CPU to GPU\;
  Copy $Z$ from GPU to CPU\;
  \end{algorithm}
  
  Copy $Z$ from GPU to CPU\;
  \end{algorithm}
  
-\RC{A revoir (c'est de la blague en l'état) : Figure~\ref{fig:00} shows the second kernel code}
-\begin{figure}[htbp]
-\centering
-\includegraphics[angle=+0,width=0.4\textwidth]{code}
-\caption{The Kernel Update code}
-\label{fig:00}
-\end{figure}
+Listing~\ref{lst:01} shows the second kernel code. As can be seen this
+kernel calls multiple kernels, all the operations on complex numbers
+are not detailed.
+
+\begin{footnotesize}
+\lstinputlisting[label=lst:01,caption=Kernels to update the roots]{code.c}
+\end{footnotesize}
+
+%\begin{figure}[htbp]
+%\centering
+%\includegraphics[angle=+0,width=0.4\textwidth]{code}
+%\caption{The Kernel Update code}
+%\label{fig:00}
+%\end{figure}
  
  %We noticed that the code is executed by a large number of GPU threads organized as grid of to dimension (Number of block per grid (NbBlock), number of threads per block(Nbthread)), the Nbthread is fixed initially, the NbBlock is computed as fallow: 
  %$ NbBlocks= \frac{N+Nbthreads-1}{Nbthreads} where N: the number of root$
  
  %We noticed that the code is executed by a large number of GPU threads organized as grid of to dimension (Number of block per grid (NbBlock), number of threads per block(Nbthread)), the Nbthread is fixed initially, the NbBlock is computed as fallow: 
  %$ NbBlocks= \frac{N+Nbthreads-1}{Nbthreads} where N: the number of root$