suite

author couturie <couturie@carcariass.(none)>

Sat, 22 Sep 2012 18:28:31 +0000 (20:28 +0200)

committer couturie <couturie@carcariass.(none)>

Sat, 22 Sep 2012 18:28:31 +0000 (20:28 +0200)
author couturie <couturie@carcariass.(none)>
Sat, 22 Sep 2012 18:28:31 +0000 (20:28 +0200)
committer couturie <couturie@carcariass.(none)>
Sat, 22 Sep 2012 18:28:31 +0000 (20:28 +0200)
diff --git a/BookGPU/BookGPU.tex b/BookGPU/BookGPU.tex

index 1d122ea0ae215aa5515b5dce775d698ba3072cd2..a144a18a8f476d9a6ae4164349c5316c98701e84 100755 (executable)
--- a/BookGPU/BookGPU.tex
+++ b/BookGPU/BookGPU.tex
@@ -6,6 +6,10 @@
  \usepackage{subfigure}
  %\usepackage{epsfig}
  \usepackage{makeidx}
  \usepackage{subfigure}
  %\usepackage{epsfig}
  \usepackage{makeidx}
+\usepackage{listings}
+\usepackage{caption}
+\usepackage{courier}
+\usepackage{color}
  \usepackage[sectionbib]{bibunits}
  \usepackage{multicol}
  \frenchspacing
  \usepackage[sectionbib]{bibunits}
  \usepackage{multicol}
  \frenchspacing
@@ -18,6 +22,52 @@
  
  \makeatother
  
  
  \makeatother
  
+
+
+
+ \lstset{
+         basicstyle=\footnotesize\ttfamily, % Standardschrift
+         %numbers=left,               % Ort der Zeilennummern
+         numberstyle=\tiny,          % Stil der Zeilennummern
+         %stepnumber=2,               % Abstand zwischen den Zeilennummern
+         numbersep=5pt,              % Abstand der Nummern zum Text
+         tabsize=2,                  % Groesse von Tabs
+         extendedchars=true,         %
+         breaklines=true,            % Zeilen werden Umgebrochen
+         keywordstyle=\color{red},
+               frame=b,         
+ %        keywordstyle=[1]\textbf,    % Stil der Keywords
+ %        keywordstyle=[2]\textbf,    %
+ %        keywordstyle=[3]\textbf,    %
+ %        keywordstyle=[4]\textbf,   \sqrt{\sqrt{}} %
+         stringstyle=\color{white}\ttfamily, % Farbe der String
+         showspaces=false,           % Leerzeichen anzeigen ?
+         showtabs=false,             % Tabs anzeigen ?
+         xleftmargin=17pt,
+         framexleftmargin=17pt,
+         framexrightmargin=5pt,
+         framexbottommargin=4pt,
+         %backgroundcolor=\color{lightgray},
+         showstringspaces=false      % Leerzeichen in Strings anzeigen ?        
+ }
+ \lstloadlanguages{% Check Dokumentation for further languages ...
+         %[Visual]Basic
+         %Pascal
+         C
+         %C++
+         %XML
+         %HTML
+         %Java
+ }
+  %\DeclareCaptionFont{blue}{\color{blue}} 
+
+  %\captionsetup[lstlisting]{singlelinecheck=false, labelfont={blue}, textfont={blue}}
+\DeclareCaptionFont{white}{\color{white}}
+\DeclareCaptionFormat{listing}{\colorbox[cmyk]{0.43, 0.35, 0.35,0.01}{\parbox{\textwidth}{\hspace{15pt}#1#2#3}}}
+\captionsetup[lstlisting]{format=listing,labelfont=white,textfont=white, singlelinecheck=false, margin=0pt, font={bf,footnotesize}}
+
+
+
  \makeindex
  
  \begin{document}
  \makeindex
  
  \begin{document}
diff --git a/BookGPU/Chapters/chapter2/biblio.bib b/BookGPU/Chapters/chapter2/biblio.bib

new file mode 100644 (file)

index 0000000..f52ff92
--- /dev/null
+++ b/BookGPU/Chapters/chapter2/biblio.bib
@@ -0,0 +1,10 @@
+@Book{Sanders:2010:CEI,
+  author =     "J. Sanders and E. Kandrot",
+  title =      "{CUDA} by example: an introduction to general-purpose
+                {GPU} programming",
+  publisher =  "Ad{\-d}i{\-s}on-Wes{\-l}ey",
+  address =    "pub-AW:adr",
+  pages =      "xix + 290",
+  year =       "2010",
+  LCCN =       "QA76.76.A65",
+}
+\ No newline at end of file
diff --git a/BookGPU/Chapters/chapter2/ch2.tex b/BookGPU/Chapters/chapter2/ch2.tex

index b06e9be3400ac32d4bc5ae254597feda71d39a40..0640708ecb3fed538952560cf313bc8c5a5e6223 100755 (executable)
--- a/BookGPU/Chapters/chapter2/ch2.tex
+++ b/BookGPU/Chapters/chapter2/ch2.tex
@@ -9,7 +9,7 @@
  In this chapter  we give some simple examples on CUDA  programming.  The goal is
  not to provide an exhaustive presentation of all the functionalities of CUDA but
  rather giving some basic elements. Of  course, readers that do not know CUDA are
  In this chapter  we give some simple examples on CUDA  programming.  The goal is
  not to provide an exhaustive presentation of all the functionalities of CUDA but
  rather giving some basic elements. Of  course, readers that do not know CUDA are
-invited to read other books that are specialized on CUDA programming.
+invited to read other books that are specialized on CUDA programming (for example: \cite{Sanders:2010:CEI}).
  
  
  \section{First example}
  
  
  \section{First example}
@@ -17,7 +17,7 @@ invited to read other books that are specialized on CUDA programming.
  This first example is  intented to show how to build a  very simple example with
  CUDA.   The goal  of this  example is  to performed  the sum  of two  arrays and
  putting the  result into a  third array.   A cuda program  consists in a  C code
  This first example is  intented to show how to build a  very simple example with
  CUDA.   The goal  of this  example is  to performed  the sum  of two  arrays and
  putting the  result into a  third array.   A cuda program  consists in a  C code
-which calls CUDA kernels that are executed on a GPU.
+which calls CUDA kernels that are executed on a GPU. The listing of this code is in Listing~\ref{ch2:lst:ex1}
  
  
  As GPUs have  their own memory, the first step consists  in allocating memory on
  
  
  As GPUs have  their own memory, the first step consists  in allocating memory on
@@ -41,5 +41,29 @@ parameter is set to  \texttt{cudaMemcpyHostToDevice}. The first parameter of the
  function is the destination array, the  second is the source array and the third
  is the number of elements to copy (exprimed in bytes).
  
  function is the destination array, the  second is the source array and the third
  is the number of elements to copy (exprimed in bytes).
  
-\putbib[biblio]
+Now the GPU contains the data needed to perform the addition. In sequential such
+addition is  achieved out with a  loop on all the  elements.  With a  GPU, it is
+possible to perform  the addition of all elements of the  arrays in parallel (if
+the   number  of   blocks   and   threads  per   blocks   is  sufficient).    In
+Listing\ref{ch2:lst:ex1}     at    the     beginning,    a     simple    kernel,
+called \texttt{addition} is defined to  compute in parallel the summation of the
+two arrays. With CUDA, a  kernel starts with the keyword \texttt{\_\_global\_\_}
+which  indicates that  this  kernel  can be  call  from the  C  code. The  first
+instruction  in  this  kernel  is   used  to  computed  the  \texttt{tid}  which
+representes the  thread index.  This thread  index is computed  according to the
+values    of    the    block    index    (it   is    a    variable    of    CUDA
+called  \texttt{blockIdx\index{CUDA~keywords!blockIdx}}). Blocks of  threads can
+be decomposed into  1 dimension, 2 dimensions or 3  dimensions. According to the
+dimension of data  manipulated, the appropriate dimension can  be useful. In our
+example, only  one dimension  is used.  Then  using notation \texttt{.x}  we can
+access to the first dimension (\texttt{.y} and \texttt{.z} allow respectively to
+access      to      the     second      and      third     dimension).       The
+variable \texttt{blockDim}\index{CUDA~keywords!blockDim} gives  the size of each
+block.
+
+
+
+\lstinputlisting[label=ch2:lst:ex1,caption=A simple example]{Chapters/chapter2/ex1.cu}
+
+\putbib[Chapters/chapter2/biblio]
  
  
diff --git a/BookGPU/Chapters/chapter2/ex1.cu b/BookGPU/Chapters/chapter2/ex1.cu

new file mode 100644 (file)

index 0000000..e182349
--- /dev/null
+++ b/BookGPU/Chapters/chapter2/ex1.cu
@@ -0,0 +1,92 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+#include "cutil_inline.h"
+
+const int nbThreadsPerBloc=256;
+
+__global__ 
+void addition(int size, int *d_C, int *d_A, int *d_B) {
+       int tid = blockIdx.x * blockDim.x + threadIdx.x;
+       if(tid<size) {
+               d_C[tid]=d_A[tid]+d_B[tid];
+       }
+}
+
+
+int main( int argc, char** argv) 
+{
+
+       if(argc!=2) { 
+               printf("usage: ex1 nb_components\n");
+               exit(0);
+       }
+
+       
+
+       int size=atoi(argv[1]);
+
+       int i;
+       int *h_arrayA=(int*)malloc(size*sizeof(int));
+       int *h_arrayB=(int*)malloc(size*sizeof(int));
+       int *h_arrayC=(int*)malloc(size*sizeof(int));
+       int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
+       int *d_arrayA, *d_arrayB, *d_arrayC;
+
+
+       cudaMalloc((void**)&d_arrayA,size*sizeof(int));
+       cudaMalloc((void**)&d_arrayB,size*sizeof(int));
+       cudaMalloc((void**)&d_arrayC,size*sizeof(int));
+
+       for(i=0;i<size;i++) {
+               h_arrayA[i]=i;
+               h_arrayB[i]=2*i;
+       }
+
+
+       unsigned int timer_cpu = 0;
+       cutilCheckError(cutCreateTimer(&timer_cpu));
+  cutilCheckError(cutStartTimer(timer_cpu));
+       for(i=0;i<size;i++) {
+               h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
+       }
+       cutilCheckError(cutStopTimer(timer_cpu));
+       printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
+       cutDeleteTimer(timer_cpu);
+
+
+       unsigned int timer_gpu = 0;
+       cutilCheckError(cutCreateTimer(&timer_gpu));
+  cutilCheckError(cutStartTimer(timer_gpu));
+       cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
+       cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
+
+       
+
+       
+       int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
+
+       addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
+
+       cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
+
+       cutilCheckError(cutStopTimer(timer_gpu));
+       printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
+       cutDeleteTimer(timer_gpu);
+
+       for(i=0;i<size;i++)
+               assert(h_arrayC[i]==h_arrayCgpu[i]);
+
+       cudaFree(d_arrayA);
+       cudaFree(d_arrayB);
+       cudaFree(d_arrayC);
+       free(h_arrayA);
+       free(h_arrayB);
+       free(h_arrayC);
+
+
+       return 0;
+
+}
diff --git a/BookGPU/Makefile b/BookGPU/Makefile

index 472850fe57c50e56eb90aa7cc88655772d897d45..a1e80b8da6df429942fea0dd6395bf50cc4a97e3 100644 (file)
--- a/BookGPU/Makefile
+++ b/BookGPU/Makefile
@@ -6,7 +6,6 @@ all:
         pdflatex ${BOOK}
         bibtex bu1
         bibtex bu2
         pdflatex ${BOOK}
         bibtex bu1
         bibtex bu2
-       bibtex bu3
         makeindex  ${BOOK}.idx
         pdflatex ${BOOK}
         pdflatex ${BOOK}
         makeindex  ${BOOK}.idx
         pdflatex ${BOOK}
         pdflatex ${BOOK}
author	couturie <couturie@carcariass.(none)>
	Sat, 22 Sep 2012 18:28:31 +0000 (20:28 +0200)
committer	couturie <couturie@carcariass.(none)>
	Sat, 22 Sep 2012 18:28:31 +0000 (20:28 +0200)
BookGPU/BookGPU.tex		patch \| blob \| history
BookGPU/Chapters/chapter2/biblio.bib	[new file with mode: 0644]	patch \| blob
BookGPU/Chapters/chapter2/ch2.tex		patch \| blob \| history
BookGPU/Chapters/chapter2/ex1.cu	[new file with mode: 0644]	patch \| blob
BookGPU/Makefile		patch \| blob \| history