]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter2/ex1.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
correct ch 10
[book_gpu.git] / BookGPU / Chapters / chapter2 / ex1.cu
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <math.h>
5 #include <assert.h>
6 #include "cutil_inline.h"
7
8 const int nbThreadsPerBloc=256;
9
10 __global__ 
11 void addition(int size, int *d_C, int *d_A, int *d_B) {
12         int tid = blockIdx.x * blockDim.x + threadIdx.x;
13         if(tid<size) {
14                 d_C[tid]=d_A[tid]+d_B[tid];
15         }
16 }
17
18
19 int main( int argc, char** argv) 
20 {
21         if(argc!=2) { 
22                 printf("usage: ex1 nb_components\n");
23                 exit(0);
24         }
25
26         int size=atoi(argv[1]);
27         int i;
28         int *h_arrayA=(int*)malloc(size*sizeof(int));
29         int *h_arrayB=(int*)malloc(size*sizeof(int));
30         int *h_arrayC=(int*)malloc(size*sizeof(int));
31         int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
32         int *d_arrayA, *d_arrayB, *d_arrayC;
33
34         cudaMalloc((void**)&d_arrayA,size*sizeof(int));
35         cudaMalloc((void**)&d_arrayB,size*sizeof(int));
36         cudaMalloc((void**)&d_arrayC,size*sizeof(int));
37
38         for(i=0;i<size;i++) {
39                 h_arrayA[i]=i;
40                 h_arrayB[i]=2*i;
41         }
42
43         unsigned int timer_cpu = 0;
44         cutilCheckError(cutCreateTimer(&timer_cpu));
45   cutilCheckError(cutStartTimer(timer_cpu));
46         for(i=0;i<size;i++) {
47                 h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
48         }
49         cutilCheckError(cutStopTimer(timer_cpu));
50         printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
51         cutDeleteTimer(timer_cpu);
52
53         unsigned int timer_gpu = 0;
54         cutilCheckError(cutCreateTimer(&timer_gpu));
55   cutilCheckError(cutStartTimer(timer_gpu));
56         cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
57         cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
58         
59         int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
60         addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
61         cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
62
63         cutilCheckError(cutStopTimer(timer_gpu));
64         printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
65         cutDeleteTimer(timer_gpu);
66
67         for(i=0;i<size;i++)
68                 assert(h_arrayC[i]==h_arrayCgpu[i]);
69
70         cudaFree(d_arrayA);
71         cudaFree(d_arrayB);
72         cudaFree(d_arrayC);
73         free(h_arrayA);
74         free(h_arrayB);
75         free(h_arrayC);
76         return 0;
77 }