]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter2/ex1.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
spell check ch1
[book_gpu.git] / BookGPU / Chapters / chapter2 / ex1.cu
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <math.h>
5 #include <assert.h>
6 #include "cutil_inline.h"
7
8 const int nbThreadsPerBloc=256;
9
10 __global__ 
11 void addition(int size, int *d_C, int *d_A, int *d_B) {
12         int tid = blockIdx.x * blockDim.x + threadIdx.x;
13         if(tid<size) {
14                 d_C[tid]=d_A[tid]+d_B[tid];
15         }
16 }
17
18
19 int main( int argc, char** argv) 
20 {
21
22         if(argc!=2) { 
23                 printf("usage: ex1 nb_components\n");
24                 exit(0);
25         }
26
27         
28
29         int size=atoi(argv[1]);
30
31         int i;
32         int *h_arrayA=(int*)malloc(size*sizeof(int));
33         int *h_arrayB=(int*)malloc(size*sizeof(int));
34         int *h_arrayC=(int*)malloc(size*sizeof(int));
35         int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
36         int *d_arrayA, *d_arrayB, *d_arrayC;
37
38
39         cudaMalloc((void**)&d_arrayA,size*sizeof(int));
40         cudaMalloc((void**)&d_arrayB,size*sizeof(int));
41         cudaMalloc((void**)&d_arrayC,size*sizeof(int));
42
43         for(i=0;i<size;i++) {
44                 h_arrayA[i]=i;
45                 h_arrayB[i]=2*i;
46         }
47
48
49         unsigned int timer_cpu = 0;
50         cutilCheckError(cutCreateTimer(&timer_cpu));
51   cutilCheckError(cutStartTimer(timer_cpu));
52         for(i=0;i<size;i++) {
53                 h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
54         }
55         cutilCheckError(cutStopTimer(timer_cpu));
56         printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
57         cutDeleteTimer(timer_cpu);
58
59
60         unsigned int timer_gpu = 0;
61         cutilCheckError(cutCreateTimer(&timer_gpu));
62   cutilCheckError(cutStartTimer(timer_gpu));
63         cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
64         cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
65
66         
67
68         
69         int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
70
71         addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
72
73         cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
74
75         cutilCheckError(cutStopTimer(timer_gpu));
76         printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
77         cutDeleteTimer(timer_gpu);
78
79         for(i=0;i<size;i++)
80                 assert(h_arrayC[i]==h_arrayCgpu[i]);
81
82         cudaFree(d_arrayA);
83         cudaFree(d_arrayB);
84         cudaFree(d_arrayC);
85         free(h_arrayA);
86         free(h_arrayB);
87         free(h_arrayC);
88
89
90         return 0;
91
92 }