6 #include "cutil_inline.h"
8 const int nbThreadsPerBloc=256;
11 void addition(int size, int *d_C, int *d_A, int *d_B) {
12 int tid = blockIdx.x * blockDim.x + threadIdx.x;
14 d_C[tid]=d_A[tid]+d_B[tid];
19 int main( int argc, char** argv)
22 printf("usage: ex1 nb_components\n");
26 int size=atoi(argv[1]);
28 int *h_arrayA=(int*)malloc(size*sizeof(int));
29 int *h_arrayB=(int*)malloc(size*sizeof(int));
30 int *h_arrayC=(int*)malloc(size*sizeof(int));
31 int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
32 int *d_arrayA, *d_arrayB, *d_arrayC;
34 cudaMalloc((void**)&d_arrayA,size*sizeof(int));
35 cudaMalloc((void**)&d_arrayB,size*sizeof(int));
36 cudaMalloc((void**)&d_arrayC,size*sizeof(int));
43 unsigned int timer_cpu = 0;
44 cutilCheckError(cutCreateTimer(&timer_cpu));
45 cutilCheckError(cutStartTimer(timer_cpu));
47 h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
49 cutilCheckError(cutStopTimer(timer_cpu));
50 printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
51 cutDeleteTimer(timer_cpu);
53 unsigned int timer_gpu = 0;
54 cutilCheckError(cutCreateTimer(&timer_gpu));
55 cutilCheckError(cutStartTimer(timer_gpu));
56 cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
57 cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
59 int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
60 addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
61 cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
63 cutilCheckError(cutStopTimer(timer_gpu));
64 printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
65 cutDeleteTimer(timer_gpu);
68 assert(h_arrayC[i]==h_arrayCgpu[i]);