6 #include "cutil_inline.h"
8 const int nbThreadsPerBloc=256;
11 void addition(int size, int *d_C, int *d_A, int *d_B) {
12 int tid = blockIdx.x * blockDim.x + threadIdx.x;
14 d_C[tid]=d_A[tid]+d_B[tid];
19 int main( int argc, char** argv)
23 printf("usage: ex1 nb_components\n");
29 int size=atoi(argv[1]);
32 int *h_arrayA=(int*)malloc(size*sizeof(int));
33 int *h_arrayB=(int*)malloc(size*sizeof(int));
34 int *h_arrayC=(int*)malloc(size*sizeof(int));
35 int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
36 int *d_arrayA, *d_arrayB, *d_arrayC;
39 cudaMalloc((void**)&d_arrayA,size*sizeof(int));
40 cudaMalloc((void**)&d_arrayB,size*sizeof(int));
41 cudaMalloc((void**)&d_arrayC,size*sizeof(int));
49 unsigned int timer_cpu = 0;
50 cutilCheckError(cutCreateTimer(&timer_cpu));
51 cutilCheckError(cutStartTimer(timer_cpu));
53 h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
55 cutilCheckError(cutStopTimer(timer_cpu));
56 printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
57 cutDeleteTimer(timer_cpu);
60 unsigned int timer_gpu = 0;
61 cutilCheckError(cutCreateTimer(&timer_gpu));
62 cutilCheckError(cutStartTimer(timer_gpu));
63 cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
64 cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
69 int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
71 addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
73 cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
75 cutilCheckError(cutStopTimer(timer_gpu));
76 printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
77 cutDeleteTimer(timer_gpu);
80 assert(h_arrayC[i]==h_arrayCgpu[i]);