int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
int *d_arrayA, *d_arrayB, *d_arrayC;
int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
int *d_arrayA, *d_arrayB, *d_arrayC;
cudaMalloc((void**)&d_arrayA,size*sizeof(int));
cudaMalloc((void**)&d_arrayB,size*sizeof(int));
cudaMalloc((void**)&d_arrayC,size*sizeof(int));
cudaMalloc((void**)&d_arrayA,size*sizeof(int));
cudaMalloc((void**)&d_arrayB,size*sizeof(int));
cudaMalloc((void**)&d_arrayC,size*sizeof(int));
unsigned int timer_cpu = 0;
cutilCheckError(cutCreateTimer(&timer_cpu));
cutilCheckError(cutStartTimer(timer_cpu));
unsigned int timer_cpu = 0;
cutilCheckError(cutCreateTimer(&timer_cpu));
cutilCheckError(cutStartTimer(timer_cpu));
printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
cutDeleteTimer(timer_cpu);
printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
cutDeleteTimer(timer_cpu);
unsigned int timer_gpu = 0;
cutilCheckError(cutCreateTimer(&timer_gpu));
cutilCheckError(cutStartTimer(timer_gpu));
cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
unsigned int timer_gpu = 0;
cutilCheckError(cutCreateTimer(&timer_gpu));
cutilCheckError(cutStartTimer(timer_gpu));
cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
cutilCheckError(cutStopTimer(timer_gpu));
cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
cutilCheckError(cutStopTimer(timer_gpu));