int main( int argc, char** argv)
{
-
if(argc!=2) {
printf("usage: ex1 nb_components\n");
exit(0);
}
-
-
int size=atoi(argv[1]);
-
int i;
int *h_arrayA=(int*)malloc(size*sizeof(int));
int *h_arrayB=(int*)malloc(size*sizeof(int));
int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
int *d_arrayA, *d_arrayB, *d_arrayC;
-
cudaMalloc((void**)&d_arrayA,size*sizeof(int));
cudaMalloc((void**)&d_arrayB,size*sizeof(int));
cudaMalloc((void**)&d_arrayC,size*sizeof(int));
h_arrayB[i]=2*i;
}
-
unsigned int timer_cpu = 0;
cutilCheckError(cutCreateTimer(&timer_cpu));
- cutilCheckError(cutStartTimer(timer_cpu));
+ cutilCheckError(cutStartTimer(timer_cpu));
for(i=0;i<size;i++) {
h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
}
printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
cutDeleteTimer(timer_cpu);
-
unsigned int timer_gpu = 0;
cutilCheckError(cutCreateTimer(&timer_gpu));
- cutilCheckError(cutStartTimer(timer_gpu));
+ cutilCheckError(cutStartTimer(timer_gpu));
cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
-
-
-
int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
-
addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
-
cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
cutilCheckError(cutStopTimer(timer_gpu));
printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
cutDeleteTimer(timer_gpu);
- for(i=0;i<size;i++)
+ for(i=0;i<size;i++) {
assert(h_arrayC[i]==h_arrayCgpu[i]);
-
+ }
cudaFree(d_arrayA);
cudaFree(d_arrayB);
cudaFree(d_arrayC);
free(h_arrayA);
free(h_arrayB);
free(h_arrayC);
-
-
return 0;
-
}