]> AND Private Git Repository - book_gpu.git/blobdiff - BookGPU/Chapters/chapter2/ex1.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
last version
[book_gpu.git] / BookGPU / Chapters / chapter2 / ex1.cu
index e182349437ce28a33b00884be74209ec5dcbc820..64c08dd68b68857d8ae2a15e9d3c8147eb18ec52 100644 (file)
@@ -18,16 +18,12 @@ void addition(int size, int *d_C, int *d_A, int *d_B) {
 
 int main( int argc, char** argv) 
 {
-
        if(argc!=2) { 
                printf("usage: ex1 nb_components\n");
                exit(0);
        }
 
-       
-
        int size=atoi(argv[1]);
-
        int i;
        int *h_arrayA=(int*)malloc(size*sizeof(int));
        int *h_arrayB=(int*)malloc(size*sizeof(int));
@@ -35,7 +31,6 @@ int main( int argc, char** argv)
        int *h_arrayCgpu=(int*)malloc(size*sizeof(int));
        int *d_arrayA, *d_arrayB, *d_arrayC;
 
-
        cudaMalloc((void**)&d_arrayA,size*sizeof(int));
        cudaMalloc((void**)&d_arrayB,size*sizeof(int));
        cudaMalloc((void**)&d_arrayC,size*sizeof(int));
@@ -45,10 +40,9 @@ int main( int argc, char** argv)
                h_arrayB[i]=2*i;
        }
 
-
        unsigned int timer_cpu = 0;
        cutilCheckError(cutCreateTimer(&timer_cpu));
-  cutilCheckError(cutStartTimer(timer_cpu));
+       cutilCheckError(cutStartTimer(timer_cpu));
        for(i=0;i<size;i++) {
                h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
        }
@@ -56,37 +50,28 @@ int main( int argc, char** argv)
        printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
        cutDeleteTimer(timer_cpu);
 
-
        unsigned int timer_gpu = 0;
        cutilCheckError(cutCreateTimer(&timer_gpu));
-  cutilCheckError(cutStartTimer(timer_gpu));
+       cutilCheckError(cutStartTimer(timer_gpu));
        cudaMemcpy(d_arrayA,h_arrayA, size * sizeof(int), cudaMemcpyHostToDevice);
        cudaMemcpy(d_arrayB,h_arrayB, size * sizeof(int), cudaMemcpyHostToDevice);
-
-       
-
        
        int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
-
        addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
-
        cudaMemcpy(h_arrayCgpu,d_arrayC, size * sizeof(int), cudaMemcpyDeviceToHost);
 
        cutilCheckError(cutStopTimer(timer_gpu));
        printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
        cutDeleteTimer(timer_gpu);
 
-       for(i=0;i<size;i++)
+       for(i=0;i<size;i++) {
                assert(h_arrayC[i]==h_arrayCgpu[i]);
-
+       }
        cudaFree(d_arrayA);
        cudaFree(d_arrayB);
        cudaFree(d_arrayC);
        free(h_arrayA);
        free(h_arrayB);
        free(h_arrayC);
-
-
        return 0;
-
 }