]> AND Private Git Repository - book_gpu.git/blobdiff - BookGPU/Chapters/chapter2/ex2.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
last version
[book_gpu.git] / BookGPU / Chapters / chapter2 / ex2.cu
index 762654ca88ba6d5346274e86d5bc168ba3f20b04..62931cdce0dc439a8bca3de46d8926d21723ce5a 100644 (file)
@@ -6,7 +6,6 @@
 #include "cutil_inline.h"
 #include <cublas_v2.h>
 
 #include "cutil_inline.h"
 #include <cublas_v2.h>
 
-
 const int nbThreadsPerBloc=256;
 
 __global__ 
 const int nbThreadsPerBloc=256;
 
 __global__ 
@@ -28,19 +27,15 @@ void inverse(int size, double *d_x) {
 
 int main( int argc, char** argv) 
 {
 
 int main( int argc, char** argv) 
 {
-
        if(argc!=2) { 
                printf("usage: ex2 nb_components\n");
                exit(0);
        }
 
        int size=atoi(argv[1]);
        if(argc!=2) { 
                printf("usage: ex2 nb_components\n");
                exit(0);
        }
 
        int size=atoi(argv[1]);
-
        cublasStatus_t stat;
        cublasHandle_t handle; 
        stat=cublasCreate(&handle);
        cublasStatus_t stat;
        cublasHandle_t handle; 
        stat=cublasCreate(&handle);
-
-
        int i;
        double *h_arrayA=(double*)malloc(size*sizeof(double));
        double *h_arrayB=(double*)malloc(size*sizeof(double));
        int i;
        double *h_arrayA=(double*)malloc(size*sizeof(double));
        double *h_arrayB=(double*)malloc(size*sizeof(double));
@@ -48,7 +43,6 @@ int main( int argc, char** argv)
        double *h_arrayCgpu=(double*)malloc(size*sizeof(double));
        double *d_arrayA, *d_arrayB, *d_arrayC;
 
        double *h_arrayCgpu=(double*)malloc(size*sizeof(double));
        double *d_arrayA, *d_arrayB, *d_arrayC;
 
-
        cudaMalloc((void**)&d_arrayA,size*sizeof(double));
        cudaMalloc((void**)&d_arrayB,size*sizeof(double));
        cudaMalloc((void**)&d_arrayC,size*sizeof(double));
        cudaMalloc((void**)&d_arrayA,size*sizeof(double));
        cudaMalloc((void**)&d_arrayB,size*sizeof(double));
        cudaMalloc((void**)&d_arrayC,size*sizeof(double));
@@ -58,10 +52,9 @@ int main( int argc, char** argv)
                h_arrayB[i]=2*(i+1);
        }
 
                h_arrayB[i]=2*(i+1);
        }
 
-
        unsigned int timer_cpu = 0;
        cutilCheckError(cutCreateTimer(&timer_cpu));
        unsigned int timer_cpu = 0;
        cutilCheckError(cutCreateTimer(&timer_cpu));
-  cutilCheckError(cutStartTimer(timer_cpu));
+       cutilCheckError(cutStartTimer(timer_cpu));
        double dot=0;
        for(i=0;i<size;i++) {
                h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
        double dot=0;
        for(i=0;i<size;i++) {
                h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
@@ -71,10 +64,9 @@ int main( int argc, char** argv)
        printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
        cutDeleteTimer(timer_cpu);
 
        printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
        cutDeleteTimer(timer_cpu);
 
-
        unsigned int timer_gpu = 0;
        cutilCheckError(cutCreateTimer(&timer_gpu));
        unsigned int timer_gpu = 0;
        cutilCheckError(cutCreateTimer(&timer_gpu));
-  cutilCheckError(cutStartTimer(timer_gpu));
+       cutilCheckError(cutStartTimer(timer_gpu));
        stat = cublasSetVector(size,sizeof(double),h_arrayA,1,d_arrayA,1);
        stat = cublasSetVector(size,sizeof(double),h_arrayB,1,d_arrayB,1);
        int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
        stat = cublasSetVector(size,sizeof(double),h_arrayA,1,d_arrayA,1);
        stat = cublasSetVector(size,sizeof(double),h_arrayB,1,d_arrayB,1);
        int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
@@ -85,16 +77,11 @@ int main( int argc, char** argv)
        double dot_gpu=0;
        stat = cublasDdot(handle,size,d_arrayC,1,d_arrayA,1,&dot_gpu);
 
        double dot_gpu=0;
        stat = cublasDdot(handle,size,d_arrayC,1,d_arrayA,1,&dot_gpu);
 
-
        cutilCheckError(cutStopTimer(timer_gpu));
        printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
        cutDeleteTimer(timer_gpu);
        cutilCheckError(cutStopTimer(timer_gpu));
        printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
        cutDeleteTimer(timer_gpu);
-       
-       cublasGetVector(size,sizeof(double),d_arrayC,1,h_arrayCgpu,1);
-
        printf("cpu dot %e --- gpu dot %e\n",dot,dot_gpu);
 
        printf("cpu dot %e --- gpu dot %e\n",dot,dot_gpu);
 
-
        cudaFree(d_arrayA);
        cudaFree(d_arrayB);
        cudaFree(d_arrayC);
        cudaFree(d_arrayA);
        cudaFree(d_arrayB);
        cudaFree(d_arrayC);
@@ -102,8 +89,6 @@ int main( int argc, char** argv)
        free(h_arrayB);
        free(h_arrayC);
        free(h_arrayCgpu);
        free(h_arrayB);
        free(h_arrayC);
        free(h_arrayCgpu);
-
        cublasDestroy(handle);
        return 0;
        cublasDestroy(handle);
        return 0;
-
 }
 }