6 #include "cutil_inline.h"
10 const int nbThreadsPerBloc=256;
13 void addition(int size, double *d_C, double *d_A, double *d_B) {
14 int tid = blockIdx.x * blockDim.x + threadIdx.x;
16 d_C[tid]=d_A[tid]+d_B[tid];
21 void inverse(int size, double *d_x) {
22 int tid = blockIdx.x * blockDim.x + threadIdx.x;
29 int main( int argc, char** argv)
33 printf("usage: ex2 nb_components\n");
37 int size=atoi(argv[1]);
40 cublasHandle_t handle;
41 stat=cublasCreate(&handle);
45 double *h_arrayA=(double*)malloc(size*sizeof(double));
46 double *h_arrayB=(double*)malloc(size*sizeof(double));
47 double *h_arrayC=(double*)malloc(size*sizeof(double));
48 double *h_arrayCgpu=(double*)malloc(size*sizeof(double));
49 double *d_arrayA, *d_arrayB, *d_arrayC;
52 cudaMalloc((void**)&d_arrayA,size*sizeof(double));
53 cudaMalloc((void**)&d_arrayB,size*sizeof(double));
54 cudaMalloc((void**)&d_arrayC,size*sizeof(double));
62 unsigned int timer_cpu = 0;
63 cutilCheckError(cutCreateTimer(&timer_cpu));
64 cutilCheckError(cutStartTimer(timer_cpu));
67 h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
68 dot+=(1./h_arrayC[i])*(1./h_arrayA[i]);
70 cutilCheckError(cutStopTimer(timer_cpu));
71 printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
72 cutDeleteTimer(timer_cpu);
75 unsigned int timer_gpu = 0;
76 cutilCheckError(cutCreateTimer(&timer_gpu));
77 cutilCheckError(cutStartTimer(timer_gpu));
78 stat = cublasSetVector(size,sizeof(double),h_arrayA,1,d_arrayA,1);
79 stat = cublasSetVector(size,sizeof(double),h_arrayB,1,d_arrayB,1);
80 int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
82 addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
83 inverse<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC);
84 inverse<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayA);
86 stat = cublasDdot(handle,size,d_arrayC,1,d_arrayA,1,&dot_gpu);
89 cutilCheckError(cutStopTimer(timer_gpu));
90 printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
91 cutDeleteTimer(timer_gpu);
93 cublasGetVector(size,sizeof(double),d_arrayC,1,h_arrayCgpu,1);
95 printf("cpu dot %e --- gpu dot %e\n",dot,dot_gpu);
106 cublasDestroy(handle);