6 #include "cutil_inline.h"
9 const int nbThreadsPerBloc=256;
12 void addition(int size, double *d_C, double *d_A, double *d_B) {
13 int tid = blockIdx.x * blockDim.x + threadIdx.x;
15 d_C[tid]=d_A[tid]+d_B[tid];
20 void inverse(int size, double *d_x) {
21 int tid = blockIdx.x * blockDim.x + threadIdx.x;
28 int main( int argc, char** argv)
31 printf("usage: ex2 nb_components\n");
35 int size=atoi(argv[1]);
37 cublasHandle_t handle;
38 stat=cublasCreate(&handle);
40 double *h_arrayA=(double*)malloc(size*sizeof(double));
41 double *h_arrayB=(double*)malloc(size*sizeof(double));
42 double *h_arrayC=(double*)malloc(size*sizeof(double));
43 double *h_arrayCgpu=(double*)malloc(size*sizeof(double));
44 double *d_arrayA, *d_arrayB, *d_arrayC;
46 cudaMalloc((void**)&d_arrayA,size*sizeof(double));
47 cudaMalloc((void**)&d_arrayB,size*sizeof(double));
48 cudaMalloc((void**)&d_arrayC,size*sizeof(double));
55 unsigned int timer_cpu = 0;
56 cutilCheckError(cutCreateTimer(&timer_cpu));
57 cutilCheckError(cutStartTimer(timer_cpu));
60 h_arrayC[i]=h_arrayA[i]+h_arrayB[i];
61 dot+=(1./h_arrayC[i])*(1./h_arrayA[i]);
63 cutilCheckError(cutStopTimer(timer_cpu));
64 printf("CPU processing time : %f (ms) \n", cutGetTimerValue(timer_cpu));
65 cutDeleteTimer(timer_cpu);
67 unsigned int timer_gpu = 0;
68 cutilCheckError(cutCreateTimer(&timer_gpu));
69 cutilCheckError(cutStartTimer(timer_gpu));
70 stat = cublasSetVector(size,sizeof(double),h_arrayA,1,d_arrayA,1);
71 stat = cublasSetVector(size,sizeof(double),h_arrayB,1,d_arrayB,1);
72 int nbBlocs=(size+nbThreadsPerBloc-1)/nbThreadsPerBloc;
74 addition<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC,d_arrayA,d_arrayB);
75 inverse<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayC);
76 inverse<<<nbBlocs,nbThreadsPerBloc>>>(size,d_arrayA);
78 stat = cublasDdot(handle,size,d_arrayC,1,d_arrayA,1,&dot_gpu);
80 cutilCheckError(cutStopTimer(timer_gpu));
81 printf("GPU processing time : %f (ms) \n", cutGetTimerValue(timer_gpu));
82 cutDeleteTimer(timer_gpu);
83 printf("cpu dot %e --- gpu dot %e\n",dot,dot_gpu);
92 cublasDestroy(handle);