]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter11/code2.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
ch1
[book_gpu.git] / BookGPU / Chapters / chapter11 / code2.cu
1 template<typename Tx, typename Ty>
2 __global__ void CalculateBeta(Tx *u, Ty *v, double *b, int N)
3 {
4    int tid = threadIdx.x + blockIdx.x * blockDim.x;
5    while(tid<=(N-2)) {
6      b[tid]=(v[tid+1]-v[tid])/fmax(1e-20,double(u[tid+1]-u[tid]));
7      tid += blockDim.x * gridDim.x;
8   }
9  __syncthreads();
10 }
11
12 __global__ void CalculateDGeneral( double *b, double *c, int N)
13 {
14    int tid = threadIdx.x + blockIdx.x * blockDim.x;
15    while(tid<=(N-2)) {
16      if((b[tid-1]*b[tid])<=0) c[tid]=0;
17        else c[tid]=(2*b[tid-1]*b[tid])/(b[tid-1]+b[tid]);
18      }
19      tid += blockDim.x * gridDim.x;
20   }
21   __syncthreads();
22 }
23
24 __global__ void CalculateD( double *b, double *c, int N )
25 {
26    if((b[0]*(2*b[0]-c[1]))<=0)  c[0]=0;
27      else  c[0]=2*b[0] - c[1];
28    if((b[N-2]*(2*b[N-2]-c[N-2]))<=0) c[N-1]=0;
29      else c[N-1]=2*b[N-2] - c[N-2];
30    __syncthreads();
31 }
32
33 template<typename Tx, typename Ty>              
34 int BuildMonotonSpline(Tx *d_X, Ty *d_Y, int N, double *t, double *alpha, double *beta, double *gamma)
35 {
36   int T =  (N-1)*2+1; // length of the output array
37   double *b, *c; // temp variables
38   cudaMalloc( (void**)&b, 1*N*sizeof(double) );
39   cudaMalloc( (void**)&c, 2*N*sizeof(double) );
40   int threads=256;
41   int blocks = (N-1)/threads + 1;
42   CalculateBeta<<<blocks,threads>>>(d_X,d_Y,b,N);
43   CalculateDGeneral<<<blocks,threads>>>(b,c,N);
44   CalculateD<<<1,1>>>(b,c,NN);  // calculate d_1 and d_N
45   CalculateCoefficientsKnots<<<blocks,threads>>>(d_X,d_Y,b,c,h,alpha,beta,gamma,N);
46   cudaFree(b); cudaFree(c);
47   return T;
48 }