1 template<typename Tx, typename Ty>
2 __global__ void CalculateBeta(Tx *u, Ty *v, double *b, int N)
4 int tid = threadIdx.x + blockIdx.x * blockDim.x;
6 b[tid]=(v[tid+1]-v[tid])/fmax(1e-20,double(u[tid+1]-u[tid]));
7 tid += blockDim.x * gridDim.x;
12 __global__ void CalculateDGeneral( double *b, double *c, int N)
14 int tid = threadIdx.x + blockIdx.x * blockDim.x;
16 if((b[tid-1]*b[tid])<=0) c[tid]=0;
17 else c[tid]=(2*b[tid-1]*b[tid])/(b[tid-1]+b[tid]);
19 tid += blockDim.x * gridDim.x;
24 __global__ void CalculateD( double *b, double *c, int N )
26 if((b[0]*(2*b[0]-c[1]))<=0) c[0]=0;
27 else c[0]=2*b[0] - c[1];
28 if((b[N-2]*(2*b[N-2]-c[N-2]))<=0) c[N-1]=0;
29 else c[N-1]=2*b[N-2] - c[N-2];
33 template<typename Tx, typename Ty>
34 int BuildMonotonSpline(Tx *d_X, Ty *d_Y, int N, double *t, double *alpha, double *beta, double *gamma)
36 int T = (N-1)*2+1; // length of the output array
37 double *b, *c; // temp variables
38 cudaMalloc( (void**)&b, 1*N*sizeof(double) );
39 cudaMalloc( (void**)&c, 2*N*sizeof(double) );
41 int blocks = (N-1)/threads + 1;
42 CalculateBeta<<<blocks,threads>>>(d_X,d_Y,b,N);
43 CalculateDGeneral<<<blocks,threads>>>(b,c,N);
44 CalculateD<<<1,1>>>(b,c,NN); // calculate d_1 and d_N
45 CalculateCoefficientsKnots<<<blocks,threads>>>(d_X,d_Y,b,c,h,alpha,beta,gamma,N);
46 cudaFree(b); cudaFree(c);