]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter11/code1.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
update
[book_gpu.git] / BookGPU / Chapters / chapter11 / code1.cu
1
2 template<typename Tx, typename Ty>
3 __global__ void CalculateCoefficientsKnots( Tx *u, Ty *v, double *b, double *c, double *t, double *alpha, double *beta, double *gamma, int N )
4 {
5   int tid = threadIdx.x + blockIdx.x * blockDim.x;
6   int s = tid*2;
7   while(tid<=(N-2))
8   {
9     // decide whether an additional knot is necessary
10     if(fabs(c[tid]+c[tid+1]- 2*b[tid])<=0.1e-5) // tolerance
11     { 
12       //no additional knot
13       h[s]=h[s+1]=u[tid];
14       alpha[s]=alpha[s+1]=v[tid];
15       beta[s]=beta[s+1]=c[tid];
16       gamma[s]=gamma[s+1]=(c[tid+1]-c[tid])/(2*(fmax(1e-10,u[tid+1]-u[tid])));
17     } 
18     else {  
19       //adding a knot
20       h[s]=u[tid];
21       //determine the position of the knot
22       if((c[tid+1] - b[tid])*(c[tid] - b[tid])<0)
23         h[s+1]=u[tid+1] + (c[tid] - b[tid])*(fmax(1e-10,u[tid+1]-u[tid]))/fmax(1e-10,(c[tid+1] - c[tid]));
24       else
25         h[s+1]=0.5*(u[tid+1] + u[tid]);
26       //calculate coefficients
27       double dtemp = (2*b[tid] - c[tid+1])+((c[tid+1] - c[tid])*
28               (h[s+1] - u[tid]))/ fmax(1e-10,(u[tid+1] - u[tid]));
29       alpha[s]=v[tid];   beta[s]=c[tid];
30       gamma[s]=(dtemp - c[tid])/(2*fmax(1e-10,(h[s+1] - u[tid])));
31       alpha[s+1]=v[tid] + c[tid]*(h[s+1] - u[tid]) +
32                 (dtemp - c[tid])*(h[s+1] - u[tid])/2;
33       gamma[s+1]=(c[tid+1]-dtemp)/(2*fmax(1e-10,(u[tid+1]-h[s+1])));
34       beta[s+1]=dtemp;
35     }
36     tid += blockDim.x * gridDim.x;   s = tid*2;
37   }
38   __syncthreads();
39   // Select a single thread  to perform the last operation
40   if((threadIdx.x  ) == 0)  {
41     s = (N-1) * 2;   h[s]=u[N-1];
42   }
43   __syncthreads();
44 }