Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
new
[book_gpu.git] / BookGPU / Chapters / chapter10 / updateBasis.cu
1 extern __shared__ volatile double sData[];
2 __global__ void
3 updateBasisKernel(int m, uint l, double d_l, double *B, uint pitch_B, double *d)
4 {        
5         uint bId = blockIdx.x, tId = threadIdx.x;
6         uint colStart = bId*pitch_B;    
7         double Bij, d_i, B2ij;
8
9         // First thread load Blj so it can be 
10         // broadcasted via shared memory to each threads
11         if(tId == 0)
12                 sdata[0] = B[colStart+leave] / d_l;                                     
13         __syncthreads();                
14         
15         // Each thread proccess mutiple elements
16         while(tId < m){         
17                 // Load di and Bij
18                 d_i = d[tId];
19                 Bij = B[colStart+tId];
20                 // Update Bij
21                 B2ij = sdata[0];
22                 if(tId != q){
23                         B2ij *= -d_i;
24                         B2ij += Bij
25                 }
26                 __syncthreads();        
27                 B[colStart+tId] = B2ij;
28
29                 tId += blockDim.x;                                      
30         }
31 }