]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter10/updateBasis.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
last version
[book_gpu.git] / BookGPU / Chapters / chapter10 / updateBasis.cu
1 extern __shared__ volatile double sdata[];
2 __global__ void
3 updateBasisKernel(int m, uint l, double d_l, double *B,
4                   uint pitch_B, double *d) {        
5         uint bId = blockIdx.x, tId = threadIdx.x;
6         uint colStart = bId*pitch_B;    
7         double Bij, d_i, B2ij;
8
9         // First thread loads Blj so it can be 
10         // broadcast via shared memory to each thread
11         if (tId == 0)
12                 sdata[0] = B[colStart+l] / d_l;                                 
13         __syncthreads();                
14         
15         // Each thread proccesses multiple elements
16         while (tId < m) {               
17                 // Load di and Bij
18                 d_i = d[tId];
19                 Bij = B[colStart+tId];
20                 // Update Bij
21                 B2ij = sdata[0];
22                 if (tId != q) {
23                         B2ij *= -d_i;
24                         B2ij += Bij
25                 }
26                 __syncthreads();        
27                 B[colStart+tId] = B2ij;
28
29                 tId += blockDim.x;                                      
30         }
31 }