]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter10/optiSE.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
ch18
[book_gpu.git] / BookGPU / Chapters / chapter10 / optiSE.cu
1 extern __shared__ volatile double sData[];
2 __global__ void
3 selectInVar(int m, int n, double *c, double *AN, uint pitchAN, uint *resIdx, double *resVal)
4 {
5         uint i, maxIdx = -1, bid = blockIdx.x;
6         double val, locSum, xScore, maxScore = 0.0;
7         while(bid < n){ // Processing multiple column
8                 i = threadIdx.x;
9                 locSum = 0.0;
10                 if(isPotentialEnteringVar(bid)){ // Do the local processing
11                         while(i < m) { // Each thread process multiple elements
12                                 val = AN[i+bid*pitchAN];
13                                 locSum += val*val;
14                                 i += blockDim.x;
15                         }
16                         // Reduce the value using the shared memory
17                         reduceSum(locSum);
18                         if (tid == 0){ // Is this the best variable encoutered ?
19                                 // on tid=0 locSum eqals the s.e. coeffcient
20                                 xScore = cVal*rsqrt(locSum); 
21                                 if(fabs(maxScore) < fabs(xScore)){
22                                         maxIdx = bid;
23                                         maxScore = xScore;
24                                 }
25                         }
26                         __syncthreads();
27                 }
28                 bid += gridDim.x;
29         }
30         // Write the result into global memory
31         if (tid == 0){
32                 resIdx[blockIdx.x] = maxIdx;    
33                 resVal[blockIdx.x] = maxScore;
34         }
35 }