]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter10/optiSE.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
new ch5 reread
[book_gpu.git] / BookGPU / Chapters / chapter10 / optiSE.cu
1 extern __shared__ volatile double sData[];
2 __global__ void
3 selectInVar(int m, int n, double *c, double *AN, uint pitchAN, 
4             uint *resIdx, double *resVal) {
5         uint i, maxIdx = -1, bid = blockIdx.x;
6         double val, locSum, xScore, maxScore = 0.0;
7         while (bid < n) { // Processing multiple columns
8                 i = threadIdx.x;
9                 locSum = 0.0;
10                 if (isPotentialEnteringVar(bid)) { // Do the local processing
11                         while (i < m) { // Each thread processes multiple elements
12                                 val = AN[i+bid*pitchAN];
13                                 locSum += val*val;
14                                 i += blockDim.x;
15                         }
16                         // Reduce the value using shared memory
17                         reduceSum(locSum);
18                         if (tid == 0){ // Is this the best variable encountered ?
19                                 // on tid=0 locSum equals the steepest edge coeffcient
20                                 xScore = cVal*rsqrt(locSum); 
21                                 if (fabs(maxScore) < fabs(xScore)) {
22                                         maxIdx = bid;
23                                         maxScore = xScore;
24                                 }
25                         }
26                         __syncthreads();
27                 }
28                 bid += gridDim.x;
29         }
30         // Write the result into global memory
31         if (tid == 0) {
32                 resIdx[blockIdx.x] = maxIdx;    
33                 resVal[blockIdx.x] = maxScore;
34         }
35 }