]> AND Private Git Repository - book_gpu.git/blobdiff - BookGPU/Chapters/chapter4/code/convoGeneSh1.cu~
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
last version
[book_gpu.git] / BookGPU / Chapters / chapter4 / code / convoGeneSh1.cu~
index 46e212063de266b19557ad3e1dac1f676104039a..dcb33793eda16bd3172b91819fea873283044342 100644 (file)
@@ -40,13 +40,13 @@ __global__ void kernel_convoNonSepSh_8p(unsigned char *output, int j_dim, int r)
                }
        }
   __syncthreads();
-  
   // computations
   for (ic=0 ; ic<k ; ic++)
        for( jc=0 ; jc<k ; jc++)
          {
                int baseRoi = __umul24(ic+threadIdx.y,(bdimX+k-1)) + jc+tidX ;
-               float valMask = masque[ __umul24(ic,k)+jc ] ;
+               float valMask = mask[ __umul24(ic,k)+jc ] ;
                outval0 += valMask*roi8p[ baseRoi ] ;
                outval1 += valMask*roi8p[ baseRoi +1 ] ;
                outval2 += valMask*roi8p[ baseRoi +2 ] ;
@@ -58,12 +58,12 @@ __global__ void kernel_convoNonSepSh_8p(unsigned char *output, int j_dim, int r)
          }
        
   // multiple output --> global mem
-  output[ idx   ] = outval0 ;
-  output[ idx+1 ] = outval1 ;
-  output[ idx+2 ] = outval2 ;
-  output[ idx+3 ] = outval3 ;
-  output[ idx+4 ] = outval4 ;
-  output[ idx+5 ] = outval5 ;
-  output[ idx+6 ] = outval6 ;
-  output[ idx+7 ] = outval7 ;
+  output[ idx++ ] = outval0 ;
+  output[ idx++ ] = outval1 ;
+  output[ idx++ ] = outval2 ;
+  output[ idx++ ] = outval3 ;
+  output[ idx++ ] = outval4 ;
+  output[ idx++ ] = outval5 ;
+  output[ idx++ ] = outval6 ;
+  output[ idx   ] = outval7 ;
 }