new ch5 reread

[book_gpu.git] / BookGPU / Chapters / chapter4 / code / convoGeneSh1.cu~
diff --git a/BookGPU/Chapters/chapter4/code/convoGeneSh1.cu~ b/BookGPU/Chapters/chapter4/code/convoGeneSh1.cu~

index 46e212063de266b19557ad3e1dac1f676104039a..dcb33793eda16bd3172b91819fea873283044342 100644 (file)
--- a/BookGPU/Chapters/chapter4/code/convoGeneSh1.cu~
+++ b/BookGPU/Chapters/chapter4/code/convoGeneSh1.cu~
@@ -40,13 +40,13 @@ __global__ void kernel_convoNonSepSh_8p(unsigned char *output, int j_dim, int r)
                 }
         }
    __syncthreads();
-  
+ 
    // computations
    for (ic=0 ; ic<k ; ic++)
         for( jc=0 ; jc<k ; jc++)
           {
                 int baseRoi = __umul24(ic+threadIdx.y,(bdimX+k-1)) + jc+tidX ;
-               float valMask = masque[ __umul24(ic,k)+jc ] ;
+               float valMask = mask[ __umul24(ic,k)+jc ] ;
                 outval0 += valMask*roi8p[ baseRoi ] ;
                 outval1 += valMask*roi8p[ baseRoi +1 ] ;
                 outval2 += valMask*roi8p[ baseRoi +2 ] ;
@@ -58,12 +58,12 @@ __global__ void kernel_convoNonSepSh_8p(unsigned char *output, int j_dim, int r)
           }
         
    // multiple output --> global mem
-  output[ idx   ] = outval0 ;
-  output[ idx+1 ] = outval1 ;
-  output[ idx+2 ] = outval2 ;
-  output[ idx+3 ] = outval3 ;
-  output[ idx+4 ] = outval4 ;
-  output[ idx+5 ] = outval5 ;
-  output[ idx+6 ] = outval6 ;
-  output[ idx+7 ] = outval7 ;
+  output[ idx++ ] = outval0 ;
+  output[ idx++ ] = outval1 ;
+  output[ idx++ ] = outval2 ;
+  output[ idx++ ] = outval3 ;
+  output[ idx++ ] = outval4 ;
+  output[ idx++ ] = outval5 ;
+  output[ idx++ ] = outval6 ;
+  output[ idx   ] = outval7 ;
  }