ch18

[book_gpu.git] / BookGPU / Chapters / chapter4 / code / convoSepShV.cu~
diff --git a/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ b/BookGPU/Chapters/chapter4/code/convoSepShV.cu~

index 4437a565eebdf3b2a73ffd915ebd0743a1315ba9..a8bf71ef58e4dc6698bd8708aefb214e28bf8a7f 100644 (file)
--- a/BookGPU/Chapters/chapter4/code/convoSepShV.cu~
+++ b/BookGPU/Chapters/chapter4/code/convoSepShV.cu~
@@ -2,9 +2,10 @@ __global__ void kernel_convoSepShx8pV(unsigned char *output, int j_dim, int r)
  {
    int ic, jc, p;
    int k = 2*r+1 ;
-  float outval0=0.0, outval1=0.0, outval2=0.0, outval3=0.0, outval4=0.0, outval5=0.0, outval6=0.0, outval7=0.0 ;
-  int bdimX = blockDim.x<<3 ;
-  int tidX = threadIdx.x<<3 ;
+  float outval0=0.0, outval1=0.0, outval2=0.0, outval3=0.0 ;
+  float outval4=0.0, outval5=0.0, outval6=0.0, outval7=0.0 ;
+  int bdimX = blockDim.x<<3 ; // all packets width
+  int tidX = threadIdx.x<<3 ; // one packet offset
      
    // absolute coordinates of the base point
    int j = (__umul24(blockIdx.x,blockDim.x) + threadIdx.x)<<3 ; 
@@ -44,7 +45,7 @@ __global__ void kernel_convoSepShx8pV(unsigned char *output, int j_dim, int r)
                 outval7 += valMask*roi8p[ baseRoi +7 ] ;
           }
         
-  // 8 pixel par thread --> global mem
+  // 8 pixels per thread --> global mem
    output[ idx++ ] = outval0 ;
    output[ idx++ ] = outval1 ;
    output[ idx++ ] = outval2 ;