X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/ecd2ddac55172a779e7e26d3bd5b1b2cb95033d6..f045ded06189b82188fcba9dd6ca383823e34aaa:/BookGPU/Chapters/chapter4/code/convoSepShV.cu~?ds=sidebyside diff --git a/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ b/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ index 4437a56..a8bf71e 100644 --- a/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ +++ b/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ @@ -2,9 +2,10 @@ __global__ void kernel_convoSepShx8pV(unsigned char *output, int j_dim, int r) { int ic, jc, p; int k = 2*r+1 ; - float outval0=0.0, outval1=0.0, outval2=0.0, outval3=0.0, outval4=0.0, outval5=0.0, outval6=0.0, outval7=0.0 ; - int bdimX = blockDim.x<<3 ; - int tidX = threadIdx.x<<3 ; + float outval0=0.0, outval1=0.0, outval2=0.0, outval3=0.0 ; + float outval4=0.0, outval5=0.0, outval6=0.0, outval7=0.0 ; + int bdimX = blockDim.x<<3 ; // all packets width + int tidX = threadIdx.x<<3 ; // one packet offset // absolute coordinates of the base point int j = (__umul24(blockIdx.x,blockDim.x) + threadIdx.x)<<3 ; @@ -44,7 +45,7 @@ __global__ void kernel_convoSepShx8pV(unsigned char *output, int j_dim, int r) outval7 += valMask*roi8p[ baseRoi +7 ] ; } - // 8 pixel par thread --> global mem + // 8 pixels per thread --> global mem output[ idx++ ] = outval0 ; output[ idx++ ] = outval1 ; output[ idx++ ] = outval2 ;