X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/32bc153a6a82be882b13679314a6f1e8021de074..1ac5b5a535d9154c4f080e94f2f9a49ab6e299b7:/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ diff --git a/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ b/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ index 4437a56..a8bf71e 100644 --- a/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ +++ b/BookGPU/Chapters/chapter4/code/convoSepShV.cu~ @@ -2,9 +2,10 @@ __global__ void kernel_convoSepShx8pV(unsigned char *output, int j_dim, int r) { int ic, jc, p; int k = 2*r+1 ; - float outval0=0.0, outval1=0.0, outval2=0.0, outval3=0.0, outval4=0.0, outval5=0.0, outval6=0.0, outval7=0.0 ; - int bdimX = blockDim.x<<3 ; - int tidX = threadIdx.x<<3 ; + float outval0=0.0, outval1=0.0, outval2=0.0, outval3=0.0 ; + float outval4=0.0, outval5=0.0, outval6=0.0, outval7=0.0 ; + int bdimX = blockDim.x<<3 ; // all packets width + int tidX = threadIdx.x<<3 ; // one packet offset // absolute coordinates of the base point int j = (__umul24(blockIdx.x,blockDim.x) + threadIdx.x)<<3 ; @@ -44,7 +45,7 @@ __global__ void kernel_convoSepShx8pV(unsigned char *output, int j_dim, int r) outval7 += valMask*roi8p[ baseRoi +7 ] ; } - // 8 pixel par thread --> global mem + // 8 pixels per thread --> global mem output[ idx++ ] = outval0 ; output[ idx++ ] = outval1 ; output[ idx++ ] = outval2 ;