1 __global__ void kernel_convoGene3Reg8( unsigned char *output, int i_dim, int j_dim)
4 float n0,n1,n2,n3,n4,n5,n6,n7,n8 ;
5 // convolution mask values
16 // absolute base point coordinates
17 int j = __mul24(blockIdx.x, blockDim.x) + threadIdx.x ;
18 int i = __mul24(blockIdx.y, blockDim.y) + threadIdx.y ;
20 outval0 = n8*tex2D(tex_img_inc, j-1, i-1 )
21 + n7*tex2D(tex_img_inc, j , i-1 )
22 + n6*tex2D(tex_img_inc, j+1, i-1 )
23 + n5*tex2D(tex_img_inc, j-1, i )
24 + n4*tex2D(tex_img_inc, j , i )
25 + n3*tex2D(tex_img_inc, j+1, i )
26 + n2*tex2D(tex_img_inc, j-1, i+1 )
27 + n1*tex2D(tex_img_inc, j , i+1 )
28 + n0*tex2D(tex_img_inc, j+1, i+1 ) ;
30 output[ __mul24(i, j_dim) + j ] = (unsigned char) outval0 ;