__global__ void kernel_convoGene3Reg8( unsigned char *output, int j_dim) { float outval0=0.0 ; float n0,n1,n2,n3,n4,n5,n6,n7,n8 ; // convolution mask values n0 = (1.0/9) ; n1 = (1.0/9) ; n2 = (1.0/9) ; n3 = (1.0/9) ; n4 = (1.0/9) ; n5 = (1.0/9) ; n6 = (1.0/9) ; n7 = (1.0/9) ; n8 = (1.0/9) ; // absolute base point coordinates int j = __mul24(blockIdx.x, blockDim.x) + threadIdx.x ; int i = __mul24(blockIdx.y, blockDim.y) + threadIdx.y ; // weighted sum outval0 = n8*tex2D(tex_img_inc, j-1, i-1 ) + n7*tex2D(tex_img_inc, j , i-1 ) + n6*tex2D(tex_img_inc, j+1, i-1 ) + n5*tex2D(tex_img_inc, j-1, i ) + n4*tex2D(tex_img_inc, j , i ) + n3*tex2D(tex_img_inc, j+1, i ) + n2*tex2D(tex_img_inc, j-1, i+1 ) + n1*tex2D(tex_img_inc, j , i+1 ) + n0*tex2D(tex_img_inc, j+1, i+1 ) ; output[ __mul24(i, j_dim) + j ] = (unsigned char) outval0 ; }