__global__ void kernel_convoGene8r( unsigned char *output, int j_dim, int r) { int ic, jc ; int L=2*r+1 ; float outval0=0.0 ; // absolute coordinates of base point int j = __umul24( blockIdx.x, blockDim.x ) + threadIdx.x ; int i = __umul24( blockIdx.y, blockDim.y) + threadIdx.y ; // convolution computation for (ic=-r ; ic<=r ; ic++) for(jc=-r ; jc<=r ; jc++) outval0 += masque[ __umul24(ic,L)+jc+r ] *tex2D(tex_img_inc, j+jc, i+ic) ; output[ __umul24(i, j_dim) + j ] = outval0 ; }