__device__ inline void s(int* a, int* b) { int tmp ; if (*a > *b) { tmp = *b; *b = *a; *a = tmp;} } #define min3(a, b, c) s(a, b); s(a, c); #define max3(a, b, c) s(b, c); s(a, c); #define minmax3(a, b, c) max3(a, b, c); s(a, b); #define minmax4(a, b, c, d) s(a, b); s(c, d); s(a, c); s(b, d); #define minmax5(a, b, c, d, e) s(a, b); s(c, d); min3(a, c, e); max3(b, d, e); #define minmax6(a, b, c, d, e, f) s(a,d); s(b, e); s(c, f); min3(a, b, c); max3(d, e, f); __global__ void kernel_medianForget1pix3( short *output, int i_dim, int j_dim) { int j = __mul24(blockIdx.x,blockDim.x) + threadIdx.x ; int i = __mul24(blockIdx.y,blockDim.y) + threadIdx.y ; int a0, a1, a2, a3, a4, a5 ; a0 = tex2D(tex_img_ins, j-1, i-1) ; // first 6 values a1 = tex2D(tex_img_ins, j, i-1) ; a2 = tex2D(tex_img_ins, j+1, i-1) ; a3 = tex2D(tex_img_ins, j-1, i) ; a4 = tex2D(tex_img_ins, j, i) ; a5 = tex2D(tex_img_ins, j+1, i) ; minmax6(&a0, &a1, &a2, &a3, &a4, &a5);//min->a0 max->a5 a5 = tex2D(tex_img_in, j-1, i+1) ; //next value in a5 minmax5(&a1, &a2, &a3, &a4, &a5) ; //min->a1 max->a5 a5 = tex2D(tex_img_ins, j, i+1) ; //next value in a5 minmax4(&a2, &a3, &a4, &a5) ; //min->a1 max->a5 a5 = tex2D(tex_img_ins, j+1, i+1) ; //next value in a5 minmax3(&a3, &a4, &a5) ; //min->a1 max->a5 output[ __mul24(i, j_dim) +j ] = a4 ; //middle value }