1 __global__ void kernel_median3_2pix( short *output,
4 // j base coordinate = 2*(thread index)
5 int j= __mul24(__mul24(blockIdx.x,blockDim.x) + threadIdx.x,2) ;
6 int i= __mul24(blockIdx.y,blockDim.y) + threadIdx.y ;
7 int a0, a1, a2, a3, a4, a5 ; // for left window
8 int b0, b1, b2, b3, b4, b5 ; // for right window
10 a0 = tex2D(tex_img_ins, j , i-1); // 6 common pixels
11 a1 = tex2D(tex_img_ins, j+1, i-1);
12 a2 = tex2D(tex_img_ins, j , i );
13 a3 = tex2D(tex_img_ins, j+1, i );
14 a4 = tex2D(tex_img_ins, j , i+1);
15 a5 = tex2D(tex_img_ins, j+1, i+1);
17 minmax6(&a0, &a1, &a2, &a3, &a4, &a5);// common minmax
18 b0=a0; b1=a1; b2=a2; b3=a3; b4=a4; b5=a5;// separation
20 a5 = tex2D(tex_img_ins, j-1, i); //separate processes
21 b5 = tex2D(tex_img_ins, j+2, i);
22 minmax5(&a1, &a2, &a3, &a4, &a5);
23 minmax5(&b1, &b2, &b3, &b4, &b5);
24 a5 = tex2D(tex_img_ins, j-1, i-1);
25 b5 = tex2D(tex_img_ins, j+2, i-1);
26 minmax4(&a2, &a3, &a4, &a5);
27 minmax4(&b2, &b3, &b4, &b5);
28 a5 = tex2D(tex_img_ins, j-1, i+1);
29 b5 = tex2D(tex_img_ins, j+2, i+1);
30 minmax3(&a3, &a4, &a5);
31 minmax3(&b3, &b4, &b5);
33 output[ __mul24(i, j_dim) +j ] = a4 ; //2 outputs
34 output[ __mul24(i, j_dim) +j+1 ] = b4 ;
37 //grid dimensions to be set in main.cu file
38 dimGrid = dim3( (W/dimBlock.x)/2, H/dimBlock.y, 1 ) ;