__global__ void kernel_medianV_sh( short *output, int i_dim, int j_dim, int r)
{
- int idc, val, min, max, inf, egal, sup, mxinf, minsup, estim ;
+ int idc, val, min, max, inf, equal, sup, mxinf, minsup, estim ;
//coordinates in the block
int ib = threadIdx.y ;
int jb = threadIdx.x ;
- int idx_h = __mul24(ib+r,blockDim.x) + jb ; // index pixel deans shmem (bloc+halo)
+ int idx_h = __mul24(ib+r,blockDim.x) +jb; // base pixel index
int offset = __mul24(blockDim.x,r) ;
- // coordonnees absolues du point
int j = __mul24(blockIdx.x,blockDim.x) + jb ;
int i = __mul24(blockIdx.y,blockDim.y) + ib ;
- extern __shared__ int buff[] ;
- /***********************************************************************************
- * CHARGEMENT DATA EN SHARED MEM
- ***********************************************************************************/
+ // DATA PREFETCHING INTO SHARED MEM
+ extern __shared__ int buff[] ;
buff[ idx_h ] = tex2D(tex_img_ins, j, i) ;
if (ib < r)
}
__syncthreads() ;
- /**********************************************************************************************
- * TRI VERTICAL par algo TORBEN MOGENSEN
- * (a little bit slow but saves memory => faster !)
- **********************************************************************************************/
+
+ // TORBEN MOGENSEN SORTING
min = max = buff[ ib*blockDim.x +jb] ;
for (idc= 0 ; idc< 2*r+1 ; idc++ )
while (1)
{
estim = (min+max)/2 ;
- inf = sup = egal = 0 ;
+ inf = sup = equal = 0 ;
mxinf = min ;
minsup= max ;
for (idc =0; idc< 2*r+1 ; idc++)
{
sup++;
if( val < minsup) minsup = val ;
- } else egal++ ;
+ } else equal++ ;
}
if ( (inf <= (r+1))&&(sup <=(r+1)) ) break ;
else if (inf>sup) max = mxinf ;
}
if ( inf >= r+1 ) val = mxinf ;
- else if (inf+egal >= r+1) val = estim ;
+ else if (inf+equal >= r+1) val = estim ;
else val = minsup ;
output[ __mul24(j, i_dim) +i ] = val ;