//coordinates in the block
int ib = threadIdx.y ;
int jb = threadIdx.x ;
- int idx_h = __mul24(ib+r,blockDim.x) + jb ; // index pixel deans shmem (bloc+halo)
+ int idx_h = __mul24(ib+r,blockDim.x) +jb; // base pixel index
int offset = __mul24(blockDim.x,r) ;
- // coordonnees absolues du point
int j = __mul24(blockIdx.x,blockDim.x) + jb ;
int i = __mul24(blockIdx.y,blockDim.y) + ib ;
- extern __shared__ int buff[] ;
- /***********************************************************************************
- * CHARGEMENT DATA EN SHARED MEM
- ***********************************************************************************/
+ // DATA PREFETCHING INTO SHARED MEM
+ extern __shared__ int buff[] ;
buff[ idx_h ] = tex2D(tex_img_ins, j, i) ;
if (ib < r)
}
__syncthreads() ;
- /**********************************************************************************************
- * TRI VERTICAL par algo TORBEN MOGENSEN
- * (a little bit slow but saves memory => faster !)
- **********************************************************************************************/
+
+ // TORBEN MOGENSEN SORTING
min = max = buff[ ib*blockDim.x +jb] ;
for (idc= 0 ; idc< 2*r+1 ; idc++ )