int i = __umul24( blockIdx.y, blockDim.y) + threadIdx.y ;
int j0= __umul24(blockIdx.x,blockDim.x)<<3 ; // block's base point
int idx = __umul24(i,j_dim) + j ; // absolute index
- int idrow = threadIdx.y*(bdimX+k-1) ; // line's offset in sh mem
+ int idrow = threadIdx.y*(bdimX+k-1) ; // line's offset in sh mem
// shared memory declaration
extern __shared__ unsigned char roi8p[];