new

[book_gpu.git] / BookGPU / Chapters / chapter3 / code / kernMedianSeparable.cu~
diff --git a/BookGPU/Chapters/chapter3/code/kernMedianSeparable.cu~ b/BookGPU/Chapters/chapter3/code/kernMedianSeparable.cu~

index 5c79c82849795ce9c79c891171c2078aaeda4951..f4ad2c6ad3d9534c5be6ed193f70656714e5f384 100755 (executable)
--- a/BookGPU/Chapters/chapter3/code/kernMedianSeparable.cu~
+++ b/BookGPU/Chapters/chapter3/code/kernMedianSeparable.cu~
@@ -6,17 +6,14 @@ __global__ void kernel_medianV_sh( short *output, int i_dim, int j_dim, int r)
    //coordinates in the block
    int ib = threadIdx.y ;
    int jb = threadIdx.x ;
    //coordinates in the block
    int ib = threadIdx.y ;
    int jb = threadIdx.x ;
-  int idx_h = __mul24(ib+r,blockDim.x) + jb ;   // index pixel deans shmem (bloc+halo)
+  int idx_h = __mul24(ib+r,blockDim.x) +jb; // base pixel index
    int offset = __mul24(blockDim.x,r) ;
    
    int offset = __mul24(blockDim.x,r) ;
    
-  // coordonnees absolues du point
    int j = __mul24(blockIdx.x,blockDim.x) + jb ; 
    int i = __mul24(blockIdx.y,blockDim.y) + ib ;
    
    int j = __mul24(blockIdx.x,blockDim.x) + jb ; 
    int i = __mul24(blockIdx.y,blockDim.y) + ib ;
    
-  extern __shared__ int buff[] ;
-  /***********************************************************************************
-   *              CHARGEMENT DATA EN SHARED MEM
-   ***********************************************************************************/
+  //      DATA PREFETCHING INTO SHARED MEM
+  extern __shared__ int buff[] ;              
    buff[ idx_h ] = tex2D(tex_img_ins, j, i) ;
                                   
    if (ib < r)
    buff[ idx_h ] = tex2D(tex_img_ins, j, i) ;
                                   
    if (ib < r)
@@ -29,10 +26,8 @@ __global__ void kernel_medianV_sh( short *output, int i_dim, int j_dim, int r)
           }
    
    __syncthreads() ;
           }
    
    __syncthreads() ;
-  /**********************************************************************************************
-   *               TRI VERTICAL par algo TORBEN MOGENSEN
-   *          (a little bit slow but saves memory => faster !)
-   **********************************************************************************************/
+
+  //      TORBEN MOGENSEN SORTING
    min = max = buff[ ib*blockDim.x +jb] ;
    
    for (idc= 0 ; idc< 2*r+1 ; idc++ )
    min = max = buff[ ib*blockDim.x +jb] ;
    
    for (idc= 0 ; idc< 2*r+1 ; idc++ )