From: Gilles Perrot <gilles.perrot@univ-fcomte.fr>
Date: Fri, 10 Jun 2011 11:58:57 +0000 (+0200)
Subject: generation de chemins de taille parametrable
X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/lniv_gpu.git/commitdiff_plain/cb7f1415a3a53d241784928e2a5d291f79a8e984

generation de chemins de taille parametrable
---

diff --git a/.gitignore b/.gitignore
index e69de29..94c2ef1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+obj/*.o
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 2801dda..20361ca 100644
--- a/Makefile
+++ b/Makefile
@@ -34,9 +34,9 @@
 ################################################################################
 
 # Add source files here
-EXECUTABLE	:= levelines
+EXECUTABLE	:= lniv
 # CUDA source files (compiled with cudacc)
-CUFILES		:= main_gmem.cu
+CUFILES		:= main.cu
 # CUDA dependency files
 CU_DEPS		:= levelines_common.h
 # C/C++ source files (compiled with gcc / c++)
diff --git a/image_cpu.pgm b/image_cpu.pgm
new file mode 100644
index 0000000..309cc8a
Binary files /dev/null and b/image_cpu.pgm differ
diff --git a/image_out.pgm b/image_out.pgm
new file mode 100644
index 0000000..690310e
Binary files /dev/null and b/image_out.pgm differ
diff --git a/levelines_common.c b/levelines_common.c
new file mode 100644
index 0000000..bb77107
--- /dev/null
+++ b/levelines_common.c
@@ -0,0 +1,131 @@
+#include "levelines_common.h"
+
+unsigned int lniv4_value(unsigned int **image, int i, int j, int idim, int jdim, int *dout)
+{
+  unsigned int value_c ;
+  unsigned int value2_c ;
+  unsigned int d, v, p, d_min, eq_min, eq, sum, sum2 ;
+  unsigned int sum_eq_min ;
+  unsigned int it, jt ;
+
+
+  /* mem */
+  value_c = image[ i*l + j ] ;
+  value2_c = value_c*value_c ;
+
+  // direction d=0
+  sum = value_c ;
+  sum2 = value2_c ;
+  it = i ;
+  jt = j ;
+  for (p=0; p<3; p++)
+    {
+      it += Di_Q1[0][p] ;
+      jt += Dj_Q1[0][p] ;
+      v = image[ it*l + jt ] ;
+      sum += v ;
+      sum2 += v*v ;
+    }
+  eq_min = sum2 - sum*sum/4 ; /* *4 */
+  sum_eq_min = sum ;
+  d_min = 0 ;
+
+  /* direction 1 a 5 */
+  for (d=1; d<6; d++)
+    {
+      sum = value_c ;
+      sum2 = value2_c ;
+      it = i ;
+      jt = j ;
+      for (p=0; p<3; p++)
+	{
+	  it += Di_Q1[d][p] ;
+	  jt += Dj_Q1[d][p] ;
+	  v = image[it*l + jt] ;
+	  sum += v ;
+	  sum2 += v*v ;
+	}
+      eq = sum2 - sum*sum/4 ; /* *4 */
+      if (eq < eq_min)
+	{
+	  eq_min = eq ;
+	  sum_eq_min = sum ;
+	  d_min = d ; /* pour info */	 
+	}
+    }
+
+  /* direction 6 a 11 */
+  for (d=0; d<6; d++)
+    {
+      sum = value_c ;
+      sum2 = value2_c ;
+      it = i ;
+      jt = j ;
+      for (p=0; p<3; p++)
+	{
+	  it += Dj_Q1[d][p] ;
+	  jt -= Di_Q1[d][p] ;
+	  v = image[it*l + jt] ;
+	  sum += v ;
+	  sum2 += v*v ;
+	}
+      eq = sum2 - sum*sum/4 ; /* *4 */
+      if (eq < eq_min)
+	{
+	  eq_min = eq ;
+	  sum_eq_min = sum ;
+	  d_min = d+6 ; /* pour info */	 
+	}
+    }
+
+  /* direction 12 a 17 */
+  for (d=0; d<6; d++)
+    {
+      sum = value_c ;
+      sum2 = value2_c ;
+      it = i ;
+      jt = j ;
+      for (p=0; p<3; p++)
+	{
+	  it -= Di_Q1[d][p] ;
+	  jt -= Dj_Q1[d][p] ;
+	  v = image[it*l + jt] ;
+	  sum += v ;
+	  sum2 += v*v ;
+	}
+      eq = sum2 - sum*sum/4 ; /* *4 */
+      if (eq < eq_min)
+	{
+	  eq_min = eq ;
+	  sum_eq_min = sum ;
+	  d_min = d+12 ; /* pour info */	 
+	}
+    }
+  
+  /* direction 18 a 23 */
+  for (d=0; d<6; d++)
+    {
+      sum = value_c ;
+      sum2 = value2_c ;
+      it = i ;
+      jt = j ;
+      for (p=0; p<3; p++)
+	{
+	  it -= Dj_Q1[d][p] ;
+	  jt += Di_Q1[d][p] ;
+	  v = image[it*l + jt] ;
+	  sum += v ;
+	  sum2 += v*v ;
+	}
+      eq = sum2 - sum*sum/4 ; /* *4 */
+      if (eq < eq_min)
+	{
+	     eq_min = eq ;
+	     sum_eq_min = sum ;
+	     d_min = d+18 ; /* pour info */	 
+	}
+    }
+  
+  *dout = d_min ;
+  return sum_eq_min/4 ;
+}
diff --git a/levelines_common.h b/levelines_common.h
index ddb0ea2..97d9bd7 100644
--- a/levelines_common.h
+++ b/levelines_common.h
@@ -3,17 +3,171 @@
 #ifndef LEVELINES_COMMON_H
 #define LEVELINES_COMMON_H
 
+#include <cuda_runtime.h>
 
 
-#include <cuda_runtime.h>
+/**
+ * \var Di_Q1
+ * \brief  table de coordonnees des segments pour le quadrant 1
+ *
+ */
+const int Di_Q1[6][3] = 
+  {{ -1, -1, -1},  /* 0 */
+   { -1, -1, -1},  /* 1 */
+   { -1, -1, -1},  /* 2 */
+   { -1, -1, -1},  /* 3 */
+   { -1,  0, -1},  /* 4 */
+   {  0, -1,  0}}; /* 5 */
+
 
+/**
+ * \var Dj_Q1
+ * \brief  table de coordonnees des segments pour le quadrant 1
+ *
+ */
+const int Dj_Q1[6][3] = 
+  {{  0,  0,  0},  /* 0 */
+   {  0,  1,  0},  /* 1 */
+   {  1,  0,  1},  /* 2 */
+   {  1,  1,  1},  /* 3 */
+   {  1,  1,  1},  /* 4 */
+   {  1,  1,  1}}; /* 5 */
 
 
 
 ////////////////////////////////////////////////////////////////////////////////
 // Reference CPU functions
 ////////////////////////////////////////////////////////////////////////////////
-//extern "C" void fonc(...);
+extern "C" unsigned int lniv_cpu(unsigned int *image, int i, int j, int idim, int l, int *dout){
+
+  unsigned int value_c ;
+  unsigned int value2_c ;
+  unsigned int d, v, p, d_min, eq_min, eq, sum, sum2 ;
+  unsigned int sum_eq_min ;
+  unsigned int it, jt ;
+
+
+  /* mem */
+  value_c = image[ i*l + j ] ;
+  value2_c = value_c*value_c ;
+
+  // direction d=0
+  sum = value_c ;
+  sum2 = value2_c ;
+  it = i ;
+  jt = j ;
+  for (p=0; p<3; p++)
+    {
+      it += Di_Q1[0][p] ;
+      jt += Dj_Q1[0][p] ;
+      v = image[ it*l + jt ] ;
+      sum += v ;
+      sum2 += v*v ;
+    }
+  eq_min = sum2 - sum*sum/4 ; /* *4 */
+  sum_eq_min = sum ;
+  d_min = 0 ;
+
+  /* direction 1 a 5 */
+  for (d=1; d<6; d++)
+    {
+      sum = value_c ;
+      sum2 = value2_c ;
+      it = i ;
+      jt = j ;
+      for (p=0; p<3; p++)
+	{
+	  it += Di_Q1[d][p] ;
+	  jt += Dj_Q1[d][p] ;
+	  v = image[it*l + jt] ;
+	  sum += v ;
+	  sum2 += v*v ;
+	}
+      eq = sum2 - sum*sum/4 ; /* *4 */
+      if (eq < eq_min)
+	{
+	  eq_min = eq ;
+	  sum_eq_min = sum ;
+	  d_min = d ; /* pour info */	 
+	}
+    }
+
+  /* direction 6 a 11 */
+  for (d=0; d<6; d++)
+    {
+      sum = value_c ;
+      sum2 = value2_c ;
+      it = i ;
+      jt = j ;
+      for (p=0; p<3; p++)
+	{
+	  it += Dj_Q1[d][p] ;
+	  jt -= Di_Q1[d][p] ;
+	  v = image[it*l + jt] ;
+	  sum += v ;
+	  sum2 += v*v ;
+	}
+      eq = sum2 - sum*sum/4 ; /* *4 */
+      if (eq < eq_min)
+	{
+	  eq_min = eq ;
+	  sum_eq_min = sum ;
+	  d_min = d+6 ; /* pour info */	 
+	}
+    }
+
+  /* direction 12 a 17 */
+  for (d=0; d<6; d++)
+    {
+      sum = value_c ;
+      sum2 = value2_c ;
+      it = i ;
+      jt = j ;
+      for (p=0; p<3; p++)
+	{
+	  it -= Di_Q1[d][p] ;
+	  jt -= Dj_Q1[d][p] ;
+	  v = image[it*l + jt] ;
+	  sum += v ;
+	  sum2 += v*v ;
+	}
+      eq = sum2 - sum*sum/4 ; /* *4 */
+      if (eq < eq_min)
+	{
+	  eq_min = eq ;
+	  sum_eq_min = sum ;
+	  d_min = d+12 ; /* pour info */	 
+	}
+    }
+  
+  /* direction 18 a 23 */
+  for (d=0; d<6; d++)
+    {
+      sum = value_c ;
+      sum2 = value2_c ;
+      it = i ;
+      jt = j ;
+      for (p=0; p<3; p++)
+	{
+	  it -= Dj_Q1[d][p] ;
+	  jt += Di_Q1[d][p] ;
+	  v = image[it*l + jt] ;
+	  sum += v ;
+	  sum2 += v*v ;
+	}
+      eq = sum2 - sum*sum/4 ; /* *4 */
+      if (eq < eq_min)
+	{
+	     eq_min = eq ;
+	     sum_eq_min = sum ;
+	     d_min = d+18 ; /* pour info */	 
+	}
+    }
+  
+  *dout = d_min ;
+  return sum_eq_min/4 ;
+ };
+
 
 ////////////////////////////////////////////////////////////////////////////////
 // GPU functions (in file.cu)
diff --git a/levelines_kernels.cu b/levelines_kernels.cu
index 491c600..754f3bf 100644
--- a/levelines_kernels.cu
+++ b/levelines_kernels.cu
@@ -6,65 +6,65 @@
 __constant__ int pathDi[PSIZE_I][PSIZE_J-1] =
   {
     // Q1
-	{  -1, -1, -1},       // 
-	{  -1, -1, -1},       //  
-	{  -1, -1, -1},       // 
-	{  -1, -1, -1},       // 
-	{  -1,  0,  1},       // 
-	{   0, -1,  0},
+	{  -1, -1, -1},       // 90
+	{  -1, -1, -1},       // 75
+	{  -1, -1, -1},       // 60
+	{  -1, -1, -1},       // 45
+	{  -1,  0, -1},       // 30
+	{   0, -1,  0},       // 15
 	// Q4
-	{   0,  0,  0},       // 
-	{   0,  1,  1},       //  
-	{   1,  0,  1},       // 
-	{   1,  1,  1},       // 
-	{   1,  1,  1},       // 
-	{   1,  1,  1},
+	{   0,  0,  0},       // 0
+	{   0,  1,  0},       // 345 
+	{   1,  0,  1},       // 330
+	{   1,  1,  1},       // 315
+	{   1,  1,  1},       // 300
+	{   1,  1,  1},       // 285
 	// Q3
-	{   1,  1,  1},       // 
-	{   1,  1,  1},       //  
-	{   1,  1,  1},       // 
-	{   1,  1,  1},       // 
-	{   1,  0, -1},       // 
-	{   0,  1,  0},
+	{   1,  1,  1},       // 270
+	{   1,  1,  1},       // 255 
+	{   1,  1,  1},       // 240
+	{   1,  1,  1},       // 225
+	{   1,  0,  1},       // 210
+	{   0,  1,  0},       // 195
 	// Q2
-	{   0,  0,  0},       // 
-	{   0, -1,  0},       //  
-	{  -1,  0, -1},       // 
-	{  -1, -1, -1},       // 
-	{  -1, -1, -1},       // 
-	{  -1, -1, -1}
+	{   0,  0,  0},       // 180
+	{   0, -1,  0},       // 165 
+	{  -1,  0, -1},       // 150
+	{  -1, -1, -1},       // 135
+	{  -1, -1, -1},       // 120
+	{  -1, -1, -1}        // 105
   } ;     // 
   
 __constant__ int pathDj[PSIZE_I][PSIZE_J-1] =
   {
 	// Q1
-	{  0,  0,  0},       // 
-	{  0,  1,  0},
-	{  1,  0,  1},  
-	{  1,  1,  1},  
-	{  1,  1,  1},  
-	{  1,  1,  1},
+	{  0,  0,  0},       // 90
+	{  0,  1,  0},       // 75
+	{  1,  0,  1},       // 60
+	{  1,  1,  1},       // 45
+	{  1,  1,  1},       // 30 
+	{  1,  1,  1},       // 15
 	// Q4
-	{  1,  1,  1},       // 
-	{  1,  1,  1},
-	{  1,  1,  1},  
-	{  1,  1,  1},  
-	{  1,  0, -1},  
-	{  0,  1,  0},
-	// Q3
-	{  0,  0,  0},       // 
-	{  0, -1,  0},
-	{ -1,  0, -1},  
-	{ -1, -1, -1},  
-	{ -1, -1, -1},  
-	{ -1, -1, -1},
+	{  1,  1,  1},       // 0
+	{  1,  1,  1},       // 345
+	{  1,  1,  1},       // 330
+	{  1,  1,  1},       // 315  
+	{  1,  0,  1},       // 300
+	{  0,  1,  0},       // 285
+	// Q3 
+	{  0,  0,  0},       // 270
+	{  0, -1,  0},       // 255
+	{ -1,  0, -1},       // 240
+	{ -1, -1, -1},       // 225
+	{ -1, -1, -1},       // 210
+	{ -1, -1, -1},       // 195
 	// Q2
-	{ -1, -1, -1},       // 
-	{ -1, -1, -1},
-	{ -1, -1, -1},  
-	{ -1, -1, -1},  
-	{ -1,  0,  1},  
-	{  0, -1,  0}
+	{ -1, -1, -1},       // 180
+	{ -1, -1, -1},       // 165 
+	{ -1, -1, -1},       // 150
+	{ -1, -1, -1},       // 135
+	{ -1,  0, -1},       // 120 
+	{  0, -1,  0}        // 105
   } ;     
 
   
@@ -74,6 +74,72 @@ texture<int, 2, cudaReadModeElementType> tex_img_estim ;
 texture<int, 2, cudaReadModeElementType> tex_img_lniv ;
 
 
+__constant__ float tangente[] = {0.000, 0.268, 0.577, 1.000} ;
+
+__global__ void kernel_calcul_paths( int2 * d_paths, unsigned int r){
+
+  unsigned int idpath = 0 ;
+  int ic, jc, iprec, jprec ;
+  float offset = 0.5 ;
+  unsigned int basepath = 0 ;
+
+  // Q1 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 0,15,30 et 45
+	for (int p=0 ; p< r ; p++){      // les r points
+	  ic = r-1 - floor(tangente[a]*p + offset) ;
+	  if ( p > 0 ){
+		d_paths[idpath*(r-1)+p-1].x = ic - iprec ;
+		d_paths[idpath*(r-1)+p-1].y = 1 ;
+	  }
+	  iprec = ic ;
+	}
+	idpath++ ;
+  }
+  // Q1 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+	for (int p=0 ; p< r ; p++){      // les r points
+	  jc = floor(tangente[a]*p + offset) ; 
+	  if ( p > 0 ){
+		d_paths[idpath*(r-1)+p-1].x = -1 ;
+		d_paths[idpath*(r-1)+p-1].y = jc - jprec ;
+	  }
+	  jprec = jc ;
+	}
+	idpath++ ;
+  }
+  
+  // Q2
+  basepath += 6 ;
+  for (int a=0 ; a< 6 ; a++){         // les 6 angles 90,105,120,135,150,165
+	for (int p=0 ; p<r-1 ; p++){      // les r points
+	  d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].y ;
+	  d_paths[idpath*(r-1)+p].y =  d_paths[(idpath - basepath)*(r-1)+p].x ;
+	  }
+	idpath++ ;
+  }
+
+  // Q3
+  basepath += 6 ;
+  for (int a=0 ; a< 6 ; a++){         // les 6 angles 180,195,210,225,240,255
+	for (int p=0 ; p<r-1 ; p++){      // les r points
+	  d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].x ;
+	  d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].y ;
+	  }
+	idpath++ ;
+  }
+
+  // Q4
+  basepath += 6 ;
+  for (int a=0 ; a< 6 ; a++){         // les 6 angles 270,285,300,315,330,345
+	for (int p=0 ; p<r-1 ; p++){      // les r points
+	  d_paths[idpath*(r-1)+p].x =  d_paths[(idpath - basepath)*(r-1)+p].y ;
+	  d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].x ;
+	  }
+	idpath++ ;
+  }
+}
+
+
 __global__ void kernel_init_estim_from_img_in(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int r){
   // coordonnes du point dans l'image
   unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
@@ -96,6 +162,17 @@ __global__ void kernel_init_estim_from_img_in(unsigned int * d_estim, unsigned i
   */
 }
 
+__global__ void kernel_neutre_estim_from_img_in( unsigned int * d_estim, unsigned int L, unsigned int H ){
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  unsigned int pos = i*L +j ;
+  
+  d_estim[ pos ] = tex2D(tex_img_in, j, i) ;
+ 
+
+}
+
 __global__ void kernel_init_estim_from_img_in_global_mem(unsigned int * d_data, unsigned int * d_estim,
 														 unsigned int L, unsigned int H, unsigned int r){
   // coordonnes du point dans l'image
@@ -209,7 +286,7 @@ __global__ void kernel_levelines_texture(unsigned int * img_out, unsigned int L,
   uint2 mse ;
   
   
-  if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+  if( (i >=lpath )&&( i<= (H-lpath) )&&( j>=lpath )&&( j<=(L-lpath) ) ){
 	for( idpath=0; idpath < PSIZE_I ; idpath++) {
 	  ic = i ;
 	  jc = j ;
@@ -225,7 +302,7 @@ __global__ void kernel_levelines_texture(unsigned int * img_out, unsigned int L,
 	  }
 	  // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
 	  // a ameliorer pour vitesse
-	  mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
+	  mse_cur = ( mse.y - ( mse.x*mse.x / lpath ) ) ;
 	  if (idpath == 0) {
 		mse_min = mse_cur ;
 		val = mse.x ;
@@ -238,56 +315,61 @@ __global__ void kernel_levelines_texture(unsigned int * img_out, unsigned int L,
 	}
 	img_out[ i*L + j ] = val / lpath ; 
   }
+  else img_out[ i*L + j ] = 0 ;
 }
 
 
-
-
-__global__ void kernel_levelines_only_texture(unsigned int * img_out, unsigned int L, unsigned int H)
+__global__ void kernel_dir_levelines_texture(unsigned int * dir, unsigned int L, unsigned int H)
 {
   // coordonnes du point dans l'image
   unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
-  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
   //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
   
   // nb de points par chemin
   int lpath =  PSIZE_J ;
-  unsigned int ic, jc ;
+  unsigned int ic, jc, zc ;
   int idpath, idpix ;
-  unsigned int mse_min, mse_cur ;
-  
-  //extern __shared__ uint2 mse[] ;
+  unsigned int mse_min, mse_cur, val ;
   uint2 mse ;
   
-  if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+  
+  if( (i >=lpath )&&( i<= (H-lpath) )&&( j>=lpath )&&( j<=(L-lpath) ) ){
 	for( idpath=0; idpath < PSIZE_I ; idpath++) {
 	  ic = i ;
 	  jc = j ;
-	  mse.x = tex2D(tex_img_in, i, j) ;
-	  mse.y = tex2D(tex_img_in, i, j)*tex2D(tex_img_in, i, j) ;
+	  zc = tex2D(tex_img_estim, j, i) ;
+	  mse.x = zc ;
+	  mse.y = zc*zc ;
 	  for( idpix=0; idpix < lpath-1 ; idpix++ ) {
 		ic += pathDi[idpath][idpix] ;
 		jc += pathDj[idpath][idpix] ;
-		mse.x += tex2D( tex_img_in, ic, jc ) ;
-		mse.y += tex2D( tex_img_in, ic, jc ) * tex2D( tex_img_in, ic, jc ) ; 
+		zc = tex2D(tex_img_estim, jc, ic) ;
+		mse.x += zc ;
+		mse.y += zc*zc ; 
 	  }
 	  // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
 	  // a ameliorer pour vitesse
-	  mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
-	  if (idpath > 0) {
+	  mse_cur = ( mse.y - ( mse.x*mse.x / lpath ) ) ;
+	  if (idpath == 0) {
+		mse_min = mse_cur ;
+		val = idpath ;
+	  } else {
 		if ( mse_cur < mse_min )  {
 		  mse_min = mse_cur ;
+		  val = idpath ; 
 		}
-	  } else {
-		mse_min = mse_cur ;
-	  }
+	  } 
 	}
-	img_out[ i*L + j ] = mse_min / lpath ; 
+	dir[ i*L + j ] = val ; 
   }
+  
 }
 
 
-__global__ void kernel_trace_levelines(unsigned int * img_in, unsigned int * dir, unsigned int * img_out,
+
+
+__global__ void kernel_trace_levelines_texture(unsigned int * dir, unsigned int * img_out,
 									   unsigned int L, unsigned int H, unsigned int pas, unsigned int ng){
   // coordonnes du point dans l'image
   unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
@@ -298,9 +380,9 @@ __global__ void kernel_trace_levelines(unsigned int * img_in, unsigned int * dir
   unsigned int ic, jc, idpix ;
   unsigned int idpath  ;
 
-  img_out[ i*L + j ] = img_in[ i*L + j ] ;
+  img_out[ i*L + j ] = tex2D(tex_img_estim, j, i ) ;
   
-  if ( !(i%pas+j%pas)&&(i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath) ){
+  if ( !(i%pas+j%pas)&&(i>=lpath)&&(i<=H-lpath)&&(j>=lpath)&&(j<=L-lpath) ){
 	ic = i ;
 	jc = j ;
 	idpath = dir[ic*L+jc] ;
diff --git a/main.cu b/main.cu
index d9db4f8..b2c1681 100644
--- a/main.cu
+++ b/main.cu
@@ -16,181 +16,232 @@
 
 #include "levelines_kernels.cu"
 
+const float tang[] = {0, 0.268, 0.577, 1} ;
 
-__global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
-  
-  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
-  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
-  unsigned int pos = i*L +j ;
 
-  ptr2[pos] = val - ptr1[pos] ;
+void genPaths(unsigned int *h_paths, int *p_i, int *p_j, unsigned int r ){
+  unsigned int idpath = 0 ;
+  int ic, jc ;
+  float offset = 0.5 ;
+    
+  // Q1 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 0,15,30 et 45
+	for (int p=0 ; p< r ; p++){      // les r points
+	  jc = p ;
+	  ic = r-1 - floor(tang[a]*p + offset) ;
+	  h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+	  if ( p > 0 ){
+		p_i[idpath*(r-1)+p-1] = ic ;
+		p_j[idpath*(r-1)+p-1] = jc ;
+	  }
+	}
+	idpath++ ;
+  }
+  // Q1 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+	for (int p=0 ; p< r ; p++){      // les r points
+	  ic = r-1 - p ;
+	  jc = floor(tang[a]*p + offset) ; 
+	  h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+	  if ( p > 0 ){
+		p_i[idpath*(r-1)+p-1] = ic ;
+		p_j[idpath*(r-1)+p-1] = jc ;
+	  }
+	}
+	idpath++ ;
+  }
+
+  // Q2 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
+	for (int p=0 ; p< r ; p++){      // les r points
+	  ic = r-1 - p ;
+	  jc = r-1 - floor(tang[a]*p + offset) ;
+	  h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+	  if ( p > 0 ){
+		p_i[idpath*(r-1)+p-1] = ic ;
+		p_j[idpath*(r-1)+p-1] = jc ;
+	  }
+	}
+	idpath++ ;
+  }
+  // Q2 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+	for (int p=0 ; p< r ; p++){      // les r points
+	  jc = r-1 - p ;
+	  ic = r-1 - floor(tang[a]*p + offset) ; 
+	  h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+	  if ( p > 0 ){
+		p_i[idpath*(r-1)+p-1] = ic ;
+		p_j[idpath*(r-1)+p-1] = jc ;
+	  }
+	}
+	idpath++ ;
+  }
+
+
+  // Q3 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
+	for (int p=0 ; p< r ; p++){      // les r points
+	  jc = r-1 - p ;
+	  ic = floor(tang[a]*p + offset) ;
+	  h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+	  if ( p > 0 ){
+		p_i[idpath*(r-1)+p-1] = ic ;
+		p_j[idpath*(r-1)+p-1] = jc ;
+	  }
+	}
+	idpath++ ;
+  }
+  // Q3 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+	for (int p=0 ; p< r ; p++){      // les r points
+	  ic = p ;
+	  jc = r-1 - floor(tang[a]*p + offset) ; 
+	  h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+	  if ( p > 0 ){
+		p_i[idpath*(r-1)+p-1] = ic ;
+		p_j[idpath*(r-1)+p-1] = jc ;
+	  }
+	}
+	idpath++ ;
+  }
+
+
+  // Q4 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
+	for (int p=0 ; p< r ; p++){      // les r points
+	  ic = p ;
+	  jc = floor(tang[a]*p + offset) ;
+	  h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+	  if ( p > 0 ){
+		p_i[idpath*(r-1)+p-1] = ic ;
+		p_j[idpath*(r-1)+p-1] = jc ;
+	  }
+	}
+	idpath++ ;
+  }
+  // Q4 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+	for (int p=0 ; p< r ; p++){      // les r points
+	  jc = p ;
+	  ic = floor(tang[a]*p + offset) ; 
+	  h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+	  if ( p > 0 ){
+		p_i[idpath*(r-1)+p-1] = ic ;
+		p_j[idpath*(r-1)+p-1] = jc ;
+	  }
+	}
+	idpath++ ;
+  }
   
 }
 
-int main(int argc, char **argv){
 
+int main(int argc, char **argv){
 
-  //float coef_regul = atof( argv[1] ) ;
-
+  // use device with highest Gflops/s
+  cudaSetDevice( cutGetMaxGflopsDeviceId() );
   unsigned int timer ;
   cutilCheckError( cutCreateTimer(&timer) );
   cutilCheckError( cutResetTimer(timer) );
-  /*****************************
-   *    CHARGEMENT IMAGE
-   *****************************/
-  char* image_path = argv[argc-1];
-  char* image_out = "./image_out.pgm" ;
-  unsigned int * h_data = NULL ;
-  unsigned int * h_data_out = NULL ;
-  unsigned int H, L, size;
-
   cutilCheckError( cutStartTimer(timer) );
-  cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
-  cutilCheckError( cutStopTimer(timer) );
+  // une alloc debile pour initialiser la carte GPU
+  int * d_void ;
+  cutilSafeCall( cudaMalloc( (void**) &d_void, 4)) ;
   
-  size = H * L * sizeof( unsigned int ); 
-  printf("Loaded  %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path,  cutGetTimerValue(timer));
-
+  
+  
+  /*********************************
+   *    DEFINITION PARAMS CHEMINS
+   *********************************/
+  char* image_out = "./image_out.pgm" ;
+  int *p_i, *p_j ;
+  int2 * d_paths ;
+  unsigned int * h_paths ;
+  unsigned int R = atoi(argv[1]);
+ 
+  
+  //unsigned int size = R * R * sizeof( unsigned int ); 
 
-  //essai alloc mapped
-  /*
-  cutilCheckError( cutResetTimer(timer) );
-  cutilCheckError( cutStartTimer(timer) );
-  unsigned int * h_ptr1, * d_ptr1 ;
-  unsigned int * h_ptr2, * d_ptr2 ;
-  int  h =  ;
-  int l = h ;
-  int mem = h*l*sizeof(unsigned int) ;
-  cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
-  cutilCheckError( cutStopTimer(timer) );
-  printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
 
-  cutilCheckError( cutStartTimer(timer) );
-  cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
-  cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
-  cutilCheckError( cutStopTimer(timer) );
-  printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+  // allocation mem
+  int memsize = 24*(R-1)*sizeof(int2) ;
+  cutilSafeCall( cudaMalloc( (void**) &d_paths, memsize ) );
   
-  for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
-
-  cutilCheckError( cutStartTimer(timer) );
-  cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
-  cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
-  cutilCheckError( cutStopTimer(timer) );
-  printf("Temps cumul get pointer  Mapped : %f ms\n", cutGetTimerValue(timer)) ;
   
-  cutilCheckError( cutStartTimer(timer) );
-  dim3 blocks(16,16,1) ;
-  dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
+  h_paths = new unsigned int [24*R*R] ;
+  p_i = new int [24*(R-1)] ;
+  p_j = new int [24*(R-1)] ;
   
-  kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
+  for (int j=0; j<24*R*R ; j++) h_paths[j] = 0 ;
 
-  cutilCheckError( cutStopTimer(timer) );
-  printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+  genPaths(h_paths, p_i, p_j, R) ;
+  
+  printf("Rayon : %d pixels \n", R) ;
+
+  //matrice p_i
+  printf("P_I\n");
+  for (int idpath=0; idpath < 24; idpath++){
+	printf("\n");
+	for (int idpix=0; idpix < R-1; idpix++){
+	  printf(" %d ", p_i[idpath*(R-1)+idpix]);
+	}
+  }
+  //matrice p_j
+  printf("\nP_J\n");
+  for (int idpath=0; idpath < 24; idpath++){
+	printf("\n");
+	for (int idpix=0; idpix < R-1; idpix++){
+	  printf(" %d ", p_j[idpath*(R-1)+idpix]);
+	}
+  }
   
-  char * image_1 = "./image_1.pgm" ;
-  char * image_2 = "./image_2.pgm" ;
   
-  cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
-  cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
-  */
   /*****************************
-   *     FIN CHARGEMENT IMAGE
+   * APPELS KERNELS et chronos
    *****************************/
   
+  dim3 dimBlock( 1, 1, 1 ) ;
+  dim3 dimGrid( 1, 1, 1 ) ;
+ 
+  kernel_calcul_paths<<< dimGrid, dimBlock, 0 >>>(d_paths, R) ;
   
-    
-  // use device with highest Gflops/s
-  cudaSetDevice( cutGetMaxGflopsDeviceId() );
-  
+  printf("\nGrille : %d x %d de Blocs : %d x %d \n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y) ;
   
-  /*
-	cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
-	cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
-	cutilSafeCall( cudaThreadSynchronize() );
-	cutilCheckError( cutResetTimer(hTimer) );
-	cutilCheckError( cutStartTimer(hTimer) );
-    
-	cutilSafeCall( cudaThreadSynchronize() );
-	cutilCheckError( cutStopTimer(hTimer) );
-	gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
-  */
-
-  cutilCheckError( cutResetTimer(timer) );
-  cutilCheckError( cutStartTimer(timer) );
-  // allocation mem GPU
-  unsigned int * d_directions =NULL ;
-  unsigned int * d_lniv, * d_estim = NULL ;
-
-  cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
-  cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
-  cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+ 
+  /**************************
+   * VERIFS 
+   **************************/
 
+  int2 * paths = new int2[24*(R-1)] ;
+  cutilSafeCall( cudaMemcpy(paths , d_paths, memsize, cudaMemcpyDeviceToHost) );
   
-  // allocate array and copy image data
-  cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
-  cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
-  cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H )); 
-  cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
-  cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
-  cutilCheckError( cutStopTimer(timer) );
-  
-  cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H )); 
-  cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
-
-  cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H )); 
-  cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
+  //matrice p_i
+  printf("P_I\n");
+  for (int idpath=0; idpath < 24; idpath++){
+	printf("\n");
+	for (int idpix=0; idpix < R-1; idpix++){
+	  printf(" %d ", paths[idpath*(R-1)+idpix].x);
+	}
+	printf("\t // %d", 15*idpath);
+  }
+  //matrice p_j
+  printf("\nP_J\n");
+  for (int idpath=0; idpath < 24; idpath++){
+	printf("\n");
+	for (int idpix=0; idpix < R-1; idpix++){
+	  printf(" %d ", paths[idpath*(R-1)+idpix].y);
+	}
+	printf("\t // %d", 15*idpath);
+  }
 
-  printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
-  /*****************************
-   * APPELS KERNELS et chronos
-   *****************************/
-  cutilCheckError( cutResetTimer(timer) );
-  cutilCheckError( cutStartTimer(timer) );
+  printf("\n");
   
-	unsigned int iter , nb_iter = 15 ;
-	unsigned int  poids = 15 ;
-	dim3 dimBlock(8,8,1) ;
-	dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
-	unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
-	// init image estimee avec image_in
-	kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
-	
-	printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
-	
-	for ( iter =0 ; iter < nb_iter ; iter++ )
-	  {
-		cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
-		kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
-		cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
-		kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
-	}
-	
-	cudaThreadSynchronize();
-	
-	cutilCheckError( cutStopTimer(timer) );
-	printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
-	printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
-
-	/**************************
-	 * VERIFS 
-	 **************************/
-	//trace des lniv sur grille de 'pas x pas'
-	//kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
-	//cudaThreadSynchronize();
-	
-	// enregistrement image lniv GPU
-	h_data_out = new unsigned int[H*L] ;
-	if ( h_data_out != NULL)
-	  cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
-        else
-	  printf("Echec allocation mem CPU\n"); 		
-
-	cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
-
-	// calcul lniv CPU
-	
-	
+	// enregistrement image des PATHS
+	//cutilSafeCall( cudaMemcpy(h_paths , d_paths, size, cudaMemcpyDeviceToHost) );
+     
+	cutilCheckError( cutSavePGMi(image_out, h_paths, R, 24*R) ) ;
+
 	// TODO verifier pourquoi les deux lignes suivantes produisent une erreur
 	//cutilExit(argc, argv);
 	//cudaThreadExit();
diff --git a/obj/release/main_gmem.cu.o b/obj/release/main_gmem.cu.o
new file mode 100644
index 0000000..264c1c6
Binary files /dev/null and b/obj/release/main_gmem.cu.o differ