From: Gilles Perrot <gilles.perrot@univ-fcomte.fr>
Date: Fri, 3 Jun 2011 14:24:59 +0000 (+0200)
Subject: initial commit for lniv
X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/lniv_gpu.git/commitdiff_plain/1a59c41c272e22861e759e660a64578b47147d85

initial commit for lniv
---

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2801dda
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,49 @@
+################################################################################
+#
+# Copyright 1993-2006 NVIDIA Corporation.  All rights reserved.
+#
+# NOTICE TO USER:   
+#
+# This source code is subject to NVIDIA ownership rights under U.S. and 
+# international Copyright laws.  
+#
+# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE 
+# CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR 
+# IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH 
+# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF 
+# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   
+# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, 
+# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE 
+# OR PERFORMANCE OF THIS SOURCE CODE.  
+#
+# U.S. Government End Users.  This source code is a "commercial item" as 
+# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting  of 
+# "commercial computer software" and "commercial computer software 
+# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) 
+# and is provided to the U.S. Government only as a commercial end item.  
+# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 
+# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the 
+# source code with only those rights set forth herein.
+#
+################################################################################
+#
+# Build script for project
+#
+################################################################################
+
+# Add source files here
+EXECUTABLE	:= levelines
+# CUDA source files (compiled with cudacc)
+CUFILES		:= main_gmem.cu
+# CUDA dependency files
+CU_DEPS		:= levelines_common.h
+# C/C++ source files (compiled with gcc / c++)
+CCFILES		:= 
+
+
+################################################################################
+# Rules and targets
+
+include ../../common/common.mk
diff --git a/defines.h b/defines.h
new file mode 100644
index 0000000..cddf7c6
--- /dev/null
+++ b/defines.h
@@ -0,0 +1,29 @@
+#ifndef __DEFINES_H__
+#define __DEFINES_H__
+
+
+
+/**
+ * \def SIZE_NAME_FILE longueur maxi associee aux noms de fichiers
+ * \def SIZE_LINE_TEXT longueur maxi associee a une ligne de texte
+ */
+#define SIZE_NAME_FILE 256 
+#define SIZE_LINE_TEXT 256 
+
+#define COEF_DECROI 0.99999 
+#define INV_COEF_DECROI 1.00001
+
+#define BSMAX 512
+#define MAX(x,y) ( ( (x)>=(y) )?(x):(y) )
+#define ABS(x) ( ((x)>0)?(x):-(x))
+#define DEC 4
+#define DEC2 8 
+#define CONFLICT_FREE_OFFSET(index)  ( ((index) >>(DEC)) + ((index) >>(DEC2) ) )
+#define CFO(index)  ( ( (index) >>(DEC) ) + ( (index) >>(DEC2) ) ) 
+#define CFI(index)  ( (index) + (CFO(index)) )
+
+//dimension de la matrice definissant les chemins des lignes de niveaux
+#define PSIZE_I 24
+#define PSIZE_J 4
+
+#endif 
diff --git a/levelines_common.h b/levelines_common.h
new file mode 100644
index 0000000..ddb0ea2
--- /dev/null
+++ b/levelines_common.h
@@ -0,0 +1,24 @@
+
+
+#ifndef LEVELINES_COMMON_H
+#define LEVELINES_COMMON_H
+
+
+
+#include <cuda_runtime.h>
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Reference CPU functions
+////////////////////////////////////////////////////////////////////////////////
+//extern "C" void fonc(...);
+
+////////////////////////////////////////////////////////////////////////////////
+// GPU functions (in file.cu)
+////////////////////////////////////////////////////////////////////////////////
+//extern "C" void fonc(float *h_Kernel);
+
+
+#endif
diff --git a/levelines_kernels.cu b/levelines_kernels.cu
new file mode 100644
index 0000000..491c600
--- /dev/null
+++ b/levelines_kernels.cu
@@ -0,0 +1,315 @@
+
+// chemins des lignes de niveaux
+// longueur = 4 pixels
+// une ligne = un chemin
+
+__constant__ int pathDi[PSIZE_I][PSIZE_J-1] =
+  {
+    // Q1
+	{  -1, -1, -1},       // 
+	{  -1, -1, -1},       //  
+	{  -1, -1, -1},       // 
+	{  -1, -1, -1},       // 
+	{  -1,  0,  1},       // 
+	{   0, -1,  0},
+	// Q4
+	{   0,  0,  0},       // 
+	{   0,  1,  1},       //  
+	{   1,  0,  1},       // 
+	{   1,  1,  1},       // 
+	{   1,  1,  1},       // 
+	{   1,  1,  1},
+	// Q3
+	{   1,  1,  1},       // 
+	{   1,  1,  1},       //  
+	{   1,  1,  1},       // 
+	{   1,  1,  1},       // 
+	{   1,  0, -1},       // 
+	{   0,  1,  0},
+	// Q2
+	{   0,  0,  0},       // 
+	{   0, -1,  0},       //  
+	{  -1,  0, -1},       // 
+	{  -1, -1, -1},       // 
+	{  -1, -1, -1},       // 
+	{  -1, -1, -1}
+  } ;     // 
+  
+__constant__ int pathDj[PSIZE_I][PSIZE_J-1] =
+  {
+	// Q1
+	{  0,  0,  0},       // 
+	{  0,  1,  0},
+	{  1,  0,  1},  
+	{  1,  1,  1},  
+	{  1,  1,  1},  
+	{  1,  1,  1},
+	// Q4
+	{  1,  1,  1},       // 
+	{  1,  1,  1},
+	{  1,  1,  1},  
+	{  1,  1,  1},  
+	{  1,  0, -1},  
+	{  0,  1,  0},
+	// Q3
+	{  0,  0,  0},       // 
+	{  0, -1,  0},
+	{ -1,  0, -1},  
+	{ -1, -1, -1},  
+	{ -1, -1, -1},  
+	{ -1, -1, -1},
+	// Q2
+	{ -1, -1, -1},       // 
+	{ -1, -1, -1},
+	{ -1, -1, -1},  
+	{ -1, -1, -1},  
+	{ -1,  0,  1},  
+	{  0, -1,  0}
+  } ;     
+
+  
+// declare texture reference for 2D int texture
+texture<int, 2, cudaReadModeElementType> tex_img_in ;
+texture<int, 2, cudaReadModeElementType> tex_img_estim ;
+texture<int, 2, cudaReadModeElementType> tex_img_lniv ;
+
+
+__global__ void kernel_init_estim_from_img_in(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int r){
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  unsigned int pos = i*L +j ;
+  unsigned int ic , jc, ng ;
+  
+  if( (i>r)&&(i<H-r)&&(j>r)&&(j<L-r) ){
+	ng = 0 ;
+	for (ic = i - r ; ic <= i + r ; ic++){
+	  for (jc = j - r ; jc <= j + r ; jc++){
+		ng += tex2D(tex_img_in, jc, ic ) ;
+	  }
+	}
+	d_estim[ pos ] = ng/((2*r+1)*(2*r+1)) ;
+  }
+  /*
+  else
+  d_estim[ pos ] = tex2D(tex_img_in, j, i) ;
+  */
+}
+
+__global__ void kernel_init_estim_from_img_in_global_mem(unsigned int * d_data, unsigned int * d_estim,
+														 unsigned int L, unsigned int H, unsigned int r){
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  unsigned int pos = i*L +j ;
+  unsigned int ic , jc, ng ;
+  
+  if( (i>r)&&(i<H-r)&&(j>r)&&(j<L-r) ){
+	ng = 0 ;
+	for (ic = i - r ; ic <= i + r ; ic++){
+	  for (jc = j - r ; jc <= j + r ; jc++){
+		ng += d_data[ ic*L + jc ] ;
+	  }
+	}
+	d_estim[ pos ] = ng/((2*r+1)*(2*r+1)) ;
+  } 
+}
+
+__global__ void kernel_estim_next_step_global_mem(unsigned int * d_estim, unsigned int * d_lniv, unsigned int L, unsigned int H, unsigned int p){
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  unsigned int pos = i*L +j ;
+
+  d_estim[ pos ] = ( tex2D(tex_img_in, j, i) + p*d_lniv[ pos ] )/(1+p) ;
+  
+}
+
+__global__ void kernel_estim_next_step_texture(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int p){
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  unsigned int pos = i*L +j ;
+
+  d_estim[ pos ] = ( tex2D(tex_img_in, j, i) + p*tex2D(tex_img_lniv, j, i ))/(1+p) ;
+  
+}
+
+__global__ void kernel_estim_next_step_global_mem(unsigned int * d_estim, unsigned int * d_lniv, unsigned int * d_data,
+												  unsigned int L, unsigned int H, unsigned int p){
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  unsigned int pos = i*L +j ;
+
+  d_estim[ pos ] = ( d_data[ pos ] + p*d_lniv[ pos ])/(1+p) ;
+  
+}
+
+__global__ void kernel_levelines_global_mem(unsigned int * img_in, unsigned int * img_out, unsigned int L, unsigned int H)
+{
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
+  
+  // nb de points par chemin
+  int lpath =  PSIZE_J ;
+  unsigned int ic, jc, zc, pos = i*L+j;
+  int idpath, idpix ;
+  unsigned int mse_min, mse_cur, val ;
+  uint2 mse ;
+  
+  
+  if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+	for( idpath=0; idpath < PSIZE_I ; idpath++) {
+	  ic = i ;
+	  jc = j ;
+	  pos = ic*L + jc ;
+	  zc = img_in[ pos ] ;
+	  mse.x = zc ;
+	  mse.y = zc*zc ;
+	  for( idpix=0; idpix < lpath-1 ; idpix++ ) {
+		ic += pathDi[idpath][idpix] ;
+		jc += pathDj[idpath][idpix] ;
+		pos = ic*L + jc ;
+		zc = img_in[ pos ] ;
+		mse.x += zc ;
+		mse.y += zc*zc ; 
+	  }
+	  // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
+	  // a ameliorer pour vitesse
+	  mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
+	  if (idpath == 0) {
+		mse_min = mse_cur ;
+		val = mse.x ;
+	  } else {
+		if ( mse_cur < mse_min )  {
+		  mse_min = mse_cur ;
+		  val = mse.x ; 
+		}
+	  } 
+	}
+	img_out[ i*L + j ] = val / lpath ; 
+  }
+}
+
+__global__ void kernel_levelines_texture(unsigned int * img_out, unsigned int L, unsigned int H)
+{
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
+  
+  // nb de points par chemin
+  int lpath =  PSIZE_J ;
+  unsigned int ic, jc, zc ;
+  int idpath, idpix ;
+  unsigned int mse_min, mse_cur, val ;
+  uint2 mse ;
+  
+  
+  if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+	for( idpath=0; idpath < PSIZE_I ; idpath++) {
+	  ic = i ;
+	  jc = j ;
+	  zc = tex2D(tex_img_estim, j, i) ;
+	  mse.x = zc ;
+	  mse.y = zc*zc ;
+	  for( idpix=0; idpix < lpath-1 ; idpix++ ) {
+		ic += pathDi[idpath][idpix] ;
+		jc += pathDj[idpath][idpix] ;
+		zc = tex2D(tex_img_estim, jc, ic) ;
+		mse.x += zc ;
+		mse.y += zc*zc ; 
+	  }
+	  // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
+	  // a ameliorer pour vitesse
+	  mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
+	  if (idpath == 0) {
+		mse_min = mse_cur ;
+		val = mse.x ;
+	  } else {
+		if ( mse_cur < mse_min )  {
+		  mse_min = mse_cur ;
+		  val = mse.x ; 
+		}
+	  } 
+	}
+	img_out[ i*L + j ] = val / lpath ; 
+  }
+}
+
+
+
+
+__global__ void kernel_levelines_only_texture(unsigned int * img_out, unsigned int L, unsigned int H)
+{
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;;
+  //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
+  
+  // nb de points par chemin
+  int lpath =  PSIZE_J ;
+  unsigned int ic, jc ;
+  int idpath, idpix ;
+  unsigned int mse_min, mse_cur ;
+  
+  //extern __shared__ uint2 mse[] ;
+  uint2 mse ;
+  
+  if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+	for( idpath=0; idpath < PSIZE_I ; idpath++) {
+	  ic = i ;
+	  jc = j ;
+	  mse.x = tex2D(tex_img_in, i, j) ;
+	  mse.y = tex2D(tex_img_in, i, j)*tex2D(tex_img_in, i, j) ;
+	  for( idpix=0; idpix < lpath-1 ; idpix++ ) {
+		ic += pathDi[idpath][idpix] ;
+		jc += pathDj[idpath][idpix] ;
+		mse.x += tex2D( tex_img_in, ic, jc ) ;
+		mse.y += tex2D( tex_img_in, ic, jc ) * tex2D( tex_img_in, ic, jc ) ; 
+	  }
+	  // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
+	  // a ameliorer pour vitesse
+	  mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
+	  if (idpath > 0) {
+		if ( mse_cur < mse_min )  {
+		  mse_min = mse_cur ;
+		}
+	  } else {
+		mse_min = mse_cur ;
+	  }
+	}
+	img_out[ i*L + j ] = mse_min / lpath ; 
+  }
+}
+
+
+__global__ void kernel_trace_levelines(unsigned int * img_in, unsigned int * dir, unsigned int * img_out,
+									   unsigned int L, unsigned int H, unsigned int pas, unsigned int ng){
+  // coordonnes du point dans l'image
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;;
+    
+  // nb de points par chemin
+  int lpath =  PSIZE_J ;
+  unsigned int ic, jc, idpix ;
+  unsigned int idpath  ;
+
+  img_out[ i*L + j ] = img_in[ i*L + j ] ;
+  
+  if ( !(i%pas+j%pas)&&(i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath) ){
+	ic = i ;
+	jc = j ;
+	idpath = dir[ic*L+jc] ;
+	img_out[ ic*L+jc ] = ng ;
+	for ( idpix=0 ; idpix < lpath-1 ; idpix++ ){
+	  ic += pathDi[idpath][idpix] ;
+	  jc += pathDj[idpath][idpix] ;
+	  img_out[ ic*L + jc ] = ng ;
+	  }
+  }
+  
+}
diff --git a/lib_alloc.c b/lib_alloc.c
new file mode 100644
index 0000000..a9a3670
--- /dev/null
+++ b/lib_alloc.c
@@ -0,0 +1,136 @@
+/**
+ * \file lib_alloc.c
+ * \brief routines d'allocation des differentes datas du snake2D3D
+ * \author NB - PhyTI 
+ * \version x.x
+ * \date 20 decembre 2009
+ *
+ */
+
+#include <malloc.h>
+#include <mm_malloc.h>
+
+#include "lib_alloc.h"
+
+/**
+ * \brief allocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in]  i_dim dimension verticale du tableau
+ * \param[in]  j_dim dimension horizontale du tableau 
+ *
+ * \return pointeur sur le tableau
+ *
+ */
+void ***new_matrix_ptr(int i_dim, int j_dim)
+{
+  /* allocation en ligne */
+  void ***matrice ;
+  void **vecteur ATT_ALIGN_SSE ;
+  int i ;
+  
+  vecteur = (void**)malloc(sizeof(void*)*i_dim*j_dim) ;
+  matrice = (void***)malloc(sizeof(void**)*i_dim) ;
+  for (i=0;i<i_dim;i++)
+    matrice[i] = &(vecteur[i*j_dim]) ;
+  return matrice;
+}
+
+/**
+ * \brief deallocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in]  image tableau int 2D avec allocation en ligne 
+ * \param[in]  i_dim dimension horizontale du tableau 
+ *
+ *
+ */
+void del_matrix_ptr(void ***image, int i_dim)
+{
+  /* allocation en ligne */
+  free(image[0]) ;
+  free(image) ;
+}
+
+
+
+/**
+ * \brief allocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in]  i_dim dimension verticale du tableau
+ * \param[in]  j_dim dimension horizontale du tableau 
+ *
+ * \return pointeur sur le tableau
+ *
+ */
+int **new_matrix_int(int i_dim, int j_dim)
+{
+  /* allocation en ligne */
+  int **matrice ;
+  int *vecteur ATT_ALIGN_SSE ;
+  int i ;
+  
+  vecteur = (int*)_mm_malloc(sizeof(int)*i_dim*j_dim, ALIGN_SSE) ;
+  matrice = (int**)malloc(sizeof(int*)*i_dim) ;
+  for (i=0;i<i_dim;i++)
+    matrice[i] = &(vecteur[i*j_dim]) ;
+  return matrice;
+}
+
+
+/**
+ * \fn void del_matrix_int(int **image, int i_dim)
+ * \brief deallocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in]  image tableau int 2D avec allocation en ligne 
+ * \param[in]  i_dim dimension horizontale du tableau 
+ *
+ *
+ */
+void del_matrix_int(int **image, int i_dim)
+{
+  /* allocation en ligne */
+  _mm_free(image[0]) ;
+  free(image) ;
+}
+
+
+
+/**
+ * \fn int **new_matrix_pixel_cumul_sse(int i_dim, int j_dim)
+ * \brief allocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])n et aligne(16)
+ *
+ * \param[in]  i_dim dimension verticale du tableau
+ * \param[in]  j_dim dimension horizontale du tableau 
+ *
+ * \return pointeur sur le tableau
+ *
+ */
+union pixel_cumul_sse **new_matrix_pixel_cumul_sse(int i_dim, int j_dim)
+{
+  /* allocation en ligne */
+  union pixel_cumul_sse **matrice ;
+  union pixel_cumul_sse *vecteur ATT_ALIGN_SSE ;
+  int i ;
+  
+  vecteur = (union pixel_cumul_sse *)_mm_malloc(sizeof(union pixel_cumul_sse)*i_dim*j_dim, ALIGN_SSE) ;
+  matrice = (union pixel_cumul_sse **)malloc(sizeof(union pixel_cumul_sse*)*i_dim) ;
+  for (i=0;i<i_dim;i++)
+    matrice[i] = &(vecteur[i*j_dim]) ;
+  return matrice;
+}
+
+
+/**
+ * \fn void del_matrix_pixel_cumul_sse(union pixel_cumul_sse **image, int i_dim)
+ * \brief deallocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in]  image tableau int 2D avec allocation en ligne 
+ * \param[in]  i_dim dimension horizontale du tableau 
+ *
+ *
+ */
+void del_matrix_pixel_cumul_sse(union pixel_cumul_sse **image, int i_dim)
+{
+  /* allocation en ligne */
+  _mm_free(image[0]) ;
+  free(image) ;
+}
diff --git a/lib_alloc.h b/lib_alloc.h
new file mode 100644
index 0000000..9eba19d
--- /dev/null
+++ b/lib_alloc.h
@@ -0,0 +1,6 @@
+
+int **new_matrix_int(int i_dim, int j_dim) ;
+unsigned short **new_matrix_ushort(int i_dim, int j_dim) ;
+void del_matrix_int(int **image, int i_dim) ;
+
+
diff --git a/lib_images.c b/lib_images.c
new file mode 100644
index 0000000..82aeb12
--- /dev/null
+++ b/lib_images.c
@@ -0,0 +1,306 @@
+/**
+ * \file lib_images.c
+ * \brief Librairie de lecture/ecriture d'image ppm/pgm 8/16 bits
+ * \author NB - PhyTI 
+ * \version x.x
+ * \date 20 decembre 2009
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <malloc.h>
+#include <unistd.h>
+
+#include "defines.h"
+#include "lib_images.h"
+#include "lib_math.h"
+
+
+/**
+ * \fn int type_image_ppm(int *prof, int *i_dim, int *j_dim, int *level, char *file_name)
+ * \brief Fonction qui renvoie le type de l'image ppm et des caracteristiques
+ *
+ * \param[out] prof profondeur de l'image 1 pour pgm 3 pour ppm, 0 sinon
+ * \param[out] i_dim renvoie la dimension verticale de l'image (si NULL, renvoie que prof)
+ * \param[out] j_dim renvoie la dimension horizontale de l'image
+ * \param[out] level renvoie la dynamique de l'image
+ * \param[in]  file_name fichier image
+ *
+ * \return 1 si ok O sinon
+ *
+ */
+int type_image_ppm(int *prof, int *i_dim, int *j_dim, int *level, char *file_name)
+{
+  char buffer[SIZE_LINE_TEXT] ;
+  FILE *file ;
+
+  *prof = 0 ;
+ 
+  file = fopen(file_name, "rb");
+  if (file == NULL) 
+      return 0 ;
+ 
+  // lecture de la premiere ligne
+  fgets(buffer, SIZE_LINE_TEXT, file);
+ 
+  /* pgm */
+  if ((buffer[0] == 'P') & (buffer[1] == '5'))
+    *prof = 1 ; // GGG
+  /* ppm */
+  if ((buffer[0] == 'P') & (buffer[1] == '6'))
+    *prof = 3 ; // RVBRVBRVB
+
+  /* type non gere */
+  if (*prof == 0) return 0 ;
+
+  /* pour une utilisation du type */
+  /* ret = type_image_ppm(&prof, NULL, NULL, NULL, file_name) */
+  if (i_dim == NULL)  
+    return 1 ;
+
+  /* on saute les lignes de commentaires */
+  fgets(buffer, SIZE_LINE_TEXT, file);
+  while ((buffer[0] == '#')|(buffer[0] == '\n')) 
+    fgets(buffer, SIZE_LINE_TEXT, file);
+  
+  /* on lit les dimensions de l'image */
+  sscanf(buffer, "%d %d", j_dim, i_dim) ;
+  fgets(buffer, SIZE_LINE_TEXT, file);
+  sscanf(buffer, "%d", level) ;
+  
+
+  fclose(file);
+  return 1 ;
+}
+
+
+
+
+/**
+ * \fn void load_pgm2int(int **image, int i_dim, int j_dim, 
+ *		         int nb_level, char *fichier_image)
+ * \brief lecture pgm 8 ou 16 bits
+ *
+ * \param[out] image 
+ * \param[in]  i_dim dimension verticale de l'image
+ * \param[in]  j_dim dimension horizontale de l'image
+ * \param[in]  nb_level dynamique de l'image
+ * \param[in]  fichier_image fichier image
+ *
+ *
+ */
+void load_pgm2int(int **image, int i_dim, int j_dim, 
+		  int nb_level, char *fichier_image)
+{
+  int i, j ;
+  char buffer[SIZE_LINE_TEXT] ;
+  unsigned char *ligne;
+  unsigned short *ligne2;
+  FILE *file = fopen(fichier_image, "rb");
+  
+  fgets(buffer, SIZE_LINE_TEXT, file); /* P5 */
+  /* on saute les lignes de commentaires */
+  fgets(buffer, SIZE_LINE_TEXT, file);
+  while ((buffer[0] == '#')|(buffer[0] == '\n')) 
+    fgets(buffer, SIZE_LINE_TEXT, file);
+  /* derniere ligne lue : dimensions */
+  fgets(buffer, SIZE_LINE_TEXT, file); /* dynamique */
+
+  /* data */
+
+  if (nb_level < 256)
+    {
+      // fichier en char, on converti au format int
+      ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim) ;
+      
+      for (i=0;i<i_dim;i++)
+	{
+	  fread(ligne, 1, j_dim, file);
+	  for (j=0;j<j_dim;j++)    
+	    image[i][j] = (int)(ligne[j]) ; 
+	}
+      free(ligne) ;
+    }
+  else
+    {
+      // fichier en short, on converti au format int
+      ligne2 = (unsigned short*)malloc(sizeof(unsigned short)*j_dim) ;
+      
+      for (i=0;i<i_dim;i++)
+	{
+	  fread(ligne2, 2, j_dim, file);
+	  for (j=0;j<j_dim;j++)    
+	    image[i][j] = (int)(ligne2[j]) ; 
+	}
+      free(ligne2);
+    }  
+  fclose(file);
+}
+
+
+void load_pgm2ushort(unsigned short **image, int i_dim, int j_dim, 
+					 int nb_level, char *fichier_image)
+{
+  int i, j ;
+  char buffer[SIZE_LINE_TEXT] ;
+  unsigned char *ligne;
+  unsigned short *ligne2;
+  FILE *file = fopen(fichier_image, "rb");
+  
+  fgets(buffer, SIZE_LINE_TEXT, file); /* P5 */
+  /* on saute les lignes de commentaires */
+  fgets(buffer, SIZE_LINE_TEXT, file);
+  while ((buffer[0] == '#')|(buffer[0] == '\n')) 
+    fgets(buffer, SIZE_LINE_TEXT, file);
+  /* derniere ligne lue : dimensions */
+  fgets(buffer, SIZE_LINE_TEXT, file); /* dynamique */
+
+  /* data */
+
+  if (nb_level < 256)
+    {
+      // fichier en char, on converti au format ushort
+      ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim) ;
+      
+      for (i=0;i<i_dim;i++)
+	{
+	  fread(ligne, 1, j_dim, file);
+	  for (j=0;j<j_dim;j++)    
+	    image[i][j] = (unsigned short)(ligne[j]) ; 
+	}
+      free(ligne) ;
+    }
+  else
+    {
+      // fichier en short,
+      ligne2 = (unsigned short*)malloc(sizeof(unsigned short)*j_dim) ;
+      
+      for (i=0;i<i_dim;i++)
+	{
+	  fread(ligne2, 2, j_dim, file);
+	  for (j=0;j<j_dim;j++)    
+	    image[i][j] = (unsigned short)(ligne2[j]) ; 
+	}
+      free(ligne2);
+    }  
+  fclose(file);
+}
+
+
+
+/**
+ * \fn void write_int2pgm(int **image, int i_dim, int j_dim, char *fichier_image, int recal)
+ * \brief copie au format pgm 8 bits
+ *
+ * \param[in]  image 
+ * \param[in]  i_dim dimension verticale de l'image
+ * \param[in]  j_dim dimension horizontale de l'image
+ * \param[in]  fichier_image fichier image
+ * \param[in]  recal recalage de l'image en 0-255
+ *
+ */
+void write_int2pgm(int **image, int i_dim, int j_dim, char *fichier_image, int recal)
+{
+  int i, j ;
+  int val_min, val_max ; 
+  double coef ;
+  unsigned char *ligne;
+  FILE *file=fopen(fichier_image,"wb");
+
+  // entete pgm
+  // format
+  fprintf(file, "P5\n") ;
+  fprintf(file, "# Physics and Images Processing Group\n") ;
+  fprintf(file, "# Fresnel Institut - Marseille - France\n") ;
+  // taille
+  fprintf(file, "%d %d\n", j_dim, i_dim) ;
+  // dynamique
+  fprintf(file, "%d\n" , 255 );
+  
+  min_max_int1d(&val_min, &val_max, image[0], i_dim*j_dim) ;
+  coef = 255.0 / (val_max - val_min) ;
+  
+  // on converti l'image en entier 8 bits (char)
+  ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim );
+  for (i=0;i<i_dim;i++) 
+    {
+      if (recal == 1)
+	for (j=0;j<j_dim;j++) ligne[j] = (unsigned char)(coef * (image[i][j]-val_min)) ;
+      else
+	for (j=0;j<j_dim;j++) ligne[j] = (unsigned char)image[i][j] ;
+      fwrite(ligne, 1, j_dim, file);
+    }
+
+  fclose(file);
+  free(ligne) ;
+}
+
+void write_ushort2pgm(unsigned short **image, int i_dim, int j_dim, char *fichier_image, int recal)
+{
+  int i, j ;
+  int val_min, val_max ; 
+  double coef ;
+  unsigned char *ligne;
+  FILE *file=fopen(fichier_image,"wb");
+
+  // entete pgm
+  // format
+  fprintf(file, "P5\n") ;
+  fprintf(file, "# Physics and Images Processing Group\n") ;
+  fprintf(file, "# Fresnel Institute - Marseille - France\n") ;
+  // taille
+  fprintf(file, "%d %d\n", j_dim, i_dim) ;
+  // dynamique
+  fprintf(file, "%d\n" , 255 );
+  
+  min_max_ushort1d(&val_min, &val_max, image[0], i_dim*j_dim) ;
+  coef = 255.0 / (val_max - val_min) ;
+  
+  // on converti l'image en entier 8 bits (char)
+  ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim );
+  for (i=0;i<i_dim;i++) 
+    {
+      if (recal == 1)
+	for (j=0;j<j_dim;j++) ligne[j] = (unsigned char)(coef * (image[i][j]-val_min)) ;
+      else
+	for (j=0;j<j_dim;j++) ligne[j] = (unsigned char)image[i][j] ;
+      fwrite(ligne, 1, j_dim, file);
+    }
+
+  fclose(file);
+  free(ligne) ;
+}
+
+
+/**
+ * \fn void imagesc(int **image, int i_dim, int j_dim)
+ * \brief affiche une image via xti
+ *
+ * \param[in]  image 
+ * \param[in]  i_dim dimension verticale de l'image
+ * \param[in]  j_dim dimension horizontale de l'image
+ *
+ */
+void imagesc(int **image, int i_dim, int j_dim)
+{
+  char nom[SIZE_NAME_FILE] ;
+  //char cmd[SIZE_LINE_TEXT] ;
+
+  sprintf(nom, "imagesc_%d.pgm", getuid());
+  write_int2pgm(image, i_dim, j_dim, nom, 1) ;
+
+  // affichage avec xti
+  //sprintf(cmd, "xti %s &", nom) ;
+  //system(cmd) ;
+}
+
+void imagesc_ushort(unsigned short **image, int i_dim, int j_dim)
+{
+  char nom[SIZE_NAME_FILE] ;
+  //char cmd[SIZE_LINE_TEXT] ;
+
+  sprintf(nom, "imagesc_%d.pgm", getuid());
+  write_ushort2pgm(image, i_dim, j_dim, nom, 1) ;
+  
+}
diff --git a/lib_images.h b/lib_images.h
new file mode 100644
index 0000000..81c04e1
--- /dev/null
+++ b/lib_images.h
@@ -0,0 +1,9 @@
+
+int type_image_ppm(int *prof, int *i_dim, int *j_dim, int *level, char *file_name) ;
+void load_pgm2int(int **image, int i_dim, int j_dim, int nb_level, char *fichier_image) ;
+void load_pgm2ushort(unsigned short **image, int i_dim, int j_dim, int nb_level, char *fichier_image) ;
+
+void write_int2pgm(int **image, int i_dim, int j_dim, char *fichier_image, int recal) ;
+void write_ushort2pgm(unsigned short **image, int i_dim, int j_dim, char *fichier_image, int recal) ;
+void imagesc(int **image, int i_dim, int j_dim) ;
+void imagesc_ushort(unsigned short **image, int i_dim, int j_dim) ;
diff --git a/lib_lniv.h b/lib_lniv.h
new file mode 100644
index 0000000..38bade6
--- /dev/null
+++ b/lib_lniv.h
@@ -0,0 +1,4 @@
+
+
+double mv_gaussien_lniv(int **image_in, int **image_out, int idim, int jdim,
+			double poids, int nb_iter) ;
diff --git a/lib_lniv_common.c b/lib_lniv_common.c
new file mode 100644
index 0000000..d8b27eb
--- /dev/null
+++ b/lib_lniv_common.c
@@ -0,0 +1,99 @@
+/**
+ * \file lib_lniv_common.c
+ * \brief routines pour la gestion des lignes de niveaux
+ * \author NB - PhyTI 
+ * \version x.x
+ * \date 8 mai 2011
+ *
+ */
+
+#include <stdlib.h>
+#include <math.h>
+#include "lib_lniv_common.h"
+
+
+/**
+ *
+ * \brief calcul du PSNR entre deux images int
+ * \author NB - PhyTI
+ *
+ * \param[in] im1  image d'entree
+ * \param[in] im2  image d'entree
+ * \param[in] idim   
+ * \param[in] jdim   
+ * \param[in] range dynamique de l'image   
+ *
+ */
+double psnr_image_int(int **im1, int **im2, int idim, int jdim, int range)
+{
+  int n ;
+  double eqm = 0.0 ;
+  double err, eq = 0.0 ;
+  for (n=0; n<idim*jdim; n++)
+    {
+      err = im1[0][n]-im2[0][n] ;
+      eq += err*err ;
+    }
+  eqm = eq / (idim*jdim) ;
+
+  return  10.0*log10((double)range*range/eqm) ;
+}
+
+
+
+
+
+/**
+ *
+ * \brief Filtrage d'une image par moyenne 
+ * \author NB - PhyTI
+ *
+ * \param[out] result  image de sortie
+ * \param[in] matrix  image d'entree
+ * \param[in] i_dim   
+ * \param[in] j_dim   
+ * \param[in] dim_wind largeur du noyau carre  
+ *
+ */
+void mean2ds(int **result, int **matrix, int dim_wind,
+	     int i_dim, int j_dim)
+{
+  int dim_wind2 = (int)floor(dim_wind/2);
+  double sqr_dim_wind = dim_wind*dim_wind;
+  double tmp_diff;
+  int i, j, iw, jw, iiw ;
+
+  // On calcul les bords en version lente
+  for (i=0;i<i_dim; i++)
+    for (j=0;j<j_dim; j++)
+      {
+	if ((i>dim_wind2+1)&&(i<(i_dim-dim_wind2-1)))
+	  if (j==dim_wind2+1) 
+	    j=j_dim-dim_wind2-1;
+	
+	result[i][j] = 0;
+	for (iw=i-dim_wind2;iw<i+dim_wind2+1;iw++)
+	  for (jw=j-dim_wind2;jw<j+dim_wind2+1;jw++)
+	    if ((iw>=0)&&(jw>=0)&&(iw<i_dim)&&(jw<j_dim))
+	      result[i][j] += matrix[iw][jw] ;
+      }
+
+// On calcul le centre en version rapide
+  for (i=dim_wind2+1;i<i_dim-dim_wind2; i++)
+    {
+      for (j=dim_wind2+1;j<j_dim-dim_wind2; j++)
+	{ 
+	  tmp_diff = 0;
+	  for (iiw=i-dim_wind2;iiw<=i+dim_wind2;iiw++)
+	  tmp_diff += matrix[iiw][j+dim_wind2] - matrix[iiw][j-dim_wind2-1] ;
+	  
+	  result[i][j] = result[i][j-1] + tmp_diff ;
+	}
+    }
+  
+  // On divise pour la moyenne
+  // pb sur les bords !!!
+  for (i=0;i<i_dim;i++)
+    for (j=0;j<j_dim;j++)
+      result[i][j] /= sqr_dim_wind; 
+}
diff --git a/lib_lniv_common.h b/lib_lniv_common.h
new file mode 100644
index 0000000..70c7c1b
--- /dev/null
+++ b/lib_lniv_common.h
@@ -0,0 +1,4 @@
+double psnr_image_int(int **im1, int **im2, int idim, int jdim, int range) ;
+
+void mean2ds(int **result, int **matrix, int dim_wind,
+	     int i_dim, int j_dim) ;
diff --git a/lib_math.c b/lib_math.c
new file mode 100644
index 0000000..7069071
--- /dev/null
+++ b/lib_math.c
@@ -0,0 +1,227 @@
+/**
+ * \file lib_math.c
+ * \brief routines 
+ * \author NB - PhyTI 
+ * \version x.x
+ * \date 20 decembre 2009
+ *
+ */
+
+#include <stdio.h>
+#include "defines.h"
+#include "lib_math.h"
+
+/**
+ * \fn void tic(struct timeval* temps, char* texte)
+ * \brief Initialise le compteur de temps
+ *
+ * \param[out]  temps  
+ * \param[in]   texte texte a afficher
+ *
+ */
+void tic(struct timeval* temps, char* texte)
+{
+  gettimeofday(temps, NULL);
+
+  if (texte != NULL)
+    printf("%s\n", texte) ;
+}
+
+/**
+ * \fn double toc(struct timeval start, char* texte)
+ * \brief Calcule le temps ecoule
+ *
+ * \param[in]  start temps de debut du chrono  
+ * \param[in]  texte texte a afficher
+ *
+ * \return le temps ecoule entre tic et toc
+ */
+double toc(struct timeval start, char* texte)
+{
+  struct timeval end ;
+  double elapsed ;
+
+  gettimeofday(&end, NULL);
+
+  elapsed = (double)(end.tv_sec-start.tv_sec) 
+    + 0.000001*(double)(end.tv_usec-start.tv_usec);
+  if (texte != NULL)
+    printf("%s : %f\n", texte, elapsed) ;
+
+  return elapsed ;
+}
+
+
+/**
+ * \fn void min_max_int1d(int *val_min, int *val_max, int *vect, int dim)
+ * \brief determine le min et max d'un vecteur de int
+ *
+ * \param[out]  val_min 
+ * \param[out]  val_max 
+ * \param[in]   vect
+ * \param[in]   dim dimension du vecteur
+ *
+ */
+
+void min_max_int1d(int *val_min, int *val_max, int *vect, int dim)
+{
+  int n, min, max ;
+
+  min = vect[1];
+  max = min;
+  
+  for (n=0;n<dim;n++)
+    {
+      if (vect[n] > max) max = vect[n];
+      if (vect[n] < min) min = vect[n];
+    }
+
+  *val_min = min ;
+  *val_max = max ;
+}
+
+void min_max_ushort1d(int *val_min, int *val_max, unsigned short *vect, int dim)
+{
+  int n ;
+  unsigned short min, max ;
+
+  min = vect[1];
+  max = min;
+  
+  for (n=0;n<dim;n++)
+    {
+      if (vect[n] > max) max = vect[n];
+      if (vect[n] < min) min = vect[n];
+    }
+
+  *val_min = min ;
+  *val_max = max ;
+}
+
+
+
+
+/**
+ * \fn inline int test_inf(double arg1, double arg2)
+ *
+ * \brief test (arg1 < arg2) inferieur a avec pourcentage minimum
+ *
+ * \param[in]   arg1
+ * \param[in]   arg2
+ *
+ * return test
+ */
+inline int test_inf(double arg1, double arg2)
+{
+  if (arg2 > 0)
+    return arg1 < (arg2*COEF_DECROI) ;
+  else
+    return arg1 < (arg2*INV_COEF_DECROI) ;
+}
+
+
+
+
+
+/**
+ * \fn inline int sign_diff_ou_egal_zero(int val1, int val2)
+ *
+ * \brief fonction qui test si les arguments sont de signes differents ou nuls
+ * \author NB - PhyTI
+ *
+ * \param[in] val1 
+ * \param[in] val2
+ *
+ * \return le test 0/1 
+ *
+ */
+inline int sign_diff_ou_egal_zero(int val1, int val2)
+{
+  if (val1 > 0) 
+    {
+      if (val2 > 0) return 0 ;
+      else return 1 ;
+    }
+  else 
+    if (val1 < 0)
+      {
+		if (val2 < 0) return 0 ;
+		else  return 1 ;
+      }
+    else
+      return 1 ;/* val1 == 0 */
+}
+
+/**
+ * \fn inline int sign_diff_strict(int val1, int val2)
+ *
+ * \brief fonction qui test si les arguments sont de signes differents strictement
+ * \author NB - PhyTI
+ *
+ * \param[in] val1 
+ * \param[in] val2
+ *
+ * \return le test 0/1 
+ *
+ */
+inline int sign_diff_strict(int val1, int val2)
+{
+  if (val1 > 0) 
+    {
+      if (val2 >= 0) return 0 ;
+      else return 1 ;
+    }
+  else 
+    if (val1 < 0)
+      {
+	if (val2 <= 0) return 0 ;
+	else  return 1 ;
+      }
+    else
+      return 0 ;/* val1 == 0 */
+}
+
+
+
+/**
+ * \fn inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj)
+ *
+ * \brief calcul le "sinus" de l'angle du triangle ABC 
+ * \author NB - PhyTI
+ *
+ * \param[in] Ai les coordonnees
+ * \param[in] Aj
+ * \param[in] Bi
+ * \param[in] Bj
+ * \param[in] Ci
+ * \param[in] Cj
+ *
+ * \return le sinus non normalise 
+ *
+ * Cette fonction est utile pour determiner si un triangle ABC
+ * est donne dans l'ordre trigo. 
+ * Signe > 0: sens trigo,
+ * signe < 0: sens antitrigo
+ *       = 0: plat
+ */
+inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj)
+{
+  return (((Bi-Ai)*(Cj-Aj)) - ((Ci-Ai)*(Bj-Aj))) ;
+}
+
+
+/**
+ * \fn void recopie_vecteur(int *in, int *out, int dim)
+ *
+ * \brief recopie le vecteur out vers in
+ * \author NB - PhyTI
+ *
+ * \param[in]  in  vecteur d'entree
+ * \param[out] out vecteur recopier
+ * \param[in]  dim longueur du vecteur
+ */
+void recopie_vecteur(int *in, int *out, int dim)
+{
+  int n ;
+  for (n=0; n<dim; n++) out[n] = in[n] ;
+}
diff --git a/lib_math.h b/lib_math.h
new file mode 100644
index 0000000..39b2af0
--- /dev/null
+++ b/lib_math.h
@@ -0,0 +1,27 @@
+#ifndef _LIB_MATH_H
+#define _LIB_MATH_H
+
+
+#include <sys/time.h>
+
+void tic(struct timeval* temps, char* texte) ;
+double toc(struct timeval start, char* texte) ;
+
+void min_max_int1d(int *val_min, int *val_max, int *vect, int dim) ;
+void min_max_ushort1d(int *val_min, int *val_max, unsigned short *vect, int dim) ;
+
+#define min(a,b) ((a)<(b)) ? (a) : (b) 
+#define max(a,b) ((a)>(b)) ? (a) : (b) 
+
+
+inline int test_inf(double arg1, double arg2);
+
+inline int sign_diff_ou_egal_zero(int val1, int val2);
+inline int sign_diff_strict(int val1, int val2);
+
+inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj);
+
+void recopie_vecteur(int *in, int *out, int dim) ;
+
+
+#endif
diff --git a/lniv.c b/lniv.c
new file mode 100644
index 0000000..6757945
--- /dev/null
+++ b/lniv.c
@@ -0,0 +1,186 @@
+/**
+ * \file nliv.c
+ * \brief test de reduction par MV et contriante de ligne de niveaux
+ * \author NB - PhyTI 
+ * \version x.x
+ * \date 6 mai 2011
+ *
+ */
+
+// protection
+#ifdef __PROTECT
+#include <sys/ptrace.h>
+#endif
+
+
+#include <stdio.h>
+#ifdef __MULTI_THREAD
+#include <pthread.h>
+#endif
+
+#include "lib_alloc.h"
+#include "lib_images.h"
+#include "lib_math.h"
+#include "lib_pretraitement.h"
+
+#include "lib_lniv.h"
+
+
+int main(int argc, char **argv)
+{
+// protection light
+#ifdef __PROTECT
+  if (ptrace(PTRACE_TRACEME, 0, 1, 0) < 0) return(0) ;
+#endif
+
+  /* declaration des variables */
+  int ret ;
+  int Bin_file = 0 ;
+  int Prof ;        /* profondeur en octets */
+  int I_dim ;       /* hauteur de l'image */
+  int J_dim ;       /* largeur de l'image */
+  int Nb_level ;    /* dynamique de l'image */
+  char *File_name ;
+  char *PARAM = NULL ;
+ 
+  /* images */
+  int **Image_in1, **Image_out1 ;
+  int **Image_in2=NULL, **Image_in3=NULL ;
+  int **Image_out2=NULL, **Image_out3=NULL ;
+
+
+  /* variables de calculs */
+  struct timeval chrono ;
+
+  /* debug : affichage snake */
+  int Verbose = 1 ;
+  int Display = 1 ;
+ 
+
+  /* lecture argument entree (basique!) */
+  if (argc == 1)
+    {
+      fprintf(stderr, "USAGE : LNIV pgm_file <params>\n") ;
+      fprintf(stderr, 
+	      "\n"
+	      "\n"
+	      "\n"
+	      "\n") ;
+      return(0) ;
+    }
+  File_name = argv[1] ;
+
+  /* verif type image (pgm 8/16) */
+  ret = type_image_ppm(&Prof, &I_dim, &J_dim, &Nb_level, File_name) ;
+  Nb_level++ ;
+  
+  if (ret == 0)
+    {
+      /* tentative image bin */
+      ret = load_dim_image_bin(&I_dim, &J_dim, File_name) ;
+      if (ret != 1)
+	{
+	  printf("format non pris en charge ... exit\n") ;
+	  return(0) ;
+	}
+      Nb_level = 65536 ; // quantif 16 bits pour les bin
+      Bin_file = 1 ;
+      Prof = 1 ;
+    }
+
+  /* infos */
+  if (Verbose)
+    {
+      printf("Image : %s\n", File_name) ;
+      printf("lecture OK : %d\n", ret) ;
+      printf("Image (%d x %d) pixels\n", I_dim, J_dim) ;
+      printf("Dynamique : %d\n", Nb_level) ;
+      printf("Canaux : %d\n", Prof) ;
+    } ;
+
+  /* Allocation */
+  Image_in1  = new_matrix_int(I_dim, J_dim) ;
+  Image_out1 = new_matrix_int(I_dim, J_dim) ;
+  if (Prof == 3)
+    {
+      Image_in2 = new_matrix_int(I_dim, J_dim) ;
+      Image_out2 = new_matrix_int(I_dim, J_dim) ;
+      Image_in3 = new_matrix_int(I_dim, J_dim) ;
+      Image_out3 = new_matrix_int(I_dim, J_dim) ;
+     }
+
+  /* chargement image d'entree */  
+  tic(&chrono, NULL) ;
+  if (Bin_file)
+    load_bin2int(Image_in1, I_dim, J_dim, Nb_level, File_name) ;
+  else
+    if (Prof == 1)
+      load_pgm2int(Image_in1, I_dim, J_dim, Nb_level, File_name) ;
+    else /* (Prof == 3) */
+      {
+	load_ppm2int(Image_in1, Image_in2, Image_in3, I_dim, J_dim, File_name ) ;
+	//RGB2YUV(Image_in1[0], Image_in2[0], Image_in3[0], I_dim*J_dim) ;
+      }
+
+  toc(chrono, "temps chargement image") ;
+  if (Display) image16(Image_in1, I_dim, J_dim) ; 
+  if ((Display)&&(Prof == 3)) image16(Image_in2, I_dim, J_dim) ; 
+  if ((Display)&&(Prof == 3)) image16(Image_in3, I_dim, J_dim) ; 
+ 
+ /* sequence de parametre */
+  if (argc >= 3) PARAM = argv[2] ;
+
+  double poids = 15.0 ;
+  int nb_iter = 15 ;
+  if (Prof == 1)
+    {
+      if (Verbose)
+	{
+	  printf("poids contrainte : %.3f\n", poids) ;
+	  printf("nombre d'iteration GN : %d\n", nb_iter) ;
+	}
+      tic(&chrono, NULL) ;
+      mv_gaussien_lniv(Image_in1, Image_out1, I_dim, J_dim, poids, nb_iter) ;
+      toc(chrono, "temps lniv_image") ;
+      
+      wimage16(Image_out1, I_dim, J_dim, "output.pgm") ;
+      
+    }
+  else
+    {
+      if (Verbose)
+	{
+	  printf("image couleur (YUV)\n") ;
+	  printf("poids contrainte : %.3f\n", poids) ;
+	  printf("nombre d'iteration GN : %d\n", nb_iter) ;
+	}
+      tic(&chrono, NULL) ;
+      
+      mv_gaussien_lniv(Image_in1, Image_out1, I_dim, J_dim, poids, nb_iter) ;
+      mv_gaussien_lniv(Image_in2, Image_out2, I_dim, J_dim, poids, nb_iter) ;
+      mv_gaussien_lniv(Image_in3, Image_out3, I_dim, J_dim, poids, nb_iter) ;
+
+      toc(chrono, "temps lniv_image") ;
+      
+      image16(Image_out1, I_dim, J_dim) ;
+      image16(Image_out2, I_dim, J_dim) ;
+      image16(Image_out3, I_dim, J_dim) ;
+      write_intRGB2ppm8(Image_out1, Image_out2, Image_out3, I_dim, J_dim, "output.ppm") ;
+     
+    }
+
+
+  /* Delete */
+  del_matrix_int(Image_in1, I_dim) ;
+  del_matrix_int(Image_out1, I_dim) ;
+  if (Prof == 3)
+    {
+      del_matrix_int(Image_in2, I_dim) ;
+      del_matrix_int(Image_out2, I_dim) ;
+      del_matrix_int(Image_in3, I_dim) ;
+      del_matrix_int(Image_out3, I_dim) ;
+    }
+
+
+  return 1 ;
+}
diff --git a/main.cu b/main.cu
new file mode 100644
index 0000000..d9db4f8
--- /dev/null
+++ b/main.cu
@@ -0,0 +1,198 @@
+// libs C
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "lib_lniv.h"
+
+// libs NV
+#include <cuda_runtime.h>
+#include <cutil_inline.h>
+
+// lib spec
+#include "defines.h"
+#include "levelines_common.h"
+
+#include "levelines_kernels.cu"
+
+
+__global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
+  
+  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+  unsigned int pos = i*L +j ;
+
+  ptr2[pos] = val - ptr1[pos] ;
+  
+}
+
+int main(int argc, char **argv){
+
+
+  //float coef_regul = atof( argv[1] ) ;
+
+  unsigned int timer ;
+  cutilCheckError( cutCreateTimer(&timer) );
+  cutilCheckError( cutResetTimer(timer) );
+  /*****************************
+   *    CHARGEMENT IMAGE
+   *****************************/
+  char* image_path = argv[argc-1];
+  char* image_out = "./image_out.pgm" ;
+  unsigned int * h_data = NULL ;
+  unsigned int * h_data_out = NULL ;
+  unsigned int H, L, size;
+
+  cutilCheckError( cutStartTimer(timer) );
+  cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
+  cutilCheckError( cutStopTimer(timer) );
+  
+  size = H * L * sizeof( unsigned int ); 
+  printf("Loaded  %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path,  cutGetTimerValue(timer));
+
+
+  //essai alloc mapped
+  /*
+  cutilCheckError( cutResetTimer(timer) );
+  cutilCheckError( cutStartTimer(timer) );
+  unsigned int * h_ptr1, * d_ptr1 ;
+  unsigned int * h_ptr2, * d_ptr2 ;
+  int  h =  ;
+  int l = h ;
+  int mem = h*l*sizeof(unsigned int) ;
+  cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
+  cutilCheckError( cutStopTimer(timer) );
+  printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+
+  cutilCheckError( cutStartTimer(timer) );
+  cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
+  cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
+  cutilCheckError( cutStopTimer(timer) );
+  printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+  
+  for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
+
+  cutilCheckError( cutStartTimer(timer) );
+  cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
+  cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
+  cutilCheckError( cutStopTimer(timer) );
+  printf("Temps cumul get pointer  Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+  
+  cutilCheckError( cutStartTimer(timer) );
+  dim3 blocks(16,16,1) ;
+  dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
+  
+  kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
+
+  cutilCheckError( cutStopTimer(timer) );
+  printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+  
+  char * image_1 = "./image_1.pgm" ;
+  char * image_2 = "./image_2.pgm" ;
+  
+  cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
+  cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
+  */
+  /*****************************
+   *     FIN CHARGEMENT IMAGE
+   *****************************/
+  
+  
+    
+  // use device with highest Gflops/s
+  cudaSetDevice( cutGetMaxGflopsDeviceId() );
+  
+  
+  /*
+	cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
+	cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
+	cutilSafeCall( cudaThreadSynchronize() );
+	cutilCheckError( cutResetTimer(hTimer) );
+	cutilCheckError( cutStartTimer(hTimer) );
+    
+	cutilSafeCall( cudaThreadSynchronize() );
+	cutilCheckError( cutStopTimer(hTimer) );
+	gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
+  */
+
+  cutilCheckError( cutResetTimer(timer) );
+  cutilCheckError( cutStartTimer(timer) );
+  // allocation mem GPU
+  unsigned int * d_directions =NULL ;
+  unsigned int * d_lniv, * d_estim = NULL ;
+
+  cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
+  cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
+  cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+
+  
+  // allocate array and copy image data
+  cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
+  cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
+  cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H )); 
+  cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
+  cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
+  cutilCheckError( cutStopTimer(timer) );
+  
+  cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H )); 
+  cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
+
+  cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H )); 
+  cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
+
+  printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
+  /*****************************
+   * APPELS KERNELS et chronos
+   *****************************/
+  cutilCheckError( cutResetTimer(timer) );
+  cutilCheckError( cutStartTimer(timer) );
+  
+	unsigned int iter , nb_iter = 15 ;
+	unsigned int  poids = 15 ;
+	dim3 dimBlock(8,8,1) ;
+	dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
+	unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
+	// init image estimee avec image_in
+	kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
+	
+	printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
+	
+	for ( iter =0 ; iter < nb_iter ; iter++ )
+	  {
+		cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
+		kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
+		cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
+		kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
+	}
+	
+	cudaThreadSynchronize();
+	
+	cutilCheckError( cutStopTimer(timer) );
+	printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
+	printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
+
+	/**************************
+	 * VERIFS 
+	 **************************/
+	//trace des lniv sur grille de 'pas x pas'
+	//kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
+	//cudaThreadSynchronize();
+	
+	// enregistrement image lniv GPU
+	h_data_out = new unsigned int[H*L] ;
+	if ( h_data_out != NULL)
+	  cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
+        else
+	  printf("Echec allocation mem CPU\n"); 		
+
+	cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
+
+	// calcul lniv CPU
+	
+	
+	// TODO verifier pourquoi les deux lignes suivantes produisent une erreur
+	//cutilExit(argc, argv);
+	//cudaThreadExit();
+	return EXIT_SUCCESS ;
+}
diff --git a/main_gmem.cu b/main_gmem.cu
new file mode 100644
index 0000000..a33fbff
--- /dev/null
+++ b/main_gmem.cu
@@ -0,0 +1,143 @@
+// libs C
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "lib_lniv.h"
+
+// libs NV
+#include <cuda_runtime.h>
+#include <cutil_inline.h>
+
+// lib spec
+#include "defines.h"
+#include "levelines_common.h"
+
+#include "levelines_kernels.cu"
+
+
+int main(int argc, char **argv){
+
+
+  //float coef_regul = atof( argv[1] ) ;
+
+  unsigned int timer ;
+  float time_cumul = 0.0 ;
+  cutilCheckError( cutCreateTimer(&timer) );
+  cutilCheckError( cutResetTimer(timer) );
+
+  /*****************************
+   *    CHARGEMENT IMAGE
+   *****************************/
+  char* image_path = argv[argc-1];
+  char* image_out = "./image_out.pgm" ;
+  unsigned int * h_data = NULL ;
+  unsigned int * h_data_out = NULL ;
+  unsigned int H, L, size;
+
+  cutilCheckError( cutStartTimer(timer) );
+  cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
+  cutilCheckError( cutStopTimer(timer) );
+  
+  size = H * L * sizeof( unsigned int ); 
+  printf("Loaded  %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path,  cutGetTimerValue(timer));
+  time_cumul += cutGetTimerValue(timer) ;
+  /*****************************
+   *     FIN CHARGEMENT IMAGE
+   *****************************/
+  
+  
+    
+  // use device with highest Gflops/s
+  cudaSetDevice( cutGetMaxGflopsDeviceId() );
+    
+  /*
+	cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
+	cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
+	cutilSafeCall( cudaThreadSynchronize() );
+	cutilCheckError( cutResetTimer(hTimer) );
+	cutilCheckError( cutStartTimer(hTimer) );
+    
+	cutilSafeCall( cudaThreadSynchronize() );
+	cutilCheckError( cutStopTimer(hTimer) );
+	gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
+  */
+
+  cutilCheckError( cutResetTimer(timer) );
+  cutilCheckError( cutStartTimer(timer) );
+  // allocation mem GPU
+  unsigned int * d_directions =NULL ;
+  unsigned int * d_lniv, * d_estim, * d_data ;
+  cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
+  cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
+  cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+  cutilSafeCall( cudaMalloc( (void**) &d_data, size ) );
+  cutilCheckError( cutStopTimer(timer) );
+  printf("Temps alloc global mem : %f ms\n", cutGetTimerValue(timer)) ;
+  time_cumul += cutGetTimerValue(timer) ;
+
+  // transfert data -> GPU global mem
+  cutilCheckError( cutStartTimer(timer) );
+  cutilSafeCall( cudaMemcpy( d_data , h_data, size, cudaMemcpyHostToDevice) );
+  cutilCheckError( cutStopTimer(timer) );
+  printf("Temps transferts en global mem : %f ms\n", cutGetTimerValue(timer)) ;
+  time_cumul += cutGetTimerValue(timer) ;
+  /*****************************
+   * APPELS KERNELS et chronos
+   *****************************/
+  cutilCheckError( cutResetTimer(timer) );
+  cutilCheckError( cutStartTimer(timer) );
+  
+	unsigned int iter , nb_iter = 15 ;
+	unsigned int  poids = 15 ;
+	dim3 dimBlock(8,8,1) ;
+	dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
+	unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
+	// init image estimee avec image_in
+	kernel_init_estim_from_img_in_global_mem<<< dimGrid, dimBlock, 0 >>>(d_data, d_estim, L, H, 7);
+	cutilCheckError( cutStopTimer(timer) );
+	printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)) ;
+	time_cumul += cutGetTimerValue(timer) ;	
+
+	
+	// iterations
+	cutilCheckError( cutStartTimer(timer) );
+	printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
+	for ( iter =0 ; iter < nb_iter ; iter++ )
+	  {
+		kernel_levelines_global_mem<<< dimGrid, dimBlock, 0 >>>( d_estim, d_lniv, L, H );
+		kernel_estim_next_step_global_mem<<< dimGrid, dimBlock, 0 >>>(d_estim, d_lniv, d_data, L, H, poids) ;
+	}
+	cutilCheckError( cutStopTimer(timer) );
+	printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)) ;
+	time_cumul += cutGetTimerValue(timer) ;	
+	
+	printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
+	printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
+	printf("Total execution : %f ms\n", time_cumul) ;
+	
+	/**************************
+	 * VERIFS 
+	 **************************/
+	//trace des lniv sur grille de 'pas x pas'
+	//kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
+	//cudaThreadSynchronize();
+	
+	// enregistrement image lniv GPU
+	h_data_out = new unsigned int[H*L] ;
+	if ( h_data_out != NULL)
+	  cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
+        else
+	  printf("Echec allocation mem CPU\n"); 		
+
+	cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
+
+	// calcul lniv CPU
+	
+	
+	// TODO verifier pourquoi les deux lignes suivantes produisent une erreur
+	//cutilExit(argc, argv);
+	//cudaThreadExit();
+	return EXIT_SUCCESS ;
+}
diff --git a/obj/release/levelines_kernels.cu.o b/obj/release/levelines_kernels.cu.o
new file mode 100644
index 0000000..69cdd7a
Binary files /dev/null and b/obj/release/levelines_kernels.cu.o differ
diff --git a/obj/release/main.cpp.o b/obj/release/main.cpp.o
new file mode 100644
index 0000000..da796c7
Binary files /dev/null and b/obj/release/main.cpp.o differ
diff --git a/obj/release/main.cu.o b/obj/release/main.cu.o
new file mode 100644
index 0000000..20ef00a
Binary files /dev/null and b/obj/release/main.cu.o differ