--- /dev/null
+################################################################################
+#
+# Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
+#
+# NOTICE TO USER:
+#
+# This source code is subject to NVIDIA ownership rights under U.S. and
+# international Copyright laws.
+#
+# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+# OR PERFORMANCE OF THIS SOURCE CODE.
+#
+# U.S. Government End Users. This source code is a "commercial item" as
+# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
+# "commercial computer software" and "commercial computer software
+# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
+# and is provided to the U.S. Government only as a commercial end item.
+# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+# source code with only those rights set forth herein.
+#
+################################################################################
+#
+# Build script for project
+#
+################################################################################
+
+# Add source files here
+EXECUTABLE := levelines
+# CUDA source files (compiled with cudacc)
+CUFILES := main_gmem.cu
+# CUDA dependency files
+CU_DEPS := levelines_common.h
+# C/C++ source files (compiled with gcc / c++)
+CCFILES :=
+
+
+################################################################################
+# Rules and targets
+
+include ../../common/common.mk
--- /dev/null
+#ifndef __DEFINES_H__
+#define __DEFINES_H__
+
+
+
+/**
+ * \def SIZE_NAME_FILE longueur maxi associee aux noms de fichiers
+ * \def SIZE_LINE_TEXT longueur maxi associee a une ligne de texte
+ */
+#define SIZE_NAME_FILE 256
+#define SIZE_LINE_TEXT 256
+
+#define COEF_DECROI 0.99999
+#define INV_COEF_DECROI 1.00001
+
+#define BSMAX 512
+#define MAX(x,y) ( ( (x)>=(y) )?(x):(y) )
+#define ABS(x) ( ((x)>0)?(x):-(x))
+#define DEC 4
+#define DEC2 8
+#define CONFLICT_FREE_OFFSET(index) ( ((index) >>(DEC)) + ((index) >>(DEC2) ) )
+#define CFO(index) ( ( (index) >>(DEC) ) + ( (index) >>(DEC2) ) )
+#define CFI(index) ( (index) + (CFO(index)) )
+
+//dimension de la matrice definissant les chemins des lignes de niveaux
+#define PSIZE_I 24
+#define PSIZE_J 4
+
+#endif
--- /dev/null
+
+
+#ifndef LEVELINES_COMMON_H
+#define LEVELINES_COMMON_H
+
+
+
+#include <cuda_runtime.h>
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Reference CPU functions
+////////////////////////////////////////////////////////////////////////////////
+//extern "C" void fonc(...);
+
+////////////////////////////////////////////////////////////////////////////////
+// GPU functions (in file.cu)
+////////////////////////////////////////////////////////////////////////////////
+//extern "C" void fonc(float *h_Kernel);
+
+
+#endif
--- /dev/null
+
+// chemins des lignes de niveaux
+// longueur = 4 pixels
+// une ligne = un chemin
+
+__constant__ int pathDi[PSIZE_I][PSIZE_J-1] =
+ {
+ // Q1
+ { -1, -1, -1}, //
+ { -1, -1, -1}, //
+ { -1, -1, -1}, //
+ { -1, -1, -1}, //
+ { -1, 0, 1}, //
+ { 0, -1, 0},
+ // Q4
+ { 0, 0, 0}, //
+ { 0, 1, 1}, //
+ { 1, 0, 1}, //
+ { 1, 1, 1}, //
+ { 1, 1, 1}, //
+ { 1, 1, 1},
+ // Q3
+ { 1, 1, 1}, //
+ { 1, 1, 1}, //
+ { 1, 1, 1}, //
+ { 1, 1, 1}, //
+ { 1, 0, -1}, //
+ { 0, 1, 0},
+ // Q2
+ { 0, 0, 0}, //
+ { 0, -1, 0}, //
+ { -1, 0, -1}, //
+ { -1, -1, -1}, //
+ { -1, -1, -1}, //
+ { -1, -1, -1}
+ } ; //
+
+__constant__ int pathDj[PSIZE_I][PSIZE_J-1] =
+ {
+ // Q1
+ { 0, 0, 0}, //
+ { 0, 1, 0},
+ { 1, 0, 1},
+ { 1, 1, 1},
+ { 1, 1, 1},
+ { 1, 1, 1},
+ // Q4
+ { 1, 1, 1}, //
+ { 1, 1, 1},
+ { 1, 1, 1},
+ { 1, 1, 1},
+ { 1, 0, -1},
+ { 0, 1, 0},
+ // Q3
+ { 0, 0, 0}, //
+ { 0, -1, 0},
+ { -1, 0, -1},
+ { -1, -1, -1},
+ { -1, -1, -1},
+ { -1, -1, -1},
+ // Q2
+ { -1, -1, -1}, //
+ { -1, -1, -1},
+ { -1, -1, -1},
+ { -1, -1, -1},
+ { -1, 0, 1},
+ { 0, -1, 0}
+ } ;
+
+
+// declare texture reference for 2D int texture
+texture<int, 2, cudaReadModeElementType> tex_img_in ;
+texture<int, 2, cudaReadModeElementType> tex_img_estim ;
+texture<int, 2, cudaReadModeElementType> tex_img_lniv ;
+
+
+__global__ void kernel_init_estim_from_img_in(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int r){
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ unsigned int pos = i*L +j ;
+ unsigned int ic , jc, ng ;
+
+ if( (i>r)&&(i<H-r)&&(j>r)&&(j<L-r) ){
+ ng = 0 ;
+ for (ic = i - r ; ic <= i + r ; ic++){
+ for (jc = j - r ; jc <= j + r ; jc++){
+ ng += tex2D(tex_img_in, jc, ic ) ;
+ }
+ }
+ d_estim[ pos ] = ng/((2*r+1)*(2*r+1)) ;
+ }
+ /*
+ else
+ d_estim[ pos ] = tex2D(tex_img_in, j, i) ;
+ */
+}
+
+__global__ void kernel_init_estim_from_img_in_global_mem(unsigned int * d_data, unsigned int * d_estim,
+ unsigned int L, unsigned int H, unsigned int r){
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ unsigned int pos = i*L +j ;
+ unsigned int ic , jc, ng ;
+
+ if( (i>r)&&(i<H-r)&&(j>r)&&(j<L-r) ){
+ ng = 0 ;
+ for (ic = i - r ; ic <= i + r ; ic++){
+ for (jc = j - r ; jc <= j + r ; jc++){
+ ng += d_data[ ic*L + jc ] ;
+ }
+ }
+ d_estim[ pos ] = ng/((2*r+1)*(2*r+1)) ;
+ }
+}
+
+__global__ void kernel_estim_next_step_global_mem(unsigned int * d_estim, unsigned int * d_lniv, unsigned int L, unsigned int H, unsigned int p){
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ unsigned int pos = i*L +j ;
+
+ d_estim[ pos ] = ( tex2D(tex_img_in, j, i) + p*d_lniv[ pos ] )/(1+p) ;
+
+}
+
+__global__ void kernel_estim_next_step_texture(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int p){
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ unsigned int pos = i*L +j ;
+
+ d_estim[ pos ] = ( tex2D(tex_img_in, j, i) + p*tex2D(tex_img_lniv, j, i ))/(1+p) ;
+
+}
+
+__global__ void kernel_estim_next_step_global_mem(unsigned int * d_estim, unsigned int * d_lniv, unsigned int * d_data,
+ unsigned int L, unsigned int H, unsigned int p){
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ unsigned int pos = i*L +j ;
+
+ d_estim[ pos ] = ( d_data[ pos ] + p*d_lniv[ pos ])/(1+p) ;
+
+}
+
+__global__ void kernel_levelines_global_mem(unsigned int * img_in, unsigned int * img_out, unsigned int L, unsigned int H)
+{
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
+
+ // nb de points par chemin
+ int lpath = PSIZE_J ;
+ unsigned int ic, jc, zc, pos = i*L+j;
+ int idpath, idpix ;
+ unsigned int mse_min, mse_cur, val ;
+ uint2 mse ;
+
+
+ if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+ for( idpath=0; idpath < PSIZE_I ; idpath++) {
+ ic = i ;
+ jc = j ;
+ pos = ic*L + jc ;
+ zc = img_in[ pos ] ;
+ mse.x = zc ;
+ mse.y = zc*zc ;
+ for( idpix=0; idpix < lpath-1 ; idpix++ ) {
+ ic += pathDi[idpath][idpix] ;
+ jc += pathDj[idpath][idpix] ;
+ pos = ic*L + jc ;
+ zc = img_in[ pos ] ;
+ mse.x += zc ;
+ mse.y += zc*zc ;
+ }
+ // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
+ // a ameliorer pour vitesse
+ mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
+ if (idpath == 0) {
+ mse_min = mse_cur ;
+ val = mse.x ;
+ } else {
+ if ( mse_cur < mse_min ) {
+ mse_min = mse_cur ;
+ val = mse.x ;
+ }
+ }
+ }
+ img_out[ i*L + j ] = val / lpath ;
+ }
+}
+
+__global__ void kernel_levelines_texture(unsigned int * img_out, unsigned int L, unsigned int H)
+{
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
+
+ // nb de points par chemin
+ int lpath = PSIZE_J ;
+ unsigned int ic, jc, zc ;
+ int idpath, idpix ;
+ unsigned int mse_min, mse_cur, val ;
+ uint2 mse ;
+
+
+ if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+ for( idpath=0; idpath < PSIZE_I ; idpath++) {
+ ic = i ;
+ jc = j ;
+ zc = tex2D(tex_img_estim, j, i) ;
+ mse.x = zc ;
+ mse.y = zc*zc ;
+ for( idpix=0; idpix < lpath-1 ; idpix++ ) {
+ ic += pathDi[idpath][idpix] ;
+ jc += pathDj[idpath][idpix] ;
+ zc = tex2D(tex_img_estim, jc, ic) ;
+ mse.x += zc ;
+ mse.y += zc*zc ;
+ }
+ // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
+ // a ameliorer pour vitesse
+ mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
+ if (idpath == 0) {
+ mse_min = mse_cur ;
+ val = mse.x ;
+ } else {
+ if ( mse_cur < mse_min ) {
+ mse_min = mse_cur ;
+ val = mse.x ;
+ }
+ }
+ }
+ img_out[ i*L + j ] = val / lpath ;
+ }
+}
+
+
+
+
+__global__ void kernel_levelines_only_texture(unsigned int * img_out, unsigned int L, unsigned int H)
+{
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;;
+ //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
+
+ // nb de points par chemin
+ int lpath = PSIZE_J ;
+ unsigned int ic, jc ;
+ int idpath, idpix ;
+ unsigned int mse_min, mse_cur ;
+
+ //extern __shared__ uint2 mse[] ;
+ uint2 mse ;
+
+ if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+ for( idpath=0; idpath < PSIZE_I ; idpath++) {
+ ic = i ;
+ jc = j ;
+ mse.x = tex2D(tex_img_in, i, j) ;
+ mse.y = tex2D(tex_img_in, i, j)*tex2D(tex_img_in, i, j) ;
+ for( idpix=0; idpix < lpath-1 ; idpix++ ) {
+ ic += pathDi[idpath][idpix] ;
+ jc += pathDj[idpath][idpix] ;
+ mse.x += tex2D( tex_img_in, ic, jc ) ;
+ mse.y += tex2D( tex_img_in, ic, jc ) * tex2D( tex_img_in, ic, jc ) ;
+ }
+ // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
+ // a ameliorer pour vitesse
+ mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
+ if (idpath > 0) {
+ if ( mse_cur < mse_min ) {
+ mse_min = mse_cur ;
+ }
+ } else {
+ mse_min = mse_cur ;
+ }
+ }
+ img_out[ i*L + j ] = mse_min / lpath ;
+ }
+}
+
+
+__global__ void kernel_trace_levelines(unsigned int * img_in, unsigned int * dir, unsigned int * img_out,
+ unsigned int L, unsigned int H, unsigned int pas, unsigned int ng){
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;;
+
+ // nb de points par chemin
+ int lpath = PSIZE_J ;
+ unsigned int ic, jc, idpix ;
+ unsigned int idpath ;
+
+ img_out[ i*L + j ] = img_in[ i*L + j ] ;
+
+ if ( !(i%pas+j%pas)&&(i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath) ){
+ ic = i ;
+ jc = j ;
+ idpath = dir[ic*L+jc] ;
+ img_out[ ic*L+jc ] = ng ;
+ for ( idpix=0 ; idpix < lpath-1 ; idpix++ ){
+ ic += pathDi[idpath][idpix] ;
+ jc += pathDj[idpath][idpix] ;
+ img_out[ ic*L + jc ] = ng ;
+ }
+ }
+
+}
--- /dev/null
+/**
+ * \file lib_alloc.c
+ * \brief routines d'allocation des differentes datas du snake2D3D
+ * \author NB - PhyTI
+ * \version x.x
+ * \date 20 decembre 2009
+ *
+ */
+
+#include <malloc.h>
+#include <mm_malloc.h>
+
+#include "lib_alloc.h"
+
+/**
+ * \brief allocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in] i_dim dimension verticale du tableau
+ * \param[in] j_dim dimension horizontale du tableau
+ *
+ * \return pointeur sur le tableau
+ *
+ */
+void ***new_matrix_ptr(int i_dim, int j_dim)
+{
+ /* allocation en ligne */
+ void ***matrice ;
+ void **vecteur ATT_ALIGN_SSE ;
+ int i ;
+
+ vecteur = (void**)malloc(sizeof(void*)*i_dim*j_dim) ;
+ matrice = (void***)malloc(sizeof(void**)*i_dim) ;
+ for (i=0;i<i_dim;i++)
+ matrice[i] = &(vecteur[i*j_dim]) ;
+ return matrice;
+}
+
+/**
+ * \brief deallocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in] image tableau int 2D avec allocation en ligne
+ * \param[in] i_dim dimension horizontale du tableau
+ *
+ *
+ */
+void del_matrix_ptr(void ***image, int i_dim)
+{
+ /* allocation en ligne */
+ free(image[0]) ;
+ free(image) ;
+}
+
+
+
+/**
+ * \brief allocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in] i_dim dimension verticale du tableau
+ * \param[in] j_dim dimension horizontale du tableau
+ *
+ * \return pointeur sur le tableau
+ *
+ */
+int **new_matrix_int(int i_dim, int j_dim)
+{
+ /* allocation en ligne */
+ int **matrice ;
+ int *vecteur ATT_ALIGN_SSE ;
+ int i ;
+
+ vecteur = (int*)_mm_malloc(sizeof(int)*i_dim*j_dim, ALIGN_SSE) ;
+ matrice = (int**)malloc(sizeof(int*)*i_dim) ;
+ for (i=0;i<i_dim;i++)
+ matrice[i] = &(vecteur[i*j_dim]) ;
+ return matrice;
+}
+
+
+/**
+ * \fn void del_matrix_int(int **image, int i_dim)
+ * \brief deallocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in] image tableau int 2D avec allocation en ligne
+ * \param[in] i_dim dimension horizontale du tableau
+ *
+ *
+ */
+void del_matrix_int(int **image, int i_dim)
+{
+ /* allocation en ligne */
+ _mm_free(image[0]) ;
+ free(image) ;
+}
+
+
+
+/**
+ * \fn int **new_matrix_pixel_cumul_sse(int i_dim, int j_dim)
+ * \brief allocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])n et aligne(16)
+ *
+ * \param[in] i_dim dimension verticale du tableau
+ * \param[in] j_dim dimension horizontale du tableau
+ *
+ * \return pointeur sur le tableau
+ *
+ */
+union pixel_cumul_sse **new_matrix_pixel_cumul_sse(int i_dim, int j_dim)
+{
+ /* allocation en ligne */
+ union pixel_cumul_sse **matrice ;
+ union pixel_cumul_sse *vecteur ATT_ALIGN_SSE ;
+ int i ;
+
+ vecteur = (union pixel_cumul_sse *)_mm_malloc(sizeof(union pixel_cumul_sse)*i_dim*j_dim, ALIGN_SSE) ;
+ matrice = (union pixel_cumul_sse **)malloc(sizeof(union pixel_cumul_sse*)*i_dim) ;
+ for (i=0;i<i_dim;i++)
+ matrice[i] = &(vecteur[i*j_dim]) ;
+ return matrice;
+}
+
+
+/**
+ * \fn void del_matrix_pixel_cumul_sse(union pixel_cumul_sse **image, int i_dim)
+ * \brief deallocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n])
+ *
+ * \param[in] image tableau int 2D avec allocation en ligne
+ * \param[in] i_dim dimension horizontale du tableau
+ *
+ *
+ */
+void del_matrix_pixel_cumul_sse(union pixel_cumul_sse **image, int i_dim)
+{
+ /* allocation en ligne */
+ _mm_free(image[0]) ;
+ free(image) ;
+}
--- /dev/null
+
+int **new_matrix_int(int i_dim, int j_dim) ;
+unsigned short **new_matrix_ushort(int i_dim, int j_dim) ;
+void del_matrix_int(int **image, int i_dim) ;
+
+
--- /dev/null
+/**
+ * \file lib_images.c
+ * \brief Librairie de lecture/ecriture d'image ppm/pgm 8/16 bits
+ * \author NB - PhyTI
+ * \version x.x
+ * \date 20 decembre 2009
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <malloc.h>
+#include <unistd.h>
+
+#include "defines.h"
+#include "lib_images.h"
+#include "lib_math.h"
+
+
+/**
+ * \fn int type_image_ppm(int *prof, int *i_dim, int *j_dim, int *level, char *file_name)
+ * \brief Fonction qui renvoie le type de l'image ppm et des caracteristiques
+ *
+ * \param[out] prof profondeur de l'image 1 pour pgm 3 pour ppm, 0 sinon
+ * \param[out] i_dim renvoie la dimension verticale de l'image (si NULL, renvoie que prof)
+ * \param[out] j_dim renvoie la dimension horizontale de l'image
+ * \param[out] level renvoie la dynamique de l'image
+ * \param[in] file_name fichier image
+ *
+ * \return 1 si ok O sinon
+ *
+ */
+int type_image_ppm(int *prof, int *i_dim, int *j_dim, int *level, char *file_name)
+{
+ char buffer[SIZE_LINE_TEXT] ;
+ FILE *file ;
+
+ *prof = 0 ;
+
+ file = fopen(file_name, "rb");
+ if (file == NULL)
+ return 0 ;
+
+ // lecture de la premiere ligne
+ fgets(buffer, SIZE_LINE_TEXT, file);
+
+ /* pgm */
+ if ((buffer[0] == 'P') & (buffer[1] == '5'))
+ *prof = 1 ; // GGG
+ /* ppm */
+ if ((buffer[0] == 'P') & (buffer[1] == '6'))
+ *prof = 3 ; // RVBRVBRVB
+
+ /* type non gere */
+ if (*prof == 0) return 0 ;
+
+ /* pour une utilisation du type */
+ /* ret = type_image_ppm(&prof, NULL, NULL, NULL, file_name) */
+ if (i_dim == NULL)
+ return 1 ;
+
+ /* on saute les lignes de commentaires */
+ fgets(buffer, SIZE_LINE_TEXT, file);
+ while ((buffer[0] == '#')|(buffer[0] == '\n'))
+ fgets(buffer, SIZE_LINE_TEXT, file);
+
+ /* on lit les dimensions de l'image */
+ sscanf(buffer, "%d %d", j_dim, i_dim) ;
+ fgets(buffer, SIZE_LINE_TEXT, file);
+ sscanf(buffer, "%d", level) ;
+
+
+ fclose(file);
+ return 1 ;
+}
+
+
+
+
+/**
+ * \fn void load_pgm2int(int **image, int i_dim, int j_dim,
+ * int nb_level, char *fichier_image)
+ * \brief lecture pgm 8 ou 16 bits
+ *
+ * \param[out] image
+ * \param[in] i_dim dimension verticale de l'image
+ * \param[in] j_dim dimension horizontale de l'image
+ * \param[in] nb_level dynamique de l'image
+ * \param[in] fichier_image fichier image
+ *
+ *
+ */
+void load_pgm2int(int **image, int i_dim, int j_dim,
+ int nb_level, char *fichier_image)
+{
+ int i, j ;
+ char buffer[SIZE_LINE_TEXT] ;
+ unsigned char *ligne;
+ unsigned short *ligne2;
+ FILE *file = fopen(fichier_image, "rb");
+
+ fgets(buffer, SIZE_LINE_TEXT, file); /* P5 */
+ /* on saute les lignes de commentaires */
+ fgets(buffer, SIZE_LINE_TEXT, file);
+ while ((buffer[0] == '#')|(buffer[0] == '\n'))
+ fgets(buffer, SIZE_LINE_TEXT, file);
+ /* derniere ligne lue : dimensions */
+ fgets(buffer, SIZE_LINE_TEXT, file); /* dynamique */
+
+ /* data */
+
+ if (nb_level < 256)
+ {
+ // fichier en char, on converti au format int
+ ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim) ;
+
+ for (i=0;i<i_dim;i++)
+ {
+ fread(ligne, 1, j_dim, file);
+ for (j=0;j<j_dim;j++)
+ image[i][j] = (int)(ligne[j]) ;
+ }
+ free(ligne) ;
+ }
+ else
+ {
+ // fichier en short, on converti au format int
+ ligne2 = (unsigned short*)malloc(sizeof(unsigned short)*j_dim) ;
+
+ for (i=0;i<i_dim;i++)
+ {
+ fread(ligne2, 2, j_dim, file);
+ for (j=0;j<j_dim;j++)
+ image[i][j] = (int)(ligne2[j]) ;
+ }
+ free(ligne2);
+ }
+ fclose(file);
+}
+
+
+void load_pgm2ushort(unsigned short **image, int i_dim, int j_dim,
+ int nb_level, char *fichier_image)
+{
+ int i, j ;
+ char buffer[SIZE_LINE_TEXT] ;
+ unsigned char *ligne;
+ unsigned short *ligne2;
+ FILE *file = fopen(fichier_image, "rb");
+
+ fgets(buffer, SIZE_LINE_TEXT, file); /* P5 */
+ /* on saute les lignes de commentaires */
+ fgets(buffer, SIZE_LINE_TEXT, file);
+ while ((buffer[0] == '#')|(buffer[0] == '\n'))
+ fgets(buffer, SIZE_LINE_TEXT, file);
+ /* derniere ligne lue : dimensions */
+ fgets(buffer, SIZE_LINE_TEXT, file); /* dynamique */
+
+ /* data */
+
+ if (nb_level < 256)
+ {
+ // fichier en char, on converti au format ushort
+ ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim) ;
+
+ for (i=0;i<i_dim;i++)
+ {
+ fread(ligne, 1, j_dim, file);
+ for (j=0;j<j_dim;j++)
+ image[i][j] = (unsigned short)(ligne[j]) ;
+ }
+ free(ligne) ;
+ }
+ else
+ {
+ // fichier en short,
+ ligne2 = (unsigned short*)malloc(sizeof(unsigned short)*j_dim) ;
+
+ for (i=0;i<i_dim;i++)
+ {
+ fread(ligne2, 2, j_dim, file);
+ for (j=0;j<j_dim;j++)
+ image[i][j] = (unsigned short)(ligne2[j]) ;
+ }
+ free(ligne2);
+ }
+ fclose(file);
+}
+
+
+
+/**
+ * \fn void write_int2pgm(int **image, int i_dim, int j_dim, char *fichier_image, int recal)
+ * \brief copie au format pgm 8 bits
+ *
+ * \param[in] image
+ * \param[in] i_dim dimension verticale de l'image
+ * \param[in] j_dim dimension horizontale de l'image
+ * \param[in] fichier_image fichier image
+ * \param[in] recal recalage de l'image en 0-255
+ *
+ */
+void write_int2pgm(int **image, int i_dim, int j_dim, char *fichier_image, int recal)
+{
+ int i, j ;
+ int val_min, val_max ;
+ double coef ;
+ unsigned char *ligne;
+ FILE *file=fopen(fichier_image,"wb");
+
+ // entete pgm
+ // format
+ fprintf(file, "P5\n") ;
+ fprintf(file, "# Physics and Images Processing Group\n") ;
+ fprintf(file, "# Fresnel Institut - Marseille - France\n") ;
+ // taille
+ fprintf(file, "%d %d\n", j_dim, i_dim) ;
+ // dynamique
+ fprintf(file, "%d\n" , 255 );
+
+ min_max_int1d(&val_min, &val_max, image[0], i_dim*j_dim) ;
+ coef = 255.0 / (val_max - val_min) ;
+
+ // on converti l'image en entier 8 bits (char)
+ ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim );
+ for (i=0;i<i_dim;i++)
+ {
+ if (recal == 1)
+ for (j=0;j<j_dim;j++) ligne[j] = (unsigned char)(coef * (image[i][j]-val_min)) ;
+ else
+ for (j=0;j<j_dim;j++) ligne[j] = (unsigned char)image[i][j] ;
+ fwrite(ligne, 1, j_dim, file);
+ }
+
+ fclose(file);
+ free(ligne) ;
+}
+
+void write_ushort2pgm(unsigned short **image, int i_dim, int j_dim, char *fichier_image, int recal)
+{
+ int i, j ;
+ int val_min, val_max ;
+ double coef ;
+ unsigned char *ligne;
+ FILE *file=fopen(fichier_image,"wb");
+
+ // entete pgm
+ // format
+ fprintf(file, "P5\n") ;
+ fprintf(file, "# Physics and Images Processing Group\n") ;
+ fprintf(file, "# Fresnel Institute - Marseille - France\n") ;
+ // taille
+ fprintf(file, "%d %d\n", j_dim, i_dim) ;
+ // dynamique
+ fprintf(file, "%d\n" , 255 );
+
+ min_max_ushort1d(&val_min, &val_max, image[0], i_dim*j_dim) ;
+ coef = 255.0 / (val_max - val_min) ;
+
+ // on converti l'image en entier 8 bits (char)
+ ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim );
+ for (i=0;i<i_dim;i++)
+ {
+ if (recal == 1)
+ for (j=0;j<j_dim;j++) ligne[j] = (unsigned char)(coef * (image[i][j]-val_min)) ;
+ else
+ for (j=0;j<j_dim;j++) ligne[j] = (unsigned char)image[i][j] ;
+ fwrite(ligne, 1, j_dim, file);
+ }
+
+ fclose(file);
+ free(ligne) ;
+}
+
+
+/**
+ * \fn void imagesc(int **image, int i_dim, int j_dim)
+ * \brief affiche une image via xti
+ *
+ * \param[in] image
+ * \param[in] i_dim dimension verticale de l'image
+ * \param[in] j_dim dimension horizontale de l'image
+ *
+ */
+void imagesc(int **image, int i_dim, int j_dim)
+{
+ char nom[SIZE_NAME_FILE] ;
+ //char cmd[SIZE_LINE_TEXT] ;
+
+ sprintf(nom, "imagesc_%d.pgm", getuid());
+ write_int2pgm(image, i_dim, j_dim, nom, 1) ;
+
+ // affichage avec xti
+ //sprintf(cmd, "xti %s &", nom) ;
+ //system(cmd) ;
+}
+
+void imagesc_ushort(unsigned short **image, int i_dim, int j_dim)
+{
+ char nom[SIZE_NAME_FILE] ;
+ //char cmd[SIZE_LINE_TEXT] ;
+
+ sprintf(nom, "imagesc_%d.pgm", getuid());
+ write_ushort2pgm(image, i_dim, j_dim, nom, 1) ;
+
+}
--- /dev/null
+
+int type_image_ppm(int *prof, int *i_dim, int *j_dim, int *level, char *file_name) ;
+void load_pgm2int(int **image, int i_dim, int j_dim, int nb_level, char *fichier_image) ;
+void load_pgm2ushort(unsigned short **image, int i_dim, int j_dim, int nb_level, char *fichier_image) ;
+
+void write_int2pgm(int **image, int i_dim, int j_dim, char *fichier_image, int recal) ;
+void write_ushort2pgm(unsigned short **image, int i_dim, int j_dim, char *fichier_image, int recal) ;
+void imagesc(int **image, int i_dim, int j_dim) ;
+void imagesc_ushort(unsigned short **image, int i_dim, int j_dim) ;
--- /dev/null
+
+
+double mv_gaussien_lniv(int **image_in, int **image_out, int idim, int jdim,
+ double poids, int nb_iter) ;
--- /dev/null
+/**
+ * \file lib_lniv_common.c
+ * \brief routines pour la gestion des lignes de niveaux
+ * \author NB - PhyTI
+ * \version x.x
+ * \date 8 mai 2011
+ *
+ */
+
+#include <stdlib.h>
+#include <math.h>
+#include "lib_lniv_common.h"
+
+
+/**
+ *
+ * \brief calcul du PSNR entre deux images int
+ * \author NB - PhyTI
+ *
+ * \param[in] im1 image d'entree
+ * \param[in] im2 image d'entree
+ * \param[in] idim
+ * \param[in] jdim
+ * \param[in] range dynamique de l'image
+ *
+ */
+double psnr_image_int(int **im1, int **im2, int idim, int jdim, int range)
+{
+ int n ;
+ double eqm = 0.0 ;
+ double err, eq = 0.0 ;
+ for (n=0; n<idim*jdim; n++)
+ {
+ err = im1[0][n]-im2[0][n] ;
+ eq += err*err ;
+ }
+ eqm = eq / (idim*jdim) ;
+
+ return 10.0*log10((double)range*range/eqm) ;
+}
+
+
+
+
+
+/**
+ *
+ * \brief Filtrage d'une image par moyenne
+ * \author NB - PhyTI
+ *
+ * \param[out] result image de sortie
+ * \param[in] matrix image d'entree
+ * \param[in] i_dim
+ * \param[in] j_dim
+ * \param[in] dim_wind largeur du noyau carre
+ *
+ */
+void mean2ds(int **result, int **matrix, int dim_wind,
+ int i_dim, int j_dim)
+{
+ int dim_wind2 = (int)floor(dim_wind/2);
+ double sqr_dim_wind = dim_wind*dim_wind;
+ double tmp_diff;
+ int i, j, iw, jw, iiw ;
+
+ // On calcul les bords en version lente
+ for (i=0;i<i_dim; i++)
+ for (j=0;j<j_dim; j++)
+ {
+ if ((i>dim_wind2+1)&&(i<(i_dim-dim_wind2-1)))
+ if (j==dim_wind2+1)
+ j=j_dim-dim_wind2-1;
+
+ result[i][j] = 0;
+ for (iw=i-dim_wind2;iw<i+dim_wind2+1;iw++)
+ for (jw=j-dim_wind2;jw<j+dim_wind2+1;jw++)
+ if ((iw>=0)&&(jw>=0)&&(iw<i_dim)&&(jw<j_dim))
+ result[i][j] += matrix[iw][jw] ;
+ }
+
+// On calcul le centre en version rapide
+ for (i=dim_wind2+1;i<i_dim-dim_wind2; i++)
+ {
+ for (j=dim_wind2+1;j<j_dim-dim_wind2; j++)
+ {
+ tmp_diff = 0;
+ for (iiw=i-dim_wind2;iiw<=i+dim_wind2;iiw++)
+ tmp_diff += matrix[iiw][j+dim_wind2] - matrix[iiw][j-dim_wind2-1] ;
+
+ result[i][j] = result[i][j-1] + tmp_diff ;
+ }
+ }
+
+ // On divise pour la moyenne
+ // pb sur les bords !!!
+ for (i=0;i<i_dim;i++)
+ for (j=0;j<j_dim;j++)
+ result[i][j] /= sqr_dim_wind;
+}
--- /dev/null
+double psnr_image_int(int **im1, int **im2, int idim, int jdim, int range) ;
+
+void mean2ds(int **result, int **matrix, int dim_wind,
+ int i_dim, int j_dim) ;
--- /dev/null
+/**
+ * \file lib_math.c
+ * \brief routines
+ * \author NB - PhyTI
+ * \version x.x
+ * \date 20 decembre 2009
+ *
+ */
+
+#include <stdio.h>
+#include "defines.h"
+#include "lib_math.h"
+
+/**
+ * \fn void tic(struct timeval* temps, char* texte)
+ * \brief Initialise le compteur de temps
+ *
+ * \param[out] temps
+ * \param[in] texte texte a afficher
+ *
+ */
+void tic(struct timeval* temps, char* texte)
+{
+ gettimeofday(temps, NULL);
+
+ if (texte != NULL)
+ printf("%s\n", texte) ;
+}
+
+/**
+ * \fn double toc(struct timeval start, char* texte)
+ * \brief Calcule le temps ecoule
+ *
+ * \param[in] start temps de debut du chrono
+ * \param[in] texte texte a afficher
+ *
+ * \return le temps ecoule entre tic et toc
+ */
+double toc(struct timeval start, char* texte)
+{
+ struct timeval end ;
+ double elapsed ;
+
+ gettimeofday(&end, NULL);
+
+ elapsed = (double)(end.tv_sec-start.tv_sec)
+ + 0.000001*(double)(end.tv_usec-start.tv_usec);
+ if (texte != NULL)
+ printf("%s : %f\n", texte, elapsed) ;
+
+ return elapsed ;
+}
+
+
+/**
+ * \fn void min_max_int1d(int *val_min, int *val_max, int *vect, int dim)
+ * \brief determine le min et max d'un vecteur de int
+ *
+ * \param[out] val_min
+ * \param[out] val_max
+ * \param[in] vect
+ * \param[in] dim dimension du vecteur
+ *
+ */
+
+void min_max_int1d(int *val_min, int *val_max, int *vect, int dim)
+{
+ int n, min, max ;
+
+ min = vect[1];
+ max = min;
+
+ for (n=0;n<dim;n++)
+ {
+ if (vect[n] > max) max = vect[n];
+ if (vect[n] < min) min = vect[n];
+ }
+
+ *val_min = min ;
+ *val_max = max ;
+}
+
+void min_max_ushort1d(int *val_min, int *val_max, unsigned short *vect, int dim)
+{
+ int n ;
+ unsigned short min, max ;
+
+ min = vect[1];
+ max = min;
+
+ for (n=0;n<dim;n++)
+ {
+ if (vect[n] > max) max = vect[n];
+ if (vect[n] < min) min = vect[n];
+ }
+
+ *val_min = min ;
+ *val_max = max ;
+}
+
+
+
+
+/**
+ * \fn inline int test_inf(double arg1, double arg2)
+ *
+ * \brief test (arg1 < arg2) inferieur a avec pourcentage minimum
+ *
+ * \param[in] arg1
+ * \param[in] arg2
+ *
+ * return test
+ */
+inline int test_inf(double arg1, double arg2)
+{
+ if (arg2 > 0)
+ return arg1 < (arg2*COEF_DECROI) ;
+ else
+ return arg1 < (arg2*INV_COEF_DECROI) ;
+}
+
+
+
+
+
+/**
+ * \fn inline int sign_diff_ou_egal_zero(int val1, int val2)
+ *
+ * \brief fonction qui test si les arguments sont de signes differents ou nuls
+ * \author NB - PhyTI
+ *
+ * \param[in] val1
+ * \param[in] val2
+ *
+ * \return le test 0/1
+ *
+ */
+inline int sign_diff_ou_egal_zero(int val1, int val2)
+{
+ if (val1 > 0)
+ {
+ if (val2 > 0) return 0 ;
+ else return 1 ;
+ }
+ else
+ if (val1 < 0)
+ {
+ if (val2 < 0) return 0 ;
+ else return 1 ;
+ }
+ else
+ return 1 ;/* val1 == 0 */
+}
+
+/**
+ * \fn inline int sign_diff_strict(int val1, int val2)
+ *
+ * \brief fonction qui test si les arguments sont de signes differents strictement
+ * \author NB - PhyTI
+ *
+ * \param[in] val1
+ * \param[in] val2
+ *
+ * \return le test 0/1
+ *
+ */
+inline int sign_diff_strict(int val1, int val2)
+{
+ if (val1 > 0)
+ {
+ if (val2 >= 0) return 0 ;
+ else return 1 ;
+ }
+ else
+ if (val1 < 0)
+ {
+ if (val2 <= 0) return 0 ;
+ else return 1 ;
+ }
+ else
+ return 0 ;/* val1 == 0 */
+}
+
+
+
+/**
+ * \fn inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj)
+ *
+ * \brief calcul le "sinus" de l'angle du triangle ABC
+ * \author NB - PhyTI
+ *
+ * \param[in] Ai les coordonnees
+ * \param[in] Aj
+ * \param[in] Bi
+ * \param[in] Bj
+ * \param[in] Ci
+ * \param[in] Cj
+ *
+ * \return le sinus non normalise
+ *
+ * Cette fonction est utile pour determiner si un triangle ABC
+ * est donne dans l'ordre trigo.
+ * Signe > 0: sens trigo,
+ * signe < 0: sens antitrigo
+ * = 0: plat
+ */
+inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj)
+{
+ return (((Bi-Ai)*(Cj-Aj)) - ((Ci-Ai)*(Bj-Aj))) ;
+}
+
+
+/**
+ * \fn void recopie_vecteur(int *in, int *out, int dim)
+ *
+ * \brief recopie le vecteur out vers in
+ * \author NB - PhyTI
+ *
+ * \param[in] in vecteur d'entree
+ * \param[out] out vecteur recopier
+ * \param[in] dim longueur du vecteur
+ */
+void recopie_vecteur(int *in, int *out, int dim)
+{
+ int n ;
+ for (n=0; n<dim; n++) out[n] = in[n] ;
+}
--- /dev/null
+#ifndef _LIB_MATH_H
+#define _LIB_MATH_H
+
+
+#include <sys/time.h>
+
+void tic(struct timeval* temps, char* texte) ;
+double toc(struct timeval start, char* texte) ;
+
+void min_max_int1d(int *val_min, int *val_max, int *vect, int dim) ;
+void min_max_ushort1d(int *val_min, int *val_max, unsigned short *vect, int dim) ;
+
+#define min(a,b) ((a)<(b)) ? (a) : (b)
+#define max(a,b) ((a)>(b)) ? (a) : (b)
+
+
+inline int test_inf(double arg1, double arg2);
+
+inline int sign_diff_ou_egal_zero(int val1, int val2);
+inline int sign_diff_strict(int val1, int val2);
+
+inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj);
+
+void recopie_vecteur(int *in, int *out, int dim) ;
+
+
+#endif
--- /dev/null
+/**
+ * \file nliv.c
+ * \brief test de reduction par MV et contriante de ligne de niveaux
+ * \author NB - PhyTI
+ * \version x.x
+ * \date 6 mai 2011
+ *
+ */
+
+// protection
+#ifdef __PROTECT
+#include <sys/ptrace.h>
+#endif
+
+
+#include <stdio.h>
+#ifdef __MULTI_THREAD
+#include <pthread.h>
+#endif
+
+#include "lib_alloc.h"
+#include "lib_images.h"
+#include "lib_math.h"
+#include "lib_pretraitement.h"
+
+#include "lib_lniv.h"
+
+
+int main(int argc, char **argv)
+{
+// protection light
+#ifdef __PROTECT
+ if (ptrace(PTRACE_TRACEME, 0, 1, 0) < 0) return(0) ;
+#endif
+
+ /* declaration des variables */
+ int ret ;
+ int Bin_file = 0 ;
+ int Prof ; /* profondeur en octets */
+ int I_dim ; /* hauteur de l'image */
+ int J_dim ; /* largeur de l'image */
+ int Nb_level ; /* dynamique de l'image */
+ char *File_name ;
+ char *PARAM = NULL ;
+
+ /* images */
+ int **Image_in1, **Image_out1 ;
+ int **Image_in2=NULL, **Image_in3=NULL ;
+ int **Image_out2=NULL, **Image_out3=NULL ;
+
+
+ /* variables de calculs */
+ struct timeval chrono ;
+
+ /* debug : affichage snake */
+ int Verbose = 1 ;
+ int Display = 1 ;
+
+
+ /* lecture argument entree (basique!) */
+ if (argc == 1)
+ {
+ fprintf(stderr, "USAGE : LNIV pgm_file <params>\n") ;
+ fprintf(stderr,
+ "\n"
+ "\n"
+ "\n"
+ "\n") ;
+ return(0) ;
+ }
+ File_name = argv[1] ;
+
+ /* verif type image (pgm 8/16) */
+ ret = type_image_ppm(&Prof, &I_dim, &J_dim, &Nb_level, File_name) ;
+ Nb_level++ ;
+
+ if (ret == 0)
+ {
+ /* tentative image bin */
+ ret = load_dim_image_bin(&I_dim, &J_dim, File_name) ;
+ if (ret != 1)
+ {
+ printf("format non pris en charge ... exit\n") ;
+ return(0) ;
+ }
+ Nb_level = 65536 ; // quantif 16 bits pour les bin
+ Bin_file = 1 ;
+ Prof = 1 ;
+ }
+
+ /* infos */
+ if (Verbose)
+ {
+ printf("Image : %s\n", File_name) ;
+ printf("lecture OK : %d\n", ret) ;
+ printf("Image (%d x %d) pixels\n", I_dim, J_dim) ;
+ printf("Dynamique : %d\n", Nb_level) ;
+ printf("Canaux : %d\n", Prof) ;
+ } ;
+
+ /* Allocation */
+ Image_in1 = new_matrix_int(I_dim, J_dim) ;
+ Image_out1 = new_matrix_int(I_dim, J_dim) ;
+ if (Prof == 3)
+ {
+ Image_in2 = new_matrix_int(I_dim, J_dim) ;
+ Image_out2 = new_matrix_int(I_dim, J_dim) ;
+ Image_in3 = new_matrix_int(I_dim, J_dim) ;
+ Image_out3 = new_matrix_int(I_dim, J_dim) ;
+ }
+
+ /* chargement image d'entree */
+ tic(&chrono, NULL) ;
+ if (Bin_file)
+ load_bin2int(Image_in1, I_dim, J_dim, Nb_level, File_name) ;
+ else
+ if (Prof == 1)
+ load_pgm2int(Image_in1, I_dim, J_dim, Nb_level, File_name) ;
+ else /* (Prof == 3) */
+ {
+ load_ppm2int(Image_in1, Image_in2, Image_in3, I_dim, J_dim, File_name ) ;
+ //RGB2YUV(Image_in1[0], Image_in2[0], Image_in3[0], I_dim*J_dim) ;
+ }
+
+ toc(chrono, "temps chargement image") ;
+ if (Display) image16(Image_in1, I_dim, J_dim) ;
+ if ((Display)&&(Prof == 3)) image16(Image_in2, I_dim, J_dim) ;
+ if ((Display)&&(Prof == 3)) image16(Image_in3, I_dim, J_dim) ;
+
+ /* sequence de parametre */
+ if (argc >= 3) PARAM = argv[2] ;
+
+ double poids = 15.0 ;
+ int nb_iter = 15 ;
+ if (Prof == 1)
+ {
+ if (Verbose)
+ {
+ printf("poids contrainte : %.3f\n", poids) ;
+ printf("nombre d'iteration GN : %d\n", nb_iter) ;
+ }
+ tic(&chrono, NULL) ;
+ mv_gaussien_lniv(Image_in1, Image_out1, I_dim, J_dim, poids, nb_iter) ;
+ toc(chrono, "temps lniv_image") ;
+
+ wimage16(Image_out1, I_dim, J_dim, "output.pgm") ;
+
+ }
+ else
+ {
+ if (Verbose)
+ {
+ printf("image couleur (YUV)\n") ;
+ printf("poids contrainte : %.3f\n", poids) ;
+ printf("nombre d'iteration GN : %d\n", nb_iter) ;
+ }
+ tic(&chrono, NULL) ;
+
+ mv_gaussien_lniv(Image_in1, Image_out1, I_dim, J_dim, poids, nb_iter) ;
+ mv_gaussien_lniv(Image_in2, Image_out2, I_dim, J_dim, poids, nb_iter) ;
+ mv_gaussien_lniv(Image_in3, Image_out3, I_dim, J_dim, poids, nb_iter) ;
+
+ toc(chrono, "temps lniv_image") ;
+
+ image16(Image_out1, I_dim, J_dim) ;
+ image16(Image_out2, I_dim, J_dim) ;
+ image16(Image_out3, I_dim, J_dim) ;
+ write_intRGB2ppm8(Image_out1, Image_out2, Image_out3, I_dim, J_dim, "output.ppm") ;
+
+ }
+
+
+ /* Delete */
+ del_matrix_int(Image_in1, I_dim) ;
+ del_matrix_int(Image_out1, I_dim) ;
+ if (Prof == 3)
+ {
+ del_matrix_int(Image_in2, I_dim) ;
+ del_matrix_int(Image_out2, I_dim) ;
+ del_matrix_int(Image_in3, I_dim) ;
+ del_matrix_int(Image_out3, I_dim) ;
+ }
+
+
+ return 1 ;
+}
--- /dev/null
+// libs C
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "lib_lniv.h"
+
+// libs NV
+#include <cuda_runtime.h>
+#include <cutil_inline.h>
+
+// lib spec
+#include "defines.h"
+#include "levelines_common.h"
+
+#include "levelines_kernels.cu"
+
+
+__global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
+
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ unsigned int pos = i*L +j ;
+
+ ptr2[pos] = val - ptr1[pos] ;
+
+}
+
+int main(int argc, char **argv){
+
+
+ //float coef_regul = atof( argv[1] ) ;
+
+ unsigned int timer ;
+ cutilCheckError( cutCreateTimer(&timer) );
+ cutilCheckError( cutResetTimer(timer) );
+ /*****************************
+ * CHARGEMENT IMAGE
+ *****************************/
+ char* image_path = argv[argc-1];
+ char* image_out = "./image_out.pgm" ;
+ unsigned int * h_data = NULL ;
+ unsigned int * h_data_out = NULL ;
+ unsigned int H, L, size;
+
+ cutilCheckError( cutStartTimer(timer) );
+ cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
+ cutilCheckError( cutStopTimer(timer) );
+
+ size = H * L * sizeof( unsigned int );
+ printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer));
+
+
+ //essai alloc mapped
+ /*
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
+ unsigned int * h_ptr1, * d_ptr1 ;
+ unsigned int * h_ptr2, * d_ptr2 ;
+ int h = ;
+ int l = h ;
+ int mem = h*l*sizeof(unsigned int) ;
+ cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+
+ cutilCheckError( cutStartTimer(timer) );
+ cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
+ cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+
+ for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
+
+ cutilCheckError( cutStartTimer(timer) );
+ cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
+ cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps cumul get pointer Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+
+ cutilCheckError( cutStartTimer(timer) );
+ dim3 blocks(16,16,1) ;
+ dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
+
+ kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
+
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+
+ char * image_1 = "./image_1.pgm" ;
+ char * image_2 = "./image_2.pgm" ;
+
+ cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
+ cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
+ */
+ /*****************************
+ * FIN CHARGEMENT IMAGE
+ *****************************/
+
+
+
+ // use device with highest Gflops/s
+ cudaSetDevice( cutGetMaxGflopsDeviceId() );
+
+
+ /*
+ cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
+ cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
+ cutilSafeCall( cudaThreadSynchronize() );
+ cutilCheckError( cutResetTimer(hTimer) );
+ cutilCheckError( cutStartTimer(hTimer) );
+
+ cutilSafeCall( cudaThreadSynchronize() );
+ cutilCheckError( cutStopTimer(hTimer) );
+ gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
+ */
+
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
+ // allocation mem GPU
+ unsigned int * d_directions =NULL ;
+ unsigned int * d_lniv, * d_estim = NULL ;
+
+ cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
+ cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
+ cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+
+
+ // allocate array and copy image data
+ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
+ cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
+ cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
+ cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
+ cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
+ cutilCheckError( cutStopTimer(timer) );
+
+ cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H ));
+ cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
+
+ cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H ));
+ cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
+
+ printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
+ /*****************************
+ * APPELS KERNELS et chronos
+ *****************************/
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
+
+ unsigned int iter , nb_iter = 15 ;
+ unsigned int poids = 15 ;
+ dim3 dimBlock(8,8,1) ;
+ dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
+ unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
+ // init image estimee avec image_in
+ kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
+
+ printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
+
+ for ( iter =0 ; iter < nb_iter ; iter++ )
+ {
+ cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
+ kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
+ cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
+ kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
+ }
+
+ cudaThreadSynchronize();
+
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
+ printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
+
+ /**************************
+ * VERIFS
+ **************************/
+ //trace des lniv sur grille de 'pas x pas'
+ //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
+ //cudaThreadSynchronize();
+
+ // enregistrement image lniv GPU
+ h_data_out = new unsigned int[H*L] ;
+ if ( h_data_out != NULL)
+ cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
+ else
+ printf("Echec allocation mem CPU\n");
+
+ cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
+
+ // calcul lniv CPU
+
+
+ // TODO verifier pourquoi les deux lignes suivantes produisent une erreur
+ //cutilExit(argc, argv);
+ //cudaThreadExit();
+ return EXIT_SUCCESS ;
+}
--- /dev/null
+// libs C
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "lib_lniv.h"
+
+// libs NV
+#include <cuda_runtime.h>
+#include <cutil_inline.h>
+
+// lib spec
+#include "defines.h"
+#include "levelines_common.h"
+
+#include "levelines_kernels.cu"
+
+
+int main(int argc, char **argv){
+
+
+ //float coef_regul = atof( argv[1] ) ;
+
+ unsigned int timer ;
+ float time_cumul = 0.0 ;
+ cutilCheckError( cutCreateTimer(&timer) );
+ cutilCheckError( cutResetTimer(timer) );
+
+ /*****************************
+ * CHARGEMENT IMAGE
+ *****************************/
+ char* image_path = argv[argc-1];
+ char* image_out = "./image_out.pgm" ;
+ unsigned int * h_data = NULL ;
+ unsigned int * h_data_out = NULL ;
+ unsigned int H, L, size;
+
+ cutilCheckError( cutStartTimer(timer) );
+ cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
+ cutilCheckError( cutStopTimer(timer) );
+
+ size = H * L * sizeof( unsigned int );
+ printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer));
+ time_cumul += cutGetTimerValue(timer) ;
+ /*****************************
+ * FIN CHARGEMENT IMAGE
+ *****************************/
+
+
+
+ // use device with highest Gflops/s
+ cudaSetDevice( cutGetMaxGflopsDeviceId() );
+
+ /*
+ cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
+ cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
+ cutilSafeCall( cudaThreadSynchronize() );
+ cutilCheckError( cutResetTimer(hTimer) );
+ cutilCheckError( cutStartTimer(hTimer) );
+
+ cutilSafeCall( cudaThreadSynchronize() );
+ cutilCheckError( cutStopTimer(hTimer) );
+ gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
+ */
+
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
+ // allocation mem GPU
+ unsigned int * d_directions =NULL ;
+ unsigned int * d_lniv, * d_estim, * d_data ;
+ cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
+ cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
+ cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+ cutilSafeCall( cudaMalloc( (void**) &d_data, size ) );
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps alloc global mem : %f ms\n", cutGetTimerValue(timer)) ;
+ time_cumul += cutGetTimerValue(timer) ;
+
+ // transfert data -> GPU global mem
+ cutilCheckError( cutStartTimer(timer) );
+ cutilSafeCall( cudaMemcpy( d_data , h_data, size, cudaMemcpyHostToDevice) );
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps transferts en global mem : %f ms\n", cutGetTimerValue(timer)) ;
+ time_cumul += cutGetTimerValue(timer) ;
+ /*****************************
+ * APPELS KERNELS et chronos
+ *****************************/
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
+
+ unsigned int iter , nb_iter = 15 ;
+ unsigned int poids = 15 ;
+ dim3 dimBlock(8,8,1) ;
+ dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
+ unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
+ // init image estimee avec image_in
+ kernel_init_estim_from_img_in_global_mem<<< dimGrid, dimBlock, 0 >>>(d_data, d_estim, L, H, 7);
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)) ;
+ time_cumul += cutGetTimerValue(timer) ;
+
+
+ // iterations
+ cutilCheckError( cutStartTimer(timer) );
+ printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
+ for ( iter =0 ; iter < nb_iter ; iter++ )
+ {
+ kernel_levelines_global_mem<<< dimGrid, dimBlock, 0 >>>( d_estim, d_lniv, L, H );
+ kernel_estim_next_step_global_mem<<< dimGrid, dimBlock, 0 >>>(d_estim, d_lniv, d_data, L, H, poids) ;
+ }
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)) ;
+ time_cumul += cutGetTimerValue(timer) ;
+
+ printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
+ printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
+ printf("Total execution : %f ms\n", time_cumul) ;
+
+ /**************************
+ * VERIFS
+ **************************/
+ //trace des lniv sur grille de 'pas x pas'
+ //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
+ //cudaThreadSynchronize();
+
+ // enregistrement image lniv GPU
+ h_data_out = new unsigned int[H*L] ;
+ if ( h_data_out != NULL)
+ cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
+ else
+ printf("Echec allocation mem CPU\n");
+
+ cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
+
+ // calcul lniv CPU
+
+
+ // TODO verifier pourquoi les deux lignes suivantes produisent une erreur
+ //cutilExit(argc, argv);
+ //cudaThreadExit();
+ return EXIT_SUCCESS ;
+}