From: Gilles Perrot Date: Fri, 3 Jun 2011 14:24:59 +0000 (+0200) Subject: initial commit for lniv X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/lniv_gpu.git/commitdiff_plain/1a59c41c272e22861e759e660a64578b47147d85?ds=inline initial commit for lniv --- diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2801dda --- /dev/null +++ b/Makefile @@ -0,0 +1,49 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Build script for project +# +################################################################################ + +# Add source files here +EXECUTABLE := levelines +# CUDA source files (compiled with cudacc) +CUFILES := main_gmem.cu +# CUDA dependency files +CU_DEPS := levelines_common.h +# C/C++ source files (compiled with gcc / c++) +CCFILES := + + +################################################################################ +# Rules and targets + +include ../../common/common.mk diff --git a/defines.h b/defines.h new file mode 100644 index 0000000..cddf7c6 --- /dev/null +++ b/defines.h @@ -0,0 +1,29 @@ +#ifndef __DEFINES_H__ +#define __DEFINES_H__ + + + +/** + * \def SIZE_NAME_FILE longueur maxi associee aux noms de fichiers + * \def SIZE_LINE_TEXT longueur maxi associee a une ligne de texte + */ +#define SIZE_NAME_FILE 256 +#define SIZE_LINE_TEXT 256 + +#define COEF_DECROI 0.99999 +#define INV_COEF_DECROI 1.00001 + +#define BSMAX 512 +#define MAX(x,y) ( ( (x)>=(y) )?(x):(y) ) +#define ABS(x) ( ((x)>0)?(x):-(x)) +#define DEC 4 +#define DEC2 8 +#define CONFLICT_FREE_OFFSET(index) ( ((index) >>(DEC)) + ((index) >>(DEC2) ) ) +#define CFO(index) ( ( (index) >>(DEC) ) + ( (index) >>(DEC2) ) ) +#define CFI(index) ( (index) + (CFO(index)) ) + +//dimension de la matrice definissant les chemins des lignes de niveaux +#define PSIZE_I 24 +#define PSIZE_J 4 + +#endif diff --git a/levelines_common.h b/levelines_common.h new file mode 100644 index 0000000..ddb0ea2 --- /dev/null +++ b/levelines_common.h @@ -0,0 +1,24 @@ + + +#ifndef LEVELINES_COMMON_H +#define LEVELINES_COMMON_H + + + +#include + + + + +//////////////////////////////////////////////////////////////////////////////// +// Reference CPU functions +//////////////////////////////////////////////////////////////////////////////// +//extern "C" void fonc(...); + +//////////////////////////////////////////////////////////////////////////////// +// GPU functions (in file.cu) +//////////////////////////////////////////////////////////////////////////////// +//extern "C" void fonc(float *h_Kernel); + + +#endif diff --git a/levelines_kernels.cu b/levelines_kernels.cu new file mode 100644 index 0000000..491c600 --- /dev/null +++ b/levelines_kernels.cu @@ -0,0 +1,315 @@ + +// chemins des lignes de niveaux +// longueur = 4 pixels +// une ligne = un chemin + +__constant__ int pathDi[PSIZE_I][PSIZE_J-1] = + { + // Q1 + { -1, -1, -1}, // + { -1, -1, -1}, // + { -1, -1, -1}, // + { -1, -1, -1}, // + { -1, 0, 1}, // + { 0, -1, 0}, + // Q4 + { 0, 0, 0}, // + { 0, 1, 1}, // + { 1, 0, 1}, // + { 1, 1, 1}, // + { 1, 1, 1}, // + { 1, 1, 1}, + // Q3 + { 1, 1, 1}, // + { 1, 1, 1}, // + { 1, 1, 1}, // + { 1, 1, 1}, // + { 1, 0, -1}, // + { 0, 1, 0}, + // Q2 + { 0, 0, 0}, // + { 0, -1, 0}, // + { -1, 0, -1}, // + { -1, -1, -1}, // + { -1, -1, -1}, // + { -1, -1, -1} + } ; // + +__constant__ int pathDj[PSIZE_I][PSIZE_J-1] = + { + // Q1 + { 0, 0, 0}, // + { 0, 1, 0}, + { 1, 0, 1}, + { 1, 1, 1}, + { 1, 1, 1}, + { 1, 1, 1}, + // Q4 + { 1, 1, 1}, // + { 1, 1, 1}, + { 1, 1, 1}, + { 1, 1, 1}, + { 1, 0, -1}, + { 0, 1, 0}, + // Q3 + { 0, 0, 0}, // + { 0, -1, 0}, + { -1, 0, -1}, + { -1, -1, -1}, + { -1, -1, -1}, + { -1, -1, -1}, + // Q2 + { -1, -1, -1}, // + { -1, -1, -1}, + { -1, -1, -1}, + { -1, -1, -1}, + { -1, 0, 1}, + { 0, -1, 0} + } ; + + +// declare texture reference for 2D int texture +texture tex_img_in ; +texture tex_img_estim ; +texture tex_img_lniv ; + + +__global__ void kernel_init_estim_from_img_in(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int r){ + // coordonnes du point dans l'image + unsigned int i = blockIdx.x*blockDim.x + threadIdx.x; + unsigned int j = blockIdx.y*blockDim.y + threadIdx.y; + unsigned int pos = i*L +j ; + unsigned int ic , jc, ng ; + + if( (i>r)&&(ir)&&(jr)&&(ir)&&(jlpath)&&(ilpath)&&(jlpath)&&(ilpath)&&(jlpath)&&(ilpath)&&(j 0) { + if ( mse_cur < mse_min ) { + mse_min = mse_cur ; + } + } else { + mse_min = mse_cur ; + } + } + img_out[ i*L + j ] = mse_min / lpath ; + } +} + + +__global__ void kernel_trace_levelines(unsigned int * img_in, unsigned int * dir, unsigned int * img_out, + unsigned int L, unsigned int H, unsigned int pas, unsigned int ng){ + // coordonnes du point dans l'image + unsigned int i = blockIdx.x*blockDim.x + threadIdx.x; + unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;; + + // nb de points par chemin + int lpath = PSIZE_J ; + unsigned int ic, jc, idpix ; + unsigned int idpath ; + + img_out[ i*L + j ] = img_in[ i*L + j ] ; + + if ( !(i%pas+j%pas)&&(i>lpath)&&(ilpath)&&(j +#include + +#include "lib_alloc.h" + +/** + * \brief allocation d'un tableau 2D (tab[i][j]) avec data en ligne (tab[0][n]) + * + * \param[in] i_dim dimension verticale du tableau + * \param[in] j_dim dimension horizontale du tableau + * + * \return pointeur sur le tableau + * + */ +void ***new_matrix_ptr(int i_dim, int j_dim) +{ + /* allocation en ligne */ + void ***matrice ; + void **vecteur ATT_ALIGN_SSE ; + int i ; + + vecteur = (void**)malloc(sizeof(void*)*i_dim*j_dim) ; + matrice = (void***)malloc(sizeof(void**)*i_dim) ; + for (i=0;i +#include +#include +#include + +#include "defines.h" +#include "lib_images.h" +#include "lib_math.h" + + +/** + * \fn int type_image_ppm(int *prof, int *i_dim, int *j_dim, int *level, char *file_name) + * \brief Fonction qui renvoie le type de l'image ppm et des caracteristiques + * + * \param[out] prof profondeur de l'image 1 pour pgm 3 pour ppm, 0 sinon + * \param[out] i_dim renvoie la dimension verticale de l'image (si NULL, renvoie que prof) + * \param[out] j_dim renvoie la dimension horizontale de l'image + * \param[out] level renvoie la dynamique de l'image + * \param[in] file_name fichier image + * + * \return 1 si ok O sinon + * + */ +int type_image_ppm(int *prof, int *i_dim, int *j_dim, int *level, char *file_name) +{ + char buffer[SIZE_LINE_TEXT] ; + FILE *file ; + + *prof = 0 ; + + file = fopen(file_name, "rb"); + if (file == NULL) + return 0 ; + + // lecture de la premiere ligne + fgets(buffer, SIZE_LINE_TEXT, file); + + /* pgm */ + if ((buffer[0] == 'P') & (buffer[1] == '5')) + *prof = 1 ; // GGG + /* ppm */ + if ((buffer[0] == 'P') & (buffer[1] == '6')) + *prof = 3 ; // RVBRVBRVB + + /* type non gere */ + if (*prof == 0) return 0 ; + + /* pour une utilisation du type */ + /* ret = type_image_ppm(&prof, NULL, NULL, NULL, file_name) */ + if (i_dim == NULL) + return 1 ; + + /* on saute les lignes de commentaires */ + fgets(buffer, SIZE_LINE_TEXT, file); + while ((buffer[0] == '#')|(buffer[0] == '\n')) + fgets(buffer, SIZE_LINE_TEXT, file); + + /* on lit les dimensions de l'image */ + sscanf(buffer, "%d %d", j_dim, i_dim) ; + fgets(buffer, SIZE_LINE_TEXT, file); + sscanf(buffer, "%d", level) ; + + + fclose(file); + return 1 ; +} + + + + +/** + * \fn void load_pgm2int(int **image, int i_dim, int j_dim, + * int nb_level, char *fichier_image) + * \brief lecture pgm 8 ou 16 bits + * + * \param[out] image + * \param[in] i_dim dimension verticale de l'image + * \param[in] j_dim dimension horizontale de l'image + * \param[in] nb_level dynamique de l'image + * \param[in] fichier_image fichier image + * + * + */ +void load_pgm2int(int **image, int i_dim, int j_dim, + int nb_level, char *fichier_image) +{ + int i, j ; + char buffer[SIZE_LINE_TEXT] ; + unsigned char *ligne; + unsigned short *ligne2; + FILE *file = fopen(fichier_image, "rb"); + + fgets(buffer, SIZE_LINE_TEXT, file); /* P5 */ + /* on saute les lignes de commentaires */ + fgets(buffer, SIZE_LINE_TEXT, file); + while ((buffer[0] == '#')|(buffer[0] == '\n')) + fgets(buffer, SIZE_LINE_TEXT, file); + /* derniere ligne lue : dimensions */ + fgets(buffer, SIZE_LINE_TEXT, file); /* dynamique */ + + /* data */ + + if (nb_level < 256) + { + // fichier en char, on converti au format int + ligne = (unsigned char*)malloc(sizeof(unsigned char)*j_dim) ; + + for (i=0;i +#include +#include "lib_lniv_common.h" + + +/** + * + * \brief calcul du PSNR entre deux images int + * \author NB - PhyTI + * + * \param[in] im1 image d'entree + * \param[in] im2 image d'entree + * \param[in] idim + * \param[in] jdim + * \param[in] range dynamique de l'image + * + */ +double psnr_image_int(int **im1, int **im2, int idim, int jdim, int range) +{ + int n ; + double eqm = 0.0 ; + double err, eq = 0.0 ; + for (n=0; ndim_wind2+1)&&(i<(i_dim-dim_wind2-1))) + if (j==dim_wind2+1) + j=j_dim-dim_wind2-1; + + result[i][j] = 0; + for (iw=i-dim_wind2;iw=0)&&(jw>=0)&&(iw +#include "defines.h" +#include "lib_math.h" + +/** + * \fn void tic(struct timeval* temps, char* texte) + * \brief Initialise le compteur de temps + * + * \param[out] temps + * \param[in] texte texte a afficher + * + */ +void tic(struct timeval* temps, char* texte) +{ + gettimeofday(temps, NULL); + + if (texte != NULL) + printf("%s\n", texte) ; +} + +/** + * \fn double toc(struct timeval start, char* texte) + * \brief Calcule le temps ecoule + * + * \param[in] start temps de debut du chrono + * \param[in] texte texte a afficher + * + * \return le temps ecoule entre tic et toc + */ +double toc(struct timeval start, char* texte) +{ + struct timeval end ; + double elapsed ; + + gettimeofday(&end, NULL); + + elapsed = (double)(end.tv_sec-start.tv_sec) + + 0.000001*(double)(end.tv_usec-start.tv_usec); + if (texte != NULL) + printf("%s : %f\n", texte, elapsed) ; + + return elapsed ; +} + + +/** + * \fn void min_max_int1d(int *val_min, int *val_max, int *vect, int dim) + * \brief determine le min et max d'un vecteur de int + * + * \param[out] val_min + * \param[out] val_max + * \param[in] vect + * \param[in] dim dimension du vecteur + * + */ + +void min_max_int1d(int *val_min, int *val_max, int *vect, int dim) +{ + int n, min, max ; + + min = vect[1]; + max = min; + + for (n=0;n max) max = vect[n]; + if (vect[n] < min) min = vect[n]; + } + + *val_min = min ; + *val_max = max ; +} + +void min_max_ushort1d(int *val_min, int *val_max, unsigned short *vect, int dim) +{ + int n ; + unsigned short min, max ; + + min = vect[1]; + max = min; + + for (n=0;n max) max = vect[n]; + if (vect[n] < min) min = vect[n]; + } + + *val_min = min ; + *val_max = max ; +} + + + + +/** + * \fn inline int test_inf(double arg1, double arg2) + * + * \brief test (arg1 < arg2) inferieur a avec pourcentage minimum + * + * \param[in] arg1 + * \param[in] arg2 + * + * return test + */ +inline int test_inf(double arg1, double arg2) +{ + if (arg2 > 0) + return arg1 < (arg2*COEF_DECROI) ; + else + return arg1 < (arg2*INV_COEF_DECROI) ; +} + + + + + +/** + * \fn inline int sign_diff_ou_egal_zero(int val1, int val2) + * + * \brief fonction qui test si les arguments sont de signes differents ou nuls + * \author NB - PhyTI + * + * \param[in] val1 + * \param[in] val2 + * + * \return le test 0/1 + * + */ +inline int sign_diff_ou_egal_zero(int val1, int val2) +{ + if (val1 > 0) + { + if (val2 > 0) return 0 ; + else return 1 ; + } + else + if (val1 < 0) + { + if (val2 < 0) return 0 ; + else return 1 ; + } + else + return 1 ;/* val1 == 0 */ +} + +/** + * \fn inline int sign_diff_strict(int val1, int val2) + * + * \brief fonction qui test si les arguments sont de signes differents strictement + * \author NB - PhyTI + * + * \param[in] val1 + * \param[in] val2 + * + * \return le test 0/1 + * + */ +inline int sign_diff_strict(int val1, int val2) +{ + if (val1 > 0) + { + if (val2 >= 0) return 0 ; + else return 1 ; + } + else + if (val1 < 0) + { + if (val2 <= 0) return 0 ; + else return 1 ; + } + else + return 0 ;/* val1 == 0 */ +} + + + +/** + * \fn inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj) + * + * \brief calcul le "sinus" de l'angle du triangle ABC + * \author NB - PhyTI + * + * \param[in] Ai les coordonnees + * \param[in] Aj + * \param[in] Bi + * \param[in] Bj + * \param[in] Ci + * \param[in] Cj + * + * \return le sinus non normalise + * + * Cette fonction est utile pour determiner si un triangle ABC + * est donne dans l'ordre trigo. + * Signe > 0: sens trigo, + * signe < 0: sens antitrigo + * = 0: plat + */ +inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj) +{ + return (((Bi-Ai)*(Cj-Aj)) - ((Ci-Ai)*(Bj-Aj))) ; +} + + +/** + * \fn void recopie_vecteur(int *in, int *out, int dim) + * + * \brief recopie le vecteur out vers in + * \author NB - PhyTI + * + * \param[in] in vecteur d'entree + * \param[out] out vecteur recopier + * \param[in] dim longueur du vecteur + */ +void recopie_vecteur(int *in, int *out, int dim) +{ + int n ; + for (n=0; n + +void tic(struct timeval* temps, char* texte) ; +double toc(struct timeval start, char* texte) ; + +void min_max_int1d(int *val_min, int *val_max, int *vect, int dim) ; +void min_max_ushort1d(int *val_min, int *val_max, unsigned short *vect, int dim) ; + +#define min(a,b) ((a)<(b)) ? (a) : (b) +#define max(a,b) ((a)>(b)) ? (a) : (b) + + +inline int test_inf(double arg1, double arg2); + +inline int sign_diff_ou_egal_zero(int val1, int val2); +inline int sign_diff_strict(int val1, int val2); + +inline int sinus_triangle(int Ai, int Aj, int Bi, int Bj, int Ci, int Cj); + +void recopie_vecteur(int *in, int *out, int dim) ; + + +#endif diff --git a/lniv.c b/lniv.c new file mode 100644 index 0000000..6757945 --- /dev/null +++ b/lniv.c @@ -0,0 +1,186 @@ +/** + * \file nliv.c + * \brief test de reduction par MV et contriante de ligne de niveaux + * \author NB - PhyTI + * \version x.x + * \date 6 mai 2011 + * + */ + +// protection +#ifdef __PROTECT +#include +#endif + + +#include +#ifdef __MULTI_THREAD +#include +#endif + +#include "lib_alloc.h" +#include "lib_images.h" +#include "lib_math.h" +#include "lib_pretraitement.h" + +#include "lib_lniv.h" + + +int main(int argc, char **argv) +{ +// protection light +#ifdef __PROTECT + if (ptrace(PTRACE_TRACEME, 0, 1, 0) < 0) return(0) ; +#endif + + /* declaration des variables */ + int ret ; + int Bin_file = 0 ; + int Prof ; /* profondeur en octets */ + int I_dim ; /* hauteur de l'image */ + int J_dim ; /* largeur de l'image */ + int Nb_level ; /* dynamique de l'image */ + char *File_name ; + char *PARAM = NULL ; + + /* images */ + int **Image_in1, **Image_out1 ; + int **Image_in2=NULL, **Image_in3=NULL ; + int **Image_out2=NULL, **Image_out3=NULL ; + + + /* variables de calculs */ + struct timeval chrono ; + + /* debug : affichage snake */ + int Verbose = 1 ; + int Display = 1 ; + + + /* lecture argument entree (basique!) */ + if (argc == 1) + { + fprintf(stderr, "USAGE : LNIV pgm_file \n") ; + fprintf(stderr, + "\n" + "\n" + "\n" + "\n") ; + return(0) ; + } + File_name = argv[1] ; + + /* verif type image (pgm 8/16) */ + ret = type_image_ppm(&Prof, &I_dim, &J_dim, &Nb_level, File_name) ; + Nb_level++ ; + + if (ret == 0) + { + /* tentative image bin */ + ret = load_dim_image_bin(&I_dim, &J_dim, File_name) ; + if (ret != 1) + { + printf("format non pris en charge ... exit\n") ; + return(0) ; + } + Nb_level = 65536 ; // quantif 16 bits pour les bin + Bin_file = 1 ; + Prof = 1 ; + } + + /* infos */ + if (Verbose) + { + printf("Image : %s\n", File_name) ; + printf("lecture OK : %d\n", ret) ; + printf("Image (%d x %d) pixels\n", I_dim, J_dim) ; + printf("Dynamique : %d\n", Nb_level) ; + printf("Canaux : %d\n", Prof) ; + } ; + + /* Allocation */ + Image_in1 = new_matrix_int(I_dim, J_dim) ; + Image_out1 = new_matrix_int(I_dim, J_dim) ; + if (Prof == 3) + { + Image_in2 = new_matrix_int(I_dim, J_dim) ; + Image_out2 = new_matrix_int(I_dim, J_dim) ; + Image_in3 = new_matrix_int(I_dim, J_dim) ; + Image_out3 = new_matrix_int(I_dim, J_dim) ; + } + + /* chargement image d'entree */ + tic(&chrono, NULL) ; + if (Bin_file) + load_bin2int(Image_in1, I_dim, J_dim, Nb_level, File_name) ; + else + if (Prof == 1) + load_pgm2int(Image_in1, I_dim, J_dim, Nb_level, File_name) ; + else /* (Prof == 3) */ + { + load_ppm2int(Image_in1, Image_in2, Image_in3, I_dim, J_dim, File_name ) ; + //RGB2YUV(Image_in1[0], Image_in2[0], Image_in3[0], I_dim*J_dim) ; + } + + toc(chrono, "temps chargement image") ; + if (Display) image16(Image_in1, I_dim, J_dim) ; + if ((Display)&&(Prof == 3)) image16(Image_in2, I_dim, J_dim) ; + if ((Display)&&(Prof == 3)) image16(Image_in3, I_dim, J_dim) ; + + /* sequence de parametre */ + if (argc >= 3) PARAM = argv[2] ; + + double poids = 15.0 ; + int nb_iter = 15 ; + if (Prof == 1) + { + if (Verbose) + { + printf("poids contrainte : %.3f\n", poids) ; + printf("nombre d'iteration GN : %d\n", nb_iter) ; + } + tic(&chrono, NULL) ; + mv_gaussien_lniv(Image_in1, Image_out1, I_dim, J_dim, poids, nb_iter) ; + toc(chrono, "temps lniv_image") ; + + wimage16(Image_out1, I_dim, J_dim, "output.pgm") ; + + } + else + { + if (Verbose) + { + printf("image couleur (YUV)\n") ; + printf("poids contrainte : %.3f\n", poids) ; + printf("nombre d'iteration GN : %d\n", nb_iter) ; + } + tic(&chrono, NULL) ; + + mv_gaussien_lniv(Image_in1, Image_out1, I_dim, J_dim, poids, nb_iter) ; + mv_gaussien_lniv(Image_in2, Image_out2, I_dim, J_dim, poids, nb_iter) ; + mv_gaussien_lniv(Image_in3, Image_out3, I_dim, J_dim, poids, nb_iter) ; + + toc(chrono, "temps lniv_image") ; + + image16(Image_out1, I_dim, J_dim) ; + image16(Image_out2, I_dim, J_dim) ; + image16(Image_out3, I_dim, J_dim) ; + write_intRGB2ppm8(Image_out1, Image_out2, Image_out3, I_dim, J_dim, "output.ppm") ; + + } + + + /* Delete */ + del_matrix_int(Image_in1, I_dim) ; + del_matrix_int(Image_out1, I_dim) ; + if (Prof == 3) + { + del_matrix_int(Image_in2, I_dim) ; + del_matrix_int(Image_out2, I_dim) ; + del_matrix_int(Image_in3, I_dim) ; + del_matrix_int(Image_out3, I_dim) ; + } + + + return 1 ; +} diff --git a/main.cu b/main.cu new file mode 100644 index 0000000..d9db4f8 --- /dev/null +++ b/main.cu @@ -0,0 +1,198 @@ +// libs C +#include +#include +#include +#include + +#include "lib_lniv.h" + +// libs NV +#include +#include + +// lib spec +#include "defines.h" +#include "levelines_common.h" + +#include "levelines_kernels.cu" + + +__global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){ + + unsigned int i = blockIdx.x*blockDim.x + threadIdx.x; + unsigned int j = blockIdx.y*blockDim.y + threadIdx.y; + unsigned int pos = i*L +j ; + + ptr2[pos] = val - ptr1[pos] ; + +} + +int main(int argc, char **argv){ + + + //float coef_regul = atof( argv[1] ) ; + + unsigned int timer ; + cutilCheckError( cutCreateTimer(&timer) ); + cutilCheckError( cutResetTimer(timer) ); + /***************************** + * CHARGEMENT IMAGE + *****************************/ + char* image_path = argv[argc-1]; + char* image_out = "./image_out.pgm" ; + unsigned int * h_data = NULL ; + unsigned int * h_data_out = NULL ; + unsigned int H, L, size; + + cutilCheckError( cutStartTimer(timer) ); + cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H)); + cutilCheckError( cutStopTimer(timer) ); + + size = H * L * sizeof( unsigned int ); + printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer)); + + + //essai alloc mapped + /* + cutilCheckError( cutResetTimer(timer) ); + cutilCheckError( cutStartTimer(timer) ); + unsigned int * h_ptr1, * d_ptr1 ; + unsigned int * h_ptr2, * d_ptr2 ; + int h = ; + int l = h ; + int mem = h*l*sizeof(unsigned int) ; + cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost)); + cutilCheckError( cutStopTimer(timer) ); + printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ; + + cutilCheckError( cutStartTimer(timer) ); + cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped)); + cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped)); + cutilCheckError( cutStopTimer(timer) ); + printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ; + + for (int i = 0; i>>(d_ptr1, d_ptr2, l, 255) ; + + cutilCheckError( cutStopTimer(timer) ); + printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ; + + char * image_1 = "./image_1.pgm" ; + char * image_2 = "./image_2.pgm" ; + + cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ; + cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ; + */ + /***************************** + * FIN CHARGEMENT IMAGE + *****************************/ + + + + // use device with highest Gflops/s + cudaSetDevice( cutGetMaxGflopsDeviceId() ); + + + /* + cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) ); + cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) ); + cutilSafeCall( cudaThreadSynchronize() ); + cutilCheckError( cutResetTimer(hTimer) ); + cutilCheckError( cutStartTimer(hTimer) ); + + cutilSafeCall( cudaThreadSynchronize() ); + cutilCheckError( cutStopTimer(hTimer) ); + gpuTime = cutGetTimerValue(hTimer) / (float)iterations; + */ + + cutilCheckError( cutResetTimer(timer) ); + cutilCheckError( cutStartTimer(timer) ); + // allocation mem GPU + unsigned int * d_directions =NULL ; + unsigned int * d_lniv, * d_estim = NULL ; + + cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ; + cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) ); + cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) ); + + + // allocate array and copy image data + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned); + cudaArray * array_img_in, *array_img_estim, *array_img_lniv; + cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H )); + cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ; + cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc)); + cutilCheckError( cutStopTimer(timer) ); + + cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H )); + cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc)); + + cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H )); + cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc)); + + printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ; + /***************************** + * APPELS KERNELS et chronos + *****************************/ + cutilCheckError( cutResetTimer(timer) ); + cutilCheckError( cutStartTimer(timer) ); + + unsigned int iter , nb_iter = 15 ; + unsigned int poids = 15 ; + dim3 dimBlock(8,8,1) ; + dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ; + unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ; + // init image estimee avec image_in + kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7); + + printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ; + + for ( iter =0 ; iter < nb_iter ; iter++ ) + { + cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ; + kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H ); + cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ; + kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ; + } + + cudaThreadSynchronize(); + + cutilCheckError( cutStopTimer(timer) ); + printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ; + printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ; + + /************************** + * VERIFS + **************************/ + //trace des lniv sur grille de 'pas x pas' + //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ; + //cudaThreadSynchronize(); + + // enregistrement image lniv GPU + h_data_out = new unsigned int[H*L] ; + if ( h_data_out != NULL) + cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) ); + else + printf("Echec allocation mem CPU\n"); + + cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ; + + // calcul lniv CPU + + + // TODO verifier pourquoi les deux lignes suivantes produisent une erreur + //cutilExit(argc, argv); + //cudaThreadExit(); + return EXIT_SUCCESS ; +} diff --git a/main_gmem.cu b/main_gmem.cu new file mode 100644 index 0000000..a33fbff --- /dev/null +++ b/main_gmem.cu @@ -0,0 +1,143 @@ +// libs C +#include +#include +#include +#include + +#include "lib_lniv.h" + +// libs NV +#include +#include + +// lib spec +#include "defines.h" +#include "levelines_common.h" + +#include "levelines_kernels.cu" + + +int main(int argc, char **argv){ + + + //float coef_regul = atof( argv[1] ) ; + + unsigned int timer ; + float time_cumul = 0.0 ; + cutilCheckError( cutCreateTimer(&timer) ); + cutilCheckError( cutResetTimer(timer) ); + + /***************************** + * CHARGEMENT IMAGE + *****************************/ + char* image_path = argv[argc-1]; + char* image_out = "./image_out.pgm" ; + unsigned int * h_data = NULL ; + unsigned int * h_data_out = NULL ; + unsigned int H, L, size; + + cutilCheckError( cutStartTimer(timer) ); + cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H)); + cutilCheckError( cutStopTimer(timer) ); + + size = H * L * sizeof( unsigned int ); + printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer)); + time_cumul += cutGetTimerValue(timer) ; + /***************************** + * FIN CHARGEMENT IMAGE + *****************************/ + + + + // use device with highest Gflops/s + cudaSetDevice( cutGetMaxGflopsDeviceId() ); + + /* + cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) ); + cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) ); + cutilSafeCall( cudaThreadSynchronize() ); + cutilCheckError( cutResetTimer(hTimer) ); + cutilCheckError( cutStartTimer(hTimer) ); + + cutilSafeCall( cudaThreadSynchronize() ); + cutilCheckError( cutStopTimer(hTimer) ); + gpuTime = cutGetTimerValue(hTimer) / (float)iterations; + */ + + cutilCheckError( cutResetTimer(timer) ); + cutilCheckError( cutStartTimer(timer) ); + // allocation mem GPU + unsigned int * d_directions =NULL ; + unsigned int * d_lniv, * d_estim, * d_data ; + cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ; + cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) ); + cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) ); + cutilSafeCall( cudaMalloc( (void**) &d_data, size ) ); + cutilCheckError( cutStopTimer(timer) ); + printf("Temps alloc global mem : %f ms\n", cutGetTimerValue(timer)) ; + time_cumul += cutGetTimerValue(timer) ; + + // transfert data -> GPU global mem + cutilCheckError( cutStartTimer(timer) ); + cutilSafeCall( cudaMemcpy( d_data , h_data, size, cudaMemcpyHostToDevice) ); + cutilCheckError( cutStopTimer(timer) ); + printf("Temps transferts en global mem : %f ms\n", cutGetTimerValue(timer)) ; + time_cumul += cutGetTimerValue(timer) ; + /***************************** + * APPELS KERNELS et chronos + *****************************/ + cutilCheckError( cutResetTimer(timer) ); + cutilCheckError( cutStartTimer(timer) ); + + unsigned int iter , nb_iter = 15 ; + unsigned int poids = 15 ; + dim3 dimBlock(8,8,1) ; + dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ; + unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ; + // init image estimee avec image_in + kernel_init_estim_from_img_in_global_mem<<< dimGrid, dimBlock, 0 >>>(d_data, d_estim, L, H, 7); + cutilCheckError( cutStopTimer(timer) ); + printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)) ; + time_cumul += cutGetTimerValue(timer) ; + + + // iterations + cutilCheckError( cutStartTimer(timer) ); + printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ; + for ( iter =0 ; iter < nb_iter ; iter++ ) + { + kernel_levelines_global_mem<<< dimGrid, dimBlock, 0 >>>( d_estim, d_lniv, L, H ); + kernel_estim_next_step_global_mem<<< dimGrid, dimBlock, 0 >>>(d_estim, d_lniv, d_data, L, H, poids) ; + } + cutilCheckError( cutStopTimer(timer) ); + printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)) ; + time_cumul += cutGetTimerValue(timer) ; + + printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ; + printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ; + printf("Total execution : %f ms\n", time_cumul) ; + + /************************** + * VERIFS + **************************/ + //trace des lniv sur grille de 'pas x pas' + //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ; + //cudaThreadSynchronize(); + + // enregistrement image lniv GPU + h_data_out = new unsigned int[H*L] ; + if ( h_data_out != NULL) + cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) ); + else + printf("Echec allocation mem CPU\n"); + + cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ; + + // calcul lniv CPU + + + // TODO verifier pourquoi les deux lignes suivantes produisent une erreur + //cutilExit(argc, argv); + //cudaThreadExit(); + return EXIT_SUCCESS ; +} diff --git a/obj/release/levelines_kernels.cu.o b/obj/release/levelines_kernels.cu.o new file mode 100644 index 0000000..69cdd7a Binary files /dev/null and b/obj/release/levelines_kernels.cu.o differ diff --git a/obj/release/main.cpp.o b/obj/release/main.cpp.o new file mode 100644 index 0000000..da796c7 Binary files /dev/null and b/obj/release/main.cpp.o differ diff --git a/obj/release/main.cu.o b/obj/release/main.cu.o new file mode 100644 index 0000000..20ef00a Binary files /dev/null and b/obj/release/main.cu.o differ