10 #include <cuda_runtime.h>
11 #include <cutil_inline.h>
15 #include "levelines_common.h"
16 #include "levelines_kernels.cu"
19 int main(int argc, char **argv){
21 // use device with highest Gflops/s
25 cutilCheckError( cutCreateTimer(&timer) );
26 cutilCheckError( cutResetTimer(timer) );
27 cutilCheckError( cutStartTimer(timer) );
30 //alloc bidon pour anticiper l'initialisation du GPU
32 cutilSafeCall( cudaMalloc( (void**) &d_bidon, sizeof(short))) ;
34 //float coef_regul = atof( argv[1] ) ;
36 /*****************************
38 *****************************/
39 char* image_path = argv[argc-1];
40 unsigned int r = atoi(argv[1]) ;
41 bool seq_out = atoi(argv[2]) ;
42 unsigned int iter , nb_iter = atoi(argv[3]) ;
43 unsigned int poids = 15 ;
44 char * image_out_base = "./image_out" ;
45 char * pgm_ext = ".pgm" ;
47 unsigned int * h_data = NULL ;
48 unsigned int * h_data_out = NULL ;
49 unsigned int H, L, size, psize ;
52 cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
53 cutilCheckError( cutStopTimer(timer) );
55 size = H * L * sizeof( unsigned int );
56 psize = (r-1)*PSIZE_I*sizeof(ushort) ;
58 printf("Longueur des chemins = %d pixels\n", r);
59 printf("Init GPU + Image %s %d x %d = %d pixels en %f ms,\n", image_path, L, H, size, cutGetTimerValue(timer));
61 /*****************************
62 * FIN CHARGEMENT IMAGE
63 *****************************/
65 cutilCheckError( cutResetTimer(timer) );
66 cutilCheckError( cutStartTimer(timer) );
68 unsigned int * d_directions =NULL ;
69 unsigned int * d_lniv, * d_estim = NULL ;
72 cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
73 cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
74 cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
75 cutilSafeCall( cudaMalloc( (void**) &d_paths, psize ) );
78 // allocate array and copy image data
79 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
80 //cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc(8, 8, 0, 0, cudaChannelFormatKindSigned);
81 cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc<ushort>();
83 cudaArray * array_img_in, *array_img_estim, *array_img_lniv, *array_paths;
84 cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
85 cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
86 cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
88 cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H ));
89 cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
91 cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H ));
92 cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
94 cutilSafeCall( cudaMallocArray( &array_paths, &channelDescP, (r-1), PSIZE_I ));
95 cutilSafeCall( cudaBindTextureToArray( tex_paths, array_paths, channelDescP));
97 cutilCheckError( cutStopTimer(timer) );
98 printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
100 /*****************************
101 * GENERATION des CHEMINS
102 *****************************/
103 cutilCheckError( cutResetTimer(timer) );
104 cutilCheckError( cutStartTimer(timer) );
106 dim3 dimBlock(1,1,1) ;
107 dim3 dimGrid(1,1,1) ;
108 // calcul des chemins
109 kernel_calcul_paths<<< dimGrid, dimBlock, 0 >>>(d_paths, r);
111 // copie du tableau en texture
112 cutilSafeCall( cudaMemcpyToArray( array_paths, 0, 0, d_paths, psize, cudaMemcpyDeviceToDevice)) ;
114 cutilCheckError( cutStopTimer(timer) );
115 printf("Temps generation chemin + transfert en texture : %f ms\n", cutGetTimerValue(timer)) ;
117 /*****************************
118 * APPELS KERNELS et chronos
119 *****************************/
120 dimBlock = dim3(8,8,1) ;
121 dimGrid = dim3( H / dimBlock.x,L / dimBlock.y, 1 ) ;
123 // pour enregistrement image lniv GPU
125 h_data_out = new unsigned int[H*L] ;
127 //init image estimee avec image_in
128 cutilCheckError( cutResetTimer(timer) );
129 cutilCheckError( cutStartTimer(timer) );
130 kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
134 kernel_neutre_img2estim<<< dimGrid, dimBlock, 0>>>(d_estim, L, H);
136 cudaThreadSynchronize() ;
137 cutilCheckError( cutStopTimer(timer) );
138 printf("Temps kernel init : %f ms\n", cutGetTimerValue(timer)) ;
141 cutilCheckError( cutResetTimer(timer) );
142 cutilCheckError( cutStartTimer(timer) );
143 cutilSafeCall( cudaMemcpyFromArray( d_estim, array_img_in, 0, 0, size, cudaMemcpyDeviceToDevice)) ;
144 cutilCheckError( cutStopTimer(timer) );
145 printf("Temps memcpyFromArray : %f ms\n", cutGetTimerValue(timer)) ;
147 printf("Grille : %d x %d de Blocs : %d x %d \n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y) ;
149 cutilCheckError( cutResetTimer(timer) );
150 cutilCheckError( cutStartTimer(timer) );
152 for ( iter =0 ; iter < nb_iter ; iter++ )
154 cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
155 //version avec/sans tentative d'utilisation de smem ( pas probante )
156 kernel_levelines_texture<<< dimGrid, dimBlock, 24*(r-1)*sizeof(short) >>>( d_lniv, L, H, r );
157 cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
159 sprintf(image_out, "%s%d%s", image_out_base, iter+1, pgm_ext) ;
160 printf("chaine : %s\n", image_out);
161 cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
162 cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
164 kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
167 cudaThreadSynchronize();
169 cutilCheckError( cutStopTimer(timer) );
170 printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
171 printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
173 /**************************
175 **************************/
177 /**************************************************
178 * recuperation matrice des chemins pour affichage
179 **************************************************/
181 int2 * h_paths = new int2[(r-1)*PSIZE_I] ;
182 cutilSafeCall( cudaMemcpyFromArray(h_paths , array_paths, 0, 0, psize, cudaMemcpyDeviceToHost) );
185 printf("matrice Di");
186 for(int idpath=0; idpath< PSIZE_I; idpath++){b
189 for(int idpix=0; idpix< r-1; idpix++){
190 printf(" % d ", h_paths[idpath*(r-1) + idpix].x );
192 printf("\t// %d°", idpath*15) ;
195 printf("\nmatrice Dj");
196 for(int idpath=0; idpath< PSIZE_I; idpath++){
198 for(int idpix=0; idpix< r-1; idpix++){
199 printf(" % d ", h_paths[idpath*(r-1) + idpix].y);
201 printf("\t// %d°", idpath*15) ;
205 /***************************************************
206 * fin verif visuelle matrices des chemins
207 ***************************************************/
209 /***************************************************
210 * execution sequentielle pour comparaison
211 * la comparaison n'est pertinente que
212 * si d_lniv contient les lniv de l'image se départ
213 **************************************************/
216 cutilCheckError( cutResetTimer(timer) );
217 cutilCheckError( cutStartTimer(timer) );
219 h_data_out = new unsigned int[H*L] ;
220 int * dout = new int[H*L] ;
221 for ( iter =0 ; iter < nb_iter ; iter++ ){
222 for (int i=r; i<= H-r; i++){
223 for (int j=r; j<= L-r; j++){
224 h_data_out[i*L+j] = lniv4_value(h_data, h_paths, i, j, H, L, &dout[i*L+j], r) ;
228 cutilCheckError( cutStopTimer(timer) );
229 printf("Execution sequentielle CPU : %f ms\n", cutGetTimerValue(timer)) ;
232 unsigned int * h_lniv = new unsigned int[H*L] ;
233 int pos, cpt_err=0, cpt_pix=0 ;
234 cutilSafeCall( cudaMemcpy(h_lniv , d_lniv, size, cudaMemcpyDeviceToHost) );
235 for ( iter =0 ; iter < nb_iter ; iter++ ){
236 for (int i=r; i<= H-r; i++){
237 for (int j=r; j<= L-r; j++){
239 if ( h_data_out[ pos ] != h_lniv[ pos ] ) {
241 printf(" pixel ( %d , %d ) -> GPU= %d CPU= %d \n", i, j, h_lniv[pos], h_data_out[pos]);
247 printf("TAUX ERREUR GPU/CPU : %d / %d \n", cpt_err, cpt_pix );
249 //trace des lniv sur grille de 'pas x pas'
250 //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255, r) ;
253 if ( h_data_out != NULL)
254 cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
256 printf("Echec allocation mem CPU\n");
257 sprintf(image_out, "%s%d%s", image_out_base, iter+1, pgm_ext) ;
258 cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
261 // TODO verifier pourquoi les deux lignes suivantes produisent une erreur
262 //cutilExit(argc, argv);
264 return EXIT_SUCCESS ;