10 #include <cuda_runtime.h>
11 #include <cutil_inline.h>
15 #include "levelines_common.h"
17 #include "levelines_kernels.cu"
20 int main(int argc, char **argv){
23 //float coef_regul = atof( argv[1] ) ;
26 float time_cumul = 0.0 ;
27 cutilCheckError( cutCreateTimer(&timer) );
28 cutilCheckError( cutResetTimer(timer) );
30 /*****************************
32 *****************************/
33 char* image_path = argv[argc-1];
34 char* image_out = "./image_out.pgm" ;
35 unsigned int * h_data = NULL ;
36 unsigned int * h_data_out = NULL ;
37 unsigned int H, L, size;
39 cutilCheckError( cutStartTimer(timer) );
40 cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
41 cutilCheckError( cutStopTimer(timer) );
43 size = H * L * sizeof( unsigned int );
44 printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer));
45 time_cumul += cutGetTimerValue(timer) ;
46 /*****************************
47 * FIN CHARGEMENT IMAGE
48 *****************************/
52 // use device with highest Gflops/s
53 cudaSetDevice( cutGetMaxGflopsDeviceId() );
56 cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
57 cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
58 cutilSafeCall( cudaThreadSynchronize() );
59 cutilCheckError( cutResetTimer(hTimer) );
60 cutilCheckError( cutStartTimer(hTimer) );
62 cutilSafeCall( cudaThreadSynchronize() );
63 cutilCheckError( cutStopTimer(hTimer) );
64 gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
67 cutilCheckError( cutResetTimer(timer) );
68 cutilCheckError( cutStartTimer(timer) );
70 unsigned int * d_directions =NULL ;
71 unsigned int * d_lniv, * d_estim, * d_data ;
72 cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
73 cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
74 cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
75 cutilSafeCall( cudaMalloc( (void**) &d_data, size ) );
76 cutilCheckError( cutStopTimer(timer) );
77 printf("Temps alloc global mem : %f ms\n", cutGetTimerValue(timer)) ;
78 time_cumul += cutGetTimerValue(timer) ;
80 // transfert data -> GPU global mem
81 cutilCheckError( cutStartTimer(timer) );
82 cutilSafeCall( cudaMemcpy( d_data , h_data, size, cudaMemcpyHostToDevice) );
83 cutilCheckError( cutStopTimer(timer) );
84 printf("Temps transferts en global mem : %f ms\n", cutGetTimerValue(timer)) ;
85 time_cumul += cutGetTimerValue(timer) ;
86 /*****************************
87 * APPELS KERNELS et chronos
88 *****************************/
89 cutilCheckError( cutResetTimer(timer) );
90 cutilCheckError( cutStartTimer(timer) );
92 unsigned int iter , nb_iter = 15 ;
93 unsigned int poids = 15 ;
94 dim3 dimBlock(8,8,1) ;
95 dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
96 unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
97 // init image estimee avec image_in
98 kernel_init_estim_from_img_in_global_mem<<< dimGrid, dimBlock, 0 >>>(d_data, d_estim, L, H, 7);
99 cutilCheckError( cutStopTimer(timer) );
100 printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)) ;
101 time_cumul += cutGetTimerValue(timer) ;
105 cutilCheckError( cutStartTimer(timer) );
106 printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
107 for ( iter =0 ; iter < nb_iter ; iter++ )
109 kernel_levelines_global_mem<<< dimGrid, dimBlock, 0 >>>( d_estim, d_lniv, L, H );
110 kernel_estim_next_step_global_mem<<< dimGrid, dimBlock, 0 >>>(d_estim, d_lniv, d_data, L, H, poids) ;
112 cutilCheckError( cutStopTimer(timer) );
113 printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)) ;
114 time_cumul += cutGetTimerValue(timer) ;
116 printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
117 printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
118 printf("Total execution : %f ms\n", time_cumul) ;
120 /**************************
122 **************************/
123 //trace des lniv sur grille de 'pas x pas'
124 //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
125 //cudaThreadSynchronize();
127 // enregistrement image lniv GPU
128 h_data_out = new unsigned int[H*L] ;
129 if ( h_data_out != NULL)
130 cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
132 printf("Echec allocation mem CPU\n");
134 cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
139 // TODO verifier pourquoi les deux lignes suivantes produisent une erreur
140 //cutilExit(argc, argv);
142 return EXIT_SUCCESS ;