10 #include <cuda_runtime.h>
11 #include <cutil_inline.h>
15 #include "levelines_common.h"
17 #include "levelines_kernels.cu"
20 __global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
22 unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
23 unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
24 unsigned int pos = i*L +j ;
26 ptr2[pos] = val - ptr1[pos] ;
30 int main(int argc, char **argv){
33 //float coef_regul = atof( argv[1] ) ;
36 cutilCheckError( cutCreateTimer(&timer) );
37 cutilCheckError( cutResetTimer(timer) );
38 /*****************************
40 *****************************/
41 char* image_path = argv[argc-1];
42 char* image_out = "./image_out.pgm" ;
43 unsigned int * h_data = NULL ;
44 unsigned int * h_data_out = NULL ;
45 unsigned int H, L, size;
47 cutilCheckError( cutStartTimer(timer) );
48 cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
49 cutilCheckError( cutStopTimer(timer) );
51 size = H * L * sizeof( unsigned int );
52 printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer));
57 cutilCheckError( cutResetTimer(timer) );
58 cutilCheckError( cutStartTimer(timer) );
59 unsigned int * h_ptr1, * d_ptr1 ;
60 unsigned int * h_ptr2, * d_ptr2 ;
63 int mem = h*l*sizeof(unsigned int) ;
64 cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
65 cutilCheckError( cutStopTimer(timer) );
66 printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
68 cutilCheckError( cutStartTimer(timer) );
69 cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
70 cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
71 cutilCheckError( cutStopTimer(timer) );
72 printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
74 for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
76 cutilCheckError( cutStartTimer(timer) );
77 cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
78 cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
79 cutilCheckError( cutStopTimer(timer) );
80 printf("Temps cumul get pointer Mapped : %f ms\n", cutGetTimerValue(timer)) ;
82 cutilCheckError( cutStartTimer(timer) );
83 dim3 blocks(16,16,1) ;
84 dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
86 kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
88 cutilCheckError( cutStopTimer(timer) );
89 printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
91 char * image_1 = "./image_1.pgm" ;
92 char * image_2 = "./image_2.pgm" ;
94 cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
95 cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
97 /*****************************
98 * FIN CHARGEMENT IMAGE
99 *****************************/
103 // use device with highest Gflops/s
104 cudaSetDevice( cutGetMaxGflopsDeviceId() );
108 cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
109 cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
110 cutilSafeCall( cudaThreadSynchronize() );
111 cutilCheckError( cutResetTimer(hTimer) );
112 cutilCheckError( cutStartTimer(hTimer) );
114 cutilSafeCall( cudaThreadSynchronize() );
115 cutilCheckError( cutStopTimer(hTimer) );
116 gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
119 cutilCheckError( cutResetTimer(timer) );
120 cutilCheckError( cutStartTimer(timer) );
121 // allocation mem GPU
122 unsigned int * d_directions =NULL ;
123 unsigned int * d_lniv, * d_estim = NULL ;
125 cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
126 cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
127 cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
130 // allocate array and copy image data
131 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
132 cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
133 cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
134 cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
135 cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
136 cutilCheckError( cutStopTimer(timer) );
138 cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H ));
139 cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
141 cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H ));
142 cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
144 printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
145 /*****************************
146 * APPELS KERNELS et chronos
147 *****************************/
148 cutilCheckError( cutResetTimer(timer) );
149 cutilCheckError( cutStartTimer(timer) );
151 unsigned int iter , nb_iter = 15 ;
152 unsigned int poids = 15 ;
153 dim3 dimBlock(8,8,1) ;
154 dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
155 unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
156 // init image estimee avec image_in
157 kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
159 printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
161 for ( iter =0 ; iter < nb_iter ; iter++ )
163 cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
164 kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
165 cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
166 kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
169 cudaThreadSynchronize();
171 cutilCheckError( cutStopTimer(timer) );
172 printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
173 printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
175 /**************************
177 **************************/
178 //trace des lniv sur grille de 'pas x pas'
179 //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
180 //cudaThreadSynchronize();
182 // enregistrement image lniv GPU
183 h_data_out = new unsigned int[H*L] ;
184 if ( h_data_out != NULL)
185 cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
187 printf("Echec allocation mem CPU\n");
189 cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
194 // TODO verifier pourquoi les deux lignes suivantes produisent une erreur
195 //cutilExit(argc, argv);
197 return EXIT_SUCCESS ;