#include "levelines_kernels.cu"
+const float tang[] = {0, 0.268, 0.577, 1} ;
-__global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
-
- unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
- unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
- unsigned int pos = i*L +j ;
- ptr2[pos] = val - ptr1[pos] ;
+void genPaths(unsigned int *h_paths, int *p_i, int *p_j, unsigned int r ){
+ unsigned int idpath = 0 ;
+ int ic, jc ;
+ float offset = 0.5 ;
+
+ // Q1 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 0,15,30 et 45
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = p ;
+ ic = r-1 - floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+ // Q1 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = r-1 - p ;
+ jc = floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+
+ // Q2 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 90,105,130 et 145
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = r-1 - p ;
+ jc = r-1 - floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+ // Q2 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = r-1 - p ;
+ ic = r-1 - floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+
+
+ // Q3 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 90,105,130 et 145
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = r-1 - p ;
+ ic = floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+ // Q3 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = p ;
+ jc = r-1 - floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+
+
+ // Q4 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 90,105,130 et 145
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = p ;
+ jc = floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+ // Q4 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = p ;
+ ic = floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
}
-int main(int argc, char **argv){
+int main(int argc, char **argv){
- //float coef_regul = atof( argv[1] ) ;
-
+ // use device with highest Gflops/s
+ cudaSetDevice( cutGetMaxGflopsDeviceId() );
unsigned int timer ;
cutilCheckError( cutCreateTimer(&timer) );
cutilCheckError( cutResetTimer(timer) );
- /*****************************
- * CHARGEMENT IMAGE
- *****************************/
- char* image_path = argv[argc-1];
- char* image_out = "./image_out.pgm" ;
- unsigned int * h_data = NULL ;
- unsigned int * h_data_out = NULL ;
- unsigned int H, L, size;
-
cutilCheckError( cutStartTimer(timer) );
- cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
- cutilCheckError( cutStopTimer(timer) );
+ // une alloc debile pour initialiser la carte GPU
+ int * d_void ;
+ cutilSafeCall( cudaMalloc( (void**) &d_void, 4)) ;
- size = H * L * sizeof( unsigned int );
- printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer));
-
+
+
+ /*********************************
+ * DEFINITION PARAMS CHEMINS
+ *********************************/
+ char* image_out = "./image_out.pgm" ;
+ int *p_i, *p_j ;
+ int2 * d_paths ;
+ unsigned int * h_paths ;
+ unsigned int R = atoi(argv[1]);
+
+
+ //unsigned int size = R * R * sizeof( unsigned int );
- //essai alloc mapped
- /*
- cutilCheckError( cutResetTimer(timer) );
- cutilCheckError( cutStartTimer(timer) );
- unsigned int * h_ptr1, * d_ptr1 ;
- unsigned int * h_ptr2, * d_ptr2 ;
- int h = ;
- int l = h ;
- int mem = h*l*sizeof(unsigned int) ;
- cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
- cutilCheckError( cutStartTimer(timer) );
- cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
- cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+ // allocation mem
+ int memsize = 24*(R-1)*sizeof(int2) ;
+ cutilSafeCall( cudaMalloc( (void**) &d_paths, memsize ) );
- for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
-
- cutilCheckError( cutStartTimer(timer) );
- cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
- cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps cumul get pointer Mapped : %f ms\n", cutGetTimerValue(timer)) ;
- cutilCheckError( cutStartTimer(timer) );
- dim3 blocks(16,16,1) ;
- dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
+ h_paths = new unsigned int [24*R*R] ;
+ p_i = new int [24*(R-1)] ;
+ p_j = new int [24*(R-1)] ;
- kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
+ for (int j=0; j<24*R*R ; j++) h_paths[j] = 0 ;
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+ genPaths(h_paths, p_i, p_j, R) ;
+
+ printf("Rayon : %d pixels \n", R) ;
+
+ //matrice p_i
+ printf("P_I\n");
+ for (int idpath=0; idpath < 24; idpath++){
+ printf("\n");
+ for (int idpix=0; idpix < R-1; idpix++){
+ printf(" %d ", p_i[idpath*(R-1)+idpix]);
+ }
+ }
+ //matrice p_j
+ printf("\nP_J\n");
+ for (int idpath=0; idpath < 24; idpath++){
+ printf("\n");
+ for (int idpix=0; idpix < R-1; idpix++){
+ printf(" %d ", p_j[idpath*(R-1)+idpix]);
+ }
+ }
- char * image_1 = "./image_1.pgm" ;
- char * image_2 = "./image_2.pgm" ;
- cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
- cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
- */
/*****************************
- * FIN CHARGEMENT IMAGE
+ * APPELS KERNELS et chronos
*****************************/
+ dim3 dimBlock( 1, 1, 1 ) ;
+ dim3 dimGrid( 1, 1, 1 ) ;
+
+ kernel_calcul_paths<<< dimGrid, dimBlock, 0 >>>(d_paths, R) ;
-
- // use device with highest Gflops/s
- cudaSetDevice( cutGetMaxGflopsDeviceId() );
-
+ printf("\nGrille : %d x %d de Blocs : %d x %d \n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y) ;
- /*
- cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
- cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
- cutilSafeCall( cudaThreadSynchronize() );
- cutilCheckError( cutResetTimer(hTimer) );
- cutilCheckError( cutStartTimer(hTimer) );
-
- cutilSafeCall( cudaThreadSynchronize() );
- cutilCheckError( cutStopTimer(hTimer) );
- gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
- */
-
- cutilCheckError( cutResetTimer(timer) );
- cutilCheckError( cutStartTimer(timer) );
- // allocation mem GPU
- unsigned int * d_directions =NULL ;
- unsigned int * d_lniv, * d_estim = NULL ;
-
- cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
- cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
- cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+
+ /**************************
+ * VERIFS
+ **************************/
+ int2 * paths = new int2[24*(R-1)] ;
+ cutilSafeCall( cudaMemcpy(paths , d_paths, memsize, cudaMemcpyDeviceToHost) );
- // allocate array and copy image data
- cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
- cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
- cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
- cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
- cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
- cutilCheckError( cutStopTimer(timer) );
-
- cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H ));
- cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
-
- cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H ));
- cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
+ //matrice p_i
+ printf("P_I\n");
+ for (int idpath=0; idpath < 24; idpath++){
+ printf("\n");
+ for (int idpix=0; idpix < R-1; idpix++){
+ printf(" %d ", paths[idpath*(R-1)+idpix].x);
+ }
+ printf("\t // %d", 15*idpath);
+ }
+ //matrice p_j
+ printf("\nP_J\n");
+ for (int idpath=0; idpath < 24; idpath++){
+ printf("\n");
+ for (int idpix=0; idpix < R-1; idpix++){
+ printf(" %d ", paths[idpath*(R-1)+idpix].y);
+ }
+ printf("\t // %d", 15*idpath);
+ }
- printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
- /*****************************
- * APPELS KERNELS et chronos
- *****************************/
- cutilCheckError( cutResetTimer(timer) );
- cutilCheckError( cutStartTimer(timer) );
+ printf("\n");
- unsigned int iter , nb_iter = 15 ;
- unsigned int poids = 15 ;
- dim3 dimBlock(8,8,1) ;
- dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
- unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
- // init image estimee avec image_in
- kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
-
- printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
-
- for ( iter =0 ; iter < nb_iter ; iter++ )
- {
- cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
- kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
- cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
- kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
- }
-
- cudaThreadSynchronize();
-
- cutilCheckError( cutStopTimer(timer) );
- printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
- printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
-
- /**************************
- * VERIFS
- **************************/
- //trace des lniv sur grille de 'pas x pas'
- //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
- //cudaThreadSynchronize();
-
- // enregistrement image lniv GPU
- h_data_out = new unsigned int[H*L] ;
- if ( h_data_out != NULL)
- cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
- else
- printf("Echec allocation mem CPU\n");
-
- cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
-
- // calcul lniv CPU
-
-
+ // enregistrement image des PATHS
+ //cutilSafeCall( cudaMemcpy(h_paths , d_paths, size, cudaMemcpyDeviceToHost) );
+
+ cutilCheckError( cutSavePGMi(image_out, h_paths, R, 24*R) ) ;
+
// TODO verifier pourquoi les deux lignes suivantes produisent une erreur
//cutilExit(argc, argv);
//cudaThreadExit();