generation de chemins de taille parametrable

[lniv_gpu.git] / main.cu
diff --git a/main.cu b/main.cu

index d9db4f87d169ddb4929cba23f8ebf36939db3f0a..b2c1681d77385ccabba954f69024999fe8f48cd4 100644 (file)
--- a/main.cu
+++ b/main.cu
@@ -16,181 +16,232 @@
  
  #include "levelines_kernels.cu"
  
+const float tang[] = {0, 0.268, 0.577, 1} ;
  
-__global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
-  
-  unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
-  unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
-  unsigned int pos = i*L +j ;
  
-  ptr2[pos] = val - ptr1[pos] ;
+void genPaths(unsigned int *h_paths, int *p_i, int *p_j, unsigned int r ){
+  unsigned int idpath = 0 ;
+  int ic, jc ;
+  float offset = 0.5 ;
+    
+  // Q1 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 0,15,30 et 45
+       for (int p=0 ; p< r ; p++){      // les r points
+         jc = p ;
+         ic = r-1 - floor(tang[a]*p + offset) ;
+         h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+         if ( p > 0 ){
+               p_i[idpath*(r-1)+p-1] = ic ;
+               p_j[idpath*(r-1)+p-1] = jc ;
+         }
+       }
+       idpath++ ;
+  }
+  // Q1 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+       for (int p=0 ; p< r ; p++){      // les r points
+         ic = r-1 - p ;
+         jc = floor(tang[a]*p + offset) ; 
+         h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+         if ( p > 0 ){
+               p_i[idpath*(r-1)+p-1] = ic ;
+               p_j[idpath*(r-1)+p-1] = jc ;
+         }
+       }
+       idpath++ ;
+  }
+
+  // Q2 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
+       for (int p=0 ; p< r ; p++){      // les r points
+         ic = r-1 - p ;
+         jc = r-1 - floor(tang[a]*p + offset) ;
+         h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+         if ( p > 0 ){
+               p_i[idpath*(r-1)+p-1] = ic ;
+               p_j[idpath*(r-1)+p-1] = jc ;
+         }
+       }
+       idpath++ ;
+  }
+  // Q2 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+       for (int p=0 ; p< r ; p++){      // les r points
+         jc = r-1 - p ;
+         ic = r-1 - floor(tang[a]*p + offset) ; 
+         h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+         if ( p > 0 ){
+               p_i[idpath*(r-1)+p-1] = ic ;
+               p_j[idpath*(r-1)+p-1] = jc ;
+         }
+       }
+       idpath++ ;
+  }
+
+
+  // Q3 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
+       for (int p=0 ; p< r ; p++){      // les r points
+         jc = r-1 - p ;
+         ic = floor(tang[a]*p + offset) ;
+         h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+         if ( p > 0 ){
+               p_i[idpath*(r-1)+p-1] = ic ;
+               p_j[idpath*(r-1)+p-1] = jc ;
+         }
+       }
+       idpath++ ;
+  }
+  // Q3 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+       for (int p=0 ; p< r ; p++){      // les r points
+         ic = p ;
+         jc = r-1 - floor(tang[a]*p + offset) ; 
+         h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+         if ( p > 0 ){
+               p_i[idpath*(r-1)+p-1] = ic ;
+               p_j[idpath*(r-1)+p-1] = jc ;
+         }
+       }
+       idpath++ ;
+  }
+
+
+  // Q4 inf
+  for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
+       for (int p=0 ; p< r ; p++){      // les r points
+         ic = p ;
+         jc = floor(tang[a]*p + offset) ;
+         h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+         if ( p > 0 ){
+               p_i[idpath*(r-1)+p-1] = ic ;
+               p_j[idpath*(r-1)+p-1] = jc ;
+         }
+       }
+       idpath++ ;
+  }
+  // Q4 sup
+  for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75 
+       for (int p=0 ; p< r ; p++){      // les r points
+         jc = p ;
+         ic = floor(tang[a]*p + offset) ; 
+         h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+         if ( p > 0 ){
+               p_i[idpath*(r-1)+p-1] = ic ;
+               p_j[idpath*(r-1)+p-1] = jc ;
+         }
+       }
+       idpath++ ;
+  }
    
  }
  
-int main(int argc, char **argv){
  
+int main(int argc, char **argv){
  
-  //float coef_regul = atof( argv[1] ) ;
-
+  // use device with highest Gflops/s
+  cudaSetDevice( cutGetMaxGflopsDeviceId() );
    unsigned int timer ;
    cutilCheckError( cutCreateTimer(&timer) );
    cutilCheckError( cutResetTimer(timer) );
-  /*****************************
-   *    CHARGEMENT IMAGE
-   *****************************/
-  char* image_path = argv[argc-1];
-  char* image_out = "./image_out.pgm" ;
-  unsigned int * h_data = NULL ;
-  unsigned int * h_data_out = NULL ;
-  unsigned int H, L, size;
-
    cutilCheckError( cutStartTimer(timer) );
-  cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
-  cutilCheckError( cutStopTimer(timer) );
+  // une alloc debile pour initialiser la carte GPU
+  int * d_void ;
+  cutilSafeCall( cudaMalloc( (void**) &d_void, 4)) ;
    
-  size = H * L * sizeof( unsigned int ); 
-  printf("Loaded  %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path,  cutGetTimerValue(timer));
-
+  
+  
+  /*********************************
+   *    DEFINITION PARAMS CHEMINS
+   *********************************/
+  char* image_out = "./image_out.pgm" ;
+  int *p_i, *p_j ;
+  int2 * d_paths ;
+  unsigned int * h_paths ;
+  unsigned int R = atoi(argv[1]);
+ 
+  
+  //unsigned int size = R * R * sizeof( unsigned int ); 
  
-  //essai alloc mapped
-  /*
-  cutilCheckError( cutResetTimer(timer) );
-  cutilCheckError( cutStartTimer(timer) );
-  unsigned int * h_ptr1, * d_ptr1 ;
-  unsigned int * h_ptr2, * d_ptr2 ;
-  int  h =  ;
-  int l = h ;
-  int mem = h*l*sizeof(unsigned int) ;
-  cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
-  cutilCheckError( cutStopTimer(timer) );
-  printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
  
-  cutilCheckError( cutStartTimer(timer) );
-  cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
-  cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
-  cutilCheckError( cutStopTimer(timer) );
-  printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+  // allocation mem
+  int memsize = 24*(R-1)*sizeof(int2) ;
+  cutilSafeCall( cudaMalloc( (void**) &d_paths, memsize ) );
    
-  for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
-
-  cutilCheckError( cutStartTimer(timer) );
-  cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
-  cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
-  cutilCheckError( cutStopTimer(timer) );
-  printf("Temps cumul get pointer  Mapped : %f ms\n", cutGetTimerValue(timer)) ;
    
-  cutilCheckError( cutStartTimer(timer) );
-  dim3 blocks(16,16,1) ;
-  dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
+  h_paths = new unsigned int [24*R*R] ;
+  p_i = new int [24*(R-1)] ;
+  p_j = new int [24*(R-1)] ;
    
-  kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
+  for (int j=0; j<24*R*R ; j++) h_paths[j] = 0 ;
  
-  cutilCheckError( cutStopTimer(timer) );
-  printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+  genPaths(h_paths, p_i, p_j, R) ;
+  
+  printf("Rayon : %d pixels \n", R) ;
+
+  //matrice p_i
+  printf("P_I\n");
+  for (int idpath=0; idpath < 24; idpath++){
+       printf("\n");
+       for (int idpix=0; idpix < R-1; idpix++){
+         printf(" %d ", p_i[idpath*(R-1)+idpix]);
+       }
+  }
+  //matrice p_j
+  printf("\nP_J\n");
+  for (int idpath=0; idpath < 24; idpath++){
+       printf("\n");
+       for (int idpix=0; idpix < R-1; idpix++){
+         printf(" %d ", p_j[idpath*(R-1)+idpix]);
+       }
+  }
    
-  char * image_1 = "./image_1.pgm" ;
-  char * image_2 = "./image_2.pgm" ;
    
-  cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
-  cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
-  */
    /*****************************
-   *     FIN CHARGEMENT IMAGE
+   * APPELS KERNELS et chronos
     *****************************/
    
+  dim3 dimBlock( 1, 1, 1 ) ;
+  dim3 dimGrid( 1, 1, 1 ) ;
+ 
+  kernel_calcul_paths<<< dimGrid, dimBlock, 0 >>>(d_paths, R) ;
    
-    
-  // use device with highest Gflops/s
-  cudaSetDevice( cutGetMaxGflopsDeviceId() );
-  
+  printf("\nGrille : %d x %d de Blocs : %d x %d \n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y) ;
    
-  /*
-       cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
-       cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
-       cutilSafeCall( cudaThreadSynchronize() );
-       cutilCheckError( cutResetTimer(hTimer) );
-       cutilCheckError( cutStartTimer(hTimer) );
-    
-       cutilSafeCall( cudaThreadSynchronize() );
-       cutilCheckError( cutStopTimer(hTimer) );
-       gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
-  */
-
-  cutilCheckError( cutResetTimer(timer) );
-  cutilCheckError( cutStartTimer(timer) );
-  // allocation mem GPU
-  unsigned int * d_directions =NULL ;
-  unsigned int * d_lniv, * d_estim = NULL ;
-
-  cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
-  cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
-  cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+ 
+  /**************************
+   * VERIFS 
+   **************************/
  
+  int2 * paths = new int2[24*(R-1)] ;
+  cutilSafeCall( cudaMemcpy(paths , d_paths, memsize, cudaMemcpyDeviceToHost) );
    
-  // allocate array and copy image data
-  cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
-  cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
-  cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H )); 
-  cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
-  cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
-  cutilCheckError( cutStopTimer(timer) );
-  
-  cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H )); 
-  cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
-
-  cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H )); 
-  cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
+  //matrice p_i
+  printf("P_I\n");
+  for (int idpath=0; idpath < 24; idpath++){
+       printf("\n");
+       for (int idpix=0; idpix < R-1; idpix++){
+         printf(" %d ", paths[idpath*(R-1)+idpix].x);
+       }
+       printf("\t // %d", 15*idpath);
+  }
+  //matrice p_j
+  printf("\nP_J\n");
+  for (int idpath=0; idpath < 24; idpath++){
+       printf("\n");
+       for (int idpix=0; idpix < R-1; idpix++){
+         printf(" %d ", paths[idpath*(R-1)+idpix].y);
+       }
+       printf("\t // %d", 15*idpath);
+  }
  
-  printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
-  /*****************************
-   * APPELS KERNELS et chronos
-   *****************************/
-  cutilCheckError( cutResetTimer(timer) );
-  cutilCheckError( cutStartTimer(timer) );
+  printf("\n");
    
-       unsigned int iter , nb_iter = 15 ;
-       unsigned int  poids = 15 ;
-       dim3 dimBlock(8,8,1) ;
-       dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
-       unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
-       // init image estimee avec image_in
-       kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
-       
-       printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
-       
-       for ( iter =0 ; iter < nb_iter ; iter++ )
-         {
-               cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
-               kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
-               cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
-               kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
-       }
-       
-       cudaThreadSynchronize();
-       
-       cutilCheckError( cutStopTimer(timer) );
-       printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
-       printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
-
-       /**************************
-        * VERIFS 
-        **************************/
-       //trace des lniv sur grille de 'pas x pas'
-       //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
-       //cudaThreadSynchronize();
-       
-       // enregistrement image lniv GPU
-       h_data_out = new unsigned int[H*L] ;
-       if ( h_data_out != NULL)
-         cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
-        else
-         printf("Echec allocation mem CPU\n");                 
-
-       cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
-
-       // calcul lniv CPU
-       
-       
+       // enregistrement image des PATHS
+       //cutilSafeCall( cudaMemcpy(h_paths , d_paths, size, cudaMemcpyDeviceToHost) );
+     
+       cutilCheckError( cutSavePGMi(image_out, h_paths, R, 24*R) ) ;
+
         // TODO verifier pourquoi les deux lignes suivantes produisent une erreur
         //cutilExit(argc, argv);
         //cudaThreadExit();