main.cu

   1 // libs C
   2 #include <stdlib.h>
   3 #include <stdio.h>
   4 #include <string.h>
   5 #include <math.h>
   6
   7 #include "lib_lniv.h"
   8
   9 // libs NV
  10 #include <cuda_runtime.h>
  11 #include <cutil_inline.h>
  12
  13 // lib spec
  14 #include "defines.h"
  15 #include "levelines_common.h"
  16
  17 #include "levelines_kernels.cu"
  18
  19 const float tang[] = {0, 0.268, 0.577, 1} ;
  20
  21
  22 void genPaths(unsigned int *h_paths, int *p_i, int *p_j, unsigned int r ){
  23   unsigned int idpath = 0 ;
  24   int ic, jc ;
  25   float offset = 0.5 ;
  26
  27   // Q1 inf
  28   for (int a=0 ; a< 4 ; a++){        // les 4 angles 0,15,30 et 45
  29         for (int p=0 ; p< r ; p++){      // les r points
  30           jc = p ;
  31           ic = r-1 - floor(tang[a]*p + offset) ;
  32           h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
  33           if ( p > 0 ){
  34                 p_i[idpath*(r-1)+p-1] = ic ;
  35                 p_j[idpath*(r-1)+p-1] = jc ;
  36           }
  37         }
  38         idpath++ ;
  39   }
  40   // Q1 sup
  41   for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75
  42         for (int p=0 ; p< r ; p++){      // les r points
  43           ic = r-1 - p ;
  44           jc = floor(tang[a]*p + offset) ;
  45           h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
  46           if ( p > 0 ){
  47                 p_i[idpath*(r-1)+p-1] = ic ;
  48                 p_j[idpath*(r-1)+p-1] = jc ;
  49           }
  50         }
  51         idpath++ ;
  52   }
  53
  54   // Q2 inf
  55   for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
  56         for (int p=0 ; p< r ; p++){      // les r points
  57           ic = r-1 - p ;
  58           jc = r-1 - floor(tang[a]*p + offset) ;
  59           h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
  60           if ( p > 0 ){
  61                 p_i[idpath*(r-1)+p-1] = ic ;
  62                 p_j[idpath*(r-1)+p-1] = jc ;
  63           }
  64         }
  65         idpath++ ;
  66   }
  67   // Q2 sup
  68   for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75
  69         for (int p=0 ; p< r ; p++){      // les r points
  70           jc = r-1 - p ;
  71           ic = r-1 - floor(tang[a]*p + offset) ;
  72           h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
  73           if ( p > 0 ){
  74                 p_i[idpath*(r-1)+p-1] = ic ;
  75                 p_j[idpath*(r-1)+p-1] = jc ;
  76           }
  77         }
  78         idpath++ ;
  79   }
  80
  81
  82   // Q3 inf
  83   for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
  84         for (int p=0 ; p< r ; p++){      // les r points
  85           jc = r-1 - p ;
  86           ic = floor(tang[a]*p + offset) ;
  87           h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
  88           if ( p > 0 ){
  89                 p_i[idpath*(r-1)+p-1] = ic ;
  90                 p_j[idpath*(r-1)+p-1] = jc ;
  91           }
  92         }
  93         idpath++ ;
  94   }
  95   // Q3 sup
  96   for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75
  97         for (int p=0 ; p< r ; p++){      // les r points
  98           ic = p ;
  99           jc = r-1 - floor(tang[a]*p + offset) ;
 100           h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
 101           if ( p > 0 ){
 102                 p_i[idpath*(r-1)+p-1] = ic ;
 103                 p_j[idpath*(r-1)+p-1] = jc ;
 104           }
 105         }
 106         idpath++ ;
 107   }
 108
 109
 110   // Q4 inf
 111   for (int a=0 ; a< 4 ; a++){        // les 4 angles 90,105,130 et 145
 112         for (int p=0 ; p< r ; p++){      // les r points
 113           ic = p ;
 114           jc = floor(tang[a]*p + offset) ;
 115           h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
 116           if ( p > 0 ){
 117                 p_i[idpath*(r-1)+p-1] = ic ;
 118                 p_j[idpath*(r-1)+p-1] = jc ;
 119           }
 120         }
 121         idpath++ ;
 122   }
 123   // Q4 sup
 124   for (int a=2 ; a>0 ; a--){         // les 2 angles 60 et 75
 125         for (int p=0 ; p< r ; p++){      // les r points
 126           jc = p ;
 127           ic = floor(tang[a]*p + offset) ;
 128           h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
 129           if ( p > 0 ){
 130                 p_i[idpath*(r-1)+p-1] = ic ;
 131                 p_j[idpath*(r-1)+p-1] = jc ;
 132           }
 133         }
 134         idpath++ ;
 135   }
 136
 137 }
 138
 139
 140 int main(int argc, char **argv){
 141
 142   // use device with highest Gflops/s
 143   cudaSetDevice( cutGetMaxGflopsDeviceId() );
 144   unsigned int timer ;
 145   cutilCheckError( cutCreateTimer(&timer) );
 146   cutilCheckError( cutResetTimer(timer) );
 147   cutilCheckError( cutStartTimer(timer) );
 148   // une alloc debile pour initialiser la carte GPU
 149   int * d_void ;
 150   cutilSafeCall( cudaMalloc( (void**) &d_void, 4)) ;
 151
 152
 153
 154   /*********************************
 155    *    DEFINITION PARAMS CHEMINS
 156    *********************************/
 157   char* image_out = "./image_out.pgm" ;
 158   int *p_i, *p_j ;
 159   int2 * d_paths ;
 160   unsigned int * h_paths ;
 161   unsigned int R = atoi(argv[1]);
 162
 163
 164   //unsigned int size = R * R * sizeof( unsigned int );
 165
 166
 167   // allocation mem
 168   int memsize = 24*(R-1)*sizeof(int2) ;
 169   cutilSafeCall( cudaMalloc( (void**) &d_paths, memsize ) );
 170
 171
 172   h_paths = new unsigned int [24*R*R] ;
 173   p_i = new int [24*(R-1)] ;
 174   p_j = new int [24*(R-1)] ;
 175
 176   for (int j=0; j<24*R*R ; j++) h_paths[j] = 0 ;
 177
 178   genPaths(h_paths, p_i, p_j, R) ;
 179
 180   printf("Rayon : %d pixels \n", R) ;
 181
 182   //matrice p_i
 183   printf("P_I\n");
 184   for (int idpath=0; idpath < 24; idpath++){
 185         printf("\n");
 186         for (int idpix=0; idpix < R-1; idpix++){
 187           printf(" %d ", p_i[idpath*(R-1)+idpix]);
 188         }
 189   }
 190   //matrice p_j
 191   printf("\nP_J\n");
 192   for (int idpath=0; idpath < 24; idpath++){
 193         printf("\n");
 194         for (int idpix=0; idpix < R-1; idpix++){
 195           printf(" %d ", p_j[idpath*(R-1)+idpix]);
 196         }
 197   }
 198
 199
 200   /*****************************
 201    * APPELS KERNELS et chronos
 202    *****************************/
 203
 204   dim3 dimBlock( 1, 1, 1 ) ;
 205   dim3 dimGrid( 1, 1, 1 ) ;
 206
 207   kernel_calcul_paths<<< dimGrid, dimBlock, 0 >>>(d_paths, R) ;
 208
 209   printf("\nGrille : %d x %d de Blocs : %d x %d \n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y) ;
 210
 211
 212   /**************************
 213    * VERIFS
 214    **************************/
 215
 216   int2 * paths = new int2[24*(R-1)] ;
 217   cutilSafeCall( cudaMemcpy(paths , d_paths, memsize, cudaMemcpyDeviceToHost) );
 218
 219   //matrice p_i
 220   printf("P_I\n");
 221   for (int idpath=0; idpath < 24; idpath++){
 222         printf("\n");
 223         for (int idpix=0; idpix < R-1; idpix++){
 224           printf(" %d ", paths[idpath*(R-1)+idpix].x);
 225         }
 226         printf("\t // %d", 15*idpath);
 227   }
 228   //matrice p_j
 229   printf("\nP_J\n");
 230   for (int idpath=0; idpath < 24; idpath++){
 231         printf("\n");
 232         for (int idpix=0; idpix < R-1; idpix++){
 233           printf(" %d ", paths[idpath*(R-1)+idpix].y);
 234         }
 235         printf("\t // %d", 15*idpath);
 236   }
 237
 238   printf("\n");
 239
 240         // enregistrement image des PATHS
 241         //cutilSafeCall( cudaMemcpy(h_paths , d_paths, size, cudaMemcpyDeviceToHost) );
 242
 243         cutilCheckError( cutSavePGMi(image_out, h_paths, R, 24*R) ) ;
 244
 245         // TODO verifier pourquoi les deux lignes suivantes produisent une erreur
 246         //cutilExit(argc, argv);
 247         //cudaThreadExit();
 248         return EXIT_SUCCESS ;
 249 }