main.cu

   1 // libs C
   2 #include <stdlib.h>
   3 #include <stdio.h>
   4 #include <string.h>
   5 #include <math.h>
   6
   7 #include "lib_lniv.h"
   8
   9 // libs NV
  10 #include <cuda_runtime.h>
  11 #include <cutil_inline.h>
  12
  13 // lib spec
  14 #include "defines.h"
  15 #include "levelines_common.h"
  16
  17 #include "levelines_kernels.cu"
  18
  19
  20 __global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
  21
  22   unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
  23   unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
  24   unsigned int pos = i*L +j ;
  25
  26   ptr2[pos] = val - ptr1[pos] ;
  27
  28 }
  29
  30 int main(int argc, char **argv){
  31
  32
  33   //float coef_regul = atof( argv[1] ) ;
  34
  35   unsigned int timer ;
  36   cutilCheckError( cutCreateTimer(&timer) );
  37   cutilCheckError( cutResetTimer(timer) );
  38   /*****************************
  39    *    CHARGEMENT IMAGE
  40    *****************************/
  41   char* image_path = argv[argc-1];
  42   char* image_out = "./image_out.pgm" ;
  43   unsigned int * h_data = NULL ;
  44   unsigned int * h_data_out = NULL ;
  45   unsigned int H, L, size;
  46
  47   cutilCheckError( cutStartTimer(timer) );
  48   cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
  49   cutilCheckError( cutStopTimer(timer) );
  50
  51   size = H * L * sizeof( unsigned int );
  52   printf("Loaded  %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path,  cutGetTimerValue(timer));
  53
  54
  55   //essai alloc mapped
  56   /*
  57   cutilCheckError( cutResetTimer(timer) );
  58   cutilCheckError( cutStartTimer(timer) );
  59   unsigned int * h_ptr1, * d_ptr1 ;
  60   unsigned int * h_ptr2, * d_ptr2 ;
  61   int  h =  ;
  62   int l = h ;
  63   int mem = h*l*sizeof(unsigned int) ;
  64   cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
  65   cutilCheckError( cutStopTimer(timer) );
  66   printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
  67
  68   cutilCheckError( cutStartTimer(timer) );
  69   cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
  70   cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
  71   cutilCheckError( cutStopTimer(timer) );
  72   printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
  73
  74   for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
  75
  76   cutilCheckError( cutStartTimer(timer) );
  77   cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
  78   cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
  79   cutilCheckError( cutStopTimer(timer) );
  80   printf("Temps cumul get pointer  Mapped : %f ms\n", cutGetTimerValue(timer)) ;
  81
  82   cutilCheckError( cutStartTimer(timer) );
  83   dim3 blocks(16,16,1) ;
  84   dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
  85
  86   kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
  87
  88   cutilCheckError( cutStopTimer(timer) );
  89   printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
  90
  91   char * image_1 = "./image_1.pgm" ;
  92   char * image_2 = "./image_2.pgm" ;
  93
  94   cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
  95   cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
  96   */
  97   /*****************************
  98    *     FIN CHARGEMENT IMAGE
  99    *****************************/
 100
 101
 102
 103   // use device with highest Gflops/s
 104   cudaSetDevice( cutGetMaxGflopsDeviceId() );
 105
 106
 107   /*
 108         cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
 109         cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
 110         cutilSafeCall( cudaThreadSynchronize() );
 111         cutilCheckError( cutResetTimer(hTimer) );
 112         cutilCheckError( cutStartTimer(hTimer) );
 113
 114         cutilSafeCall( cudaThreadSynchronize() );
 115         cutilCheckError( cutStopTimer(hTimer) );
 116         gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
 117   */
 118
 119   cutilCheckError( cutResetTimer(timer) );
 120   cutilCheckError( cutStartTimer(timer) );
 121   // allocation mem GPU
 122   unsigned int * d_directions =NULL ;
 123   unsigned int * d_lniv, * d_estim = NULL ;
 124
 125   cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
 126   cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
 127   cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
 128
 129
 130   // allocate array and copy image data
 131   cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
 132   cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
 133   cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
 134   cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
 135   cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
 136   cutilCheckError( cutStopTimer(timer) );
 137
 138   cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H ));
 139   cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
 140
 141   cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H ));
 142   cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
 143
 144   printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
 145   /*****************************
 146    * APPELS KERNELS et chronos
 147    *****************************/
 148   cutilCheckError( cutResetTimer(timer) );
 149   cutilCheckError( cutStartTimer(timer) );
 150
 151         unsigned int iter , nb_iter = 15 ;
 152         unsigned int  poids = 15 ;
 153         dim3 dimBlock(8,8,1) ;
 154         dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
 155         unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
 156         // init image estimee avec image_in
 157         kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
 158
 159         printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
 160
 161         for ( iter =0 ; iter < nb_iter ; iter++ )
 162           {
 163                 cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
 164                 kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
 165                 cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
 166                 kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
 167         }
 168
 169         cudaThreadSynchronize();
 170
 171         cutilCheckError( cutStopTimer(timer) );
 172         printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
 173         printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
 174
 175         /**************************
 176          * VERIFS
 177          **************************/
 178         //trace des lniv sur grille de 'pas x pas'
 179         //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
 180         //cudaThreadSynchronize();
 181
 182         // enregistrement image lniv GPU
 183         h_data_out = new unsigned int[H*L] ;
 184         if ( h_data_out != NULL)
 185           cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
 186         else
 187           printf("Echec allocation mem CPU\n");
 188
 189         cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
 190
 191         // calcul lniv CPU
 192
 193
 194         // TODO verifier pourquoi les deux lignes suivantes produisent une erreur
 195         //cutilExit(argc, argv);
 196         //cudaThreadExit();
 197         return EXIT_SUCCESS ;
 198 }