BookGPU/Chapters/chapter3/code/memSkel.cu

   1 // C libraries
   2 #include <stdlib.h>
   3 #include <stdio.h>
   4
   5 // NVidia libraries
   6 #include <cuda_runtime.h>
   7 #include <cutil_inline.h>
   8
   9 // our kernels
  10 #include "fast_kernels.cu"
  11
  12 int main(int argc, char **argv){
  13   // raw way of selecting GPU
  14   cudaSetDevice( 0 );
  15
  16   unsigned int timer ;
  17
  18   // CPU memory allocation
  19   short *h_in, *h_out ;
  20   int *h_img, H, L, size ;
  21
  22   // allocation mem GPU
  23   short * d_out ;
  24
  25   dim3 dimBlock, dimGrid ;
  26   int bsx=16, bsy=16 ;
  27
  28   cudaChannelFormatDesc channelDescS = cudaCreateChannelDesc<short>();
  29   cudaArray * array_img_in ;
  30
  31   // chargt image
  32   cutilCheckError( cutLoadPGMi("image.pgm", &h_data, &L, &H));
  33   size = H * L * sizeof( short );
  34
  35   /* transfert en zone short ;) */
  36   h_in = new short[H*L] ;
  37   for (int k=0; k<H*L ; k++)
  38      h_in[k] = (short)h_data[k] ;
  39
  40   cutilCheckError( cutResetTimer(timer) );
  41   cutilCheckError( cutStartTimer(timer) );
  42
  43   cutilSafeCall( cudaMalloc( (void**) &d_out, size ) );
  44
  45   tex_img_in.addressMode[0] = cudaAddressModeClamp ;
  46   tex_img_in.addressMode[1] = cudaAddressModeClamp ;
  47
  48   cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
  49   cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
  50
  51   //cutilCheckError( cutResetTimer(timer) );
  52   //cutilCheckError( cutStartTimer(timer) );
  53   cutilSafeCall( cudaMemcpyToArray( array_img_ins, 0, 0, h_datas, H*L*sizeof(short), cudaMemcpyHostToDevice)) ;
  54   //cutilCheckError( cutStopTimer(timer) );
  55   //printf("Temps transfert image(s) en texture : %f ms\n", cutGetTimerValue(timer)) ;
  56
  57     dimBlock = dim3(bsx,bsy,1) ;
  58         dimGrid = dim3( (L/dimBlock.x)/1, (H/dimBlock.y)/1, 1 ) ;
  59         kernel_ident<<< dimGrid, dimBlock, 0>>>(d_outs, H, L) ;
  60
  61
  62         return 0;
  63 }
  64