src/lib_test_gpu.cu

   1 #include "stdio.h"
   2 #include "structures.h"
   3 #include "lib_test_gpu.h"
   4
   5 void verif_cumuls(int ** img_in, int H, int L, uint64 ** d_img_1, uint64 ** d_img_x, uint64 ** d_img_x2, int BS, bool DISPLAY){
   6   int taille = H*L ;
   7   //allocation memoire CPU
   8   uint64  img_1[H][L];
   9   uint64  img_x[H][L];
  10   uint64  img_x2[H][L];
  11
  12   /*pour test comparaison*/
  13   uint64  img_1b[H][L];
  14   uint64  img_xb[H][L];
  15   uint64  img_x2b[H][L];
  16   cudaMemcpy( img_1b, *d_img_1, taille*sizeof(uint64), cudaMemcpyDeviceToHost );
  17   cudaMemcpy( img_xb, *d_img_x, taille*sizeof(uint64), cudaMemcpyDeviceToHost);
  18   cudaMemcpy( img_x2b, *d_img_x2, taille*sizeof(uint64), cudaMemcpyDeviceToHost);
  19
  20   for (int i=0; i<H; i++)
  21         {
  22           img_1[i][0] = 1 ;
  23           img_x[i][0] = img_in[i][0] ;
  24           img_x2[i][0]= img_in[i][0]*img_in[i][0] ;
  25           for (int j=1; j<L; j++)
  26                 {
  27                   img_1[i][j]  = img_1[i][j-1]  + 1 ;
  28                   img_x[i][j]  = img_x[i][j-1]  + img_in[i][j] ;
  29                   img_x2[i][j] = img_x2[i][j-1] + img_in[i][j]*img_in[i][j] ;
  30                 }
  31         }
  32
  33   int cpt = 0;
  34   int cpt_err = 0;
  35   for (int i=0; i<H; i++){
  36         for (int j=0; j<L; j++){
  37           if( (img_1[i][j] !=  img_1b[i][j]) ||  (img_x[i][j] !=  img_xb[i][j]) ||  (img_x2[i][j] !=  img_x2b[i][j]) )
  38                 {
  39                   if (!(j%BS)) printf("\n");
  40                   if (DISPLAY)
  41                         {
  42                           printf("(%d,%d)CPU:%lu GPU:%lu\n",i, j, img_x2[i][j], img_x2b[i][j]);
  43                           printf("(%d,%d):CPU=%lu  GPU=%lu\n",i,j,img_x[i][j], img_xb[i][j]);
  44                         }
  45                   cpt_err++;
  46                 }
  47           cpt++;
  48         }
  49   }
  50   printf("%d erreurs / %d points\n", cpt_err, cpt );
  51 }
  52
  53   void verif_stats_ref(int64 ** d_stats_ref, snake_node_gpu ** d_snake, int nnodes, int H, int L,
  54                                            uint64 ** d_img_1, uint64 ** d_img_x, uint64 ** d_img_x2){
  55
  56         int64 * h_stats_ref = new int64[3*nnodes];
  57         snake_node_gpu * h_snake = new snake_node_gpu[nnodes] ;
  58         uint64 * img_1 = new uint64[H*L];
  59         uint64 * img_x = new uint64[H*L];
  60         uint64 * img_x2 = new uint64[H*L];
  61
  62         cudaMemcpy( img_1, *d_img_1, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
  63         cudaMemcpy( img_x, *d_img_x, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
  64         cudaMemcpy( img_x2, *d_img_x2, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
  65
  66         cudaMemcpy( h_stats_ref, *d_stats_ref, 3*nnodes*sizeof(int64), cudaMemcpyDeviceToHost) ;
  67         cudaMemcpy( h_snake, *d_snake, nnodes*sizeof(snake_node_gpu), cudaMemcpyDeviceToHost) ;
  68
  69
  70         printf("******* STATS DIMINUEES\n");
  71   for(int n=0; n<nnodes;n++)
  72         {
  73           int i = h_snake[n].posi, j = h_snake[n].posj ;
  74           printf("node %d (%d,%d) : %ld - %ld - %ld - img1= %lu - imgx= %lu - imgx2= %lu \n", n, i, j,
  75                          h_stats_ref[3*n], h_stats_ref[3*n +1], h_stats_ref[3*n +2],
  76                          img_1[i*L+j], img_x[i*L+j], img_x2[i*L+j]);
  77         }
  78   }