]> AND Private Git Repository - snake_gpu.git/blob - src/lib_test_gpu.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
clean
[snake_gpu.git] / src / lib_test_gpu.cu
1 #include "stdio.h"
2 #include "structures.h"
3 #include "lib_test_gpu.h"
4
5 void verif_cumuls(int ** img_in, int H, int L, uint64 ** d_img_1, uint64 ** d_img_x, uint64 ** d_img_x2, int BS, bool DISPLAY){
6   int taille = H*L ;
7   //allocation memoire CPU
8   uint64  img_1[H][L];
9   uint64  img_x[H][L];
10   uint64  img_x2[H][L];
11
12   /*pour test comparaison*/
13   uint64  img_1b[H][L];
14   uint64  img_xb[H][L];
15   uint64  img_x2b[H][L];
16   cudaMemcpy( img_1b, *d_img_1, taille*sizeof(uint64), cudaMemcpyDeviceToHost );
17   cudaMemcpy( img_xb, *d_img_x, taille*sizeof(uint64), cudaMemcpyDeviceToHost);
18   cudaMemcpy( img_x2b, *d_img_x2, taille*sizeof(uint64), cudaMemcpyDeviceToHost);
19  
20   for (int i=0; i<H; i++)
21         {
22           img_1[i][0] = 1 ;
23           img_x[i][0] = img_in[i][0] ;
24           img_x2[i][0]= img_in[i][0]*img_in[i][0] ;
25           for (int j=1; j<L; j++)
26                 {
27                   img_1[i][j]  = img_1[i][j-1]  + 1 ;
28                   img_x[i][j]  = img_x[i][j-1]  + img_in[i][j] ;
29                   img_x2[i][j] = img_x2[i][j-1] + img_in[i][j]*img_in[i][j] ;
30                 }
31         }
32   
33   int cpt = 0;
34   int cpt_err = 0;
35   for (int i=0; i<H; i++){
36         for (int j=0; j<L; j++){
37           if( (img_1[i][j] !=  img_1b[i][j]) ||  (img_x[i][j] !=  img_xb[i][j]) ||  (img_x2[i][j] !=  img_x2b[i][j]) )
38                 {
39                   if (!(j%BS)) printf("\n");
40                   if (DISPLAY)
41                         {
42                           printf("(%d,%d)CPU:%lu GPU:%lu\n",i, j, img_x2[i][j], img_x2b[i][j]);
43                           printf("(%d,%d):CPU=%lu  GPU=%lu\n",i,j,img_x[i][j], img_xb[i][j]);
44                         }
45                   cpt_err++;
46                 }
47           cpt++;
48         }
49   }
50   printf("%d erreurs / %d points\n", cpt_err, cpt );
51 }
52
53   void verif_stats_ref(int64 ** d_stats_ref, snake_node_gpu ** d_snake, int nnodes, int H, int L,
54                                            uint64 ** d_img_1, uint64 ** d_img_x, uint64 ** d_img_x2){
55         
56         int64 * h_stats_ref = new int64[3*nnodes];
57         snake_node_gpu * h_snake = new snake_node_gpu[nnodes] ;
58         uint64 * img_1 = new uint64[H*L];
59         uint64 * img_x = new uint64[H*L];
60         uint64 * img_x2 = new uint64[H*L];
61         
62         cudaMemcpy( img_1, *d_img_1, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
63         cudaMemcpy( img_x, *d_img_x, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
64         cudaMemcpy( img_x2, *d_img_x2, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
65         
66         cudaMemcpy( h_stats_ref, *d_stats_ref, 3*nnodes*sizeof(int64), cudaMemcpyDeviceToHost) ;
67         cudaMemcpy( h_snake, *d_snake, nnodes*sizeof(snake_node_gpu), cudaMemcpyDeviceToHost) ;
68         
69         
70         printf("******* STATS DIMINUEES\n");
71   for(int n=0; n<nnodes;n++)
72         {
73           int i = h_snake[n].posi, j = h_snake[n].posj ;
74           printf("node %d (%d,%d) : %ld - %ld - %ld - img1= %lu - imgx= %lu - imgx2= %lu \n", n, i, j,
75                          h_stats_ref[3*n], h_stats_ref[3*n +1], h_stats_ref[3*n +2],
76                          img_1[i*L+j], img_x[i*L+j], img_x2[i*L+j]);
77         }
78   }