2 #include "structures.h"
3 #include "lib_test_gpu.h"
5 void verif_cumuls(int ** img_in, int H, int L, uint64 ** d_img_1, uint64 ** d_img_x, uint64 ** d_img_x2, int BS, bool DISPLAY){
7 //allocation memoire CPU
12 /*pour test comparaison*/
16 cudaMemcpy( img_1b, *d_img_1, taille*sizeof(uint64), cudaMemcpyDeviceToHost );
17 cudaMemcpy( img_xb, *d_img_x, taille*sizeof(uint64), cudaMemcpyDeviceToHost);
18 cudaMemcpy( img_x2b, *d_img_x2, taille*sizeof(uint64), cudaMemcpyDeviceToHost);
20 for (int i=0; i<H; i++)
23 img_x[i][0] = img_in[i][0] ;
24 img_x2[i][0]= img_in[i][0]*img_in[i][0] ;
25 for (int j=1; j<L; j++)
27 img_1[i][j] = img_1[i][j-1] + 1 ;
28 img_x[i][j] = img_x[i][j-1] + img_in[i][j] ;
29 img_x2[i][j] = img_x2[i][j-1] + img_in[i][j]*img_in[i][j] ;
35 for (int i=0; i<H; i++){
36 for (int j=0; j<L; j++){
37 if( (img_1[i][j] != img_1b[i][j]) || (img_x[i][j] != img_xb[i][j]) || (img_x2[i][j] != img_x2b[i][j]) )
39 if (!(j%BS)) printf("\n");
42 printf("(%d,%d)CPU:%lu GPU:%lu\n",i, j, img_x2[i][j], img_x2b[i][j]);
43 printf("(%d,%d):CPU=%lu GPU=%lu\n",i,j,img_x[i][j], img_xb[i][j]);
50 printf("%d erreurs / %d points\n", cpt_err, cpt );
53 void verif_stats_ref(int64 ** d_stats_ref, snake_node_gpu ** d_snake, int nnodes, int H, int L,
54 uint64 ** d_img_1, uint64 ** d_img_x, uint64 ** d_img_x2){
56 int64 * h_stats_ref = new int64[3*nnodes];
57 snake_node_gpu * h_snake = new snake_node_gpu[nnodes] ;
58 uint64 * img_1 = new uint64[H*L];
59 uint64 * img_x = new uint64[H*L];
60 uint64 * img_x2 = new uint64[H*L];
62 cudaMemcpy( img_1, *d_img_1, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
63 cudaMemcpy( img_x, *d_img_x, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
64 cudaMemcpy( img_x2, *d_img_x2, H*L*nnodes*sizeof(uint64), cudaMemcpyDeviceToHost) ;
66 cudaMemcpy( h_stats_ref, *d_stats_ref, 3*nnodes*sizeof(int64), cudaMemcpyDeviceToHost) ;
67 cudaMemcpy( h_snake, *d_snake, nnodes*sizeof(snake_node_gpu), cudaMemcpyDeviceToHost) ;
70 printf("******* STATS DIMINUEES\n");
71 for(int n=0; n<nnodes;n++)
73 int i = h_snake[n].posi, j = h_snake[n].posj ;
74 printf("node %d (%d,%d) : %ld - %ld - %ld - img1= %lu - imgx= %lu - imgx2= %lu \n", n, i, j,
75 h_stats_ref[3*n], h_stats_ref[3*n +1], h_stats_ref[3*n +2],
76 img_1[i*L+j], img_x[i*L+j], img_x2[i*L+j]);