- if (nb_nodes == 4) genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ;
- else if (nb_nodes == 40) genere_snake_rectangle_Nnodes_gpu<<< 1, 1>>>(*d_snake, (H+L)/20, H, L) ;
+
+ //execution kernel
+ dim3 grid = dim3(H/div, L/div, 1);
+ calcul_contribs_snake4<<<grid, bs, CFI(bs)*sizeof(tcontribs) >>>(*d_snake, *d_img_x, *d_img_x2, H, L, *d_stats_snake, d_all_crit) ;
+ cudaThreadSynchronize();
+ toc(chrono, "\nCALCULS RECTANGLES");
+
+ //recup data rectangles
+ int ret;
+ ret = cudaMemcpy( h_all_crit, d_all_crit, Nperm*sizeof(t_rectangle_snake), cudaMemcpyDeviceToHost) ;
+ printf("COPIE DATA = %s\n",(ret==0)?"OK":"ERR");
+
+ //optimum sur CPU
+ best_crit = h_all_crit[0].crit ;
+ ind_best_crit = 0 ;
+ for (int k=1; k<100; k++){
+ if ((h_all_crit[k].crit > 0) && (h_all_crit[k].crit < best_crit)) {
+ best_crit = h_all_crit[k].crit ;
+ ind_best_crit = k ;
+ }
+ printf("%d -> ( %d, %d )--( %d, %d) CRITERE = %f\n", k, h_all_crit[k].bpi, h_all_crit[k].bpj,
+ h_all_crit[k].opi, h_all_crit[k].opj, h_all_crit[k].crit );
+ }
+
+ printf("BEST RECTANGLE/%d tests : %d -> ( %d, %d )--( %d, %d) CRITERE = %f\n", Nperm, ind_best_crit, h_all_crit[ind_best_crit].bpi, h_all_crit[ind_best_crit].bpj,
+ h_all_crit[ind_best_crit].opi, h_all_crit[ind_best_crit].opj, best_crit );
+
+ exit(0);
+ /*fin test snake rectangle initial optimal*/
+
+ //genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ;
+