- //calcul listes pix + contrib partielles + freemans + centres
- calcul_contribs_segments_blocs_full<<< grid , threads, taille_smem >>>( d_snake, nnodes, d_positions, h_nb_pix_max,
- d_img_x, d_img_x2, d_codes_segments,
- J_dim, d_listes_pixels, d_contribs_segments_blocs,
- pairs);
- calcul_freemans_centre<<<n_interval, 16>>>( d_listes_pixels, d_freemanDiDj, d_freemans_centres);
- //printf("EXEC pairs : %d max pix - %d intervalles => %d blocs de %d threads - %d octets de smem\n", h_nb_pix_max, n_interval, grid.x, threads.x, taille_smem);
- //sommes des contribs partielles -> contribs segments
- somsom_full<<< 16*n_interval , 1>>>(d_contribs_segments_blocs, nnodes, nblocs_seg, d_contribs_segments) ;
+ //parametres d'execution des kernels pour le recalcul des contribs et stats du snake
+ npixmax = h_nb_pix_max ;
+ tpb = nextPow2(npixmax) ;
+ if (tpb >= BSMAX) tpb = BSMAX ;// /!\ le kernel <<< calcul_contrib...>>> ne supporte pas un bs>BSMAX a cause de la shared-mem nécessaire
+ if (tpb < 32 ) tpb = 32 ;
+ bps = (npixmax+tpb-1)/tpb ;
+ //calcul sommes partielles des contribs + codes segments
+ recalcul_contribs_segments_snake<<< nnodes*bps, tpb, CFI(tpb)*sizeof(tcontribs)>>>(d_snake_tmp, nnodes,
+ d_img_x, d_img_x2,
+ J_dim, d_liste_temp, d_sompart );
+ //calcul des freemans et des centres a partir des 5 points stockes par segment dans 'd_liste_temp'
+ recalcul_freemans_centre<<<nnodes, 1>>>(d_snake_tmp, d_liste_temp, d_freemanDiDj);
+ //somme des sommes partielles
+ resomsom_snake<<< nnodes , 1 >>>(d_sompart, nnodes, bps, d_snake_tmp);
+ //calcul des stats
+ recalcul_stats_snake<<< 1 , 1 >>>(d_snake_tmp, nnodes, d_stats_snake, d_vrais_snake,
+ d_img_x, d_img_x2,
+ d_codeNoeud, J_dim
+ );
+ copie_snake<<< nnodes, 1 >>>(d_snake_tmp, d_snake) ;