X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/snake_gpu.git/blobdiff_plain/086196b2c97166f2420134552efb0a20195fece3..4808809dab187a1799ac382ae24041d6243a97cc:/src/lib_kernels_contribs.cu diff --git a/src/lib_kernels_contribs.cu b/src/lib_kernels_contribs.cu index 2440f0c..43dc923 100644 --- a/src/lib_kernels_contribs.cu +++ b/src/lib_kernels_contribs.cu @@ -103,7 +103,7 @@ __global__ void calcul_contribs_segments_blocs_full(snake_node_gpu * d_snake, in // indices des elements int blockSize = blockDim.x ; // nb threads par bloc int tib = threadIdx.x ; // position du thread dans le bloc - int nblocs_noeud = gridDim.x / (nb_nodes/2 + pairs*(nb_nodes%2)) ; // nb de blocs dédié à chaque noeud + int nblocs_noeud = gridDim.x / nb_nodes ; // nb de blocs dédié à chaque noeud int nblocs_seg = nblocs_noeud / 16 ; // nb de blocs dédiés à un segment de test int idx = blockDim.x*blockIdx.x + threadIdx.x ; // position absolue du thread ds la grille int id_interval = blockIdx.x / nblocs_noeud ; // indice de l'intervalle du noeud dans la grille @@ -125,18 +125,10 @@ __global__ void calcul_contribs_segments_blocs_full(snake_node_gpu * d_snake, in int xprec, xsuiv ; // determine les indices des noeuds prec, courant, suiv - if (pairs) - { - n1 = 2*id_interval -1 ; - n2 = 2*id_interval ; - n3 = 2*id_interval +1 ; - } - else - { - n1 = 2*id_interval ; - n2 = 2*id_interval +1 ; - n3 = 2*id_interval +2 ; - } + n1 = id_interval -1 ; + n2 = id_interval ; + n3 = id_interval +1 ; + //gestion du bouclage du snake if (n1 < 0) n1 = nb_nodes-1 ; if (n3 >= nb_nodes) n3 = 0 ; @@ -293,6 +285,8 @@ __global__ void calcul_contribs_segments_blocs_full(snake_node_gpu * d_snake, in } } + + /* calcul des freeman et du centre de chaque segment de test a executer sur 'n_interval' blocs de 16 threads @@ -528,7 +522,7 @@ __global__ void soustrait_aux_stats_2N_segments_noeud(snake_node_gpu * d_snake, EXEC : sur n_interval blocs de 8 threads */ -__global__ void calcul_stats_full(snake_node_gpu * d_snake, int nnodes, bool pairs, int64 * d_stats_snake, +__global__ void calcul_stats_full(snake_node_gpu * d_snake, snake_node_gpu * d_snake_tmp, int nnodes, bool pairs, int64 * d_stats_snake, int64 * d_stats_ref, int64 * d_stats, uint64 * d_contribs, uint4 * d_liste_points, int * code_segment, uint4 * d_freemans, int * d_table_codes, t_cumul_x * cumul_x, t_cumul_x2 * cumul_x2, @@ -541,7 +535,7 @@ __global__ void calcul_stats_full(snake_node_gpu * d_snake, int nnodes, bool pai int code_noeud; __shared__ int64 s_stats_snake[3*8] ; - id_nx = 2*interval + !pairs ; + id_nx = interval ; if (id_nx == 0) id_nprec = nnodes - 1 ; else id_nprec = id_nx - 1 ; if (id_nprec == 0) id_nprecprec = nnodes -1 ; @@ -672,44 +666,19 @@ __global__ void calcul_stats_full(snake_node_gpu * d_snake, int nnodes, bool pai pos_optim = v; } } - if (pos_optim >-1){ - if ( !croisement(d_snake, id_nx, d_liste_points[8*id_nx + pos_optim].x, d_liste_points[8*id_nx + pos_optim].y, nnodes) ) + if ( (pos_optim >-1) && !croisement(d_snake, id_nx, d_liste_points[8*id_nx + pos_optim].x, d_liste_points[8*id_nx + pos_optim].y, nnodes) ) { /*maj data snake*/ move[id_nx] = true ; //new position - d_snake[id_nx].posi = d_liste_points[8*id_nx + pos_optim].x ; - d_snake[id_nx].posj = d_liste_points[8*id_nx + pos_optim].y ; - //nb pixels segment precedent - d_snake[id_nprec].nb_pixels = d_liste_points[8*id_nx + pos_optim].z ; - //nb pixels segment suivant - d_snake[id_nx].nb_pixels = d_liste_points[8*id_nx + pos_optim].w ; - //contribs segment precedent - d_snake[id_nprec].sum_1 = d_contribs[3*(16*interval + pos_optim)] ; - d_snake[id_nprec].sum_x = d_contribs[3*(16*interval + pos_optim) + 1] ; - d_snake[id_nprec].sum_x2 = d_contribs[3*(16*interval + pos_optim) + 2] ; - //contribs segment suivant - d_snake[id_nx].sum_1 = d_contribs[3*(16*interval + pos_optim + 8)] ; - d_snake[id_nx].sum_x = d_contribs[3*(16*interval + pos_optim + 8) + 1] ; - d_snake[id_nx].sum_x2 = d_contribs[3*(16*interval + pos_optim + 8) + 2] ; - //freemans segment precedent - d_snake[id_nprec].freeman_out = d_freemans[16*interval + pos_optim].z ; - d_snake[id_nprec].freeman_in = d_freemans[16*interval + pos_optim].w ; - //freemans segment suivant - d_snake[id_nx].freeman_out = d_freemans[16*interval + pos_optim + 8].z ; - d_snake[id_nx].freeman_in = d_freemans[16*interval + pos_optim + 8].w ; - //codes segment precedent - d_snake[id_nprec].code_segment = code_segment[16*interval + pos_optim] ; - //code segment suivant - d_snake[id_nx].code_segment = code_segment[16*interval + pos_optim + 8] ; - //centre segment precedent - d_snake[id_nprec].centre_i = d_freemans[16*interval + pos_optim ].x ; - d_snake[id_nprec].centre_j = d_freemans[16*interval + pos_optim ].y ; - //centre segment suivant - d_snake[id_nx].centre_i = d_freemans[16*interval + pos_optim + 8].x ; - d_snake[id_nx].centre_j = d_freemans[16*interval + pos_optim + 8].y ; + d_snake_tmp[id_nx].posi = d_liste_points[8*id_nx + pos_optim].x ; + d_snake_tmp[id_nx].posj = d_liste_points[8*id_nx + pos_optim].y ; + } + else { + //keep old position + d_snake_tmp[id_nx].posi = d_snake[id_nx].posi ; + d_snake_tmp[id_nx].posj = d_snake[id_nx].posj ; - } } } @@ -787,32 +756,39 @@ __global__ void recalcul_stats_snake(snake_node_gpu * d_snake, int nnodes, int6 __global__ void ajoute_noeuds(snake_node_gpu * snake, snake_node_gpu * snake_tmp, int nnodes, int seuil, int * new_nb_nodes){ - volatile snake_node_gpu * st = snake_tmp ; - + int id_cpy = 0; for (int id_nx=0; id_nx < nnodes; id_nx++){ //position du noeud existant - st[id_cpy].posi = snake[id_nx].posi ; - st[id_cpy].posj = snake[id_nx].posj ; + snake_tmp[id_cpy].posi = snake[id_nx].posi ; + snake_tmp[id_cpy].posj = snake[id_nx].posj ; id_cpy++ ; - if ( snake[id_nx].nb_pixels > seuil) + if ( snake_tmp[id_nx].nb_pixels > seuil) { //position du nouveau noeud - st[id_cpy].posi = snake[id_nx].centre_i ; - st[id_cpy].posj = snake[id_nx].centre_j ; + snake_tmp[id_cpy].posi = snake[id_nx].centre_i ; + snake_tmp[id_cpy].posj = snake[id_nx].centre_j ; id_cpy++ ; } } - for( int node=0; node= nb_segments ) nd_suiv = 0 ; + snake[id_segment].nb_pixels = calcul_nb_pixels(snake[nd].posi, snake[nd].posj, snake[nd_suiv].posi, snake[nd_suiv].posj) ; +} /* sommme des contribs par bloc -> contribs segment, pour le snake