X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/snake_gpu.git/blobdiff_plain/af4b787ce73a80f23e9e2b1ef9ac52660e8ab754..48b1bac747f398161b53a67ad80c4596f531c88a:/src/lib_gpu.cu diff --git a/src/lib_gpu.cu b/src/lib_gpu.cu index 1bbd61d..5a90c1c 100644 --- a/src/lib_gpu.cu +++ b/src/lib_gpu.cu @@ -153,9 +153,9 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes, calcul_stats_image<<<1, 1>>>( *d_img_x, *d_img_x2, H, L, (uint64*)*d_stats_snake); - cudaThreadSynchronize() ; - toc(chrono, "\tTemps GPU"); - if(DEBUG_IMG_CUMUL) + cudaThreadSynchronize() ; + toc(chrono, "\tTemps GPU"); + if(DEBUG_IMG_CUMUL) { //allocation memoire CPU @@ -231,33 +231,52 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes, * generation snake en mem GPU */ int dist = 140 ; - - tic(&chrono, NULL); - int MAX_DIAGOS = 1024*65536 ; - int ret, Q = 120 ; - int * d_n_diagos, h_n_diagos; - uint4 * d_diagos_snake ; - uint4 * h_diagos_snake = new uint4[MAX_DIAGOS]; - - - cudaMalloc( (void**) &d_n_diagos, sizeof(int)) ; - cudaMalloc( (void**) &d_diagos_snake, MAX_DIAGOS*sizeof(uint4)) ; - - genere_diagos_rectangle<<<1,1>>>(d_diagos_snake, H,L,Q, d_n_diagos); + /* Test de determination du snake rectangle initial optimal*/ + int div = 100;//nb de divisions de l'image : cela définit le pas. La valeur max découle du nb max de threads possible ds une grille + int Nperm = div*div*bs;//nb total de rectangles a tester. La distribution est ainsi irrégulière, mais plus simple. + double best_crit ; + int ind_best_crit ; + + t_rectangle_snake * d_all_crit, d_best_crit;//tableaux pour les résultats des différents rectangles / le meilleur + t_rectangle_snake * h_all_crit = new t_rectangle_snake[Nperm];//correspondant CPU - cudaMemcpy( &h_n_diagos, d_n_diagos, sizeof(int), cudaMemcpyDeviceToHost) ; - ret = cudaMemcpy( h_diagos_snake, d_diagos_snake, MAX_DIAGOS*sizeof(uint4), cudaMemcpyDeviceToHost) ; + //allocations + cudaMalloc((void**) &d_all_crit, Nperm*sizeof(t_rectangle_snake)); + cudaMalloc((void**) &d_best_crit, sizeof(t_rectangle_snake)); + + tic(&chrono, NULL); - toc(chrono, "\tCalcul diagos"); + //execution kernel + dim3 grid = dim3(H/div, L/div, 1); + calcul_contribs_snake4<<>>(*d_snake, *d_img_x, *d_img_x2, H, L, *d_stats_snake, d_all_crit) ; + cudaThreadSynchronize(); + toc(chrono, "\nCALCULS RECTANGLES"); - printf("COPY : %s, DIAGOS = %d / %d\n", (ret==0)?"OK":"ERREUR", h_n_diagos,MAX_DIAGOS); - for (int d=0; d<200;d++){ - printf("| (%d,%d)-(%d,%d) ", h_diagos_snake[d].x, h_diagos_snake[d].y, h_diagos_snake[d].z, h_diagos_snake[d].w ); + //recup data rectangles + int ret; + ret = cudaMemcpy( h_all_crit, d_all_crit, Nperm*sizeof(t_rectangle_snake), cudaMemcpyDeviceToHost) ; + printf("COPIE DATA = %s\n",(ret==0)?"OK":"ERR"); + + //optimum sur CPU + best_crit = h_all_crit[0].crit ; + ind_best_crit = 0 ; + for (int k=1; k<100; k++){ + if ((h_all_crit[k].crit > 0) && (h_all_crit[k].crit < best_crit)) { + best_crit = h_all_crit[k].crit ; + ind_best_crit = k ; + } + printf("%d -> ( %d, %d )--( %d, %d) CRITERE = %f\n", k, h_all_crit[k].bpi, h_all_crit[k].bpj, + h_all_crit[k].opi, h_all_crit[k].opj, h_all_crit[k].crit ); } + printf("BEST RECTANGLE/%d tests : %d -> ( %d, %d )--( %d, %d) CRITERE = %f\n", Nperm, ind_best_crit, h_all_crit[ind_best_crit].bpi, h_all_crit[ind_best_crit].bpj, + h_all_crit[ind_best_crit].opi, h_all_crit[ind_best_crit].opj, best_crit ); + exit(0); - genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ; + /*fin test snake rectangle initial optimal*/ + + //genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ; int nnodes = nb_nodes ;