X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/snake_gpu.git/blobdiff_plain/c17ca25473465d5550bc7e3e27b87b3d33d28dc6..48b1bac747f398161b53a67ad80c4596f531c88a:/src/lib_gpu.cu diff --git a/src/lib_gpu.cu b/src/lib_gpu.cu index 73571f2..5a90c1c 100644 --- a/src/lib_gpu.cu +++ b/src/lib_gpu.cu @@ -153,9 +153,9 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes, calcul_stats_image<<<1, 1>>>( *d_img_x, *d_img_x2, H, L, (uint64*)*d_stats_snake); - cudaThreadSynchronize() ; - toc(chrono, "\tTemps GPU"); - if(DEBUG_IMG_CUMUL) + cudaThreadSynchronize() ; + toc(chrono, "\tTemps GPU"); + if(DEBUG_IMG_CUMUL) { //allocation memoire CPU @@ -231,29 +231,52 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes, * generation snake en mem GPU */ int dist = 140 ; + + /* Test de determination du snake rectangle initial optimal*/ + int div = 100;//nb de divisions de l'image : cela définit le pas. La valeur max découle du nb max de threads possible ds une grille + int Nperm = div*div*bs;//nb total de rectangles a tester. La distribution est ainsi irrégulière, mais plus simple. + double best_crit ; + int ind_best_crit ; + + t_rectangle_snake * d_all_crit, d_best_crit;//tableaux pour les résultats des différents rectangles / le meilleur + t_rectangle_snake * h_all_crit = new t_rectangle_snake[Nperm];//correspondant CPU + + //allocations + cudaMalloc((void**) &d_all_crit, Nperm*sizeof(t_rectangle_snake)); + cudaMalloc((void**) &d_best_crit, sizeof(t_rectangle_snake)); tic(&chrono, NULL); - int MAX_DIAGOS = 1024*65536 ; - int ret, Q = 100 ; - uint4 * d_diagos_snake ; - uint4 * h_diagos_snake = new uint4[MAX_DIAGOS]; - - - ret = cudaMalloc( (void**) &d_diagos_snake, MAX_DIAGOS*sizeof(uint4)) ; + //execution kernel + dim3 grid = dim3(H/div, L/div, 1); + calcul_contribs_snake4<<>>(*d_snake, *d_img_x, *d_img_x2, H, L, *d_stats_snake, d_all_crit) ; + cudaThreadSynchronize(); + toc(chrono, "\nCALCULS RECTANGLES"); - genere_diagos_rectangle<<<1,1>>>(d_diagos_snake, H,L,Q); - - ret = cudaMemcpy( h_diagos_snake, d_diagos_snake, MAX_DIAGOS*sizeof(uint4), cudaMemcpyDeviceToHost) ; - //cudaSuccess, cudaErrorInvalidValue, cudaErrorInvalidDevicePointer, cudaErrorInvalidMemcpyDirection - - printf("COPY : %d, MAX_DIAGOS = %d\n", ret, MAX_DIAGOS); - for (int d=0; d<200;d++){ - printf("| (%d,%d)-(%d,%d) ", h_diagos_snake[d].x, h_diagos_snake[d].y, h_diagos_snake[d].z, h_diagos_snake[d].w ); + //recup data rectangles + int ret; + ret = cudaMemcpy( h_all_crit, d_all_crit, Nperm*sizeof(t_rectangle_snake), cudaMemcpyDeviceToHost) ; + printf("COPIE DATA = %s\n",(ret==0)?"OK":"ERR"); + + //optimum sur CPU + best_crit = h_all_crit[0].crit ; + ind_best_crit = 0 ; + for (int k=1; k<100; k++){ + if ((h_all_crit[k].crit > 0) && (h_all_crit[k].crit < best_crit)) { + best_crit = h_all_crit[k].crit ; + ind_best_crit = k ; + } + printf("%d -> ( %d, %d )--( %d, %d) CRITERE = %f\n", k, h_all_crit[k].bpi, h_all_crit[k].bpj, + h_all_crit[k].opi, h_all_crit[k].opj, h_all_crit[k].crit ); } + printf("BEST RECTANGLE/%d tests : %d -> ( %d, %d )--( %d, %d) CRITERE = %f\n", Nperm, ind_best_crit, h_all_crit[ind_best_crit].bpi, h_all_crit[ind_best_crit].bpj, + h_all_crit[ind_best_crit].opi, h_all_crit[ind_best_crit].opj, best_crit ); + exit(0); - genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ; + /*fin test snake rectangle initial optimal*/ + + //genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ; int nnodes = nb_nodes ;