From: Gilles Perrot Date: Fri, 18 Feb 2011 12:35:53 +0000 (+0100) Subject: Test diagonales naif X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/snake_gpu.git/commitdiff_plain/af4b787ce73a80f23e9e2b1ef9ac52660e8ab754?hp=c17ca25473465d5550bc7e3e27b87b3d33d28dc6 Test diagonales naif --- diff --git a/exec/SNAKE2D b/exec/SNAKE2D index b035298..1d1de07 100755 Binary files a/exec/SNAKE2D and b/exec/SNAKE2D differ diff --git a/lib/lib_gpu.o b/lib/lib_gpu.o new file mode 100644 index 0000000..1d978d7 Binary files /dev/null and b/lib/lib_gpu.o differ diff --git a/lib/lib_snake_2_gpu.o b/lib/lib_snake_2_gpu.o new file mode 100644 index 0000000..4a3a81f Binary files /dev/null and b/lib/lib_snake_2_gpu.o differ diff --git a/lib/lib_test_gpu.o b/lib/lib_test_gpu.o new file mode 100644 index 0000000..5a6e207 Binary files /dev/null and b/lib/lib_test_gpu.o differ diff --git a/lib/snake2D_gpu.o b/lib/snake2D_gpu.o new file mode 100644 index 0000000..7be4b88 Binary files /dev/null and b/lib/snake2D_gpu.o differ diff --git a/src/lib_gpu.cu b/src/lib_gpu.cu index 73571f2..1bbd61d 100644 --- a/src/lib_gpu.cu +++ b/src/lib_gpu.cu @@ -235,19 +235,23 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes, tic(&chrono, NULL); int MAX_DIAGOS = 1024*65536 ; - int ret, Q = 100 ; + int ret, Q = 120 ; + int * d_n_diagos, h_n_diagos; uint4 * d_diagos_snake ; uint4 * h_diagos_snake = new uint4[MAX_DIAGOS]; - - ret = cudaMalloc( (void**) &d_diagos_snake, MAX_DIAGOS*sizeof(uint4)) ; - genere_diagos_rectangle<<<1,1>>>(d_diagos_snake, H,L,Q); - + cudaMalloc( (void**) &d_n_diagos, sizeof(int)) ; + cudaMalloc( (void**) &d_diagos_snake, MAX_DIAGOS*sizeof(uint4)) ; + + genere_diagos_rectangle<<<1,1>>>(d_diagos_snake, H,L,Q, d_n_diagos); + + cudaMemcpy( &h_n_diagos, d_n_diagos, sizeof(int), cudaMemcpyDeviceToHost) ; ret = cudaMemcpy( h_diagos_snake, d_diagos_snake, MAX_DIAGOS*sizeof(uint4), cudaMemcpyDeviceToHost) ; - //cudaSuccess, cudaErrorInvalidValue, cudaErrorInvalidDevicePointer, cudaErrorInvalidMemcpyDirection - - printf("COPY : %d, MAX_DIAGOS = %d\n", ret, MAX_DIAGOS); + + toc(chrono, "\tCalcul diagos"); + + printf("COPY : %s, DIAGOS = %d / %d\n", (ret==0)?"OK":"ERREUR", h_n_diagos,MAX_DIAGOS); for (int d=0; d<200;d++){ printf("| (%d,%d)-(%d,%d) ", h_diagos_snake[d].x, h_diagos_snake[d].y, h_diagos_snake[d].z, h_diagos_snake[d].w ); } diff --git a/src/lib_kernel_snake_2_gpu.cu b/src/lib_kernel_snake_2_gpu.cu index b807cb7..5293308 100644 --- a/src/lib_kernel_snake_2_gpu.cu +++ b/src/lib_kernel_snake_2_gpu.cu @@ -33,7 +33,7 @@ __global__ void genere_snake_rectangle_4nodes_gpu(snake_node_gpu * d_snake, int } } -__global__ void genere_diagos_rectangle(uint4 * d_diagos, int h, int l, int q){ +__global__ void genere_diagos_rectangle(uint4 * d_diagos, int h, int l, int q, int * n_diagos){ int inci = h/q; int incj = l/q; int iM,jM, iN, jN ; @@ -53,6 +53,7 @@ __global__ void genere_diagos_rectangle(uint4 * d_diagos, int h, int l, int q){ } } } + *n_diagos = --idxDiago ; } __global__ void genere_snake_rectangle_Nnodes_gpu(snake_node_gpu * d_snake, int dist_bords, int i_dim, int j_dim){ @@ -466,3 +467,63 @@ __global__ void calcul_stats_snake(snake_node_gpu * d_snake, int nnodes, int64 *vrais_min = codage_gl_gauss(s_stats_snake[0], s_stats_snake[1], s_stats_snake[2], d_stats_snake[3], d_stats_snake[4], d_stats_snake[5]); } + + +__global__ void calcul_contribs_snake4(t_cumul_x * cumul_x, t_cumul_x2 * cumul_x2, int h, int l, tcontribs * gcontribs, + uint64 SUM_1, uint64 SUM_X, uint64 SUM_X2) +{ + // nb de diagonales testees par bloc (ie. par point de base NO) + int blockSize = blockDim.x ; + // indice du second point de chaque diagonale (=Opposite Point, = point SE) + int OPib = threadIdx.x ; + // coordonnees de chaque point de base (NO) + int BPi = blockIdx.x ; + int BPj = blockIdx.y ; + //coordonnees de chaque Opposite Point (SE) + int OPi = OPib / (l - BPj) ; + int OPj = OPib - (l - BPj)*OPi ; + OPi += BPi ; + OPj += BPj ; + //indices des pixels dans les images cumulees + int posG, posD; + //contrib 1 du snake + int C1 = (OPi - BPi)*(OPj - BPj) ; + + + //pour stocker contribs de chaque snake d'un block + //TODO on peut utiliser une structure restreinte (sans le c1) = gain d'espace + extern __shared__ tcontribs scumuls[]; + + //calcul contribs du snake + for (int k=BPi ; k < OPi ; k++) + { + posG = (BPi+k)*l + BPj ; + posD = posG - BPj + OPj ; + scumuls[CFI(OPib)].cx += cumul_x[ posD ] - cumul_x[ posG ] ; + scumuls[CFI(OPib)].cx2 += cumul_x2[ posD ] - cumul_x2[ posG ]; + } + + //calcul de critère pour chaque snake + uint64 stat_sum_xe ; /* somme des xn region exterieure */ + uint32 ne ; /* nombre de pixel region exterieure */ + double sigi2, sige2; /* variance region interieure et exterieure */ + double criterion; + + /* variance des valeurs des niveaux de gris a l'interieur du snake */ + sigi2 = + ((double)scumuls[CFI(OPib)].cx2/(double)C1) - + ((double)scumuls[CFI(OPib)].cx/(uint64)C1)*((double)scumuls[CFI(OPib)].cx/(uint64)C1) ; + + /* variance des valeurs des niveaux de gris a l'exterieur du snake */ + ne = SUM_1 - C1 ; + stat_sum_xe = SUM_X - scumuls[CFI(OPib)].cx ; + sige2 = + ((double)SUM_X2-scumuls[CFI(OPib)].cx2)/(double)ne - + ((double)stat_sum_xe/(uint64)ne)*((double)stat_sum_xe/(uint64)ne) ; + + if ((sigi2 > 0)|(sige2 > 0)) + criterion = 0.5*((double)C1*log(sigi2) + (double)ne*log(sige2)) ; + + //tri meilleur snake du bloc ( necessite de passer SUM_1, SUM_X et SUM_X2 ) + +}