From: perrot Date: Tue, 15 Feb 2011 15:19:14 +0000 (+0100) Subject: added rectangle generation for initialization X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/snake_gpu.git/commitdiff_plain/c17ca25473465d5550bc7e3e27b87b3d33d28dc6?hp=935bdd1c8b99ce5d70b6e4a53ca29f06353e8baa added rectangle generation for initialization --- diff --git a/exec/SNAKE2D b/exec/SNAKE2D index 67f3437..b035298 100755 Binary files a/exec/SNAKE2D and b/exec/SNAKE2D differ diff --git a/makefile b/makefile index 70d42b5..6e1ada5 100644 --- a/makefile +++ b/makefile @@ -12,7 +12,7 @@ PATH_INCLUDE = $(PATH_GCC)/src/ # compilateur CC = gcc -NVCC = /usr/local/cuda/bin/nvcc +NVCC = nvcc CXX = g++ # options de compilation @@ -31,7 +31,7 @@ OPTION_CC = $(OPTION_CC2) -I$(PATH_INCLUDE) -I$(PATH_SRC) # librairies pour la compilation LIB_CC = -lm -LIBSNV = -L/usr/local/cuda/lib64 -lcuda -lcudart +LIBSNV = -L/cm/shared/apps/cuda31/toolkit/3.1/lib64 -lcuda -lcudart # sources utiles a la compilation des main SRCS = lib_alloc.c lib_images.c lib_math.c lib_snake_common.c lib_contour.c @@ -55,7 +55,7 @@ clean : # --use_fast_math # --ptxas-options=-v $(PATH_LIB)%_gpu.o : $(PATH_SRC)%_gpu.cu - $(NVCC) -arch=sm_20 --use_fast_math -c $< -o $@ + $(NVCC) -arch=sm_13 --use_fast_math -c $< -o $@ $(PATH_LIB)%.o : $(PATH_SRC)%.c $(DEPS) $(CC) $(OPTION_CC) -c $< -o $@ diff --git a/src/lib_gpu.cu b/src/lib_gpu.cu index d6df5ec..73571f2 100644 --- a/src/lib_gpu.cu +++ b/src/lib_gpu.cu @@ -233,8 +233,28 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes, int dist = 140 ; tic(&chrono, NULL); - if (nb_nodes == 4) genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ; - else if (nb_nodes == 40) genere_snake_rectangle_Nnodes_gpu<<< 1, 1>>>(*d_snake, (H+L)/20, H, L) ; + + int MAX_DIAGOS = 1024*65536 ; + int ret, Q = 100 ; + uint4 * d_diagos_snake ; + uint4 * h_diagos_snake = new uint4[MAX_DIAGOS]; + + + ret = cudaMalloc( (void**) &d_diagos_snake, MAX_DIAGOS*sizeof(uint4)) ; + + genere_diagos_rectangle<<<1,1>>>(d_diagos_snake, H,L,Q); + + ret = cudaMemcpy( h_diagos_snake, d_diagos_snake, MAX_DIAGOS*sizeof(uint4), cudaMemcpyDeviceToHost) ; + //cudaSuccess, cudaErrorInvalidValue, cudaErrorInvalidDevicePointer, cudaErrorInvalidMemcpyDirection + + printf("COPY : %d, MAX_DIAGOS = %d\n", ret, MAX_DIAGOS); + for (int d=0; d<200;d++){ + printf("| (%d,%d)-(%d,%d) ", h_diagos_snake[d].x, h_diagos_snake[d].y, h_diagos_snake[d].z, h_diagos_snake[d].w ); + } + + exit(0); + genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ; + int nnodes = nb_nodes ; snake_node_gpu * h_snake = new snake_node_gpu[nnodes]; @@ -261,15 +281,19 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes, cudaMalloc((void**) &d_stats_ref, 3*nnodes*sizeof(int64)); //DEBUG : pour forcer la mise à zero du tableau intermediaire d_stats_ref + /* int64 h_stats_ref[3*nnodes] ; for (int a=0; a<3*nnodes ; a++) h_stats_ref[a] = 0 ; cudaMemcpy( h_stats_ref, d_stats_ref, sizeof(int64), cudaMemcpyHostToDevice) ; + */ //fin forçage a 0 //DEBUG : pour forcer la mise à zero du tableau intermediaire d_sompart + /* t_sum_x2 h_sompart[ 3*nnodes*bps ] ; for (int a=0; a<3*nnodes*bps ; a++) h_sompart[a] = 0 ; cudaMemcpy( h_sompart, d_sompart, sizeof(t_sum_x2), cudaMemcpyHostToDevice) ; + */ //fin forçage a 0 calcul_contribs_segments_snake<<< nnodes*bps, tpb, (CFI(tpb))*(3*sizeof(t_sum_x2))>>> diff --git a/src/lib_kernel_snake_2_gpu.cu b/src/lib_kernel_snake_2_gpu.cu index 8d819a8..b807cb7 100644 --- a/src/lib_kernel_snake_2_gpu.cu +++ b/src/lib_kernel_snake_2_gpu.cu @@ -33,6 +33,27 @@ __global__ void genere_snake_rectangle_4nodes_gpu(snake_node_gpu * d_snake, int } } +__global__ void genere_diagos_rectangle(uint4 * d_diagos, int h, int l, int q){ + int inci = h/q; + int incj = l/q; + int iM,jM, iN, jN ; + int idxDiago = 0 ; + // boucle double pour les positions du point NO de la diagonale + for ( iM = 0; iM < q-1; iM++){ + for (jM = 0 ; jM < q-1 ; jM++){ + //boucle double pour les positions du point SE de la diagonale + for (iN = iM+1; iN < q; iN++){ + for (jN = jM+1; jN < q; jN++){ + d_diagos[idxDiago].x = iM*inci; + d_diagos[idxDiago].y = jM*incj; + d_diagos[idxDiago].z = iN*inci; + d_diagos[idxDiago].w = jN*incj; + idxDiago++; + } + } + } + } +} __global__ void genere_snake_rectangle_Nnodes_gpu(snake_node_gpu * d_snake, int dist_bords, int i_dim, int j_dim){ int nb_node_seg = 9 ;