added rectangle generation for initialization

author perrot <perrot@debzulu.iut-bm.univ-fcomte.fr>

Tue, 15 Feb 2011 15:19:14 +0000 (16:19 +0100)

committer perrot <perrot@debzulu.iut-bm.univ-fcomte.fr>

Tue, 15 Feb 2011 15:19:14 +0000 (16:19 +0100)
author perrot <perrot@debzulu.iut-bm.univ-fcomte.fr>
Tue, 15 Feb 2011 15:19:14 +0000 (16:19 +0100)
committer perrot <perrot@debzulu.iut-bm.univ-fcomte.fr>
Tue, 15 Feb 2011 15:19:14 +0000 (16:19 +0100)
diff --git a/exec/SNAKE2D b/exec/SNAKE2D

index 67f3437fa7b489a3d60f6384c850865128d0b87e..b0352988462706c34c0e34413075c30054e00b60 100755 (executable)

Binary files a/exec/SNAKE2D and b/exec/SNAKE2D differ
diff --git a/makefile b/makefile

index 70d42b529b7738ea8dfce4bf0d95cec404048c64..6e1ada53fdb4b6b5e8c86dadfc585beaddc16578 100644 (file)
--- a/makefile
+++ b/makefile
@@ -12,7 +12,7 @@ PATH_INCLUDE = $(PATH_GCC)/src/
  
  # compilateur
  CC = gcc
  
  # compilateur
  CC = gcc
-NVCC = /usr/local/cuda/bin/nvcc
+NVCC = nvcc
  CXX = g++
  
  # options de compilation
  CXX = g++
  
  # options de compilation
@@ -31,7 +31,7 @@ OPTION_CC = $(OPTION_CC2) -I$(PATH_INCLUDE) -I$(PATH_SRC)
  
  # librairies pour la compilation
  LIB_CC = -lm 
  
  # librairies pour la compilation
  LIB_CC = -lm 
-LIBSNV = -L/usr/local/cuda/lib64 -lcuda -lcudart
+LIBSNV = -L/cm/shared/apps/cuda31/toolkit/3.1/lib64 -lcuda -lcudart
  
  # sources utiles a la compilation des main
  SRCS =         lib_alloc.c lib_images.c lib_math.c lib_snake_common.c lib_contour.c
  
  # sources utiles a la compilation des main
  SRCS =         lib_alloc.c lib_images.c lib_math.c lib_snake_common.c lib_contour.c
@@ -55,7 +55,7 @@ clean :
  # --use_fast_math
  # --ptxas-options=-v
  $(PATH_LIB)%_gpu.o :   $(PATH_SRC)%_gpu.cu
  # --use_fast_math
  # --ptxas-options=-v
  $(PATH_LIB)%_gpu.o :   $(PATH_SRC)%_gpu.cu
-                       $(NVCC) -arch=sm_20 --use_fast_math -c $< -o $@ 
+                       $(NVCC) -arch=sm_13 --use_fast_math -c $< -o $@ 
  
  $(PATH_LIB)%.o :       $(PATH_SRC)%.c $(DEPS)
                         $(CC) $(OPTION_CC) -c $< -o $@
  
  $(PATH_LIB)%.o :       $(PATH_SRC)%.c $(DEPS)
                         $(CC) $(OPTION_CC) -c $< -o $@
diff --git a/src/lib_gpu.cu b/src/lib_gpu.cu

index d6df5ec11acac0a9142c5057996e1bf170bf4b27..73571f2cadfa8d663581093747d0edbc75c96fff 100644 (file)
--- a/src/lib_gpu.cu
+++ b/src/lib_gpu.cu
@@ -233,8 +233,28 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes,
    int dist = 140 ;
    
    tic(&chrono, NULL);
    int dist = 140 ;
    
    tic(&chrono, NULL);
-  if (nb_nodes == 4)  genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ;
-  else if (nb_nodes == 40) genere_snake_rectangle_Nnodes_gpu<<< 1, 1>>>(*d_snake, (H+L)/20, H, L) ;
+
+  int MAX_DIAGOS = 1024*65536 ;
+  int ret, Q = 100 ;
+  uint4 * d_diagos_snake ;
+  uint4 * h_diagos_snake = new uint4[MAX_DIAGOS];
+ 
+  
+  ret = cudaMalloc( (void**) &d_diagos_snake, MAX_DIAGOS*sizeof(uint4)) ;
+
+  genere_diagos_rectangle<<<1,1>>>(d_diagos_snake, H,L,Q);
+  
+  ret = cudaMemcpy( h_diagos_snake, d_diagos_snake, MAX_DIAGOS*sizeof(uint4), cudaMemcpyDeviceToHost) ;
+  //cudaSuccess, cudaErrorInvalidValue, cudaErrorInvalidDevicePointer, cudaErrorInvalidMemcpyDirection
+  
+  printf("COPY : %d, MAX_DIAGOS = %d\n", ret, MAX_DIAGOS);
+  for (int d=0; d<200;d++){
+       printf("| (%d,%d)-(%d,%d) ", h_diagos_snake[d].x, h_diagos_snake[d].y, h_diagos_snake[d].z, h_diagos_snake[d].w );
+  }
+
+  exit(0);
+  genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ;
+ 
  
    int nnodes = nb_nodes ;
    snake_node_gpu * h_snake = new snake_node_gpu[nnodes];
  
    int nnodes = nb_nodes ;
    snake_node_gpu * h_snake = new snake_node_gpu[nnodes];
@@ -261,15 +281,19 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes,
    cudaMalloc((void**) &d_stats_ref, 3*nnodes*sizeof(int64));
  
    //DEBUG : pour forcer la mise à zero du tableau intermediaire d_stats_ref
    cudaMalloc((void**) &d_stats_ref, 3*nnodes*sizeof(int64));
  
    //DEBUG : pour forcer la mise à zero du tableau intermediaire d_stats_ref
+  /*
    int64 h_stats_ref[3*nnodes] ;
    for (int a=0; a<3*nnodes ; a++) h_stats_ref[a] = 0 ;
    cudaMemcpy( h_stats_ref, d_stats_ref, sizeof(int64), cudaMemcpyHostToDevice) ;
    int64 h_stats_ref[3*nnodes] ;
    for (int a=0; a<3*nnodes ; a++) h_stats_ref[a] = 0 ;
    cudaMemcpy( h_stats_ref, d_stats_ref, sizeof(int64), cudaMemcpyHostToDevice) ;
+  */
    //fin forçage a 0
    
    //DEBUG : pour forcer la mise à zero du tableau intermediaire d_sompart
    //fin forçage a 0
    
    //DEBUG : pour forcer la mise à zero du tableau intermediaire d_sompart
+  /*
       t_sum_x2 h_sompart[ 3*nnodes*bps ] ;
       for (int a=0; a<3*nnodes*bps ; a++) h_sompart[a] = 0 ;
       cudaMemcpy( h_sompart, d_sompart, sizeof(t_sum_x2), cudaMemcpyHostToDevice) ;
       t_sum_x2 h_sompart[ 3*nnodes*bps ] ;
       for (int a=0; a<3*nnodes*bps ; a++) h_sompart[a] = 0 ;
       cudaMemcpy( h_sompart, d_sompart, sizeof(t_sum_x2), cudaMemcpyHostToDevice) ;
+  */
    //fin forçage a 0
    
    calcul_contribs_segments_snake<<< nnodes*bps, tpb, (CFI(tpb))*(3*sizeof(t_sum_x2))>>>
    //fin forçage a 0
    
    calcul_contribs_segments_snake<<< nnodes*bps, tpb, (CFI(tpb))*(3*sizeof(t_sum_x2))>>>
diff --git a/src/lib_kernel_snake_2_gpu.cu b/src/lib_kernel_snake_2_gpu.cu

index 8d819a8997914305ecb0d53b2154e8b400e09f80..b807cb77e519efa15cfbdaf9a6c80a2367efb455 100644 (file)
--- a/src/lib_kernel_snake_2_gpu.cu
+++ b/src/lib_kernel_snake_2_gpu.cu
@@ -33,6 +33,27 @@ __global__ void genere_snake_rectangle_4nodes_gpu(snake_node_gpu * d_snake, int
    }
  }
  
    }
  }
  
+__global__ void genere_diagos_rectangle(uint4 * d_diagos, int h, int l, int q){
+  int inci = h/q;
+  int incj = l/q;
+  int iM,jM, iN, jN ;
+  int idxDiago = 0 ;
+       // boucle double pour les positions du point NO de la diagonale
+       for ( iM = 0; iM < q-1; iM++){
+         for (jM = 0 ; jM < q-1 ; jM++){
+               //boucle double pour les positions du point SE de la diagonale
+               for (iN = iM+1; iN < q; iN++){
+                 for (jN = jM+1; jN < q; jN++){
+                       d_diagos[idxDiago].x = iM*inci;
+                       d_diagos[idxDiago].y = jM*incj;
+                       d_diagos[idxDiago].z = iN*inci;
+                       d_diagos[idxDiago].w = jN*incj;
+                       idxDiago++;
+                 }
+               }
+         }
+       }
+}
  
  __global__ void genere_snake_rectangle_Nnodes_gpu(snake_node_gpu * d_snake, int dist_bords, int i_dim, int j_dim){
    int nb_node_seg = 9 ;
  
  __global__ void genere_snake_rectangle_Nnodes_gpu(snake_node_gpu * d_snake, int dist_bords, int i_dim, int j_dim){
    int nb_node_seg = 9 ;
author	perrot <perrot@debzulu.iut-bm.univ-fcomte.fr>
	Tue, 15 Feb 2011 15:19:14 +0000 (16:19 +0100)
committer	perrot <perrot@debzulu.iut-bm.univ-fcomte.fr>
	Tue, 15 Feb 2011 15:19:14 +0000 (16:19 +0100)
exec/SNAKE2D		patch \| blob \| history
makefile		patch \| blob \| history
src/lib_gpu.cu		patch \| blob \| history
src/lib_kernel_snake_2_gpu.cu		patch \| blob \| history