test tex

[snake_gpu.git] / src / lib_gpu.cu
diff --git a/src/lib_gpu.cu b/src/lib_gpu.cu

index d6df5ec11acac0a9142c5057996e1bf170bf4b27..d54bade2c0fa5ce7064e32c8f030b34e704a0b57 100644 (file)
--- a/src/lib_gpu.cu
+++ b/src/lib_gpu.cu
@@ -39,9 +39,16 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes,
  {
    unsigned int taille = H*L;
    timeval chrono;
-
+ 
    
    //allocation cumuls en memoire GPU
+
+  // allocate array and copy image data
+  cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
+  cudaArray * array_img_in ;
+  cudaMallocArray( &array_img_in, &channelDesc, L, H ); 
+  cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc);
+  
    tic(&chrono, NULL);
    /*
         MAX_PIX 20000
@@ -106,6 +113,7 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes,
    /*transfert image en global mem GPU*/
    tic(&chrono, NULL);
    cudaMemcpy( *d_img, img_in[0], taille*sizeof(unsigned short), cudaMemcpyHostToDevice);
+  cudaMemcpyToArray( array_img_in, 0, 0, (unsigned int*)*d_img, taille*sizeof(unsigned int) , cudaMemcpyDeviceToDevice) ;
    toc(chrono, "transfert image vers GPU");
  
    //calculs images cumulees sur GPU
@@ -226,7 +234,8 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes,
                 }
           printf("STATS IMAGE  N = %d - sigX = %lu - sigX2 = %lu\n",  H*L, sigX, sigX2 );
         }
-  
+
+        exit(0) ;
    /*
     * generation snake en mem GPU
     */
@@ -234,7 +243,7 @@ void cuda_init_img_cumul(unsigned short ** img_in, int H, int L, int nb_nodes,
    
    tic(&chrono, NULL);
    if (nb_nodes == 4)  genere_snake_rectangle_4nodes_gpu<<< 1, 1>>>(*d_snake, 140, H, L) ;
-  else if (nb_nodes == 40) genere_snake_rectangle_Nnodes_gpu<<< 1, 1>>>(*d_snake, (H+L)/20, H, L) ;
+
  
    int nnodes = nb_nodes ;
    snake_node_gpu * h_snake = new snake_node_gpu[nnodes];