printf("Longueur des chemins = %d pixels\n", r);
printf("Init GPU + Image %s %d x %d = %d pixels en %f ms,\n", image_path, L, H, size, cutGetTimerValue(timer));
printf("Longueur des chemins = %d pixels\n", r);
printf("Init GPU + Image %s %d x %d = %d pixels en %f ms,\n", image_path, L, H, size, cutGetTimerValue(timer));
// allocation mem GPU
unsigned int * d_directions =NULL ;
unsigned int * d_lniv, * d_estim = NULL ;
// allocation mem GPU
unsigned int * d_directions =NULL ;
unsigned int * d_lniv, * d_estim = NULL ;
cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
// allocate array and copy image data
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
// allocate array and copy image data
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
- cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindSigned);
+ //cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc(8, 8, 0, 0, cudaChannelFormatKindSigned);
+ cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc<ushort>();
cudaArray * array_img_in, *array_img_estim, *array_img_lniv, *array_paths;
cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
cudaArray * array_img_in, *array_img_estim, *array_img_lniv, *array_paths;
cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
/*****************************
* APPELS KERNELS et chronos
*****************************/
/*****************************
* APPELS KERNELS et chronos
*****************************/
dimGrid = dim3( H / dimBlock.x,L / dimBlock.y, 1 ) ;
// pour enregistrement image lniv GPU
dimGrid = dim3( H / dimBlock.x,L / dimBlock.y, 1 ) ;
// pour enregistrement image lniv GPU
for ( iter =0 ; iter < nb_iter ; iter++ )
{
cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
for ( iter =0 ; iter < nb_iter ; iter++ )
{
cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
- kernel_levelines_texture_smem<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H, r );
+ //version avec/sans tentative d'utilisation de smem ( pas probante )
+ kernel_levelines_texture<<< dimGrid, dimBlock, 24*(r-1)*sizeof(short) >>>( d_lniv, L, H, r );
cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
if (seq_out){
sprintf(image_out, "%s%d%s", image_out_base, iter+1, pgm_ext) ;
cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
if (seq_out){
sprintf(image_out, "%s%d%s", image_out_base, iter+1, pgm_ext) ;
/**************************************************
* recuperation matrice des chemins pour affichage
**************************************************/
/**************************************************
* recuperation matrice des chemins pour affichage
**************************************************/
int2 * h_paths = new int2[(r-1)*PSIZE_I] ;
cutilSafeCall( cudaMemcpyFromArray(h_paths , array_paths, 0, 0, psize, cudaMemcpyDeviceToHost) );
int2 * h_paths = new int2[(r-1)*PSIZE_I] ;
cutilSafeCall( cudaMemcpyFromArray(h_paths , array_paths, 0, 0, psize, cudaMemcpyDeviceToHost) );