__constant__ int pathDi[PSIZE_I][PSIZE_J-1] =
{
// Q1
- { -1, -1, -1}, //
- { -1, -1, -1}, //
- { -1, -1, -1}, //
- { -1, -1, -1}, //
- { -1, 0, 1}, //
- { 0, -1, 0},
+ { -1, -1, -1}, // 90
+ { -1, -1, -1}, // 75
+ { -1, -1, -1}, // 60
+ { -1, -1, -1}, // 45
+ { -1, 0, -1}, // 30
+ { 0, -1, 0}, // 15
// Q4
- { 0, 0, 0}, //
- { 0, 1, 1}, //
- { 1, 0, 1}, //
- { 1, 1, 1}, //
- { 1, 1, 1}, //
- { 1, 1, 1},
+ { 0, 0, 0}, // 0
+ { 0, 1, 0}, // 345
+ { 1, 0, 1}, // 330
+ { 1, 1, 1}, // 315
+ { 1, 1, 1}, // 300
+ { 1, 1, 1}, // 285
// Q3
- { 1, 1, 1}, //
- { 1, 1, 1}, //
- { 1, 1, 1}, //
- { 1, 1, 1}, //
- { 1, 0, -1}, //
- { 0, 1, 0},
+ { 1, 1, 1}, // 270
+ { 1, 1, 1}, // 255
+ { 1, 1, 1}, // 240
+ { 1, 1, 1}, // 225
+ { 1, 0, 1}, // 210
+ { 0, 1, 0}, // 195
// Q2
- { 0, 0, 0}, //
- { 0, -1, 0}, //
- { -1, 0, -1}, //
- { -1, -1, -1}, //
- { -1, -1, -1}, //
- { -1, -1, -1}
+ { 0, 0, 0}, // 180
+ { 0, -1, 0}, // 165
+ { -1, 0, -1}, // 150
+ { -1, -1, -1}, // 135
+ { -1, -1, -1}, // 120
+ { -1, -1, -1} // 105
} ; //
__constant__ int pathDj[PSIZE_I][PSIZE_J-1] =
{
// Q1
- { 0, 0, 0}, //
- { 0, 1, 0},
- { 1, 0, 1},
- { 1, 1, 1},
- { 1, 1, 1},
- { 1, 1, 1},
+ { 0, 0, 0}, // 90
+ { 0, 1, 0}, // 75
+ { 1, 0, 1}, // 60
+ { 1, 1, 1}, // 45
+ { 1, 1, 1}, // 30
+ { 1, 1, 1}, // 15
// Q4
- { 1, 1, 1}, //
- { 1, 1, 1},
- { 1, 1, 1},
- { 1, 1, 1},
- { 1, 0, -1},
- { 0, 1, 0},
- // Q3
- { 0, 0, 0}, //
- { 0, -1, 0},
- { -1, 0, -1},
- { -1, -1, -1},
- { -1, -1, -1},
- { -1, -1, -1},
+ { 1, 1, 1}, // 0
+ { 1, 1, 1}, // 345
+ { 1, 1, 1}, // 330
+ { 1, 1, 1}, // 315
+ { 1, 0, 1}, // 300
+ { 0, 1, 0}, // 285
+ // Q3
+ { 0, 0, 0}, // 270
+ { 0, -1, 0}, // 255
+ { -1, 0, -1}, // 240
+ { -1, -1, -1}, // 225
+ { -1, -1, -1}, // 210
+ { -1, -1, -1}, // 195
// Q2
- { -1, -1, -1}, //
- { -1, -1, -1},
- { -1, -1, -1},
- { -1, -1, -1},
- { -1, 0, 1},
- { 0, -1, 0}
+ { -1, -1, -1}, // 180
+ { -1, -1, -1}, // 165
+ { -1, -1, -1}, // 150
+ { -1, -1, -1}, // 135
+ { -1, 0, -1}, // 120
+ { 0, -1, 0} // 105
} ;
texture<int, 2, cudaReadModeElementType> tex_img_lniv ;
+__constant__ float tangente[] = {0.000, 0.268, 0.577, 1.000} ;
+
+__global__ void kernel_calcul_paths( int2 * d_paths, unsigned int r){
+
+ unsigned int idpath = 0 ;
+ int ic, jc, iprec, jprec ;
+ float offset = 0.5 ;
+ unsigned int basepath = 0 ;
+
+ // Q1 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 0,15,30 et 45
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = r-1 - floor(tangente[a]*p + offset) ;
+ if ( p > 0 ){
+ d_paths[idpath*(r-1)+p-1].x = ic - iprec ;
+ d_paths[idpath*(r-1)+p-1].y = 1 ;
+ }
+ iprec = ic ;
+ }
+ idpath++ ;
+ }
+ // Q1 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = floor(tangente[a]*p + offset) ;
+ if ( p > 0 ){
+ d_paths[idpath*(r-1)+p-1].x = -1 ;
+ d_paths[idpath*(r-1)+p-1].y = jc - jprec ;
+ }
+ jprec = jc ;
+ }
+ idpath++ ;
+ }
+
+ // Q2
+ basepath += 6 ;
+ for (int a=0 ; a< 6 ; a++){ // les 6 angles 90,105,120,135,150,165
+ for (int p=0 ; p<r-1 ; p++){ // les r points
+ d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].y ;
+ d_paths[idpath*(r-1)+p].y = d_paths[(idpath - basepath)*(r-1)+p].x ;
+ }
+ idpath++ ;
+ }
+
+ // Q3
+ basepath += 6 ;
+ for (int a=0 ; a< 6 ; a++){ // les 6 angles 180,195,210,225,240,255
+ for (int p=0 ; p<r-1 ; p++){ // les r points
+ d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].x ;
+ d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].y ;
+ }
+ idpath++ ;
+ }
+
+ // Q4
+ basepath += 6 ;
+ for (int a=0 ; a< 6 ; a++){ // les 6 angles 270,285,300,315,330,345
+ for (int p=0 ; p<r-1 ; p++){ // les r points
+ d_paths[idpath*(r-1)+p].x = d_paths[(idpath - basepath)*(r-1)+p].y ;
+ d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].x ;
+ }
+ idpath++ ;
+ }
+}
+
+
__global__ void kernel_init_estim_from_img_in(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int r){
// coordonnes du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
*/
}
+__global__ void kernel_neutre_estim_from_img_in( unsigned int * d_estim, unsigned int L, unsigned int H ){
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ unsigned int pos = i*L +j ;
+
+ d_estim[ pos ] = tex2D(tex_img_in, j, i) ;
+
+
+}
+
__global__ void kernel_init_estim_from_img_in_global_mem(unsigned int * d_data, unsigned int * d_estim,
unsigned int L, unsigned int H, unsigned int r){
// coordonnes du point dans l'image
uint2 mse ;
- if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+ if( (i >=lpath )&&( i<= (H-lpath) )&&( j>=lpath )&&( j<=(L-lpath) ) ){
for( idpath=0; idpath < PSIZE_I ; idpath++) {
ic = i ;
jc = j ;
}
// critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
// a ameliorer pour vitesse
- mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
+ mse_cur = ( mse.y - ( mse.x*mse.x / lpath ) ) ;
if (idpath == 0) {
mse_min = mse_cur ;
val = mse.x ;
}
img_out[ i*L + j ] = val / lpath ;
}
+ else img_out[ i*L + j ] = 0 ;
}
-
-
-__global__ void kernel_levelines_only_texture(unsigned int * img_out, unsigned int L, unsigned int H)
+__global__ void kernel_dir_levelines_texture(unsigned int * dir, unsigned int L, unsigned int H)
{
// coordonnes du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
- unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
//unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
// nb de points par chemin
int lpath = PSIZE_J ;
- unsigned int ic, jc ;
+ unsigned int ic, jc, zc ;
int idpath, idpix ;
- unsigned int mse_min, mse_cur ;
-
- //extern __shared__ uint2 mse[] ;
+ unsigned int mse_min, mse_cur, val ;
uint2 mse ;
- if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+
+ if( (i >=lpath )&&( i<= (H-lpath) )&&( j>=lpath )&&( j<=(L-lpath) ) ){
for( idpath=0; idpath < PSIZE_I ; idpath++) {
ic = i ;
jc = j ;
- mse.x = tex2D(tex_img_in, i, j) ;
- mse.y = tex2D(tex_img_in, i, j)*tex2D(tex_img_in, i, j) ;
+ zc = tex2D(tex_img_estim, j, i) ;
+ mse.x = zc ;
+ mse.y = zc*zc ;
for( idpix=0; idpix < lpath-1 ; idpix++ ) {
ic += pathDi[idpath][idpix] ;
jc += pathDj[idpath][idpix] ;
- mse.x += tex2D( tex_img_in, ic, jc ) ;
- mse.y += tex2D( tex_img_in, ic, jc ) * tex2D( tex_img_in, ic, jc ) ;
+ zc = tex2D(tex_img_estim, jc, ic) ;
+ mse.x += zc ;
+ mse.y += zc*zc ;
}
// critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
// a ameliorer pour vitesse
- mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
- if (idpath > 0) {
+ mse_cur = ( mse.y - ( mse.x*mse.x / lpath ) ) ;
+ if (idpath == 0) {
+ mse_min = mse_cur ;
+ val = idpath ;
+ } else {
if ( mse_cur < mse_min ) {
mse_min = mse_cur ;
+ val = idpath ;
}
- } else {
- mse_min = mse_cur ;
- }
+ }
}
- img_out[ i*L + j ] = mse_min / lpath ;
+ dir[ i*L + j ] = val ;
}
+
}
-__global__ void kernel_trace_levelines(unsigned int * img_in, unsigned int * dir, unsigned int * img_out,
+
+
+__global__ void kernel_trace_levelines_texture(unsigned int * dir, unsigned int * img_out,
unsigned int L, unsigned int H, unsigned int pas, unsigned int ng){
// coordonnes du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int ic, jc, idpix ;
unsigned int idpath ;
- img_out[ i*L + j ] = img_in[ i*L + j ] ;
+ img_out[ i*L + j ] = tex2D(tex_img_estim, j, i ) ;
- if ( !(i%pas+j%pas)&&(i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath) ){
+ if ( !(i%pas+j%pas)&&(i>=lpath)&&(i<=H-lpath)&&(j>=lpath)&&(j<=L-lpath) ){
ic = i ;
jc = j ;
idpath = dir[ic*L+jc] ;
#include "levelines_kernels.cu"
+const float tang[] = {0, 0.268, 0.577, 1} ;
-__global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
-
- unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
- unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
- unsigned int pos = i*L +j ;
- ptr2[pos] = val - ptr1[pos] ;
+void genPaths(unsigned int *h_paths, int *p_i, int *p_j, unsigned int r ){
+ unsigned int idpath = 0 ;
+ int ic, jc ;
+ float offset = 0.5 ;
+
+ // Q1 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 0,15,30 et 45
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = p ;
+ ic = r-1 - floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+ // Q1 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = r-1 - p ;
+ jc = floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+
+ // Q2 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 90,105,130 et 145
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = r-1 - p ;
+ jc = r-1 - floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+ // Q2 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = r-1 - p ;
+ ic = r-1 - floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+
+
+ // Q3 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 90,105,130 et 145
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = r-1 - p ;
+ ic = floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+ // Q3 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = p ;
+ jc = r-1 - floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+
+
+ // Q4 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 90,105,130 et 145
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = p ;
+ jc = floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
+ // Q4 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = p ;
+ ic = floor(tang[a]*p + offset) ;
+ h_paths[ idpath*r*r + ic*r + jc ] = 255 ;
+ if ( p > 0 ){
+ p_i[idpath*(r-1)+p-1] = ic ;
+ p_j[idpath*(r-1)+p-1] = jc ;
+ }
+ }
+ idpath++ ;
+ }
}
-int main(int argc, char **argv){
+int main(int argc, char **argv){
- //float coef_regul = atof( argv[1] ) ;
-
+ // use device with highest Gflops/s
+ cudaSetDevice( cutGetMaxGflopsDeviceId() );
unsigned int timer ;
cutilCheckError( cutCreateTimer(&timer) );
cutilCheckError( cutResetTimer(timer) );
- /*****************************
- * CHARGEMENT IMAGE
- *****************************/
- char* image_path = argv[argc-1];
- char* image_out = "./image_out.pgm" ;
- unsigned int * h_data = NULL ;
- unsigned int * h_data_out = NULL ;
- unsigned int H, L, size;
-
cutilCheckError( cutStartTimer(timer) );
- cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
- cutilCheckError( cutStopTimer(timer) );
+ // une alloc debile pour initialiser la carte GPU
+ int * d_void ;
+ cutilSafeCall( cudaMalloc( (void**) &d_void, 4)) ;
- size = H * L * sizeof( unsigned int );
- printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer));
-
+
+
+ /*********************************
+ * DEFINITION PARAMS CHEMINS
+ *********************************/
+ char* image_out = "./image_out.pgm" ;
+ int *p_i, *p_j ;
+ int2 * d_paths ;
+ unsigned int * h_paths ;
+ unsigned int R = atoi(argv[1]);
+
+
+ //unsigned int size = R * R * sizeof( unsigned int );
- //essai alloc mapped
- /*
- cutilCheckError( cutResetTimer(timer) );
- cutilCheckError( cutStartTimer(timer) );
- unsigned int * h_ptr1, * d_ptr1 ;
- unsigned int * h_ptr2, * d_ptr2 ;
- int h = ;
- int l = h ;
- int mem = h*l*sizeof(unsigned int) ;
- cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
- cutilCheckError( cutStartTimer(timer) );
- cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
- cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+ // allocation mem
+ int memsize = 24*(R-1)*sizeof(int2) ;
+ cutilSafeCall( cudaMalloc( (void**) &d_paths, memsize ) );
- for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
-
- cutilCheckError( cutStartTimer(timer) );
- cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
- cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps cumul get pointer Mapped : %f ms\n", cutGetTimerValue(timer)) ;
- cutilCheckError( cutStartTimer(timer) );
- dim3 blocks(16,16,1) ;
- dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
+ h_paths = new unsigned int [24*R*R] ;
+ p_i = new int [24*(R-1)] ;
+ p_j = new int [24*(R-1)] ;
- kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
+ for (int j=0; j<24*R*R ; j++) h_paths[j] = 0 ;
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
+ genPaths(h_paths, p_i, p_j, R) ;
+
+ printf("Rayon : %d pixels \n", R) ;
+
+ //matrice p_i
+ printf("P_I\n");
+ for (int idpath=0; idpath < 24; idpath++){
+ printf("\n");
+ for (int idpix=0; idpix < R-1; idpix++){
+ printf(" %d ", p_i[idpath*(R-1)+idpix]);
+ }
+ }
+ //matrice p_j
+ printf("\nP_J\n");
+ for (int idpath=0; idpath < 24; idpath++){
+ printf("\n");
+ for (int idpix=0; idpix < R-1; idpix++){
+ printf(" %d ", p_j[idpath*(R-1)+idpix]);
+ }
+ }
- char * image_1 = "./image_1.pgm" ;
- char * image_2 = "./image_2.pgm" ;
- cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
- cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
- */
/*****************************
- * FIN CHARGEMENT IMAGE
+ * APPELS KERNELS et chronos
*****************************/
+ dim3 dimBlock( 1, 1, 1 ) ;
+ dim3 dimGrid( 1, 1, 1 ) ;
+
+ kernel_calcul_paths<<< dimGrid, dimBlock, 0 >>>(d_paths, R) ;
-
- // use device with highest Gflops/s
- cudaSetDevice( cutGetMaxGflopsDeviceId() );
-
+ printf("\nGrille : %d x %d de Blocs : %d x %d \n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y) ;
- /*
- cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
- cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
- cutilSafeCall( cudaThreadSynchronize() );
- cutilCheckError( cutResetTimer(hTimer) );
- cutilCheckError( cutStartTimer(hTimer) );
-
- cutilSafeCall( cudaThreadSynchronize() );
- cutilCheckError( cutStopTimer(hTimer) );
- gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
- */
-
- cutilCheckError( cutResetTimer(timer) );
- cutilCheckError( cutStartTimer(timer) );
- // allocation mem GPU
- unsigned int * d_directions =NULL ;
- unsigned int * d_lniv, * d_estim = NULL ;
-
- cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
- cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
- cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+
+ /**************************
+ * VERIFS
+ **************************/
+ int2 * paths = new int2[24*(R-1)] ;
+ cutilSafeCall( cudaMemcpy(paths , d_paths, memsize, cudaMemcpyDeviceToHost) );
- // allocate array and copy image data
- cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
- cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
- cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
- cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
- cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
- cutilCheckError( cutStopTimer(timer) );
-
- cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H ));
- cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
-
- cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H ));
- cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
+ //matrice p_i
+ printf("P_I\n");
+ for (int idpath=0; idpath < 24; idpath++){
+ printf("\n");
+ for (int idpix=0; idpix < R-1; idpix++){
+ printf(" %d ", paths[idpath*(R-1)+idpix].x);
+ }
+ printf("\t // %d", 15*idpath);
+ }
+ //matrice p_j
+ printf("\nP_J\n");
+ for (int idpath=0; idpath < 24; idpath++){
+ printf("\n");
+ for (int idpix=0; idpix < R-1; idpix++){
+ printf(" %d ", paths[idpath*(R-1)+idpix].y);
+ }
+ printf("\t // %d", 15*idpath);
+ }
- printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
- /*****************************
- * APPELS KERNELS et chronos
- *****************************/
- cutilCheckError( cutResetTimer(timer) );
- cutilCheckError( cutStartTimer(timer) );
+ printf("\n");
- unsigned int iter , nb_iter = 15 ;
- unsigned int poids = 15 ;
- dim3 dimBlock(8,8,1) ;
- dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
- unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
- // init image estimee avec image_in
- kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
-
- printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
-
- for ( iter =0 ; iter < nb_iter ; iter++ )
- {
- cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
- kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
- cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
- kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
- }
-
- cudaThreadSynchronize();
-
- cutilCheckError( cutStopTimer(timer) );
- printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
- printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
-
- /**************************
- * VERIFS
- **************************/
- //trace des lniv sur grille de 'pas x pas'
- //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
- //cudaThreadSynchronize();
-
- // enregistrement image lniv GPU
- h_data_out = new unsigned int[H*L] ;
- if ( h_data_out != NULL)
- cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
- else
- printf("Echec allocation mem CPU\n");
-
- cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
-
- // calcul lniv CPU
-
-
+ // enregistrement image des PATHS
+ //cutilSafeCall( cudaMemcpy(h_paths , d_paths, size, cudaMemcpyDeviceToHost) );
+
+ cutilCheckError( cutSavePGMi(image_out, h_paths, R, 24*R) ) ;
+
// TODO verifier pourquoi les deux lignes suivantes produisent une erreur
//cutilExit(argc, argv);
//cudaThreadExit();