+# ne pas surveiller les fichiers objet
+*.o
\ No newline at end of file
################################################################################
# Add source files here
-EXECUTABLE := levelines
+EXECUTABLE := lniv
# CUDA source files (compiled with cudacc)
-CUFILES := main_gmem.cu
+CUFILES := main.cu
# CUDA dependency files
CU_DEPS := levelines_common.h
# C/C++ source files (compiled with gcc / c++)
// Reference CPU functions
////////////////////////////////////////////////////////////////////////////////
//extern "C" void fonc(...);
+/**
+ *
+ * \brief determine la valeur de NG de la ligne de niveau demarrant de i,j
+ * \author NB - PhyTI
+ *
+ * \param[in] image image d'entree
+ * \param[in] i coord i du point de depart
+ * \param[in] j coord j du point de depart
+ *
+ * \return la valeur de la ligne
+ *
+ * la ligne de niveau ne va que d'un cote
+ * on cherche le segment de r pixels ou les valeurs
+ * de l'image ont une variance minimale
+ * il y a 24 segments possibles (4 par quadrants)
+ * la direction 0 va vers le haut, les directions tournent
+ * dans le sens horaire, dans l'ordre croissant
+ *
+ * pas de test de bord : pas d'utilisation a moins de r pixels du bords
+ *
+ *
+ * EN TEST
+ */
+int lniv4_value(unsigned int *image, int2 *path, int i, int j, int idim, int jdim, int *dout, unsigned int r )
+{
+ int value_c ;
+ int value2_c ;
+ int d, v, p, d_min, eq_min, eq, sum, sum2 ;
+ int sum_eq_min ;
+ int it, jt ;
+
+
+ /* mem */
+ value_c = image[i*jdim + j] ;
+ value2_c = value_c*value_c ;
+
+ // direction d=0
+ sum = value_c ;
+ sum2 = value2_c ;
+ it = i ;
+ jt = j ;
+ for (p=0; p<r-1; p++)
+ {
+ it += path[p].x ; // Di_Q1[0][p] ;
+ jt += path[p].y ; // Dj_Q1[0][p] ;
+ v = image[it*jdim + jt] ;
+ sum += v ;
+ sum2 += v*v ;
+ }
+ eq_min = sum2 - sum*sum/r ; /* *4 */
+ sum_eq_min = sum ;
+ d_min = 0 ;
+
+ /* direction 1 a 5 */
+ for (d=1; d<6; d++)
+ {
+ sum = value_c ;
+ sum2 = value2_c ;
+ it = i ;
+ jt = j ;
+ for (p=0; p<r-1; p++)
+ {
+ it += path[d*jdim + p].x ; // Di_Q1[d][p] ;
+ jt += path[d*jdim + p].y ; // Dj_Q1[d][p] ;
+ v = image[it*jdim + jt] ;
+ sum += v ;
+ sum2 += v*v ;
+ }
+ eq = sum2 - sum*sum/r ; /* *4 */
+ if (eq < eq_min)
+ {
+ eq_min = eq ;
+ sum_eq_min = sum ;
+ d_min = d ; /* pour info */
+ }
+ }
+
+ /* direction 6 a 11 */
+ for (d=0; d<6; d++)
+ {
+ sum = value_c ;
+ sum2 = value2_c ;
+ it = i ;
+ jt = j ;
+ for (p=0; p<r-1; p++)
+ {
+ it += path[d*jdim + p].y ; // Dj_Q1[d][p] ;
+ jt -= path[d*jdim + p].x ; // ]Di_Q1[d][p] ;
+ v = image[it*jdim + jt] ;
+ sum += v ;
+ sum2 += v*v ;
+ }
+ eq = sum2 - sum*sum/r ; /* *4 */
+ if (eq < eq_min)
+ {
+ eq_min = eq ;
+ sum_eq_min = sum ;
+ d_min = d+6 ; /* pour info */
+ }
+ }
+
+ /* direction 12 a 17 */
+ for (d=0; d<6; d++)
+ {
+ sum = value_c ;
+ sum2 = value2_c ;
+ it = i ;
+ jt = j ;
+ for (p=0; p<r-1; p++)
+ {
+ it -= path[d*jdim + p].x ; // Di_Q1[d][p] ;
+ jt -= path[d*jdim + p].y ; // Dj_Q1[d][p] ;
+ v = image[it*jdim + jt] ;
+ sum += v ;
+ sum2 += v*v ;
+ }
+ eq = sum2 - sum*sum/r ; /* *4 */
+ if (eq < eq_min)
+ {
+ eq_min = eq ;
+ sum_eq_min = sum ;
+ d_min = d+12 ; /* pour info */
+ }
+ }
+
+ /* direction 18 a 23 */
+ for (d=0; d<6; d++)
+ {
+ sum = value_c ;
+ sum2 = value2_c ;
+ it = i ;
+ jt = j ;
+ for (p=0; p<r-1; p++)
+ {
+ it -= path[d*jdim + p].y ; // Dj_Q1[d][p] ;
+ jt += path[d*jdim + p].x ; // Di_Q1[d][p] ;
+ v = image[it*jdim + jt] ;
+ sum += v ;
+ sum2 += v*v ;
+ }
+ eq = sum2 - sum*sum/r ; /* *4 */
+ if (eq < eq_min)
+ {
+ eq_min = eq ;
+ sum_eq_min = sum ;
+ d_min = d+18 ; /* pour info */
+ }
+ }
+
+ *dout = d_min ;
+ return sum_eq_min/r ;
+}
+
////////////////////////////////////////////////////////////////////////////////
// GPU functions (in file.cu)
-
-// chemins des lignes de niveaux
-// longueur = 4 pixels
-// une ligne = un chemin
-
+/************************************************************************
+ * chemins des lignes de niveaux pour la version à chemins constants
+ * Ne sont conservés que pour comparaison GPU/CPU -> à faire disparaître
+ * longueur = 4 pixels
+ * une ligne = un chemin
+ ************************************************************************/
__constant__ int pathDi[PSIZE_I][PSIZE_J-1] =
{
// Q1
- { -1, -1, -1}, //
- { -1, -1, -1}, //
- { -1, -1, -1}, //
- { -1, -1, -1}, //
- { -1, 0, 1}, //
- { 0, -1, 0},
+ { -1, -1, -1}, // 90
+ { -1, -1, -1}, // 75
+ { -1, -1, -1}, // 60
+ { -1, -1, -1}, // 45
+ { -1, 0, -1}, // 30
+ { 0, -1, 0}, // 15
// Q4
- { 0, 0, 0}, //
- { 0, 1, 1}, //
- { 1, 0, 1}, //
- { 1, 1, 1}, //
- { 1, 1, 1}, //
- { 1, 1, 1},
+ { 0, 0, 0}, // 0
+ { 0, 1, 0}, // 345
+ { 1, 0, 1}, // 330
+ { 1, 1, 1}, // 315
+ { 1, 1, 1}, // 300
+ { 1, 1, 1}, // 285
// Q3
- { 1, 1, 1}, //
- { 1, 1, 1}, //
- { 1, 1, 1}, //
- { 1, 1, 1}, //
- { 1, 0, -1}, //
- { 0, 1, 0},
+ { 1, 1, 1}, // 270
+ { 1, 1, 1}, // 255
+ { 1, 1, 1}, // 240
+ { 1, 1, 1}, // 225
+ { 1, 0, 1}, // 210
+ { 0, 1, 0}, // 195
// Q2
- { 0, 0, 0}, //
- { 0, -1, 0}, //
- { -1, 0, -1}, //
- { -1, -1, -1}, //
- { -1, -1, -1}, //
- { -1, -1, -1}
+ { 0, 0, 0}, // 180
+ { 0, -1, 0}, // 165
+ { -1, 0, -1}, // 150
+ { -1, -1, -1}, // 135
+ { -1, -1, -1}, // 120
+ { -1, -1, -1} // 105
} ; //
__constant__ int pathDj[PSIZE_I][PSIZE_J-1] =
{
// Q1
- { 0, 0, 0}, //
- { 0, 1, 0},
- { 1, 0, 1},
- { 1, 1, 1},
- { 1, 1, 1},
- { 1, 1, 1},
+ { 0, 0, 0}, // 90
+ { 0, 1, 0}, // 75
+ { 1, 0, 1}, // 60
+ { 1, 1, 1}, // 45
+ { 1, 1, 1}, // 30
+ { 1, 1, 1}, // 15
// Q4
- { 1, 1, 1}, //
- { 1, 1, 1},
- { 1, 1, 1},
- { 1, 1, 1},
- { 1, 0, -1},
- { 0, 1, 0},
- // Q3
- { 0, 0, 0}, //
- { 0, -1, 0},
- { -1, 0, -1},
- { -1, -1, -1},
- { -1, -1, -1},
- { -1, -1, -1},
+ { 1, 1, 1}, // 0
+ { 1, 1, 1}, // 345
+ { 1, 1, 1}, // 330
+ { 1, 1, 1}, // 315
+ { 1, 0, 1}, // 300
+ { 0, 1, 0}, // 285
+ // Q3
+ { 0, 0, 0}, // 270
+ { 0, -1, 0}, // 255
+ { -1, 0, -1}, // 240
+ { -1, -1, -1}, // 225
+ { -1, -1, -1}, // 210
+ { -1, -1, -1}, // 195
// Q2
- { -1, -1, -1}, //
- { -1, -1, -1},
- { -1, -1, -1},
- { -1, -1, -1},
- { -1, 0, 1},
- { 0, -1, 0}
+ { -1, -1, -1}, // 180
+ { -1, -1, -1}, // 165
+ { -1, -1, -1}, // 150
+ { -1, -1, -1}, // 135
+ { -1, 0, -1}, // 120
+ { 0, -1, 0} // 105
} ;
+
+
+// valeurs des tangentes des angles de base pour la génération initiale des chemins
+// pour la version à chemins de longueur paramétrable
+__constant__ float tangente[] = {0.000, 0.268, 0.577, 1.000} ;
-// declare texture reference for 2D int texture
+// declarations des textures
texture<int, 2, cudaReadModeElementType> tex_img_in ;
texture<int, 2, cudaReadModeElementType> tex_img_estim ;
texture<int, 2, cudaReadModeElementType> tex_img_lniv ;
+texture<int2, 2, cudaReadModeElementType> tex_paths ;
+
+
+
+/**
+ *
+ * \brief calcule les chemins
+ * \author NB - PhyTI, modifié by zulu pour adaptater aux chemins paramétrables
+ *
+ * \param[in] r longueur des chemins
+ *
+ * \param[out] d_paths matrice des déplacements relatifs (chemins)
+ *
+ * Cette fonction utilise le tableau constant des tangentes des angles
+ * considérés pour le calcul de chemins (float tangente[]).
+ *
+ */
+__global__ void kernel_calcul_paths( int2 * d_paths, unsigned int r){
+
+ unsigned int idpath = 0 ;
+ int ic, jc, iprec, jprec ;
+ float offset = 0.5 ;
+ unsigned int basepath = 0 ;
+
+ // Q1 inf
+ for (int a=0 ; a< 4 ; a++){ // les 4 angles 0,15,30 et 45
+ for (int p=0 ; p< r ; p++){ // les r points
+ ic = r-1 - floor(tangente[a]*p + offset) ;
+ if ( p > 0 ){
+ d_paths[idpath*(r-1)+p-1].x = ic - iprec ;
+ d_paths[idpath*(r-1)+p-1].y = 1 ;
+ }
+ iprec = ic ;
+ }
+ idpath++ ;
+ }
+ // Q1 sup
+ for (int a=2 ; a>0 ; a--){ // les 2 angles 60 et 75
+ for (int p=0 ; p< r ; p++){ // les r points
+ jc = floor(tangente[a]*p + offset) ;
+ if ( p > 0 ){
+ d_paths[idpath*(r-1)+p-1].x = -1 ;
+ d_paths[idpath*(r-1)+p-1].y = jc - jprec ;
+ }
+ jprec = jc ;
+ }
+ idpath++ ;
+ }
+
+ // Q2
+ basepath += 6 ;
+ for (int a=0 ; a< 6 ; a++){ // les 6 angles 90,105,120,135,150,165
+ for (int p=0 ; p<r-1 ; p++){ // les r points
+ d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].y ;
+ d_paths[idpath*(r-1)+p].y = d_paths[(idpath - basepath)*(r-1)+p].x ;
+ }
+ idpath++ ;
+ }
+
+ // Q3
+ basepath += 6 ;
+ for (int a=0 ; a< 6 ; a++){ // les 6 angles 180,195,210,225,240,255
+ for (int p=0 ; p<r-1 ; p++){ // les r points
+ d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].x ;
+ d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].y ;
+ }
+ idpath++ ;
+ }
+
+ // Q4
+ basepath += 6 ;
+ for (int a=0 ; a< 6 ; a++){ // les 6 angles 270,285,300,315,330,345
+ for (int p=0 ; p<r-1 ; p++){ // les r points
+ d_paths[idpath*(r-1)+p].x = d_paths[(idpath - basepath)*(r-1)+p].y ;
+ d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].x ;
+ }
+ idpath++ ;
+ }
+}
+/**
+ *
+ * \brief calcule l'estimation initiale
+ * \author zulu - AND
+ *
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] r coté de la fenêtre de moyenneage
+ *
+ * \param[out] d_estim Image estimee 0
+ *
+ * Version texture : l'img originale est supposée en texture.
+ * L'estimation réalisée correspond a un moyenneur de 'rayon' r
+ * Execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image.
+ *
+ */
+__global__ void kernel_neutre_img2estim(unsigned int *d_estim, unsigned int L, unsigned int H){
+ // coordonnes du point dans l'image
+ unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+ unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
+ unsigned int pos = i*L +j ;
+
+ d_estim[ pos ] = tex2D(tex_img_in, j, i) ;
+
+}
+
+
+/**
+ *
+ * \brief calcule l'estimation initiale
+ * \author zulu - AND
+ *
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] r coté de la fenêtre de moyenneage
+ *
+ * \param[out] d_estim Image estimee 0
+ *
+ * Version texture : l'img originale est supposée en texture.
+ * L'estimation réalisée correspond a un moyenneur de 'rayon' r
+ * Execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image.
+ *
+ */
__global__ void kernel_init_estim_from_img_in(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int r){
// coordonnes du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
d_estim[ pos ] = ng/((2*r+1)*(2*r+1)) ;
}
/*
+ // pour les bords : pas de traitement
else
d_estim[ pos ] = tex2D(tex_img_in, j, i) ;
*/
}
+
+
+/**
+ *
+ * \brief calcule l'estimation initiale
+ * \author zulu - AND
+ *
+ * \param[in] d_data image originale
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] r coté de la fenêtre de moyenneage
+ *
+ * \param[out] d_estim Image estimee 0
+ *
+ * Version global mem : l'img originale est en mémoire globale, passée en param.
+ * L'estimation réalisée correspond a un moyenneur de 'rayon' r
+ * Execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image.
+ * Moins rapide que les 2 autres solutions.
+ *
+ */
__global__ void kernel_init_estim_from_img_in_global_mem(unsigned int * d_data, unsigned int * d_estim,
unsigned int L, unsigned int H, unsigned int r){
// coordonnes du point dans l'image
}
}
-__global__ void kernel_estim_next_step_global_mem(unsigned int * d_estim, unsigned int * d_lniv, unsigned int L, unsigned int H, unsigned int p){
+
+
+/**
+ *
+ * \brief calcule les niveaux de gris de l'estimation n+1
+ * \author zulu - AND
+ *
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] p poids du terme lniv
+ *
+ * \param[out] d_estim Image estimee n+1
+ *
+ * Version mixte : l'img originale est supposee en texture,
+ * l'img lniv en mémoire globale, passée en param.
+ * Cela évite la copie en texture de l'img lniv à chaque itération.
+ * Execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image.
+ * Moins rapide que 'texture' mais plus rapide que 'globalmem'
+ *
+ */
+__global__ void kernel_estim_next_step_hybrid(unsigned int * d_estim, unsigned int * d_lniv, unsigned int L, unsigned int H, unsigned int p){
// coordonnes du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
}
+/**
+ *
+ * \brief calcule les niveaux de gris de l'estimation n+1
+ * \author zulu - AND
+ *
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] p poids du terme lniv
+ *
+ * \param[out] d_estim Image estimee n+1
+ *
+ * Version texture : Les donnees (img originale, img lniv) sont supposees en textures.
+ * Execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image.
+ * Plus rapide que les 2 autres solutions
+ *
+ */
__global__ void kernel_estim_next_step_texture(unsigned int * d_estim, unsigned int L, unsigned int H, unsigned int p){
// coordonnes du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
}
+/**
+ *
+ * \brief calcule les niveaux de gris de l'estimation n+1
+ * \author zulu - AND
+ *
+ * \param[in] d_lniv Image des lniv n
+ * \param[in] d_data Image originale
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] p poids du terme lniv
+ *
+ * \param[out] d_estim Image estimee n+1
+ *
+ * Version mémoire globale : les données sont passées en params.
+ * Execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image.
+ *
+ */
__global__ void kernel_estim_next_step_global_mem(unsigned int * d_estim, unsigned int * d_lniv, unsigned int * d_data,
unsigned int L, unsigned int H, unsigned int p){
// coordonnes du point dans l'image
}
-__global__ void kernel_levelines_global_mem(unsigned int * img_in, unsigned int * img_out, unsigned int L, unsigned int H)
-{
- // coordonnes du point dans l'image
- unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
- unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
- //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
-
- // nb de points par chemin
- int lpath = PSIZE_J ;
- unsigned int ic, jc, zc, pos = i*L+j;
- int idpath, idpix ;
- unsigned int mse_min, mse_cur, val ;
- uint2 mse ;
-
-
- if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
- for( idpath=0; idpath < PSIZE_I ; idpath++) {
- ic = i ;
- jc = j ;
- pos = ic*L + jc ;
- zc = img_in[ pos ] ;
- mse.x = zc ;
- mse.y = zc*zc ;
- for( idpix=0; idpix < lpath-1 ; idpix++ ) {
- ic += pathDi[idpath][idpix] ;
- jc += pathDj[idpath][idpix] ;
- pos = ic*L + jc ;
- zc = img_in[ pos ] ;
- mse.x += zc ;
- mse.y += zc*zc ;
- }
- // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
- // a ameliorer pour vitesse
- mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
- if (idpath == 0) {
- mse_min = mse_cur ;
- val = mse.x ;
- } else {
- if ( mse_cur < mse_min ) {
- mse_min = mse_cur ;
- val = mse.x ;
- }
- }
- }
- img_out[ i*L + j ] = val / lpath ;
- }
-}
-
-__global__ void kernel_levelines_texture(unsigned int * img_out, unsigned int L, unsigned int H)
+/**
+ *
+ * \brief determine les lniv en chaque point de l'image
+ * \author zulu - AND
+ *
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] r longueur des segments
+ *
+ * \param[out] img_out image des lniv
+ *
+ * Execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image.
+ * L'image d'entrée doit être au préalable en mémoire texture pointée par "tex_img_estim".
+ * Les matrices des chemins sont, elles, pointées par "tex_paths"
+ * Cette version ne fournit pas les indices des chemins pour les tracé éventuel des lniv.
+ */
+__global__ void kernel_levelines_texture(unsigned int * img_out, unsigned int L, unsigned int H, unsigned int r)
{
// coordonnes du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
- //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
-
+
// nb de points par chemin
- int lpath = PSIZE_J ;
- unsigned int ic, jc, zc ;
+ int lpath = r ;
+ unsigned int ic, jc, zc, z ;
int idpath, idpix ;
unsigned int mse_min, mse_cur, val ;
uint2 mse ;
- if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+ if((i>=lpath)&&(i<=H-lpath)&&(j>=lpath)&&(j<=L-lpath)){
+ z = tex2D(tex_img_estim, j, i) ;
for( idpath=0; idpath < PSIZE_I ; idpath++) {
ic = i ;
jc = j ;
- zc = tex2D(tex_img_estim, j, i) ;
- mse.x = zc ;
- mse.y = zc*zc ;
+ mse.x = z ;
+ mse.y = z*z ;
for( idpix=0; idpix < lpath-1 ; idpix++ ) {
- ic += pathDi[idpath][idpix] ;
- jc += pathDj[idpath][idpix] ;
+ ic += tex2D(tex_paths, idpix, idpath).x ;
+ jc += tex2D(tex_paths, idpix, idpath).y ;
zc = tex2D(tex_img_estim, jc, ic) ;
mse.x += zc ;
mse.y += zc*zc ;
}
// critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
- // a ameliorer pour vitesse
+ // TODO cherchera ameliorer pour vitesse
mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
- if (idpath == 0) {
+ if ( (idpath == 0) || (mse_cur < mse_min) ) {
mse_min = mse_cur ;
- val = mse.x ;
- } else {
- if ( mse_cur < mse_min ) {
- mse_min = mse_cur ;
- val = mse.x ;
- }
+ val = mse.x ;
}
}
img_out[ i*L + j ] = val / lpath ;
}
+/**
+ *
+ * \brief determine les lniv en chaque point de l'image
+ * \author zulu - AND
+ *
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] r longueur des segments
+ *
+ * \param[out] img_out image des lniv
+ *
+ * Execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image.
+ * L'image d'entrée doit être au préalable en mémoire texture pointée par "tex_img_estim".
+ * Les matrices des chemins sont, elles, pointées par "tex_paths"
+ * Cette version ne fournit pas les indices des chemins pour les tracé éventuel des lniv.
+ * Cette version tente d'utiliser la shared memory pour compenser la baisse de perf due aux chemins
+ * paramétrables non constants.
+ */
-
-__global__ void kernel_levelines_only_texture(unsigned int * img_out, unsigned int L, unsigned int H)
+__global__ void kernel_levelines_texture_smem(unsigned int * img_out, unsigned int L, unsigned int H, unsigned int r)
{
+ // coordonnées du point dans le bloc
+ unsigned int iib = threadIdx.x ;
+ unsigned int jib = threadIdx.y ;
// coordonnes du point dans l'image
- unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
- unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;;
- //unsigned int spos = threadIdx.x * blockDim.y + threadIdx.y ;
-
+ unsigned int i = blockIdx.x*blockDim.x + iib ;
+ unsigned int j = blockIdx.y*blockDim.y + jib ;
+
// nb de points par chemin
- int lpath = PSIZE_J ;
- unsigned int ic, jc ;
+ int lpath = r ;
+ int ic, jc ;
int idpath, idpix ;
- unsigned int mse_min, mse_cur ;
-
- //extern __shared__ uint2 mse[] ;
- uint2 mse ;
+ unsigned int val, mse_cur, mse_min, z, zc ;
+ uint2 mse_data ;
+
+ //__shared__ unsigned int val_img[16*16] ;
+
+ //val_img[jib*16+iib] = tex2D(tex_img_estim, j, i) ;
- if((i>lpath)&&(i<H-lpath)&&(j>lpath)&&(j<L-lpath)){
+ if((i>=lpath)&&(i<=H-lpath)&&(j>=lpath)&&(j<=L-lpath)){
+ z = tex2D(tex_img_estim, j, i) ;
for( idpath=0; idpath < PSIZE_I ; idpath++) {
ic = i ;
jc = j ;
- mse.x = tex2D(tex_img_in, i, j) ;
- mse.y = tex2D(tex_img_in, i, j)*tex2D(tex_img_in, i, j) ;
+ mse_data.x = z ;
+ mse_data.y = z*z ;
+ mse_min = mse_data.y - mse_data.x/lpath*mse_data.y ;
for( idpix=0; idpix < lpath-1 ; idpix++ ) {
- ic += pathDi[idpath][idpix] ;
- jc += pathDj[idpath][idpix] ;
- mse.x += tex2D( tex_img_in, ic, jc ) ;
- mse.y += tex2D( tex_img_in, ic, jc ) * tex2D( tex_img_in, ic, jc ) ;
+ ic += tex2D(tex_paths, idpix, idpath).x ;
+ jc += tex2D(tex_paths, idpix, idpath).y ;
+ zc = tex2D(tex_img_estim, jc, ic) ;
+ mse_data.x += zc ;
+ mse_data.y += zc*zc ;
}
// critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
- // a ameliorer pour vitesse
- mse_cur = ( mse.y - ( mse.x / lpath ) * mse.x ) ;
- if (idpath > 0) {
- if ( mse_cur < mse_min ) {
- mse_min = mse_cur ;
- }
- } else {
+ // TODO cherchera ameliorer pour vitesse
+ mse_cur = ( mse_data.y - ( mse_data.x / lpath ) * mse_data.x ) ;
+ if ( mse_cur < mse_min ){
mse_min = mse_cur ;
- }
+ val = mse_data.x ;
+ }
}
- img_out[ i*L + j ] = mse_min / lpath ;
+ img_out[ i*L + j ] = val / lpath ;
}
}
-
+/**
+ *
+ * \brief trace les segments sur un maillage carré
+ * \author zulu - AND
+ *
+ * \param[in] img_in image d'entree
+ * \param[in] dir tableaux des directions
+ * \param[in] L Largeur de l'image
+ * \param[in] H Hauteur de l'image
+ * \param[in] pas coté du maillage
+ * \param[in] ng niveau de gris des segments
+ * \param[in] r longueur des segments
+ *
+ * \param[out] img_out image + les segments superposés
+ *
+ * Kernel trivial. Ne trace rien sur les bords.
+ * execution sur des blocs de threads 2D et une grille 2D
+ * selon les dimensions de l'image
+ */
__global__ void kernel_trace_levelines(unsigned int * img_in, unsigned int * dir, unsigned int * img_out,
- unsigned int L, unsigned int H, unsigned int pas, unsigned int ng){
+ unsigned int L, unsigned int H, unsigned int pas, unsigned int ng,
+ unsigned int r ){
// coordonnes du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;;
// nb de points par chemin
- int lpath = PSIZE_J ;
+ int lpath = r ;
unsigned int ic, jc, idpix ;
unsigned int idpath ;
idpath = dir[ic*L+jc] ;
img_out[ ic*L+jc ] = ng ;
for ( idpix=0 ; idpix < lpath-1 ; idpix++ ){
- ic += pathDi[idpath][idpix] ;
- jc += pathDj[idpath][idpix] ;
+ ic += tex2D(tex_paths, idpix, idpath).x ; // pathDi[idpath][idpix] ;
+ jc += tex2D(tex_paths, idpix, idpath).y ; // pathDj[idpath][idpix] ;
img_out[ ic*L + jc ] = ng ;
}
}
--- /dev/null
+<!DOCTYPE cvp>
+<cvp version="4.0" >
+ <session>
+ <name>smem_3staticArrays</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 10:47:10</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="1" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+ <session>
+ <name>nosmem_tex</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 10:50:54</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+ <session>
+ <name>smem2DnoCFI</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 11:10:33</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+ <session>
+ <name>smemCFI_1linearArray</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 11:16:24</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+ <session>
+ <name>smem_zc_CFI_linearArray</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 11:30:11</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+ <session>
+ <name>Session19</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 14:10:47</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+ <session>
+ <name>Session21</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 14:27:50</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+ <session>
+ <name>Session22</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 14:34:00</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+</cvp>
+
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef914083a0
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122940adae6a6780,memcpyHtoA,177.856,331,,,,,,,,,,,0,0,1048576,0
+122940adae70e2c0,_Z19kernel_calcul_pathsP4int2j,43.296,12,0.031,1,1,13908848,1,1,1,0,28,11,0,-1,,0,168,0,1587,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+122940adae719640,memcpyDtoA,4.288,6,,,,,,,,,,,0,0,576,0
+122940adae71e360,_Z23kernel_neutre_img2estimPjjj,450.88,8,0.5,64,64,0,8,8,1,0,32,5,0,-1,,0,0,0,5448,0,409,0,0,0,0,0,13088,0,0,0,272,2454,818
+122940adae7969a0,memcpyAtoD,50.592,6,,,,,,,,,,,0,0,1048576,0
+122940adae7a3980,memcpyDtoA,47.392,4,,,,,,,,,,,0,0,1048576,0
+122940adae7b4960,_Z29kernel_levelines_texture_smemPjjjj,2129.12,9,0.5,64,64,6881395,8,8,1,0,308,18,0,-1,,0,47606,11,731991,67080,410,0,0,0,0,0,12802,0,0,0,272,1173117,4335
+122940adae9bd0a0,memcpyDtoA,47.712,4,,,,,,,,,,,0,0,1048576,0
+122940adae9cc6c0,_Z30kernel_estim_next_step_texturePjjjj,472.384,5,0.5,64,64,7536757,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122940adaea40640,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122940adaea4dca0,_Z29kernel_levelines_texture_smemPjjjj,2109.98,4,0.5,64,64,6357104,8,8,1,0,308,18,0,-1,,0,47606,12,731991,66886,410,0,0,0,0,0,12804,0,0,0,272,1164819,2655
+122940adaec51900,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122940adaec5eee0,_Z30kernel_estim_next_step_texturePjjjj,472.96,4,0.5,64,64,6488175,8,8,1,0,36,9,0,-1,,0,816,0,15513,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122940adaecd30a0,memcpyDtoA,47.744,3,,,,,,,,,,,0,0,1048576,0
+122940adaece0740,_Z29kernel_levelines_texture_smemPjjjj,2126.05,4,0.5,64,64,6619245,8,8,1,0,308,18,0,-1,,0,47606,10,731993,66947,409,0,0,0,0,0,12800,0,0,0,272,1163430,2600
+122940adaeee8260,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122940adaeef58a0,_Z30kernel_estim_next_step_texturePjjjj,457.856,4,0.5,64,64,7602208,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122940adaef65fe0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122940adaef73620,_Z29kernel_levelines_texture_smemPjjjj,2112.03,3,0.5,64,64,6357108,8,8,1,0,308,18,0,-1,,0,47606,12,731993,66824,409,0,0,0,0,0,12788,0,0,0,272,1163358,2672
+122940adaf177b20,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122940adaf185160,_Z30kernel_estim_next_step_texturePjjjj,474.08,4,0.5,64,64,6750313,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122940adaf1f97a0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122940adaf206da0,_Z29kernel_levelines_texture_smemPjjjj,2102.98,4,0.5,64,64,6357090,8,8,1,0,308,18,0,-1,,0,47606,10,731993,66846,410,0,0,0,0,0,12815,0,0,0,272,1165192,2643
+122940adaf408ec0,memcpyDtoA,47.808,4,,,,,,,,,,,0,0,1048576,0
+122940adaf4165a0,_Z30kernel_estim_next_step_texturePjjjj,479.456,4,0.5,64,64,7602276,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122940adaf48c0e0,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122940adaf499720,_Z29kernel_levelines_texture_smemPjjjj,2106.5,3,0.5,64,64,7798900,8,8,1,0,308,18,0,-1,,0,47434,12,729319,66968,410,0,0,0,0,0,12802,0,0,0,271,1163817,2749
+122940adaf69c5e0,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122940adaf6a9c20,_Z30kernel_estim_next_step_texturePjjjj,484.128,4,0.5,64,64,7274600,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122940adaf720980,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122940adaf72df80,_Z29kernel_levelines_texture_smemPjjjj,2115.81,3,0.5,64,64,6553710,8,8,1,0,308,18,0,-1,,0,47434,10,729328,66761,410,0,0,0,0,0,12804,0,0,0,271,1164886,2588
+122940adaf9332c0,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122940adaf9408c0,_Z30kernel_estim_next_step_texturePjjjj,481.312,4,0.5,64,64,6488169,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122940adaf9b6b40,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122940adaf9c41a0,_Z29kernel_levelines_texture_smemPjjjj,2109.15,4,0.5,64,64,7274605,8,8,1,0,308,18,0,-1,,0,47256,10,726617,66445,409,0,0,0,0,0,12800,0,0,0,270,1163404,2626
+122940adafbc7ac0,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122940adafbd5120,_Z30kernel_estim_next_step_texturePjjjj,477.376,4,0.5,64,64,7733327,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122940adafc4a420,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122940adafc57a20,_Z29kernel_levelines_texture_smemPjjjj,2113.09,4,0.5,64,64,2097253,8,8,1,0,308,18,0,-1,,0,47256,10,726618,66738,409,0,0,0,0,0,12788,0,0,0,270,1163359,2671
+122940adafe5c2e0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122940adafe698c0,_Z30kernel_estim_next_step_texturePjjjj,476.672,3,0.5,64,64,7209065,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122940adafede940,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122940adafeebf60,_Z29kernel_levelines_texture_smemPjjjj,2110.3,4,0.5,64,64,6619245,8,8,1,0,308,18,0,-1,,0,47256,10,726610,66368,410,0,0,0,0,0,12815,0,0,0,270,1165183,2652
+122940adb00efd00,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122940adb00fd300,_Z30kernel_estim_next_step_texturePjjjj,468.864,4,0.5,64,64,7536672,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122940adb01704c0,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122940adb017dae0,_Z29kernel_levelines_texture_smemPjjjj,2115.36,4,0.5,64,64,2097252,8,8,1,0,308,18,0,-1,,0,47256,10,726611,66084,410,0,0,0,0,0,12802,0,0,0,270,1163872,2694
+122940adb0382c40,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122940adb0390220,_Z30kernel_estim_next_step_texturePjjjj,473.44,4,0.5,64,64,7274614,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122940adb04045c0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122940adb0411bc0,_Z29kernel_levelines_texture_smemPjjjj,2116.22,4,0.5,64,64,6357024,8,8,1,0,308,18,0,-1,,0,47256,10,726610,66314,410,0,0,0,0,0,12804,0,0,0,270,1164898,2576
+122940adb0617080,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122940adb0624680,_Z30kernel_estim_next_step_texturePjjjj,466.592,4,0.5,64,64,7143456,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122940adb0696f60,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122940adb06a45a0,_Z29kernel_levelines_texture_smemPjjjj,2121.09,4,0.5,64,64,6553701,8,8,1,0,308,18,0,-1,,0,47256,10,726613,66552,409,0,0,0,0,0,12800,0,0,0,270,1163359,2671
+122940adb08aad80,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122940adb08b83a0,_Z30kernel_estim_next_step_texturePjjjj,465.152,4,0.5,64,64,7733359,8,8,1,0,36,9,0,-1,,0,822,0,15627,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122940adb092a700,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122940adb0937d40,_Z29kernel_levelines_texture_smemPjjjj,2106.34,4,0.5,64,64,2097260,8,8,1,0,308,18,0,-1,,0,47256,10,726614,66553,409,0,0,0,0,0,12788,0,0,0,270,1163415,2615
+122940adb0b3ab60,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122940adb0b48140,_Z30kernel_estim_next_step_texturePjjjj,476.896,4,0.5,64,64,7143525,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122940adb0bbd2a0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122940adb0bca8a0,_Z29kernel_levelines_texture_smemPjjjj,2109.22,4,0.5,64,64,7274598,8,8,1,0,308,18,0,-1,,0,47606,13,731993,67183,410,0,0,0,0,0,12815,0,0,0,272,1165195,2640
+122940adb0dce280,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122940adb0ddb880,_Z30kernel_estim_next_step_texturePjjjj,483.456,3,0.5,64,64,6619235,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122940adb0e8f400,memcpyAtoH,11.52,29,,,,,,,,,,,0,0,576,0
+122940adb0e98720,memcpyDtoH,191.68,736,,,,,,,,,,,0,0,1048576,0
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef91ef4898
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+1229419bcf456a00,memcpyHtoA,177.728,334,,,,,,,,,,,0,0,1048576,0
+1229419bcf4bf660,_Z19kernel_calcul_pathsP4int2j,43.392,12,0.031,1,1,12973520,1,1,1,0,28,11,0,-1,,0,168,0,1587,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+1229419bcf4caa20,memcpyDtoA,4.32,6,,,,,,,,,,,0,0,576,0
+1229419bcf4cf760,_Z23kernel_neutre_img2estimPjjj,440.768,9,0.5,64,64,-2147483648,8,8,1,0,32,5,0,-1,,0,0,0,5448,0,409,0,0,0,0,0,13088,0,0,0,272,2454,818
+1229419bcf544f00,memcpyAtoD,50.4,6,,,,,,,,,,,0,0,1048576,0
+1229419bcf551e20,memcpyDtoA,47.52,3,,,,,,,,,,,0,0,1048576,0
+1229419bcf562e80,_Z29kernel_levelines_texture_smemPjjjj,2074.66,9,0.5,64,64,8916960,8,8,1,0,36,18,0,-1,,0,47606,11,703977,0,410,0,0,0,0,0,12802,0,0,0,272,1173148,4304
+1229419bcf75e0e0,memcpyDtoA,47.744,4,,,,,,,,,,,0,0,1048576,0
+1229419bcf76d700,_Z30kernel_estim_next_step_texturePjjjj,475.36,6,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+1229419bcf7e2220,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+1229419bcf7ef860,_Z29kernel_levelines_texture_smemPjjjj,2052.22,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47606,12,703981,0,410,0,0,0,0,0,12804,0,0,0,272,1164805,2669
+1229419bcf9e5320,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+1229419bcf9f2900,_Z30kernel_estim_next_step_texturePjjjj,457.632,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15513,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+1229419bcfa62f40,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229419bcfa705a0,_Z29kernel_levelines_texture_smemPjjjj,2059.81,4,0.5,64,64,8466560,8,8,1,0,36,18,0,-1,,0,47606,10,703983,0,409,0,0,0,0,0,12800,0,0,0,272,1163425,2605
+1229419bcfc67e00,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229419bcfc753e0,_Z30kernel_estim_next_step_texturePjjjj,484.128,3,0.5,64,64,10922,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+1229419bcfcec140,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229419bcfcf9720,_Z29kernel_levelines_texture_smemPjjjj,2054.18,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47606,12,703989,0,409,0,0,0,0,0,12788,0,0,0,272,1163336,2694
+1229419bcfeef9a0,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+1229419bcfefcfe0,_Z30kernel_estim_next_step_texturePjjjj,469.376,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+1229419bcff703a0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229419bcff7d9a0,_Z29kernel_levelines_texture_smemPjjjj,2055.1,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47606,10,703984,0,410,0,0,0,0,0,12815,0,0,0,272,1165156,2679
+1229419bd0173fa0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229419bd0181600,_Z30kernel_estim_next_step_texturePjjjj,477.216,9,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+1229419bd01f6840,memcpyDtoA,47.712,4,,,,,,,,,,,0,0,1048576,0
+1229419bd0203ec0,_Z29kernel_levelines_texture_smemPjjjj,2056.77,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47434,12,701415,0,410,0,0,0,0,0,12802,0,0,0,271,1163891,2675
+1229419bd03fab40,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+1229419bd04081a0,_Z30kernel_estim_next_step_texturePjjjj,484.992,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15512,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+1229419bd047f280,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+1229419bd048c8e0,_Z29kernel_levelines_texture_smemPjjjj,2049.92,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47434,10,701416,0,410,0,0,0,0,0,12804,0,0,0,271,1164883,2591
+1229419bd0681ac0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229419bd068f0e0,_Z30kernel_estim_next_step_texturePjjjj,486.752,4,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229419bd07068a0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+1229419bd0713e80,_Z29kernel_levelines_texture_smemPjjjj,2068.54,4,0.5,64,64,7143535,8,8,1,0,36,18,0,-1,,0,47256,10,698805,0,409,0,0,0,0,0,12800,0,0,0,270,1163428,2602
+1229419bd090d900,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+1229419bd091af60,_Z30kernel_estim_next_step_texturePjjjj,489.152,3,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15625,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229419bd09930c0,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+1229419bd09a0700,_Z29kernel_levelines_texture_smemPjjjj,2058.94,3,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47256,10,698807,0,409,0,0,0,0,0,12788,0,0,0,270,1163396,2634
+1229419bd0b97c40,memcpyDtoA,47.52,3,,,,,,,,,,,0,0,1048576,0
+1229419bd0ba5220,_Z30kernel_estim_next_step_texturePjjjj,479.776,4,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229419bd0c1aea0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229419bd0c284a0,_Z29kernel_levelines_texture_smemPjjjj,2055.78,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47256,10,698809,0,410,0,0,0,0,0,12815,0,0,0,270,1165209,2626
+1229419bd0e1ed60,memcpyDtoA,47.712,3,,,,,,,,,,,0,0,1048576,0
+1229419bd0e2c3e0,_Z30kernel_estim_next_step_texturePjjjj,471.392,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+1229419bd0e9ff80,memcpyDtoA,47.808,3,,,,,,,,,,,0,0,1048576,0
+1229419bd0ead6a0,_Z29kernel_levelines_texture_smemPjjjj,2047.3,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47256,10,698801,0,410,0,0,0,0,0,12802,0,0,0,270,1163935,2631
+1229419bd10a1e20,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+1229419bd10af420,_Z30kernel_estim_next_step_texturePjjjj,476.864,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15627,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+1229419bd1124540,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+1229419bd1131ba0,_Z29kernel_levelines_texture_smemPjjjj,2057.82,4,0.5,64,64,-1631,8,8,1,0,36,18,0,-1,,0,47256,10,698807,0,410,0,0,0,0,0,12804,0,0,0,270,1164939,2535
+1229419bd1328c60,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+1229419bd1336240,_Z30kernel_estim_next_step_texturePjjjj,475.936,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229419bd13aafa0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229419bd13b85a0,_Z29kernel_levelines_texture_smemPjjjj,2057.95,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47256,10,698806,0,409,0,0,0,0,0,12800,0,0,0,270,1163422,2608
+1229419bd15af6e0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+1229419bd15bcd60,_Z30kernel_estim_next_step_texturePjjjj,493.888,4,0.5,64,64,856,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229419bd16360e0,memcpyDtoA,47.712,3,,,,,,,,,,,0,0,1048576,0
+1229419bd1643760,_Z29kernel_levelines_texture_smemPjjjj,2053.98,3,0.5,64,64,12758580,8,8,1,0,36,18,0,-1,,0,47256,10,698810,0,409,0,0,0,0,0,12788,0,0,0,270,1163489,2541
+1229419bd1839940,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+1229419bd1846f40,_Z30kernel_estim_next_step_texturePjjjj,474.208,3,0.5,64,64,32767,8,8,1,0,36,9,0,-1,,0,822,0,15628,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229419bd18bb600,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+1229419bd18c8c40,_Z29kernel_levelines_texture_smemPjjjj,2065.76,4,0.5,64,64,2,8,8,1,0,36,18,0,-1,,0,47606,13,703985,0,410,0,0,0,0,0,12815,0,0,0,272,1165162,2673
+1229419bd1ac1be0,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+1229419bd1acf1e0,_Z30kernel_estim_next_step_texturePjjjj,465.952,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+1229419bd1b7e360,memcpyAtoH,11.52,30,,,,,,,,,,,0,0,576,0
+1229419bd1b87d00,memcpyDtoH,191.232,736,,,,,,,,,,,0,0,1048576,0
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef92065818
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122941f213dd9800,memcpyHtoA,177.792,336,,,,,,,,,,,0,0,1048576,0
+122941f213e40de0,_Z19kernel_calcul_pathsP4int2j,43.2,12,0.031,1,1,-1386402264,1,1,1,0,28,11,0,-1,,0,168,0,1587,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+122941f213e4c140,memcpyDtoA,4.32,6,,,,,,,,,,,0,0,576,0
+122941f213e50ea0,_Z23kernel_neutre_img2estimPjjj,446.944,9,0.5,64,64,0,8,8,1,0,32,5,0,-1,,0,0,0,5448,0,409,0,0,0,0,0,13088,0,0,0,272,2454,818
+122941f213ec8740,memcpyAtoD,50.496,6,,,,,,,,,,,0,0,1048576,0
+122941f213ed56c0,memcpyDtoA,47.456,4,,,,,,,,,,,0,0,1048576,0
+122941f213ee6700,_Z29kernel_levelines_texture_smemPjjjj,2749.79,9,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47606,11,743152,187680,410,0,0,0,0,0,12802,0,0,0,272,1571115,7958
+122941f2141866a0,memcpyDtoA,47.84,4,,,,,,,,,,,0,0,1048576,0
+122941f214195d60,_Z30kernel_estim_next_step_texturePjjjj,472.736,6,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122941f214209e60,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122941f214217440,_Z29kernel_levelines_texture_smemPjjjj,2289.44,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47606,12,743153,152442,410,0,0,0,0,0,12804,0,0,0,272,1256596,17308
+122941f214446dc0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122941f214454400,_Z30kernel_estim_next_step_texturePjjjj,480.32,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122941f2144ca2a0,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122941f2144d78c0,_Z29kernel_levelines_texture_smemPjjjj,2174.62,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47606,10,743148,144198,409,0,0,0,0,0,12800,0,0,0,272,1174004,1661
+122941f2146eb1a0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122941f2146f8780,_Z30kernel_estim_next_step_texturePjjjj,478.304,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122941f21476de20,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122941f21477b420,_Z29kernel_levelines_texture_smemPjjjj,2169.34,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47606,12,743151,144449,409,0,0,0,0,0,12788,0,0,0,272,1172650,1348
+122941f21498d8c0,memcpyDtoA,47.52,3,,,,,,,,,,,0,0,1048576,0
+122941f21499ae80,_Z30kernel_estim_next_step_texturePjjjj,482.432,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15512,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122941f214a11560,memcpyDtoA,47.488,3,,,,,,,,,,,0,0,1048576,0
+122941f214a1eb00,_Z29kernel_levelines_texture_smemPjjjj,2166.24,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47606,10,743150,144061,410,0,0,0,0,0,12815,0,0,0,272,1171928,1205
+122941f214c30340,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122941f214c3d900,_Z30kernel_estim_next_step_texturePjjjj,484.064,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122941f214cb4640,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122941f214cc1ca0,_Z29kernel_levelines_texture_smemPjjjj,2165.44,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47434,12,740447,143891,410,0,0,0,0,0,12802,0,0,0,271,1169458,1131
+122941f214ed31c0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122941f214ee0840,_Z30kernel_estim_next_step_texturePjjjj,475.104,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122941f214f55260,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122941f214f62840,_Z29kernel_levelines_texture_smemPjjjj,2169.92,3,0.5,64,64,19,8,8,1,0,292,20,0,-1,,0,47434,10,740437,144692,410,0,0,0,0,0,12804,0,0,0,271,1171196,1166
+122941f215174f40,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122941f215182520,_Z30kernel_estim_next_step_texturePjjjj,481.824,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122941f2151f8980,memcpyDtoA,47.744,3,,,,,,,,,,,0,0,1048576,0
+122941f215206020,_Z29kernel_levelines_texture_smemPjjjj,2174.78,3,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47256,10,737697,142626,409,0,0,0,0,0,12800,0,0,0,270,1171005,1147
+122941f2154199c0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122941f215427060,_Z30kernel_estim_next_step_texturePjjjj,473.44,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122941f21549b400,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122941f2154a89e0,_Z29kernel_levelines_texture_smemPjjjj,2172.13,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47256,10,737693,142582,409,0,0,0,0,0,12788,0,0,0,270,1170316,1122
+122941f2156bb900,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122941f2156c8f00,_Z30kernel_estim_next_step_texturePjjjj,478.72,4,0.5,64,64,512,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122941f21573e740,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122941f21574bda0,_Z29kernel_levelines_texture_smemPjjjj,2175.81,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47256,10,737683,142961,410,0,0,0,0,0,12815,0,0,0,270,1172017,1158
+122941f21595fb20,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122941f21596d120,_Z30kernel_estim_next_step_texturePjjjj,484.352,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122941f2159e3f80,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122941f2159f1580,_Z29kernel_levelines_texture_smemPjjjj,2171.97,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47256,10,737684,143028,410,0,0,0,0,0,12802,0,0,0,270,1170115,1118
+122941f215c04400,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122941f215c11a40,_Z30kernel_estim_next_step_texturePjjjj,473.408,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15627,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122941f215c85dc0,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122941f215c93420,_Z29kernel_levelines_texture_smemPjjjj,2178.5,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47256,10,737687,143703,410,0,0,0,0,0,12804,0,0,0,270,1171501,1189
+122941f215ea7c20,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122941f215eb5200,_Z30kernel_estim_next_step_texturePjjjj,475.584,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122941f215f29e00,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122941f215f37460,_Z29kernel_levelines_texture_smemPjjjj,2162.05,3,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47256,10,737680,142311,409,0,0,0,0,0,12800,0,0,0,270,1170773,1141
+122941f216147c20,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122941f216155220,_Z30kernel_estim_next_step_texturePjjjj,473.152,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122941f2161c94a0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122941f2161d6a80,_Z29kernel_levelines_texture_smemPjjjj,2169.7,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47256,10,737687,142117,409,0,0,0,0,0,12788,0,0,0,270,1169968,1132
+122941f2163e9020,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122941f2163f66c0,_Z30kernel_estim_next_step_texturePjjjj,491.52,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122941f21646f140,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122941f21647c740,_Z29kernel_levelines_texture_smemPjjjj,2164.67,4,0.5,64,64,0,8,8,1,0,292,20,0,-1,,0,47606,13,743152,142819,410,0,0,0,0,0,12815,0,0,0,272,1171022,1143
+122941f21668d940,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122941f21669af80,_Z30kernel_estim_next_step_texturePjjjj,486.816,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122941f21674db40,memcpyAtoH,11.552,30,,,,,,,,,,,0,0,576,0
+122941f216756cc0,memcpyDtoH,191.712,745,,,,,,,,,,,0,0,1048576,0
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef88fd8ba8
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122935c55701cc60,memcpyHtoA,177.568,331,,,,,,,,,,,0,0,1048576,0
+122935c557085020,_Z19kernel_calcul_pathsP4int2j,43.168,13,0.031,1,1,11485616,1,1,1,0,28,11,0,-1,,0,168,0,1587,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+122935c557090340,memcpyDtoA,4.32,5,,,,,,,,,,,0,0,576,0
+122935c5570950c0,_Z23kernel_neutre_img2estimPjjj,441.44,9,0.5,64,64,1,8,8,1,0,32,5,0,-1,,0,0,0,5448,0,409,0,0,0,0,0,13088,0,0,0,272,2454,818
+122935c55710b3a0,memcpyAtoD,50.464,6,,,,,,,,,,,0,0,1048576,0
+122935c557118300,memcpyDtoA,47.392,4,,,,,,,,,,,0,0,1048576,0
+122935c557129320,_Z24kernel_levelines_texturePjjjj,2217.66,10,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47606,11,748312,0,410,0,0,0,0,0,12802,0,0,0,272,1198877,41827
+122935c557347440,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122935c557356a00,_Z30kernel_estim_next_step_texturePjjjj,463.072,6,0.5,64,64,13,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122935c5573c8540,memcpyDtoA,47.52,3,,,,,,,,,,,0,0,1048576,0
+122935c5573d5b20,_Z24kernel_levelines_texturePjjjj,2217.76,4,0.5,64,64,11833200,8,8,1,0,36,18,0,-1,,0,47606,12,748315,0,410,0,0,0,0,0,12804,0,0,0,272,1198392,43464
+122935c5575f3ca0,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122935c557601280,_Z30kernel_estim_next_step_texturePjjjj,471.84,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122935c557674fe0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935c5576825e0,_Z24kernel_levelines_texturePjjjj,2230.08,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47606,10,748312,0,409,0,0,0,0,0,12800,0,0,0,272,1195784,44536
+122935c5578a37e0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935c5578b0de0,_Z30kernel_estim_next_step_texturePjjjj,466.176,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122935c557923540,memcpyDtoA,47.712,4,,,,,,,,,,,0,0,1048576,0
+122935c557930bc0,_Z24kernel_levelines_texturePjjjj,2213.15,4,0.5,64,64,0,8,8,1,0,36,18,0,-1,,0,47606,12,748313,0,409,0,0,0,0,0,12788,0,0,0,272,1198714,41606
+122935c557b4dba0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122935c557b5b1c0,_Z30kernel_estim_next_step_texturePjjjj,471.072,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122935c557bcec20,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935c557bdc260,_Z24kernel_levelines_texturePjjjj,2206.82,4,0.5,64,64,11460208,8,8,1,0,36,18,0,-1,,0,47606,10,748330,0,410,0,0,0,0,0,12815,0,0,0,272,1198656,43584
+122935c557df7980,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122935c557e04fc0,_Z30kernel_estim_next_step_texturePjjjj,478.304,3,0.5,64,64,10922,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122935c557e7a6c0,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122935c557e87d40,_Z24kernel_levelines_texturePjjjj,2228.32,4,0.5,64,64,32,8,8,1,0,36,18,0,-1,,0,47434,12,745579,0,410,0,0,0,0,0,12802,0,0,0,271,1198277,42427
+122935c5580a87e0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122935c5580b5de0,_Z30kernel_estim_next_step_texturePjjjj,467.04,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122935c5581288e0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122935c558135ea0,_Z24kernel_levelines_texturePjjjj,2214.69,4,0.5,64,64,-1053464,8,8,1,0,36,18,0,-1,,0,47434,10,745588,0,410,0,0,0,0,0,12804,0,0,0,271,1199501,42355
+122935c558353400,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122935c558360a40,_Z30kernel_estim_next_step_texturePjjjj,491.712,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935c5583d9560,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122935c5583e6b60,_Z24kernel_levelines_texturePjjjj,2220.61,4,0.5,64,64,11831600,8,8,1,0,36,18,0,-1,,0,47256,10,742810,0,409,0,0,0,0,0,12800,0,0,0,270,1195661,44659
+122935c558605840,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122935c558612e80,_Z30kernel_estim_next_step_texturePjjjj,472.992,3,0.5,64,64,256,8,8,1,0,36,9,0,-1,,0,822,0,15625,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935c558687060,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935c558694680,_Z24kernel_levelines_texturePjjjj,2218.37,4,0.5,64,64,8373664,8,8,1,0,36,18,0,-1,,0,47256,10,742813,0,409,0,0,0,0,0,12788,0,0,0,270,1197654,42666
+122935c5588b2a60,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935c5588c0060,_Z30kernel_estim_next_step_texturePjjjj,478.496,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15625,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935c5589357c0,memcpyDtoA,47.52,4,,,,,,,,,,,0,0,1048576,0
+122935c558942d80,_Z24kernel_levelines_texturePjjjj,2210.37,3,0.5,64,64,144,8,8,1,0,36,18,0,-1,,0,47256,10,742811,0,410,0,0,0,0,0,12815,0,0,0,270,1198542,43698
+122935c558b5f200,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122935c558b6c800,_Z30kernel_estim_next_step_texturePjjjj,467.968,4,0.5,64,64,48,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122935c558bdf660,memcpyDtoA,47.712,3,,,,,,,,,,,0,0,1048576,0
+122935c558beccc0,_Z24kernel_levelines_texturePjjjj,2221.98,4,0.5,64,64,11565802,8,8,1,0,36,18,0,-1,,0,47256,10,742809,0,410,0,0,0,0,0,12802,0,0,0,270,1198356,42348
+122935c558e0bea0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935c558e194a0,_Z30kernel_estim_next_step_texturePjjjj,468.512,4,0.5,64,64,7602289,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122935c558e8c500,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122935c558e99b80,_Z24kernel_levelines_texturePjjjj,2216.1,4,0.5,64,64,6226030,8,8,1,0,36,18,0,-1,,0,47256,10,742809,0,410,0,0,0,0,0,12804,0,0,0,270,1199237,42619
+122935c5590b76e0,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122935c5590c4cc0,_Z30kernel_estim_next_step_texturePjjjj,477.856,4,0.5,64,64,3211314,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935c55913a1c0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122935c5591477a0,_Z24kernel_levelines_texturePjjjj,2213.6,4,0.5,64,64,3670069,8,8,1,0,36,18,0,-1,,0,47256,10,742809,0,409,0,0,0,0,0,12800,0,0,0,270,1196806,43514
+122935c5593648e0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122935c559371f40,_Z30kernel_estim_next_step_texturePjjjj,484.352,4,0.5,64,64,3145780,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935c5593e8d80,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122935c5593f6400,_Z24kernel_levelines_texturePjjjj,2214.46,4,0.5,64,64,6225971,8,8,1,0,36,18,0,-1,,0,47256,10,742810,0,409,0,0,0,0,0,12788,0,0,0,270,1197263,43057
+122935c5596138a0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122935c559620f20,_Z30kernel_estim_next_step_texturePjjjj,485.952,4,0.5,64,64,3145773,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935c5596983a0,memcpyDtoA,47.712,3,,,,,,,,,,,0,0,1048576,0
+122935c5596a5a40,_Z24kernel_levelines_texturePjjjj,2219.36,4,0.5,64,64,-767361184,8,8,1,0,36,18,0,-1,,0,47606,13,748312,0,410,0,0,0,0,0,12815,0,0,0,272,1199612,42628
+122935c5598c41e0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122935c5598d1860,_Z30kernel_estim_next_step_texturePjjjj,469.504,4,0.5,64,64,48,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122935c559988ea0,memcpyAtoH,11.584,33,,,,,,,,,,,0,0,576,0
+122935c5599936c0,memcpyDtoH,191.04,741,,,,,,,,,,,0,0,1048576,0
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef899f0960
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122936d7f2be0c20,memcpyHtoA,177.248,331,,,,,,,,,,,0,0,1048576,0
+122936d7f2c48180,_Z19kernel_calcul_pathsP4int2j,43.2,12,0.031,1,1,12468336,1,1,1,0,28,11,0,-1,,0,168,0,1588,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+122936d7f2c53480,memcpyDtoA,4.256,6,,,,,,,,,,,0,0,576,0
+122936d7f2c581a0,_Z23kernel_neutre_img2estimPjjj,432.8,8,0.5,64,64,256,8,8,1,0,32,5,0,-1,,0,0,0,5448,0,409,0,0,0,0,0,13088,0,0,0,272,2454,818
+122936d7f2ccc5a0,memcpyAtoD,50.4,6,,,,,,,,,,,0,0,1048576,0
+122936d7f2cd94c0,memcpyDtoA,47.488,4,,,,,,,,,,,0,0,1048576,0
+122936d7f2cea560,_Z29kernel_levelines_texture_smemPjjjj,3264.77,9,0.5,64,64,11454096,8,8,1,0,552,18,0,-1,,0,47606,11,788295,862664,410,0,0,0,0,0,12802,0,0,0,272,1224934,31164
+122936d7f30080a0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122936d7f30176c0,_Z30kernel_estim_next_step_texturePjjjj,483.904,6,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122936d7f308e340,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122936d7f309b960,_Z29kernel_levelines_texture_smemPjjjj,3259.26,4,0.5,64,64,64,8,8,1,0,552,18,0,-1,,0,47606,12,788295,858288,410,0,0,0,0,0,12804,0,0,0,272,1220536,26811
+122936d7f33b7f20,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122936d7f33c5500,_Z30kernel_estim_next_step_texturePjjjj,485.856,5,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122936d7f343c940,memcpyDtoA,47.776,4,,,,,,,,,,,0,0,1048576,0
+122936d7f344a000,_Z29kernel_levelines_texture_smemPjjjj,3269.73,3,0.5,64,64,0,8,8,1,0,552,18,0,-1,,0,47606,10,788295,857612,409,0,0,0,0,0,12800,0,0,0,272,1214814,27588
+122936d7f3768ea0,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122936d7f37764c0,_Z30kernel_estim_next_step_texturePjjjj,481.024,4,0.5,64,64,31,8,8,1,0,36,9,0,-1,,0,816,0,15513,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122936d7f37ec680,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122936d7f37f9c60,_Z29kernel_levelines_texture_smemPjjjj,3263.46,3,0.5,64,64,147,8,8,1,0,552,18,0,-1,,0,47606,12,788302,855382,409,0,0,0,0,0,12788,0,0,0,272,1215865,24785
+122936d7f3b172a0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122936d7f3b248e0,_Z30kernel_estim_next_step_texturePjjjj,470.016,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15512,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122936d7f3b97f60,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122936d7f3ba55a0,_Z29kernel_levelines_texture_smemPjjjj,3258.18,4,0.5,64,64,12305584,8,8,1,0,552,18,0,-1,,0,47606,10,788297,859076,410,0,0,0,0,0,12815,0,0,0,272,1215402,27600
+122936d7f3ec1740,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122936d7f3eced80,_Z30kernel_estim_next_step_texturePjjjj,465.536,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122936d7f3f41260,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122936d7f3f4e860,_Z29kernel_levelines_texture_smemPjjjj,3266.59,4,0.5,64,64,117,8,8,1,0,552,18,0,-1,,0,47434,12,785416,855772,410,0,0,0,0,0,12802,0,0,0,271,1216005,25733
+122936d7f426cb40,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122936d7f427a120,_Z30kernel_estim_next_step_texturePjjjj,478.304,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122936d7f42ef7c0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122936d7f42fce60,_Z29kernel_levelines_texture_smemPjjjj,3274.46,3,0.5,64,64,192,8,8,1,0,552,18,0,-1,,0,47434,10,785418,856700,410,0,0,0,0,0,12804,0,0,0,271,1218747,24325
+122936d7f461cfa0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122936d7f462a600,_Z30kernel_estim_next_step_texturePjjjj,476.576,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122936d7f469f600,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122936d7f46acc20,_Z29kernel_levelines_texture_smemPjjjj,3270.85,3,0.5,64,64,-1404411488,8,8,1,0,552,18,0,-1,,0,47256,10,782499,851609,409,0,0,0,0,0,12800,0,0,0,270,1215219,26327
+122936d7f49cbfa0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122936d7f49d95a0,_Z30kernel_estim_next_step_texturePjjjj,478.88,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122936d7f4a4eea0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122936d7f4a5c500,_Z29kernel_levelines_texture_smemPjjjj,3272.86,3,0.5,64,64,0,8,8,1,0,552,18,0,-1,,0,47256,10,782499,851674,409,0,0,0,0,0,12788,0,0,0,270,1215877,25735
+122936d7f4d7c000,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122936d7f4d89640,_Z30kernel_estim_next_step_texturePjjjj,479.904,4,0.5,64,64,35,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122936d7f4dff320,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122936d7f4e0c920,_Z29kernel_levelines_texture_smemPjjjj,3263.14,4,0.5,64,64,145,8,8,1,0,552,18,0,-1,,0,47256,10,782501,852832,410,0,0,0,0,0,12815,0,0,0,270,1216391,27119
+122936d7f5129e00,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122936d7f5137460,_Z30kernel_estim_next_step_texturePjjjj,474.112,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122936d7f51abaa0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122936d7f51b90c0,_Z29kernel_levelines_texture_smemPjjjj,3255.3,3,0.5,64,64,0,8,8,1,0,552,18,0,-1,,0,47256,10,782499,851161,410,0,0,0,0,0,12802,0,0,0,270,1216487,25419
+122936d7f54d4700,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122936d7f54e1ce0,_Z30kernel_estim_next_step_texturePjjjj,478.432,3,0.5,64,64,37,8,8,1,0,36,9,0,-1,,0,822,0,15625,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122936d7f5557420,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122936d7f5564a60,_Z29kernel_levelines_texture_smemPjjjj,3263.68,4,0.5,64,64,1102,8,8,1,0,552,18,0,-1,,0,47256,10,782499,851691,410,0,0,0,0,0,12804,0,0,0,270,1215992,27116
+122936d7f5882180,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122936d7f588f760,_Z30kernel_estim_next_step_texturePjjjj,470.944,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122936d7f5903140,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122936d7f5910760,_Z29kernel_levelines_texture_smemPjjjj,3257.12,3,0.5,64,64,9661136,8,8,1,0,552,18,0,-1,,0,47256,10,782499,851971,409,0,0,0,0,0,12800,0,0,0,270,1216412,25128
+122936d7f5c2c4c0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122936d7f5c39ac0,_Z30kernel_estim_next_step_texturePjjjj,483.392,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15627,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122936d7f5cb0560,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122936d7f5cbdb60,_Z29kernel_levelines_texture_smemPjjjj,3269.76,4,0.5,64,64,12157856,8,8,1,0,552,18,0,-1,,0,47256,10,782500,855086,409,0,0,0,0,0,12788,0,0,0,270,1217387,24205
+122936d7f5fdca40,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122936d7f5fea060,_Z30kernel_estim_next_step_texturePjjjj,466.784,3,0.5,64,64,32767,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122936d7f605ca20,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122936d7f606a020,_Z29kernel_levelines_texture_smemPjjjj,3273.89,5,0.5,64,64,9749904,8,8,1,0,552,18,0,-1,,0,47606,13,788296,857119,410,0,0,0,0,0,12815,0,0,0,272,1216502,27016
+122936d7f6389f20,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122936d7f6397580,_Z30kernel_estim_next_step_texturePjjjj,478.08,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122936d7f6447fa0,memcpyAtoH,11.616,29,,,,,,,,,,,0,0,576,0
+122936d7f6451160,memcpyDtoH,191.392,744,,,,,,,,,,,0,0,1048576,0
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef89bda598
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122937299dd13280,memcpyHtoA,177.568,333,,,,,,,,,,,0,0,1048576,0
+122937299dd7b040,_Z19kernel_calcul_pathsP4int2j,42.976,12,0.031,1,1,12201664,1,1,1,0,28,11,0,-1,,0,168,0,1587,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+122937299dd86260,memcpyDtoA,4.288,6,,,,,,,,,,,0,0,576,0
+122937299dd8afa0,_Z23kernel_neutre_img2estimPjjj,444.608,8,0.5,64,64,1074266112,8,8,1,0,32,5,0,-1,,0,0,0,5448,0,409,0,0,0,0,0,13088,0,0,0,272,2454,818
+122937299de016c0,memcpyAtoD,50.592,6,,,,,,,,,,,0,0,1048576,0
+122937299de0e6e0,memcpyDtoA,47.424,4,,,,,,,,,,,0,0,1048576,0
+122937299de1f760,_Z29kernel_levelines_texture_smemPjjjj,2694.56,10,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47606,11,789386,391913,410,0,0,0,0,0,12802,0,0,0,272,1223244,32854
+122937299e0b1f60,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122937299e0c1560,_Z30kernel_estim_next_step_texturePjjjj,471.616,6,0.5,64,64,1075052544,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122937299e135200,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122937299e142860,_Z29kernel_levelines_texture_smemPjjjj,2685.31,4,0.5,64,64,1,8,8,1,0,584,18,0,-1,,0,47606,12,789386,390037,410,0,0,0,0,0,12804,0,0,0,272,1219847,27708
+122937299e3d2c20,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122937299e3e0260,_Z30kernel_estim_next_step_texturePjjjj,486.784,4,0.5,64,64,1075052544,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122937299e457a40,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122937299e465040,_Z29kernel_levelines_texture_smemPjjjj,2667.74,4,0.5,64,64,1,8,8,1,0,584,18,0,-1,,0,47606,10,789383,389311,409,0,0,0,0,0,12800,0,0,0,272,1213376,27080
+122937299e6f0fa0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122937299e6fe600,_Z30kernel_estim_next_step_texturePjjjj,470.112,4,0.5,64,64,1074266112,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122937299e771cc0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122937299e77f2a0,_Z29kernel_levelines_texture_smemPjjjj,2670.72,3,0.5,64,64,1,8,8,1,0,584,18,0,-1,,0,47606,12,789386,388524,409,0,0,0,0,0,12788,0,0,0,272,1215272,26206
+122937299ea0bd60,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122937299ea19340,_Z30kernel_estim_next_step_texturePjjjj,480.096,4,0.5,64,64,1074266112,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122937299ea8f0e0,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122937299ea9c740,_Z29kernel_levelines_texture_smemPjjjj,2671.14,4,0.5,64,64,1,8,8,1,0,584,18,0,-1,,0,47606,10,789388,389600,410,0,0,0,0,0,12815,0,0,0,272,1218236,25318
+122937299ed293c0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122937299ed36a20,_Z30kernel_estim_next_step_texturePjjjj,476.704,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15512,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122937299edabac0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122937299edb90a0,_Z29kernel_levelines_texture_smemPjjjj,2671.3,4,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47434,12,786502,388436,410,0,0,0,0,0,12802,0,0,0,271,1213625,28427
+122937299f045da0,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122937299f0533c0,_Z30kernel_estim_next_step_texturePjjjj,472.064,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15512,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122937299f0c7200,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122937299f0d4860,_Z29kernel_levelines_texture_smemPjjjj,2677.92,4,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47434,10,786502,388243,410,0,0,0,0,0,12804,0,0,0,271,1216963,26153
+122937299f362f40,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122937299f370540,_Z30kernel_estim_next_step_texturePjjjj,467.456,3,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15625,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937299f3e3180,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122937299f3f07e0,_Z29kernel_levelines_texture_smemPjjjj,2675.04,4,0.5,64,64,11741088,8,8,1,0,584,18,0,-1,,0,47256,10,783580,385314,409,0,0,0,0,0,12800,0,0,0,270,1216083,25553
+122937299f67e3a0,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122937299f68ba00,_Z30kernel_estim_next_step_texturePjjjj,475.392,3,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937299f700560,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122937299f70dba0,_Z29kernel_levelines_texture_smemPjjjj,2662.05,4,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47256,10,783580,384899,409,0,0,0,0,0,12788,0,0,0,270,1214437,27265
+122937299f998480,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122937299f9a5aa0,_Z30kernel_estim_next_step_texturePjjjj,477.12,4,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15625,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937299fa1acc0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122937299fa282c0,_Z29kernel_levelines_texture_smemPjjjj,2673.63,4,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47256,10,783582,384740,410,0,0,0,0,0,12815,0,0,0,270,1216791,26747
+122937299fcb58e0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122937299fcc2ee0,_Z30kernel_estim_next_step_texturePjjjj,481.792,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122937299fd39340,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122937299fd46940,_Z29kernel_levelines_texture_smemPjjjj,2675.68,3,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47256,10,783579,383647,410,0,0,0,0,0,12802,0,0,0,270,1215897,26147
+122937299ffd47c0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122937299ffe1dc0,_Z30kernel_estim_next_step_texturePjjjj,486.4,3,0.5,64,64,10922,8,8,1,0,36,9,0,-1,,0,822,0,15627,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+12293729a0059400,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+12293729a00669e0,_Z29kernel_levelines_texture_smemPjjjj,2668.42,4,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47256,10,783580,384611,410,0,0,0,0,0,12804,0,0,0,270,1216801,26315
+12293729a02f2ba0,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+12293729a0300220,_Z30kernel_estim_next_step_texturePjjjj,465.92,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+12293729a0372860,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+12293729a037fea0,_Z29kernel_levelines_texture_smemPjjjj,2667.1,3,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47256,10,783579,385331,409,0,0,0,0,0,12800,0,0,0,270,1215586,26050
+12293729a060bb80,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+12293729a06191e0,_Z30kernel_estim_next_step_texturePjjjj,480.32,4,0.5,64,64,856,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+12293729a068f060,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+12293729a069c6a0,_Z29kernel_levelines_texture_smemPjjjj,2676.93,3,0.5,64,64,2621500,8,8,1,0,584,18,0,-1,,0,47256,10,783580,386407,409,0,0,0,0,0,12788,0,0,0,270,1214898,26804
+12293729a092a9c0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+12293729a0938000,_Z30kernel_estim_next_step_texturePjjjj,475.328,3,0.5,64,64,32767,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+12293729a09acb00,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+12293729a09ba140,_Z29kernel_levelines_texture_smemPjjjj,2666.72,4,0.5,64,64,0,8,8,1,0,584,18,0,-1,,0,47606,13,789385,389081,410,0,0,0,0,0,12815,0,0,0,272,1215491,28047
+12293729a0c45c60,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+12293729a0c53260,_Z30kernel_estim_next_step_texturePjjjj,479.136,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+12293729a0d07640,memcpyAtoH,11.712,30,,,,,,,,,,,0,0,576,0
+12293729a0d10ae0,memcpyDtoH,191.936,733,,,,,,,,,,,0,0,1048576,0
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef8902a258
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122935912c8a99a0,memcpyHtoA,177.376,332,,,,,,,,,,,0,0,1048576,0
+122935912c911b40,_Z19kernel_calcul_pathsP4int2j,42.752,12,0.031,1,1,11338272,1,1,1,0,28,11,0,-1,,0,168,0,1587,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+122935912c91cca0,memcpyDtoA,4.32,5,,,,,,,,,,,0,0,576,0
+122935912c921a40,_Z23kernel_neutre_img2estimPjjj,440.864,9,0.5,64,64,1074266112,8,8,1,0,32,5,0,-1,,0,0,0,5448,0,409,0,0,0,0,0,13088,0,0,0,272,2454,818
+122935912c9973a0,memcpyAtoD,50.688,6,,,,,,,,,,,0,0,1048576,0
+122935912c9a4400,memcpyDtoA,47.392,3,,,,,,,,,,,0,0,1048576,0
+122935912c9b5440,_Z29kernel_levelines_texture_smemPjjjj,3593.66,9,0.5,64,64,0,8,8,1,0,1064,17,0,-1,,0,72362,4638,841352,995747,410,0,0,0,0,0,12802,0,0,0,272,1199518,41186
+122935912cd23460,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122935912cd32a20,_Z30kernel_estim_next_step_texturePjjjj,478.112,6,0.5,64,64,1075052544,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122935912cda8020,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122935912cdb5600,_Z29kernel_levelines_texture_smemPjjjj,3592.77,4,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72795,5071,843086,996169,410,0,0,0,0,0,12804,0,0,0,272,1199848,42008
+122935912d1232c0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935912d130960,_Z30kernel_estim_next_step_texturePjjjj,463.392,4,0.5,64,64,1075052544,8,8,1,0,36,9,0,-1,,0,816,0,15513,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122935912d1a25e0,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122935912d1afc40,_Z29kernel_levelines_texture_smemPjjjj,3594.72,3,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72875,5149,843404,1000954,409,0,0,0,0,0,12800,0,0,0,272,1195816,44504
+122935912d51e060,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935912d52b660,_Z30kernel_estim_next_step_texturePjjjj,475.52,4,0.5,64,64,1074266112,8,8,1,0,36,9,0,-1,,0,816,0,15512,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122935912d5a0220,memcpyDtoA,47.552,11,,,,,,,,,,,0,0,1048576,0
+122935912d5ad800,_Z29kernel_levelines_texture_smemPjjjj,3590.11,4,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72741,5017,842871,999358,409,0,0,0,0,0,12788,0,0,0,272,1197057,43263
+122935912d91aa40,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935912d928060,_Z30kernel_estim_next_step_texturePjjjj,489.248,4,0.5,64,64,1074266112,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122935912d9a01c0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935912d9ad840,_Z29kernel_levelines_texture_smemPjjjj,3597.86,4,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72787,5061,843057,1001133,410,0,0,0,0,0,12815,0,0,0,272,1198644,43596
+122935912dd1c8a0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122935912dd29f00,_Z30kernel_estim_next_step_texturePjjjj,481.728,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122935912dda0320,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122935912ddad980,_Z29kernel_levelines_texture_smemPjjjj,3595.36,4,0.5,64,64,0,8,8,1,0,1064,17,0,-1,,0,72422,4944,839570,998486,410,0,0,0,0,0,12802,0,0,0,271,1198607,42097
+122935912e11c040,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122935912e129680,_Z30kernel_estim_next_step_texturePjjjj,477.28,3,0.5,64,64,18,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122935912e19e940,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122935912e1abf20,_Z29kernel_levelines_texture_smemPjjjj,3600.32,3,0.5,64,64,10083328,8,8,1,0,1064,17,0,-1,,0,72371,4891,839366,996160,410,0,0,0,0,0,12804,0,0,0,271,1199727,42129
+122935912e51b920,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122935912e528f80,_Z30kernel_estim_next_step_texturePjjjj,474.72,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935912e59d840,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122935912e5aae60,_Z29kernel_levelines_texture_smemPjjjj,3596.1,3,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72068,4840,836118,990542,409,0,0,0,0,0,12800,0,0,0,270,1197654,42666
+122935912e919820,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122935912e926e80,_Z30kernel_estim_next_step_texturePjjjj,476.032,4,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935912e99bc60,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122935912e9a9280,_Z29kernel_levelines_texture_smemPjjjj,3600.45,4,0.5,64,64,0,8,8,1,0,1064,17,0,-1,,0,72013,4785,835903,990163,409,0,0,0,0,0,12788,0,0,0,270,1199082,41238
+122935912ed18d20,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122935912ed26360,_Z30kernel_estim_next_step_texturePjjjj,479.328,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15625,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935912ed9be20,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122935912eda9440,_Z29kernel_levelines_texture_smemPjjjj,3593.73,4,0.5,64,64,0,8,8,1,0,1064,17,0,-1,,0,71997,4769,835835,989090,410,0,0,0,0,0,12815,0,0,0,270,1199031,43209
+122935912f117480,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122935912f124a60,_Z30kernel_estim_next_step_texturePjjjj,474.816,4,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122935912f1993a0,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122935912f1a6a00,_Z29kernel_levelines_texture_smemPjjjj,3588.06,4,0.5,64,64,0,8,8,1,0,1064,17,0,-1,,0,71962,4734,835695,988526,410,0,0,0,0,0,12802,0,0,0,270,1197717,42987
+122935912f513440,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122935912f520aa0,_Z30kernel_estim_next_step_texturePjjjj,492.384,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122935912f599860,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122935912f5a6ea0,_Z29kernel_levelines_texture_smemPjjjj,3582.43,4,0.5,64,64,-1386403272,8,8,1,0,1064,17,0,-1,,0,71991,4763,835813,989348,410,0,0,0,0,0,12804,0,0,0,270,1198967,42889
+122935912f912300,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935912f91f940,_Z30kernel_estim_next_step_texturePjjjj,476.928,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935912f994aa0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935912f9a20a0,_Z29kernel_levelines_texture_smemPjjjj,3605.02,4,0.5,64,64,0,8,8,1,0,1064,17,0,-1,,0,71919,4691,835524,991687,409,0,0,0,0,0,12800,0,0,0,270,1195048,45272
+122935912fd12d20,memcpyDtoA,47.744,3,,,,,,,,,,,0,0,1048576,0
+122935912fd203c0,_Z30kernel_estim_next_step_texturePjjjj,475.52,4,0.5,64,64,32,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122935912fd94f80,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122935912fda2580,_Z29kernel_levelines_texture_smemPjjjj,3586.85,4,0.5,64,64,32,8,8,1,0,1064,17,0,-1,,0,71947,4719,835634,989254,409,0,0,0,0,0,12788,0,0,0,270,1197194,43126
+122935913010eb00,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122935913011c100,_Z30kernel_estim_next_step_texturePjjjj,477.696,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229359130191560,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122935913019eba0,_Z29kernel_levelines_texture_smemPjjjj,3588.93,4,0.5,64,64,65280,8,8,1,0,1064,17,0,-1,,0,72420,4696,841586,995223,410,0,0,0,0,0,12815,0,0,0,272,1199000,43240
+122935913050b940,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+1229359130518f60,_Z30kernel_estim_next_step_texturePjjjj,482.272,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+12293591305cd240,memcpyAtoH,11.584,32,,,,,,,,,,,0,0,576,0
+12293591305d6be0,memcpyDtoH,191.232,734,,,,,,,,,,,0,0,1048576,0
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef8a3fef18
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122937ea2858cb60,memcpyHtoA,214.656,809,,,,,,,,,,,0,0,1048576,0
+122937ea28726900,_Z19kernel_calcul_pathsP4int2j,42.144,16,0.031,1,1,11690128,1,1,1,0,28,11,0,-1,,0,168,0,1588,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+122937ea287317e0,memcpyDtoA,4.128,7,,,,,,,,,,,0,0,576,0
+122937ea28736640,_Z23kernel_neutre_img2estimPjjj,453.632,10,0.5,64,64,4,8,8,1,0,32,5,0,-1,,0,0,0,5496,0,410,0,0,0,0,0,13120,0,0,0,274,2460,820
+122937ea288f0a20,memcpyAtoD,51.296,7,,,,,,,,,,,0,0,1048576,0
+122937ea288fdce0,memcpyDtoA,47.456,4,,,,,,,,,,,0,0,1048576,0
+122937ea2890ef80,_Z29kernel_levelines_texture_smemPjjjj,2313.95,11,0.5,64,64,11454132,8,8,1,0,308,18,0,-1,,0,47434,12,760498,69890,410,0,0,0,0,0,12760,0,0,0,271,1211981,29748
+122937ea28b448e0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122937ea28b53ec0,_Z30kernel_estim_next_step_texturePjjjj,497.632,7,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15636,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea28bce100,memcpyDtoA,47.712,4,,,,,,,,,,,0,0,1048576,0
+122937ea28bdb740,_Z29kernel_levelines_texture_smemPjjjj,2293.38,4,0.5,64,64,337,8,8,1,0,308,18,0,-1,,0,47434,12,760497,69704,410,0,0,0,0,0,12760,0,0,0,271,1216149,27759
+122937ea28e0c000,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122937ea28e195c0,_Z30kernel_estim_next_step_texturePjjjj,478.784,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea28e8ee40,memcpyDtoA,1401.6,4,,,,,,,,,,,0,0,1048576,0
+122937ea28fe6f20,_Z29kernel_levelines_texture_smemPjjjj,2285.98,3,0.5,64,64,40,8,8,1,0,308,18,0,-1,,0,47434,12,760495,69267,410,0,0,0,0,0,12760,0,0,0,271,1211639,27223
+122937ea29215b20,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122937ea292230e0,_Z30kernel_estim_next_step_texturePjjjj,476.352,4,0.5,64,64,127,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea29298000,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122937ea292a5580,_Z29kernel_levelines_texture_smemPjjjj,2276.77,4,0.5,64,64,8415296,8,8,1,0,308,18,0,-1,,0,47434,12,760494,68848,410,0,0,0,0,0,12760,0,0,0,271,1202995,26669
+122937ea294d1d60,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122937ea294df340,_Z30kernel_estim_next_step_texturePjjjj,480.416,4,0.5,64,64,1072693248,8,8,1,0,36,9,0,-1,,0,822,0,15633,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea29555240,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122937ea296a25a0,_Z29kernel_levelines_texture_smemPjjjj,2269.89,4,0.5,64,64,1,8,8,1,0,308,18,0,-1,,0,47434,12,760496,68781,410,0,0,0,0,0,12760,0,0,0,271,1203474,26190
+122937ea298cd2a0,memcpyDtoA,47.744,3,,,,,,,,,,,0,0,1048576,0
+122937ea298da900,_Z30kernel_estim_next_step_texturePjjjj,486.432,3,0.5,64,64,16,8,8,1,0,36,9,0,-1,,0,822,0,15635,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea29951f60,memcpyDtoA,47.712,3,,,,,,,,,,,0,0,1048576,0
+122937ea2995f5a0,_Z29kernel_levelines_texture_smemPjjjj,2277.66,4,0.5,64,64,0,8,8,1,0,308,18,0,-1,,0,47434,12,760500,68764,410,0,0,0,0,0,12760,0,0,0,271,1205094,24836
+122937ea29b8c100,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122937ea29b996e0,_Z30kernel_estim_next_step_texturePjjjj,491.712,4,0.5,64,64,8,8,8,1,0,36,9,0,-1,,0,822,0,15636,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea29c12200,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122937ea29d1e8a0,_Z29kernel_levelines_texture_smemPjjjj,2273.31,4,0.5,64,64,8755104,8,8,1,0,308,18,0,-1,,0,47434,12,760499,68617,410,0,0,0,0,0,12760,0,0,0,271,1202684,27194
+122937ea29f4a300,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122937ea29f578e0,_Z30kernel_estim_next_step_texturePjjjj,473.216,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea29fcbbc0,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122937ea29fd91a0,_Z29kernel_levelines_texture_smemPjjjj,2265.38,4,0.5,64,64,0,8,8,1,0,308,18,0,-1,,0,47434,12,760499,68678,410,0,0,0,0,0,12760,0,0,0,271,1205362,24302
+122937ea2a202d00,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122937ea2a210320,_Z30kernel_estim_next_step_texturePjjjj,475.552,3,0.5,64,64,1076494336,8,8,1,0,36,9,0,-1,,0,822,0,15636,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea2a284f00,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122937ea2a38c880,_Z29kernel_levelines_texture_smemPjjjj,2268.16,3,0.5,64,64,11096400,8,8,1,0,308,18,0,-1,,0,47434,12,760496,68770,410,0,0,0,0,0,12760,0,0,0,271,1205150,24514
+122937ea2a5b6ec0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122937ea2a5c44c0,_Z30kernel_estim_next_step_texturePjjjj,474.496,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea2a638ca0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122937ea2a6462e0,_Z29kernel_levelines_texture_smemPjjjj,2277.92,4,0.5,64,64,144,8,8,1,0,308,18,0,-1,,0,47434,12,760500,68681,410,0,0,0,0,0,12760,0,0,0,271,1204010,25654
+122937ea2a872f80,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122937ea2a880540,_Z30kernel_estim_next_step_texturePjjjj,472.32,3,0.5,64,64,48,8,8,1,0,36,9,0,-1,,0,822,0,15636,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea2a8f4480,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122937ea2aa00920,_Z29kernel_levelines_texture_smemPjjjj,2275.33,4,0.5,64,64,11937498,8,8,1,0,308,18,0,-1,,0,47606,11,763277,68860,410,0,0,0,0,0,12760,0,0,0,272,1203314,25998
+122937ea2ac2cb80,memcpyDtoA,47.776,3,,,,,,,,,,,0,0,1048576,0
+122937ea2ac3a1e0,_Z30kernel_estim_next_step_texturePjjjj,479.136,4,0.5,64,64,6357093,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea2acafbc0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122937ea2acbd1a0,_Z29kernel_levelines_texture_smemPjjjj,2268.19,5,0.5,64,64,6488165,8,8,1,0,308,18,0,-1,,0,47434,12,760497,68864,410,0,0,0,0,0,12760,0,0,0,271,1204950,24714
+122937ea2aee7800,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122937ea2aef4da0,_Z30kernel_estim_next_step_texturePjjjj,468.896,3,0.5,64,64,3211309,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea2af67fc0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122937ea2b06fe40,_Z29kernel_levelines_texture_smemPjjjj,2280.93,4,0.5,64,64,3670061,8,8,1,0,308,18,0,-1,,0,47434,12,760497,68707,410,0,0,0,0,0,12760,0,0,0,271,1203092,26572
+122937ea2b29d660,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122937ea2b2aad60,_Z30kernel_estim_next_step_texturePjjjj,486.016,4,0.5,64,64,3407923,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea2b322220,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122937ea2b32f800,_Z29kernel_levelines_texture_smemPjjjj,2272.99,4,0.5,64,64,3211314,8,8,1,0,308,18,0,-1,,0,47434,12,760500,69017,410,0,0,0,0,0,12760,0,0,0,271,1205428,24236
+122937ea2b55b120,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122937ea2b5686c0,_Z30kernel_estim_next_step_texturePjjjj,490.4,4,0.5,64,64,3145773,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea2b5e0ce0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122937ea2b6e5e20,_Z29kernel_levelines_texture_smemPjjjj,2271.9,3,0.5,64,64,11665408,8,8,1,0,308,18,0,-1,,0,47434,12,760503,68833,410,0,0,0,0,0,12760,0,0,0,271,1203294,26370
+122937ea2b911340,memcpyDtoA,47.712,4,,,,,,,,,,,0,0,1048576,0
+122937ea2b91e980,_Z30kernel_estim_next_step_texturePjjjj,479.776,3,0.5,64,64,10922,8,8,1,0,36,9,0,-1,,0,822,0,15634,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122937ea2ba8b1a0,memcpyAtoH,11.936,782,,,,,,,,,,,0,0,576,0
+122937ea2beeee40,memcpyDtoH,192.032,1729,,,,,,,,,,,0,0,1048576,0
// lib spec
#include "defines.h"
#include "levelines_common.h"
-
#include "levelines_kernels.cu"
-__global__ void kernel_debil(unsigned int * ptr1, unsigned int * ptr2, unsigned int L, int val){
-
- unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
- unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
- unsigned int pos = i*L +j ;
+int main(int argc, char **argv){
- ptr2[pos] = val - ptr1[pos] ;
+ // use device with highest Gflops/s
+ cudaSetDevice( 0 );
-}
-
-int main(int argc, char **argv){
+ unsigned int timer ;
+ cutilCheckError( cutCreateTimer(&timer) );
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
+
+ //alloc bidon pour anticiper l'initialisation du GPU
+ short * d_bidon ;
+ cutilSafeCall( cudaMalloc( (void**) &d_bidon, sizeof(short))) ;
//float coef_regul = atof( argv[1] ) ;
- unsigned int timer ;
- cutilCheckError( cutCreateTimer(&timer) );
- cutilCheckError( cutResetTimer(timer) );
/*****************************
* CHARGEMENT IMAGE
*****************************/
char* image_path = argv[argc-1];
- char* image_out = "./image_out.pgm" ;
+ unsigned int r = atoi(argv[1]) ;
+ bool seq_out = atoi(argv[2]) ;
+ unsigned int iter , nb_iter = atoi(argv[3]) ;
+ unsigned int poids = 15 ;
+ char * image_out_base = "./image_out" ;
+ char * pgm_ext = ".pgm" ;
+ char image_out[80] ;
unsigned int * h_data = NULL ;
unsigned int * h_data_out = NULL ;
- unsigned int H, L, size;
-
- cutilCheckError( cutStartTimer(timer) );
- cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
- cutilCheckError( cutStopTimer(timer) );
+ unsigned int H, L, size, psize ;
- size = H * L * sizeof( unsigned int );
- printf("Loaded %d x %d = %d pixels from '%s' en %f ms,\n", L, H, size, image_path, cutGetTimerValue(timer));
-
-
- //essai alloc mapped
- /*
- cutilCheckError( cutResetTimer(timer) );
- cutilCheckError( cutStartTimer(timer) );
- unsigned int * h_ptr1, * d_ptr1 ;
- unsigned int * h_ptr2, * d_ptr2 ;
- int h = ;
- int l = h ;
- int mem = h*l*sizeof(unsigned int) ;
- cutilSafeCall(cudaSetDeviceFlags(cudaDeviceMapHost));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps set flag Mapped : %f ms\n", cutGetTimerValue(timer)) ;
-
- cutilCheckError( cutStartTimer(timer) );
- cutilSafeCall(cudaHostAlloc((void **)&h_ptr1, mem, cudaHostAllocMapped));
- cutilSafeCall(cudaHostAlloc((void **)&h_ptr2, mem, cudaHostAllocMapped));
+ // chargt image
+ cutilCheckError( cutLoadPGMi(image_path, &h_data, &L, &H));
cutilCheckError( cutStopTimer(timer) );
- printf("Temps cumul alloc Mapped : %f ms\n", cutGetTimerValue(timer)) ;
- for (int i = 0; i<h*l ; i++) h_ptr1[i] = 200 ;
+ size = H * L * sizeof( unsigned int );
+ psize = (r-1)*PSIZE_I*sizeof(int2) ;
- cutilCheckError( cutStartTimer(timer) );
- cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr1, (void *)h_ptr1, 0));
- cutilSafeCall(cudaHostGetDevicePointer((void **)&d_ptr2, (void *)h_ptr2, 0));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps cumul get pointer Mapped : %f ms\n", cutGetTimerValue(timer)) ;
-
- cutilCheckError( cutStartTimer(timer) );
- dim3 blocks(16,16,1) ;
- dim3 grid( h / blocks.x, l / blocks.y, 1 ) ;
-
- kernel_debil<<< grid, blocks >>>(d_ptr1, d_ptr2, l, 255) ;
+ printf("Longueur des chemins = %d pixels\n", r);
+ printf("Init GPU + Image %s %d x %d = %d pixels en %f ms,\n", image_path, L, H, size, cutGetTimerValue(timer));
- cutilCheckError( cutStopTimer(timer) );
- printf("Temps total Mapped : %f ms\n", cutGetTimerValue(timer)) ;
-
- char * image_1 = "./image_1.pgm" ;
- char * image_2 = "./image_2.pgm" ;
-
- cutilCheckError( cutSavePGMi(image_1, h_ptr1, l, h) ) ;
- cutilCheckError( cutSavePGMi(image_2, h_ptr2, l, h) ) ;
- */
/*****************************
* FIN CHARGEMENT IMAGE
*****************************/
-
-
-
- // use device with highest Gflops/s
- cudaSetDevice( cutGetMaxGflopsDeviceId() );
-
-
- /*
- cutilSafeCall( cudaMallocArray(&a_Src, &floatTex, imageW, imageH) );
- cutilSafeCall( cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)) );
- cutilSafeCall( cudaThreadSynchronize() );
- cutilCheckError( cutResetTimer(hTimer) );
- cutilCheckError( cutStartTimer(hTimer) );
-
- cutilSafeCall( cudaThreadSynchronize() );
- cutilCheckError( cutStopTimer(hTimer) );
- gpuTime = cutGetTimerValue(hTimer) / (float)iterations;
- */
cutilCheckError( cutResetTimer(timer) );
cutilCheckError( cutStartTimer(timer) );
// allocation mem GPU
unsigned int * d_directions =NULL ;
unsigned int * d_lniv, * d_estim = NULL ;
+ int2 * d_paths ;
cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
cutilSafeCall( cudaMalloc( (void**) &d_estim, size ) );
+ cutilSafeCall( cudaMalloc( (void**) &d_paths, psize ) );
// allocate array and copy image data
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
- cudaArray * array_img_in, *array_img_estim, *array_img_lniv;
+ cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindSigned);
+
+ cudaArray * array_img_in, *array_img_estim, *array_img_lniv, *array_paths;
cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
cutilSafeCall( cudaMemcpyToArray( array_img_in, 0, 0, h_data, size, cudaMemcpyHostToDevice)) ;
cutilSafeCall( cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc));
- cutilCheckError( cutStopTimer(timer) );
cutilSafeCall( cudaMallocArray( &array_img_estim, &channelDesc, L, H ));
cutilSafeCall( cudaBindTextureToArray( tex_img_estim, array_img_estim, channelDesc));
cutilSafeCall( cudaMallocArray( &array_img_lniv, &channelDesc, L, H ));
cutilSafeCall( cudaBindTextureToArray( tex_img_lniv, array_img_lniv, channelDesc));
+ cutilSafeCall( cudaMallocArray( &array_paths, &channelDescP, (r-1), PSIZE_I ));
+ cutilSafeCall( cudaBindTextureToArray( tex_paths, array_paths, channelDescP));
+
+ cutilCheckError( cutStopTimer(timer) );
printf("Temps alloc + transferts en Textures : %f ms\n", cutGetTimerValue(timer)) ;
+
/*****************************
- * APPELS KERNELS et chronos
+ * GENERATION des CHEMINS
*****************************/
cutilCheckError( cutResetTimer(timer) );
cutilCheckError( cutStartTimer(timer) );
- unsigned int iter , nb_iter = 15 ;
- unsigned int poids = 15 ;
- dim3 dimBlock(8,8,1) ;
- dim3 dimGrid( H / dimBlock.x, L / dimBlock.y, 1 ) ;
- unsigned int smem_size = dimBlock.x * dimBlock.y * sizeof(unsigned int) ;
- // init image estimee avec image_in
+ dim3 dimBlock(1,1,1) ;
+ dim3 dimGrid(1,1,1) ;
+ // calcul des chemins
+ kernel_calcul_paths<<< dimGrid, dimBlock, 0 >>>(d_paths, r);
+
+ // copie du tableau en texture
+ cutilSafeCall( cudaMemcpyToArray( array_paths, 0, 0, d_paths, psize, cudaMemcpyDeviceToDevice)) ;
+
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps generation chemin + transfert en texture : %f ms\n", cutGetTimerValue(timer)) ;
+
+ /*****************************
+ * APPELS KERNELS et chronos
+ *****************************/
+ dimBlock = dim3(16,16,1) ;
+ dimGrid = dim3( H / dimBlock.x,L / dimBlock.y, 1 ) ;
+
+ // pour enregistrement image lniv GPU
+ free(h_data_out) ;
+ h_data_out = new unsigned int[H*L] ;
+
+ //init image estimee avec image_in
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
kernel_init_estim_from_img_in<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, 7);
- printf("Grille : %d x %d de Blocs : %d x %d - Shared mem : %d octets\n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y, smem_size) ;
+ // kernel de copie
+ /*
+ kernel_neutre_img2estim<<< dimGrid, dimBlock, 0>>>(d_estim, L, H);
+ */
+ cudaThreadSynchronize() ;
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps kernel init : %f ms\n", cutGetTimerValue(timer)) ;
+ // a remplacer par
+ /*
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
+ cutilSafeCall( cudaMemcpyFromArray( d_estim, array_img_in, 0, 0, size, cudaMemcpyDeviceToDevice)) ;
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Temps memcpyFromArray : %f ms\n", cutGetTimerValue(timer)) ;
+ */
+ printf("Grille : %d x %d de Blocs : %d x %d \n", dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y) ;
+
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
for ( iter =0 ; iter < nb_iter ; iter++ )
{
cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
- kernel_levelines_texture<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H );
+ kernel_levelines_texture_smem<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H, r );
cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
+ if (seq_out){
+ sprintf(image_out, "%s%d%s", image_out_base, iter+1, pgm_ext) ;
+ printf("chaine : %s\n", image_out);
+ cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
+ cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
+ }
kernel_estim_next_step_texture<<< dimGrid, dimBlock, 0 >>>(d_estim, L, H, poids) ;
}
cutilCheckError( cutStopTimer(timer) );
printf("Execution moy par kernel : %f ms\n", cutGetTimerValue(timer)/(float)nb_iter) ;
printf("Total pour %d kernels : %f ms\n", nb_iter, cutGetTimerValue(timer)) ;
-
+
/**************************
* VERIFS
**************************/
- //trace des lniv sur grille de 'pas x pas'
- //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255) ;
- //cudaThreadSynchronize();
-
- // enregistrement image lniv GPU
- h_data_out = new unsigned int[H*L] ;
- if ( h_data_out != NULL)
- cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
- else
- printf("Echec allocation mem CPU\n");
- cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
+ /**************************************************
+ * recuperation matrice des chemins pour affichage
+ **************************************************/
+
+ int2 * h_paths = new int2[(r-1)*PSIZE_I] ;
+ cutilSafeCall( cudaMemcpyFromArray(h_paths , array_paths, 0, 0, psize, cudaMemcpyDeviceToHost) );
+ /*
+ //verif Di
+ printf("matrice Di");
+ for(int idpath=0; idpath< PSIZE_I; idpath++){b
+
+ printf("\n");
+ for(int idpix=0; idpix< r-1; idpix++){
+ printf(" % d ", h_paths[idpath*(r-1) + idpix].x );
+ }
+ printf("\t// %d°", idpath*15) ;
+ }
+ //verif Dj
+ printf("\nmatrice Dj");
+ for(int idpath=0; idpath< PSIZE_I; idpath++){
+ printf("\n");
+ for(int idpix=0; idpix< r-1; idpix++){
+ printf(" % d ", h_paths[idpath*(r-1) + idpix].y);
+ }
+ printf("\t// %d°", idpath*15) ;
+ }
+ printf("\n");
+ */
+ /***************************************************
+ * fin verif visuelle matrices des chemins
+ ***************************************************/
- // calcul lniv CPU
+ /***************************************************
+ * execution sequentielle pour comparaison
+ * la comparaison n'est pertinente que
+ * si d_lniv contient les lniv de l'image se départ
+ **************************************************/
+ /*
+ // calcul sequentiel
+ cutilCheckError( cutResetTimer(timer) );
+ cutilCheckError( cutStartTimer(timer) );
+ h_data_out = new unsigned int[H*L] ;
+ int * dout = new int[H*L] ;
+ for ( iter =0 ; iter < nb_iter ; iter++ ){
+ for (int i=r; i<= H-r; i++){
+ for (int j=r; j<= L-r; j++){
+ h_data_out[i*L+j] = lniv4_value(h_data, h_paths, i, j, H, L, &dout[i*L+j], r) ;
+ }
+ }
+ }
+ cutilCheckError( cutStopTimer(timer) );
+ printf("Execution sequentielle CPU : %f ms\n", cutGetTimerValue(timer)) ;
+ // comparaison
+ unsigned int * h_lniv = new unsigned int[H*L] ;
+ int pos, cpt_err=0, cpt_pix=0 ;
+ cutilSafeCall( cudaMemcpy(h_lniv , d_lniv, size, cudaMemcpyDeviceToHost) );
+ for ( iter =0 ; iter < nb_iter ; iter++ ){
+ for (int i=r; i<= H-r; i++){
+ for (int j=r; j<= L-r; j++){
+ pos = i*L + j ;
+ if ( h_data_out[ pos ] != h_lniv[ pos ] ) {
+ cpt_err++ ;
+ printf(" pixel ( %d , %d ) -> GPU= %d CPU= %d \n", i, j, h_lniv[pos], h_data_out[pos]);
+ }
+ cpt_pix++ ;
+ }
+ }
+ }
+ printf("TAUX ERREUR GPU/CPU : %d / %d \n", cpt_err, cpt_pix );
+ */
+ //trace des lniv sur grille de 'pas x pas'
+ //kernel_trace_levelines<<< dimGrid, dimBlock, 0 >>>(d_data, d_directions, d_data2, L, H, 16, 255, r) ;
+
+ if (!seq_out){
+ if ( h_data_out != NULL)
+ cutilSafeCall( cudaMemcpy(h_data_out , d_estim, size, cudaMemcpyDeviceToHost) );
+ else
+ printf("Echec allocation mem CPU\n");
+ sprintf(image_out, "%s%d%s", image_out_base, iter+1, pgm_ext) ;
+ cutilCheckError( cutSavePGMi(image_out, h_data_out, L, H) ) ;
+ }
+
// TODO verifier pourquoi les deux lignes suivantes produisent une erreur
//cutilExit(argc, argv);
//cudaThreadExit();
--- /dev/null
+<!DOCTYPE cvp>
+<cvp version="4.0" >
+ <session>
+ <name>Session1</name>
+ <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
+ <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
+ <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
+ <datetime>21 Jun 2011 10:18:11</datetime>
+ <normalizedcounter>false</normalizedcounter>
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="1" >
+ <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
+ <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
+ <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
+ <run number="5" >gst_128b</run>
+ <run number="6" >gld_request,gst_request</run>
+ <run number="7" >tex_cache_hit,tex_cache_miss</run>
+ </context>
+ </device>
+ </session>
+</cvp>
+
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef886fccd8
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122933fc4123b400,memcpyHtoA,177.376,329,,,,,,,,,,,0,0,1048576,0
+122933fc412a2780,_Z19kernel_calcul_pathsP4int2j,43.296,12,0.031,1,1,-1386402296,1,1,1,0,28,11,0,-1,,0,168,0,1587,0,1,0,0,54,0,0,72,0,0,54,72,0,0
+122933fc412adae0,memcpyDtoA,4.256,5,,,,,,,,,,,0,0,576,0
+122933fc412b2800,_Z23kernel_neutre_img2estimPjjj,446.912,9,0.5,64,64,1074266112,8,8,1,0,32,5,0,-1,,0,0,0,5448,0,409,0,0,0,0,0,13088,0,0,0,272,2454,818
+122933fc4132a1e0,memcpyAtoD,50.528,6,,,,,,,,,,,0,0,1048576,0
+122933fc41337180,memcpyDtoA,47.424,4,,,,,,,,,,,0,0,1048576,0
+122933fc41348200,_Z29kernel_levelines_texture_smemPjjjj,3595.01,10,0.5,64,64,0,8,8,1,0,1064,17,0,-1,,0,72362,4638,841352,997341,410,0,0,0,0,0,12802,0,0,0,272,1196850,43854
+122933fc416b6760,memcpyDtoA,47.744,3,,,,,,,,,,,0,0,1048576,0
+122933fc416c5d80,_Z30kernel_estim_next_step_texturePjjjj,477.824,6,0.5,64,64,1075052544,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122933fc4173b240,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122933fc41748840,_Z29kernel_levelines_texture_smemPjjjj,3596.54,4,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72795,5071,843084,998235,410,0,0,0,0,0,12804,0,0,0,272,1199869,41987
+122933fc41ab7380,memcpyDtoA,47.712,3,,,,,,,,,,,0,0,1048576,0
+122933fc41ac4a00,_Z30kernel_estim_next_step_texturePjjjj,480.928,4,0.5,64,64,1075052544,8,8,1,0,36,9,0,-1,,0,816,0,15513,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122933fc41b3aae0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122933fc41b480e0,_Z29kernel_levelines_texture_smemPjjjj,3589.54,4,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72875,5149,843406,999080,409,0,0,0,0,0,12800,0,0,0,272,1195950,44370
+122933fc41eb50e0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122933fc41ec26e0,_Z30kernel_estim_next_step_texturePjjjj,477.792,4,0.5,64,64,1074266112,8,8,1,0,36,9,0,-1,,0,816,0,15513,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122933fc41f37bc0,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122933fc41f45200,_Z29kernel_levelines_texture_smemPjjjj,3592.61,3,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72741,5017,842869,995949,409,0,0,0,0,0,12788,0,0,0,272,1196521,43799
+122933fc422b2e00,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122933fc422c03e0,_Z30kernel_estim_next_step_texturePjjjj,461.92,4,0.5,64,64,1074266112,8,8,1,0,36,9,0,-1,,0,816,0,15512,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122933fc42331a80,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122933fc4233f100,_Z29kernel_levelines_texture_smemPjjjj,3582.94,3,0.5,64,64,1,8,8,1,0,1064,17,0,-1,,0,72787,5061,843054,1000048,410,0,0,0,0,0,12815,0,0,0,272,1197435,44805
+122933fc426aa720,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122933fc426b7da0,_Z30kernel_estim_next_step_texturePjjjj,479.776,4,0.5,64,64,3080302,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122933fc4272da40,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122933fc4273b040,_Z29kernel_levelines_texture_smemPjjjj,3586.02,4,0.5,64,64,3080312,8,8,1,0,1064,17,0,-1,,0,72422,4944,839570,994815,410,0,0,0,0,0,12802,0,0,0,271,1198152,42552
+122933fc42aa7280,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122933fc42ab4880,_Z30kernel_estim_next_step_texturePjjjj,480.544,3,0.5,64,64,7536737,8,8,1,0,36,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122933fc42b2a7e0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122933fc42b37de0,_Z29kernel_levelines_texture_smemPjjjj,3599.3,4,0.5,64,64,7733353,8,8,1,0,1064,17,0,-1,,0,72371,4891,839365,996460,410,0,0,0,0,0,12804,0,0,0,271,1199816,42040
+122933fc42ea73e0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122933fc42eb49e0,_Z30kernel_estim_next_step_texturePjjjj,474.464,4,0.5,64,64,3211296,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122933fc42f291a0,memcpyDtoA,47.488,3,,,,,,,,,,,0,0,1048576,0
+122933fc42f36760,_Z29kernel_levelines_texture_smemPjjjj,3595.94,4,0.5,64,64,7143535,8,8,1,0,1064,17,0,-1,,0,72068,4840,836122,989513,409,0,0,0,0,0,12800,0,0,0,270,1197365,42955
+122933fc432a5060,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122933fc432b2700,_Z30kernel_estim_next_step_texturePjjjj,472.832,4,0.5,64,64,7471218,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122933fc43326880,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122933fc43333e80,_Z29kernel_levelines_texture_smemPjjjj,3590.69,4,0.5,64,64,6357101,8,8,1,0,1064,17,0,-1,,0,72013,4785,835899,989204,409,0,0,0,0,0,12788,0,0,0,270,1198226,42094
+122933fc436a1300,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122933fc436ae940,_Z30kernel_estim_next_step_texturePjjjj,496.352,4,0.5,64,64,7274595,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122933fc43728680,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122933fc43735c80,_Z29kernel_levelines_texture_smemPjjjj,3592.03,4,0.5,64,64,3080307,8,8,1,0,1064,17,0,-1,,0,71997,4769,835835,989426,410,0,0,0,0,0,12815,0,0,0,270,1196748,45492
+122933fc43aa3640,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122933fc43ab0cc0,_Z30kernel_estim_next_step_texturePjjjj,484.352,4,0.5,64,64,7209071,8,8,1,0,36,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122933fc43b27b20,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122933fc43b35180,_Z29kernel_levelines_texture_smemPjjjj,3600.42,4,0.5,64,64,3014754,8,8,1,0,1064,17,0,-1,,0,71962,4734,835697,990688,410,0,0,0,0,0,12802,0,0,0,270,1198105,42599
+122933fc43ea4be0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122933fc43eb21e0,_Z30kernel_estim_next_step_texturePjjjj,477.504,4,0.5,64,64,7471136,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122933fc43f27580,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122933fc43f34ba0,_Z29kernel_levelines_texture_smemPjjjj,3592.61,4,0.5,64,64,2097207,8,8,1,0,1064,17,0,-1,,0,71991,4763,835810,989866,410,0,0,0,0,0,12804,0,0,0,270,1198680,43176
+122933fc442a27a0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122933fc442afe20,_Z30kernel_estim_next_step_texturePjjjj,469.472,4,0.5,64,64,51,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122933fc44323240,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122933fc44330880,_Z29kernel_levelines_texture_smemPjjjj,3599.94,3,0.5,64,64,-767360128,8,8,1,0,1064,17,0,-1,,0,71919,4691,835524,989067,409,0,0,0,0,0,12800,0,0,0,270,1193858,46462
+122933fc446a0120,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122933fc446ad740,_Z30kernel_estim_next_step_texturePjjjj,473.952,3,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122933fc44721d00,memcpyDtoA,47.648,4,,,,,,,,,,,0,0,1048576,0
+122933fc4472f340,_Z29kernel_levelines_texture_smemPjjjj,3589.06,3,0.5,64,64,-1386403096,8,8,1,0,1064,17,0,-1,,0,71947,4719,835636,987891,409,0,0,0,0,0,12788,0,0,0,270,1198457,41863
+122933fc44a9c160,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122933fc44aa9760,_Z30kernel_estim_next_step_texturePjjjj,476.576,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122933fc44b1e760,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122933fc44b2bdc0,_Z29kernel_levelines_texture_smemPjjjj,3590.59,4,0.5,64,64,0,8,8,1,0,1064,17,0,-1,,0,72420,4696,841587,993210,410,0,0,0,0,0,12815,0,0,0,272,1200675,41565
+122933fc44e991e0,memcpyDtoA,47.776,3,,,,,,,,,,,0,0,1048576,0
+122933fc44ea68a0,_Z30kernel_estim_next_step_texturePjjjj,477.408,4,0.5,64,64,0,8,8,1,0,36,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122933fc44f57380,memcpyAtoH,11.552,30,,,,,,,,,,,0,0,576,0
+122933fc44f605e0,memcpyDtoH,191.136,740,,,,,,,,,,,0,0,1048576,0