# Rules and targets
include ../../common/common.mk
+
+zul:
+ echo $(NVCCFLAGS)
\ No newline at end of file
#define BSMAX 512
#define MAX(x,y) ( ( (x)>=(y) )?(x):(y) )
#define ABS(x) ( ((x)>0)?(x):-(x))
-#define DEC 4
-#define DEC2 8
+#define DEC 3
+#define DEC2 6
#define CONFLICT_FREE_OFFSET(index) ( ((index) >>(DEC)) + ((index) >>(DEC2) ) )
#define CFO(index) ( ( (index) >>(DEC) ) + ( (index) >>(DEC2) ) )
#define CFI(index) ( (index) + (CFO(index)) )
texture<int, 2, cudaReadModeElementType> tex_img_in ;
texture<int, 2, cudaReadModeElementType> tex_img_estim ;
texture<int, 2, cudaReadModeElementType> tex_img_lniv ;
-texture<int2, 2, cudaReadModeElementType> tex_paths ;
+texture<ushort, 2, cudaReadModeElementType> tex_paths ;
* considérés pour le calcul de chemins (float tangente[]).
*
*/
-__global__ void kernel_calcul_paths( int2 * d_paths, unsigned int r){
+__global__ void kernel_calcul_paths( ushort * d_paths, unsigned int r){
unsigned int idpath = 0 ;
int ic, jc, iprec, jprec ;
float offset = 0.5 ;
unsigned int basepath = 0 ;
+ char MSQ, LSQ ;
// Q1 inf
for (int a=0 ; a< 4 ; a++){ // les 4 angles 0,15,30 et 45
for (int p=0 ; p< r ; p++){ // les r points
ic = r-1 - floor(tangente[a]*p + offset) ;
if ( p > 0 ){
- d_paths[idpath*(r-1)+p-1].x = ic - iprec ;
- d_paths[idpath*(r-1)+p-1].y = 1 ;
+ MSQ = ic - iprec ;
+ LSQ = 1 ;
+ //d_paths[idpath*(r-1)+p-1].x = ic - iprec ;
+ //d_paths[idpath*(r-1)+p-1].y = 1 ;
+ d_paths[idpath*(r-1)+p-1] = ((short)MSQ << 8) | LSQ ;
}
iprec = ic ;
}
for (int p=0 ; p< r ; p++){ // les r points
jc = floor(tangente[a]*p + offset) ;
if ( p > 0 ){
- d_paths[idpath*(r-1)+p-1].x = -1 ;
- d_paths[idpath*(r-1)+p-1].y = jc - jprec ;
+ MSQ = -1 ;
+ LSQ = jc - jprec ;
+ d_paths[idpath*(r-1)+p-1] = ((short)MSQ << 8) | LSQ ;
+ //d_paths[idpath*(r-1)+p-1].x = -1 ;
+ //d_paths[idpath*(r-1)+p-1].y = jc - jprec ;
}
jprec = jc ;
}
basepath += 6 ;
for (int a=0 ; a< 6 ; a++){ // les 6 angles 90,105,120,135,150,165
for (int p=0 ; p<r-1 ; p++){ // les r points
- d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].y ;
- d_paths[idpath*(r-1)+p].y = d_paths[(idpath - basepath)*(r-1)+p].x ;
+ MSQ = - ( d_paths[(idpath - basepath)*(r-1)+p] & 0x00FF ) ;
+ LSQ = ( d_paths[(idpath - basepath)*(r-1)+p] >> 8 ) ;
+ d_paths[idpath*(r-1)+p-1] = ((short)MSQ << 8) | LSQ ;
+ //d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].y ;
+ //d_paths[idpath*(r-1)+p].y = d_paths[(idpath - basepath)*(r-1)+p].x ;
}
idpath++ ;
}
basepath += 6 ;
for (int a=0 ; a< 6 ; a++){ // les 6 angles 180,195,210,225,240,255
for (int p=0 ; p<r-1 ; p++){ // les r points
- d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].x ;
- d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].y ;
+ MSQ = - ( d_paths[(idpath - basepath)*(r-1)+p] >> 8 ) ;
+ LSQ = - ( d_paths[(idpath - basepath)*(r-1)+p] & 0x00FF ) ;
+ d_paths[idpath*(r-1)+p-1] = ((short)MSQ << 8) | LSQ ;
+ //d_paths[idpath*(r-1)+p].x = -d_paths[(idpath - basepath)*(r-1)+p].x ;
+ //d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].y ;
}
idpath++ ;
}
basepath += 6 ;
for (int a=0 ; a< 6 ; a++){ // les 6 angles 270,285,300,315,330,345
for (int p=0 ; p<r-1 ; p++){ // les r points
- d_paths[idpath*(r-1)+p].x = d_paths[(idpath - basepath)*(r-1)+p].y ;
- d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].x ;
+ MSQ = d_paths[(idpath - basepath)*(r-1)+p] & 0x00FF ;
+ LSQ = - ( d_paths[(idpath - basepath)*(r-1)+p] >> 8 ) ;
+ d_paths[idpath*(r-1)+p-1] = ((short)MSQ << 8) | LSQ ;
+ //d_paths[idpath*(r-1)+p].x = d_paths[(idpath - basepath)*(r-1)+p].y ;
+ //d_paths[idpath*(r-1)+p].y = -d_paths[(idpath - basepath)*(r-1)+p].x ;
}
idpath++ ;
}
+
}
/**
* Execution sur des blocs de threads 2D et une grille 2D
* selon les dimensions de l'image.
* L'image d'entrée doit être au préalable en mémoire texture pointée par "tex_img_estim".
- * Les matrices des chemins sont, elles, pointées par "tex_paths"
+ * Les matrices des chemins sont, elles, préalablement chargées en SHMEM depuis la texture"
* Cette version ne fournit pas les indices des chemins pour les tracé éventuel des lniv.
*/
__global__ void kernel_levelines_texture(unsigned int * img_out, unsigned int L, unsigned int H, unsigned int r)
{
- // coordonnes du point dans l'image
+ // coordonnees du point dans le bloc
+ unsigned int iib = threadIdx.x ;
+ unsigned int jib = threadIdx.y ;
+ // coordonnees du point dans l'image
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
int idpath, idpix ;
unsigned int mse_min, mse_cur, val ;
uint2 mse ;
+ short texVal ;
+
+ extern __shared__ short shPath[] ;
+
+ unsigned int absPos = jib*8 + iib ;
+ if ( absPos < PSIZE_I ){
+ for ( idpix = 0; idpix < lpath-1; idpix++){
+ shPath[ idpix*24 + absPos ] = tex2D(tex_paths, idpix, absPos) ;
+ }
+ syncthreads() ;
+ }
if((i>=lpath)&&(i<=H-lpath)&&(j>=lpath)&&(j<=L-lpath)){
z = tex2D(tex_img_estim, j, i) ;
mse.x = z ;
mse.y = z*z ;
for( idpix=0; idpix < lpath-1 ; idpix++ ) {
- ic += tex2D(tex_paths, idpix, idpath).x ;
- jc += tex2D(tex_paths, idpix, idpath).y ;
+ texVal = shPath[ idpix*24 + idpath ] ;
+ ic += (char)(texVal>>8) ;
+ jc += (char)(texVal) ;
zc = tex2D(tex_img_estim, jc, ic) ;
mse.x += zc ;
mse.y += zc*zc ;
}
-/**
- *
- * \brief determine les lniv en chaque point de l'image
- * \author zulu - AND
- *
- * \param[in] L Largeur de l'image
- * \param[in] H Hauteur de l'image
- * \param[in] r longueur des segments
- *
- * \param[out] img_out image des lniv
- *
- * Execution sur des blocs de threads 2D et une grille 2D
- * selon les dimensions de l'image.
- * L'image d'entrée doit être au préalable en mémoire texture pointée par "tex_img_estim".
- * Les matrices des chemins sont, elles, pointées par "tex_paths"
- * Cette version ne fournit pas les indices des chemins pour les tracé éventuel des lniv.
- * Cette version tente d'utiliser la shared memory pour compenser la baisse de perf due aux chemins
- * paramétrables non constants.
- */
-
-__global__ void kernel_levelines_texture_smem(unsigned int * img_out, unsigned int L, unsigned int H, unsigned int r)
-{
- // coordonnées du point dans le bloc
- unsigned int iib = threadIdx.x ;
- unsigned int jib = threadIdx.y ;
- // coordonnes du point dans l'image
- unsigned int i = blockIdx.x*blockDim.x + iib ;
- unsigned int j = blockIdx.y*blockDim.y + jib ;
-
- // nb de points par chemin
- int lpath = r ;
- int ic, jc ;
- int idpath, idpix ;
- unsigned int val, mse_cur, mse_min, z, zc ;
- uint2 mse_data ;
-
- //__shared__ unsigned int val_img[16*16] ;
-
- //val_img[jib*16+iib] = tex2D(tex_img_estim, j, i) ;
-
- if((i>=lpath)&&(i<=H-lpath)&&(j>=lpath)&&(j<=L-lpath)){
- z = tex2D(tex_img_estim, j, i) ;
- for( idpath=0; idpath < PSIZE_I ; idpath++) {
- ic = i ;
- jc = j ;
- mse_data.x = z ;
- mse_data.y = z*z ;
- mse_min = mse_data.y - mse_data.x/lpath*mse_data.y ;
- for( idpix=0; idpix < lpath-1 ; idpix++ ) {
- ic += tex2D(tex_paths, idpix, idpath).x ;
- jc += tex2D(tex_paths, idpix, idpath).y ;
- zc = tex2D(tex_img_estim, jc, ic) ;
- mse_data.x += zc ;
- mse_data.y += zc*zc ;
- }
- // critere de selection du chemin ( SUM_(X2) - SUM_(X)2 / lpath )
- // TODO cherchera ameliorer pour vitesse
- mse_cur = ( mse_data.y - ( mse_data.x / lpath ) * mse_data.x ) ;
- if ( mse_cur < mse_min ){
- mse_min = mse_cur ;
- val = mse_data.x ;
- }
- }
- img_out[ i*L + j ] = val / lpath ;
- }
-}
-
/**
*
* \brief trace les segments sur un maillage carré
* execution sur des blocs de threads 2D et une grille 2D
* selon les dimensions de l'image
*/
+/*
__global__ void kernel_trace_levelines(unsigned int * img_in, unsigned int * dir, unsigned int * img_out,
unsigned int L, unsigned int H, unsigned int pas, unsigned int ng,
unsigned int r ){
}
}
+*/
<workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
<datetime>21 Jun 2011 10:47:10</datetime>
<normalizedcounter>false</normalizedcounter>
- <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
<context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="1" >
<run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
<run number="3" >cta_launched,local_load,local_store,gld_32b</run>
<workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
<datetime>21 Jun 2011 10:50:54</datetime>
<normalizedcounter>false</normalizedcounter>
- <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
<context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
<run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
<run number="3" >cta_launched,local_load,local_store,gld_32b</run>
<workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
<datetime>21 Jun 2011 11:10:33</datetime>
<normalizedcounter>false</normalizedcounter>
- <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
<context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
<run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
<run number="3" >cta_launched,local_load,local_store,gld_32b</run>
<workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
<datetime>21 Jun 2011 11:16:24</datetime>
<normalizedcounter>false</normalizedcounter>
- <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
<context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
<run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
<run number="3" >cta_launched,local_load,local_store,gld_32b</run>
<workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
<datetime>21 Jun 2011 11:30:11</datetime>
<normalizedcounter>false</normalizedcounter>
- <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
- <context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
- <run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
- <run number="3" >cta_launched,local_load,local_store,gld_32b</run>
- <run number="4" >gld_64b,gld_128b,gst_32b,gst_64b</run>
- <run number="5" >gst_128b</run>
- <run number="6" >gld_request,gst_request</run>
- <run number="7" >tex_cache_hit,tex_cache_miss</run>
- </context>
- </device>
- </session>
- <session>
- <name>Session19</name>
- <progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
- <cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
- <workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
- <datetime>21 Jun 2011 14:10:47</datetime>
- <normalizedcounter>false</normalizedcounter>
- <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
+ <device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
<context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
<run number="2" >branch,divergent_branch,instructions,warp_serialize</run>
<run number="3" >cta_launched,local_load,local_store,gld_32b</run>
</device>
</session>
<session>
- <name>Session21</name>
+ <name>smemPaths</name>
<progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
<cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
<workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
- <datetime>21 Jun 2011 14:27:50</datetime>
+ <datetime>22 Jun 2011 11:18:26</datetime>
<normalizedcounter>false</normalizedcounter>
<device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
<context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
</device>
</session>
<session>
- <name>Session22</name>
+ <name>Session17</name>
<progpath>"/home/perrot/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/lniv"</progpath>
<cmdlineargs>4 0 15 /home/perrot/Images/cochons/cochon_512b.pgm</cmdlineargs>
<workdirpath>/home/perrot/NVIDIA_GPU_Computing_SDK/C/src/lniv_gpu</workdirpath>
- <datetime>21 Jun 2011 14:34:00</datetime>
+ <datetime>22 Jun 2011 11:27:01</datetime>
<normalizedcounter>false</normalizedcounter>
<device multProcessorCount="30" warpSize="32" textureAlignment="256" maxRegPerBlock="16384" computeCapability="1.3" maxThreadPerBlock="512" totalConstMem="65536" maxGridDim="65535 65535" maxShareMemPerBlock="16384" id="0" chip="160" maxBlockDim="512 512 64" GpuOverlap="1" name="Tesla C1060" label="Device_0" clockFreq="1296000" maxPitchMem="2147483647" >
<context totalcountercount="0" invalidcountercount="0" id="0" label="Context_0" truncatedcountercount="0" trace="0" >
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef6d4ddfa0
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+1229865247212140,memcpyHtoA,177.856,336,,,,,,,,,,,0,0,1048576,0
+1229865247279f80,_Z19kernel_calcul_pathsPtj,47.808,13,0.031,1,1,13486368,1,1,1,0,32,10,0,-1,,0,168,0,1834,0,1,0,0,108,0,0,72,0,0,108,72,0,0
+1229865247286460,memcpyDtoA,4.352,6,,,,,,,,,,,0,0,144,0
+122986524728b1e0,_Z29kernel_init_estim_from_img_inPjjjj,1600.64,8,0.5,64,64,13,8,8,1,0,48,12,0,-1,,0,78756,10,478914,0,409,0,0,0,0,0,12416,0,0,0,266,625958,89992
+122986524741da80,memcpyDtoA,47.872,6,,,,,,,,,,,0,0,1048576,0
+122986524742eea0,_Z24kernel_levelines_texturePjjjj,1925.92,9,0.5,64,64,12923796,8,8,1,144,48,16,0,-1,,0,49113,148,716401,1106,410,0,0,0,0,0,12802,0,0,0,272,184897,54656
+1229865247605c40,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+1229865247615220,_Z30kernel_estim_next_step_texturePjjjj,479.392,6,0.5,64,64,1,8,8,1,0,48,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122986524768ad20,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229865247698320,_Z24kernel_levelines_texturePjjjj,1940.35,4,0.5,64,64,14781808,8,8,1,144,48,16,0,-1,,0,49113,149,716405,1062,410,0,0,0,0,0,12804,0,0,0,272,185864,53908
+12298652478728e0,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122986524787ff60,_Z30kernel_estim_next_step_texturePjjjj,472.448,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+12298652478f3f40,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+1229865247901580,_Z24kernel_levelines_texturePjjjj,1938.46,4,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,49113,147,716400,1080,409,0,0,0,0,0,12800,0,0,0,272,184958,54513
+1229865247adb480,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+1229865247ae8ae0,_Z30kernel_estim_next_step_texturePjjjj,472.8,3,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+1229865247b5cc20,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+1229865247b6a240,_Z24kernel_levelines_texturePjjjj,1946.3,3,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,49113,149,716406,1089,409,0,0,0,0,0,12788,0,0,0,272,184803,54668
+1229865247d45f40,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+1229865247d53520,_Z30kernel_estim_next_step_texturePjjjj,468.192,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15512,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+1229865247dc6460,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+1229865247dd3aa0,_Z24kernel_levelines_texturePjjjj,1931.3,4,0.5,64,64,14780256,8,8,1,144,48,16,0,-1,,0,49113,147,716403,1082,410,0,0,0,0,0,12815,0,0,0,272,185389,54456
+1229865247fabd20,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+1229865247fb9300,_Z30kernel_estim_next_step_texturePjjjj,468.128,3,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122986524802c220,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+12298652480397e0,_Z24kernel_levelines_texturePjjjj,1923.81,3,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,48941,149,713807,1061,410,0,0,0,0,0,12802,0,0,0,271,184460,55093
+122986524820fd20,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122986524821d340,_Z30kernel_estim_next_step_texturePjjjj,482.88,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+1229865248293bc0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+12298652482a11a0,_Z24kernel_levelines_texturePjjjj,1931.9,3,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,48941,147,713807,1039,410,0,0,0,0,0,12804,0,0,0,271,186162,53610
+12298652484796a0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229865248486c80,_Z30kernel_estim_next_step_texturePjjjj,478.944,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15625,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+12298652484fc5a0,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+1229865248509c20,_Z24kernel_levelines_texturePjjjj,1942.02,4,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,48752,146,711135,1071,409,0,0,0,0,0,12800,0,0,0,270,184874,54597
+12298652486e48a0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+12298652486f1ee0,_Z30kernel_estim_next_step_texturePjjjj,479.84,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229865248767b80,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+1229865248775160,_Z24kernel_levelines_texturePjjjj,1925.86,4,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,48752,146,711137,1052,409,0,0,0,0,0,12788,0,0,0,270,184892,54579
+122986524894be80,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+12298652489594c0,_Z30kernel_estim_next_step_texturePjjjj,473.632,3,0.5,64,64,32767,8,8,1,0,48,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+12298652489cd920,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+12298652489dafc0,_Z24kernel_levelines_texturePjjjj,1946.14,4,0.5,64,64,144,8,8,1,144,48,16,0,-1,,0,48752,146,711139,1029,410,0,0,0,0,0,12815,0,0,0,270,185136,54709
+1229865248bb6c40,memcpyDtoA,47.712,4,,,,,,,,,,,0,0,1048576,0
+1229865248bc4320,_Z30kernel_estim_next_step_texturePjjjj,470.08,4,0.5,64,64,48,8,8,1,0,48,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+1229865248c379c0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+1229865248c44fe0,_Z24kernel_levelines_texturePjjjj,1932.83,4,0.5,64,64,13964298,8,8,1,144,48,16,0,-1,,0,48752,146,711138,1048,410,0,0,0,0,0,12802,0,0,0,270,184798,54755
+1229865248e1d840,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+1229865248e2aea0,_Z30kernel_estim_next_step_texturePjjjj,467.36,4,0.5,64,64,7602289,8,8,1,0,48,9,0,-1,,0,822,0,15626,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+1229865248e9daa0,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+1229865248eab120,_Z24kernel_levelines_texturePjjjj,1921.12,4,0.5,64,64,6226030,8,8,1,144,48,16,0,-1,,0,48752,146,711133,1089,410,0,0,0,0,0,12804,0,0,0,270,185686,54086
+1229865249080bc0,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122986524908e240,_Z30kernel_estim_next_step_texturePjjjj,467.456,4,0.5,64,64,3145780,8,8,1,0,48,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+1229865249100e80,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122986524910e480,_Z24kernel_levelines_texturePjjjj,1931.62,3,0.5,64,64,3670071,8,8,1,144,48,16,0,-1,,0,48752,146,711135,1087,409,0,0,0,0,0,12800,0,0,0,270,185448,54023
+12298652492e6840,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+12298652492f3ea0,_Z30kernel_estim_next_step_texturePjjjj,485.568,4,0.5,64,64,3145776,8,8,1,0,48,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122986524936b1c0,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+12298652493787c0,_Z24kernel_levelines_texturePjjjj,1927.55,4,0.5,64,64,6225971,8,8,1,144,48,16,0,-1,,0,48752,146,711136,1043,409,0,0,0,0,0,12788,0,0,0,270,185851,53620
+122986524954fb80,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122986524955d1e0,_Z30kernel_estim_next_step_texturePjjjj,475.648,3,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+12298652495d1e40,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+12298652495df480,_Z24kernel_levelines_texturePjjjj,1971.1,4,0.5,64,64,64,8,8,1,144,48,16,0,-1,,0,49113,150,716402,1074,410,0,0,0,0,0,12815,0,0,0,272,185344,54501
+12298652497c1280,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+12298652497ce8a0,_Z30kernel_estim_next_step_texturePjjjj,476.032,3,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122986524988d200,memcpyDtoH,191.296,740,,,,,,,,,,,0,0,1048576,0
--- /dev/null
+# CUDA_PROFILE_LOG_VERSION 2.0
+# CUDA_PROFILE_CSV 1
+# TIMESTAMPFACTOR fffff6ef6cfa6038
+# CUDA_DEVICE 0 Tesla C1060
+gpustarttimestamp,method,gputime,cputime,occupancy,gridSizeX,gridSizeY,gridSizeZ,blockSizeX,blockSizeY,blockSizeZ,dynSmemPerBlock,staSmemPerBlock,registerPerThread,streamID,localblocksize,memTransferSize,memtransferhostmemtype,branch,divergent_branch,instructions,warp_serialize,cta_launched,local_load,local_store,gld_32b,gld_64b,gld_128b,gst_32b,gst_64b,gst_128b,gld_request,gst_request,tex_cache_hit,tex_cache_miss
+122985da5f305f60,memcpyHtoA,177.92,331,,,,,,,,,,,0,0,1048576,0
+122985da5f36ee60,_Z19kernel_calcul_pathsPtj,47.808,14,0.031,1,1,13732448,1,1,1,0,32,10,0,-1,,0,168,0,1834,0,1,0,0,108,0,0,72,0,0,108,72,0,0
+122985da5f37b380,memcpyDtoA,4.32,5,,,,,,,,,,,0,0,144,0
+122985da5f3800e0,_Z29kernel_init_estim_from_img_inPjjjj,1586.94,8,0.5,64,64,8,8,8,1,0,48,12,0,-1,,0,78756,10,478915,0,409,0,0,0,0,0,12416,0,0,0,266,620103,95847
+122985da5f50d800,memcpyDtoA,48,6,,,,,,,,,,,0,0,1048576,0
+122985da5f51ebc0,_Z24kernel_levelines_texturePjjjj,1930.4,9,0.5,64,64,12923760,8,8,1,144,48,16,0,-1,,0,49113,148,690702,1881,410,0,0,0,0,0,12802,0,0,0,272,236380,3173
+122985da5f6f6ae0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122985da5f706100,_Z30kernel_estim_next_step_texturePjjjj,466.528,6,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122985da5f7789e0,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122985da5f786060,_Z24kernel_levelines_texturePjjjj,1930.21,4,0.5,64,64,-64,8,8,1,144,48,16,0,-1,,0,49113,149,690700,1877,410,0,0,0,0,0,12804,0,0,0,272,236557,3215
+122985da5f95dec0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122985da5f96b520,_Z30kernel_estim_next_step_texturePjjjj,472.32,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15511,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122985da5f9df480,memcpyDtoA,47.68,4,,,,,,,,,,,0,0,1048576,0
+122985da5f9ecac0,_Z24kernel_levelines_texturePjjjj,1921.98,4,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,49113,147,690706,1882,409,0,0,0,0,0,12800,0,0,0,272,236235,3236
+122985da5fbc28c0,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122985da5fbcfea0,_Z30kernel_estim_next_step_texturePjjjj,465.536,4,0.5,64,64,-64,8,8,1,0,48,9,0,-1,,0,816,0,15513,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122985da5fc42380,memcpyDtoA,47.584,3,,,,,,,,,,,0,0,1048576,0
+122985da5fc4f980,_Z24kernel_levelines_texturePjjjj,1916.54,4,0.5,64,64,8057472,8,8,1,144,48,16,0,-1,,0,49113,149,690708,1895,409,0,0,0,0,0,12788,0,0,0,272,236073,3398
+122985da5fe24300,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122985da5fe318e0,_Z30kernel_estim_next_step_texturePjjjj,470.784,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15510,0,410,0,0,0,0,0,13120,0,0,0,272,4920,1640
+122985da5fea5280,memcpyDtoA,47.712,3,,,,,,,,,,,0,0,1048576,0
+122985da5feb2900,_Z24kernel_levelines_texturePjjjj,1916.61,4,0.5,64,64,144,8,8,1,144,48,16,0,-1,,0,49113,147,690703,1870,410,0,0,0,0,0,12815,0,0,0,272,236510,3335
+122985da60087220,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122985da60094840,_Z30kernel_estim_next_step_texturePjjjj,465.344,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15510,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122985da60106c60,memcpyDtoA,47.776,3,,,,,,,,,,,0,0,1048576,0
+122985da60114320,_Z24kernel_levelines_texturePjjjj,1914.43,4,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,48941,149,688205,1889,410,0,0,0,0,0,12802,0,0,0,271,236103,3450
+122985da602e83c0,memcpyDtoA,47.616,3,,,,,,,,,,,0,0,1048576,0
+122985da602f5a00,_Z30kernel_estim_next_step_texturePjjjj,477.952,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15511,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122985da6036af60,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122985da603785a0,_Z24kernel_levelines_texturePjjjj,1909.12,3,0.5,64,64,6881375,8,8,1,144,48,16,0,-1,,0,48941,147,688211,1849,410,0,0,0,0,0,12804,0,0,0,271,236529,3243
+122985da6054b180,memcpyDtoA,47.616,7,,,,,,,,,,,0,0,1048576,0
+122985da605587e0,_Z30kernel_estim_next_step_texturePjjjj,474.72,4,0.5,64,64,3276855,8,8,1,0,48,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122985da605cd080,memcpyDtoA,47.552,3,,,,,,,,,,,0,0,1048576,0
+122985da605da680,_Z24kernel_levelines_texturePjjjj,1920.1,4,0.5,64,64,3145783,8,8,1,144,48,16,0,-1,,0,48752,146,685626,1884,409,0,0,0,0,0,12800,0,0,0,270,235824,3647
+122985da607afd40,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122985da607bd3a0,_Z30kernel_estim_next_step_texturePjjjj,490.048,4,0.5,64,64,3604536,8,8,1,0,48,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122985da60835860,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122985da60842ea0,_Z24kernel_levelines_texturePjjjj,1915.78,4,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,48752,146,685627,1911,409,0,0,0,0,0,12788,0,0,0,270,236322,3149
+122985da60a174c0,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122985da60a24aa0,_Z30kernel_estim_next_step_texturePjjjj,480.128,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15624,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122985da60a9a8a0,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122985da60aa7ec0,_Z24kernel_levelines_texturePjjjj,1911.39,3,0.5,64,64,48,8,8,1,144,48,16,0,-1,,0,48752,146,685632,1882,410,0,0,0,0,0,12815,0,0,0,270,236367,3478
+122985da60c7b3a0,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122985da60c889e0,_Z30kernel_estim_next_step_texturePjjjj,476.992,3,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15624,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122985da60cfdb80,memcpyDtoA,47.552,4,,,,,,,,,,,0,0,1048576,0
+122985da60d0b160,_Z24kernel_levelines_texturePjjjj,1912.67,4,0.5,64,64,9532554,8,8,1,144,48,16,0,-1,,0,48752,146,685631,1906,410,0,0,0,0,0,12802,0,0,0,270,236347,3206
+122985da60edeb00,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122985da60eec160,_Z30kernel_estim_next_step_texturePjjjj,486.112,3,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15627,0,409,0,0,0,0,0,13088,0,0,0,274,4908,1636
+122985da60f63660,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122985da60f70c40,_Z24kernel_levelines_texturePjjjj,1913.22,3,0.5,64,64,97,8,8,1,144,48,16,0,-1,,0,48752,146,685626,1867,410,0,0,0,0,0,12804,0,0,0,270,236389,3383
+122985da61144800,memcpyDtoA,47.648,3,,,,,,,,,,,0,0,1048576,0
+122985da61151e40,_Z30kernel_estim_next_step_texturePjjjj,487.296,3,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122985da611c9820,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122985da611d6e40,_Z24kernel_levelines_texturePjjjj,1912.86,4,0.5,64,64,0,8,8,1,144,48,16,0,-1,,0,48752,146,685627,1873,409,0,0,0,0,0,12800,0,0,0,270,236113,3358
+122985da613aa8e0,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122985da613b7f40,_Z30kernel_estim_next_step_texturePjjjj,480.544,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15626,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122985da6142dea0,memcpyDtoA,47.68,3,,,,,,,,,,,0,0,1048576,0
+122985da6143b500,_Z24kernel_levelines_texturePjjjj,1914.14,4,0.5,64,64,12032,8,8,1,144,48,16,0,-1,,0,48752,146,685630,1884,409,0,0,0,0,0,12788,0,0,0,270,235919,3552
+122985da6160f480,memcpyDtoA,47.744,3,,,,,,,,,,,0,0,1048576,0
+122985da6161cb60,_Z30kernel_estim_next_step_texturePjjjj,488.672,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,822,0,15627,0,410,0,0,0,0,0,13120,0,0,0,274,4920,1640
+122985da61694a80,memcpyDtoA,47.616,4,,,,,,,,,,,0,0,1048576,0
+122985da616a20a0,_Z24kernel_levelines_texturePjjjj,1930.24,3,0.5,64,64,12032,8,8,1,144,48,16,0,-1,,0,49113,150,690707,1868,410,0,0,0,0,0,12815,0,0,0,272,236723,3122
+122985da61879f20,memcpyDtoA,47.584,4,,,,,,,,,,,0,0,1048576,0
+122985da61887520,_Z30kernel_estim_next_step_texturePjjjj,476,4,0.5,64,64,0,8,8,1,0,48,9,0,-1,,0,816,0,15512,0,409,0,0,0,0,0,13088,0,0,0,272,4908,1636
+122985da619422a0,memcpyDtoH,191.328,755,,,,,,,,,,,0,0,1048576,0
cutilCheckError( cutStopTimer(timer) );
size = H * L * sizeof( unsigned int );
- psize = (r-1)*PSIZE_I*sizeof(int2) ;
+ psize = (r-1)*PSIZE_I*sizeof(ushort) ;
printf("Longueur des chemins = %d pixels\n", r);
printf("Init GPU + Image %s %d x %d = %d pixels en %f ms,\n", image_path, L, H, size, cutGetTimerValue(timer));
// allocation mem GPU
unsigned int * d_directions =NULL ;
unsigned int * d_lniv, * d_estim = NULL ;
- int2 * d_paths ;
+ ushort * d_paths ;
cutilSafeCall( cudaMalloc( (void**) &d_directions, size)) ;
cutilSafeCall( cudaMalloc( (void**) &d_lniv, size ) );
// allocate array and copy image data
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
- cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindSigned);
+ //cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc(8, 8, 0, 0, cudaChannelFormatKindSigned);
+ cudaChannelFormatDesc channelDescP = cudaCreateChannelDesc<ushort>();
cudaArray * array_img_in, *array_img_estim, *array_img_lniv, *array_paths;
cutilSafeCall( cudaMallocArray( &array_img_in, &channelDesc, L, H ));
/*****************************
* APPELS KERNELS et chronos
*****************************/
- dimBlock = dim3(16,16,1) ;
+ dimBlock = dim3(8,8,1) ;
dimGrid = dim3( H / dimBlock.x,L / dimBlock.y, 1 ) ;
// pour enregistrement image lniv GPU
for ( iter =0 ; iter < nb_iter ; iter++ )
{
cutilSafeCall( cudaMemcpyToArray( array_img_estim, 0, 0, d_estim, size, cudaMemcpyDeviceToDevice)) ;
- kernel_levelines_texture_smem<<< dimGrid, dimBlock, 0 >>>( d_lniv, L, H, r );
+ //version avec/sans tentative d'utilisation de smem ( pas probante )
+ kernel_levelines_texture<<< dimGrid, dimBlock, 24*(r-1)*sizeof(short) >>>( d_lniv, L, H, r );
cutilSafeCall( cudaMemcpyToArray( array_img_lniv, 0, 0, d_lniv, size, cudaMemcpyDeviceToDevice)) ;
if (seq_out){
sprintf(image_out, "%s%d%s", image_out_base, iter+1, pgm_ext) ;
/**************************************************
* recuperation matrice des chemins pour affichage
**************************************************/
-
+ /*
int2 * h_paths = new int2[(r-1)*PSIZE_I] ;
cutilSafeCall( cudaMemcpyFromArray(h_paths , array_paths, 0, 0, psize, cudaMemcpyDeviceToHost) );
- /*
+
//verif Di
printf("matrice Di");
for(int idpath=0; idpath< PSIZE_I; idpath++){b