CIT2011/expose_cit2011_short.tex

   1 % /solutions/conference-talks/conference-ornate-20min.fr.tex, 22/02/2006 De Sousa
   2 \documentclass{beamer}
   3 %\documentclass[red]{beamer}
   4 \usepackage{pgf,pgfarrows,pgfnodes,pgfautomata,pgfheaps,pgfshade}
   5 \usepackage{amsfonts,amsmath,amssymb,stmaryrd}
   6 \usepackage[latin1]{inputenc}
   7 \usepackage[T1]{fontenc}
   8 %\usepackage[frenchb]{babel}
   9 \usepackage{aeguill}
  10 \usepackage{graphicx}
  11 \usepackage{amsmath,amssymb}
  12 \usepackage{multirow}
  13 \usepackage{multicol}
  14 \usepackage{colortbl}
  15 \usepackage{graphicx}
  16 %\usepackage{figlatex}
  17 \usepackage{multirow}
  18 \usepackage{times}
  19 \usepackage{marvosym}
  20 \usepackage{pifont}
  21 %\usepackage[vlined,longend,algoruled,titlenumbered,french]{algorithm2e}
  22 \usepackage[vlined,french,algoruled]{algorithm2e}
  23 \usepackage[gen]{eurosym}
  24
  25 %      _         _             _                     _
  26 %  ___| |_ _   _| | ___    ___| |_    ___ ___  _   _| | ___ _   _ _ __ ___
  27 % / __| __| | | | |/ _ \  / _ \ __|  / __/ _ \| | | | |/ _ \ | | | '__/ __|
  28 % \__ \ |_| |_| | |  __/ |  __/ |_  | (_| (_) | |_| | |  __/ |_| | |  \__ \
  29 % |___/\__|\__, |_|\___|  \___|\__|  \___\___/ \__,_|_|\___|\__,_|_|  |___/
  30 %          |___/
  31 %
  32
  33 \setbeamercovered{transparent}
  34 \usetheme{Warsaw}
  35 % enlever [red] dans le \documentclass si utilisation de la suite :
  36 \usecolortheme[RGB={202,31,39}]{structure} % rouge
  37
  38 %\setbeamertemplate{background}{\includegraphics[height=96mm,width=128mm]{fond.png}}
  39
  40 % definition de la couleur du fond des blocks
  41 \definecolor{ArrierePlanBodyW}{rgb}{0.5,0.5,0.5} % si ecriture blanche
  42 \definecolor{ArrierePlanBodyB}{rgb}{0.85,0.85,0.85} % si ecriture noire
  43 % definition de la couleur des slides
  44 \definecolor{ArrierePlan}{rgb}{0.9,0.9,0.9}
  45
  46 % selection de la couleur du fond des slides
  47 \setbeamercolor{background canvas}{bg=ArrierePlan}
  48 % selection de la couleur du fond des block avec ecriture noire
  49 %\setbeamercolor{block body}{use=structure,fg=black,bg=ArrierePlanBodyB}
  50 % selection de la couleur du fond des block avec ecriture blanche
  51 \setbeamercolor{block body}{use=structure,fg=white,bg=ArrierePlanBodyW}
  52
  53
  54 \DeclareGraphicsExtensions{pdf,eps,jpg,png}
  55 \graphicspath{{.}{./fig/}{img/}}
  56
  57 \defbeamertemplate*{footline}{section in head/foot}
  58 {
  59   \leavevmode%
  60   \hbox{\begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}%
  61     \usebeamerfont{author in head/foot}\insertshortauthor
  62   \end{beamercolorbox}%
  63   \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}%
  64     \usebeamerfont{title in head/foot}\insertshorttitle \hfill \insertframenumber{} / \inserttotalframenumber\hspace*{2ex}%
  65   \end{beamercolorbox}}%
  66   \vskip0pt%
  67 }
  68
  69
  70 \setbeamercovered{dynamic}
  71 %\logo{\includegraphics[height=1cm]{./img/logo_petit-tr.png}}
  72 % \defbeamertemplate*{headline}{split theme}
  73 % {%
  74 %   \begin{beamercolorbox}{section in head/foot}
  75 %     \insertsectionnavigationhorizontal{2cm}{\includegraphics[height=2cm]{./img/logo_petit-tr.png}}{}
  76 %   \end{beamercolorbox}%
  77 %  }
  78
  79 \defbeamertemplate*{headline}{page title}{
  80   \leavevmode%
  81   \begin{beamercolorbox}[wd=.08\paperwidth,ht=1.45cm]{section in head/foot}%
  82     \vbox to 1.45cm{\vfill\hspace*{-0.1mm}\includegraphics[height=1.45cm]{./img/logo_fresnel.pdf}\vfill}%
  83   \end{beamercolorbox}%
  84   \begin{beamercolorbox}[wd=.655\paperwidth,ht=1.45cm]{section in head/foot}%
  85     \vbox to 1.45cm{\vfill\hspace*{-0.1mm}\includegraphics[height=1.45cm]{./img/UPC.jpg}\vfill}%
  86   \end{beamercolorbox}%
  87   \begin{beamercolorbox}[wd=.1\paperwidth,ht=1.45cm]{section in head/foot}%
  88     \vbox to 1.45cm{\vfill\hspace*{1mm}\includegraphics[height=1.25cm]{./img/lifc.pdf}\vfill}%
  89   \end{beamercolorbox}%
  90    \begin{beamercolorbox}[wd=.17\paperwidth,ht=1.45cm]{section in head/foot}%
  91       \vbox to 1.45cm{\vfill\hspace*{-0.1mm}\includegraphics[height=1.45cm]{./img/ufc2.pdf}\vfill}%
  92   \end{beamercolorbox}%
  93 }
  94
  95 \defbeamertemplate*{headline}{section in head/foot}
  96 {
  97   \leavevmode%
  98   \begin{beamercolorbox}[wd=.1\paperwidth,ht=1.45cm]{section in head/foot}%
  99     \vbox to 1.45cm{\vfill\hspace*{-0.1mm}\includegraphics[height=1.25cm]{./img/lifc.pdf}\vfill}%
 100   \end{beamercolorbox}%
 101   \begin{beamercolorbox}[wd=.39\paperwidth,ht=1.45cm]{section in head/foot}%
 102     \vbox to 1.45cm{\vfil\insertsectionnavigation{.4\paperwidth}\vfil}%
 103   \end{beamercolorbox}%
 104   \begin{beamercolorbox}[wd=.43\paperwidth,ht=1.45cm]{subsection in head/foot}%
 105    \vbox to 1.45cm{\vfil\insertsubsectionnavigation{.4\paperwidth}\vfil}%
 106   \end{beamercolorbox}%
 107   %\begin{beamercolorbox}[wd=.17\paperwidth,ht=1.45cm]{section in head/foot}%
 108   %    \vbox to 1.45cm{\vfill\hspace*{-0.1mm}\includegraphics[height=1.45cm]{./img/ufc2.pdf}\vfill}%
 109   %\end{beamercolorbox}%
 110   %\begin{beamercolorbox}[wd=.235\paperwidth,ht=1.45cm]{section in head/foot}%
 111   %    \vbox to 1.45cm{\vfill\hspace*{-0.1mm}\includegraphics[height=1.45cm]{./img/UPC.jpg}\vfill}%
 112   %\end{beamercolorbox}%
 113   \begin{beamercolorbox}[wd=.1\paperwidth,ht=1.45cm]{section in head/foot}%
 114     \vbox to 1.45cm{\vfill\includegraphics[height=1.45cm]{./img/logo_fresnel.pdf}\vfill}%
 115   \end{beamercolorbox}%
 116
 117 }
 118
 119
 120
 121
 122 % CUSTOM JMN
 123 %%%%%%%%%%%%%
 124 % EXERCICES %
 125 %%%%%%%%%%%%%
 126 \newcounter{exo}
 127 \setcounter{exo}{0}
 128 \newcounter{labelExo}
 129
 130 \newcommand{\Exos}[1]{\stepcounter{exo}{\vspace{0.5cm}
 131      {\textbf{Exercice \arabic{exo}~:\ #1}}}}
 132
 133 \newcommand{\Solution}{\vspace{0.5cm}
 134      {\textbf{\emph{Solution de l'exercice \arabic{exo}}}}}
 135
 136 %%%%%%%%%%%%%%%
 137 % DEFINITIONS %
 138 %%%%%%%%%%%%%%%
 139 \newcounter{def}
 140 \setcounter{def}{0}
 141 \newcounter{labelDef}
 142
 143 \renewcommand{\Definition}[1]{\stepcounter{def}{%\vspace{0.5cm}
 144    {\textbf{Définition \arabic{def}~:\ #1 }}}}
 145
 146
 147 \def\N{{ I\!\!N}}
 148 \def\R{{ I\!\!R}}
 149 \def\Z{{ Z\!\!\! Z}}
 150
 151 \def\Q{I\kern-0.60em Q}
 152 \def\C{I\kern-0.60em C}
 153 \def\F{I\kern-0.20em F}
 154 \def\K{I\kern-0.20em K}
 155
 156 % fleche d'affectation pour les algo
 157 \def\plv{$ \leftarrow$\hspace{1ex}}
 158
 159 \titlegraphic{%
 160   \centerline{%
 161     %% \parbox{.165\textwidth}{%
 162     %%  \includegraphics[width=0.165\textwidth]{logo-2lignes-moyen-tr.png}}
 163     %%\hfill
 164     %%\parbox{.22\textwidth}{%
 165     %%  \includegraphics[width=0.11\textwidth]{logo_tutelle4.jpg}}
 166     %\hfill
 167     %\parbox{.22\textwidth}{
 168     %  \includegraphics[width=0.11\textwidth]{logo_petit-tr.png}}
 169     \hfill
 170     \parbox{.5\textwidth}{%
 171       \includegraphics[width=0.5\textwidth]{CITLogo2.jpg}}
 172     \hfill
 173   }
 174 }
 175
 176
 177 \title[Snake GPU] % (facultatif, à utiliser uniquement si le titre de l'article est trop long)
 178 {\scshape GPU-accelerated snake}
 179
 180 \subtitle {GPU implementation of a region-based segmentation algorithm (snake) for large images}
 181
 182 \author[G. Perrot] % (facultatif, à utiliser seulement avec plusieurs auteurs)
 183 {Gilles Perrot$¹$, Stéphane Domas$¹$, Raphaël Couturier$¹$, Nicolas Bertaux$²$.}
 184 % - Composez les noms dans l'ordre dans lequel ils apparaîtrons dans l'article
 185 % - Utilisez la commande \inst{?} uniquement si les auteurs ont des affiliations
 186 %   différentes.
 187
 188 \institute[] % (facultatif mais généralement nécessaire)
 189 {
 190  $¹$University of Franche-Comté - Distributed Numerical Algorithmics group (AND),\\
 191  $²$Fresnel Institute - Physics and Image Computing group (PhyTI)
 192 }
 193 % - Utilisez la commande \inst uniquement s'il y a plusieurs affectations
 194 % - Faîtes quelque chose de simple, personne ne s'intéresse à votre adresse.
 195
 196 \date[] % (facultatif)
 197 {\small 31 august - 1 september}
 198
 199 %\subject{}
 200 % Inséré uniquement dans la page d'information du fichier PDF. Peut être
 201 % supprimé.
 202
 203
 204
 205 % Si vous avez un fichier nommé "université-logo-nomfichier.xxx", où xxx
 206 % est un format graphique accepté par latex ou pdflatex (comme par exemple .png),
 207 % alors vous pouvez insérer votre logo ainsi :
 208
 209 %\pgfdeclareimage[height=1cm]{logo-lifc}{img/logo-lifc.jpg}
 210 %\logo{\pgfuseimage{logo-lifc}}
 211
 212
 213
 214 % À supprimer si vous ne voulez pas que la table des matières apparaisse
 215 % au début de chaque sous-section :
 216 %
 217 % \AtBeginSubsection[] {
 218 %   \begin{frame}<beamer>{Lignes directrices}
 219 %     \tableofcontents[currentsection,currentsubsection]
 220 %   \end{frame}
 221 % }
 222
 223
 224 % Si vous souhaitez recouvrir vos transparents un à un,
 225 % utilisez la commande suivante (pour plus d'info, voir la page 74 du manuel
 226 % d'utilisation de Beamer (version 3.06) par Till Tantau) :
 227
 228 %\beamerdefaultoverlayspecification{<+->}
 229 \colorlet{textvert}{green}
 230
 231 \begin{document}
 232
 233 \setbeamertemplate{headline}[page title]
 234 \setbeamertemplate{footline}[default]
 235
 236 \begin{frame}[default]
 237   \titlepage
 238 \end{frame}
 239
 240
 241 \begin{frame}{Table of contents}
 242   \tableofcontents
 243   % Vous pouvez, si vous le souhaitez ajouter l'option [pausesections]
 244 \end{frame}
 245
 246 \setbeamertemplate{headline}[section in head/foot]
 247 \setbeamertemplate{footline}[section in head/foot]
 248
 249 \section{Context}
 250 \begin{frame}{Image segmentation}
 251 \begin{block}{Definition, goal}
 252 \begin{itemize}
 253   \item Dividing an image into two homogeneous regions.
 254     \item Reducing the amount of data needed to code information.
 255       \item Helping the human perception in certain cases.
 256 \end{itemize}
 257 \end{block}
 258  \begin{block}{Image characteristics}
 259  \begin{itemize}
 260    \item 16 bit-coded gray levels,
 261    \item From 10~Mpixels to more than 100~Mpixels,
 262    \item Very noisy.
 263  \end{itemize}
 264  \end{block}
 265 \end{frame}
 266
 267 % \begin{frame}{Images of our interest}
 268 % \begin{block}{Origins}
 269 % \begin{itemize}
 270 %   \item Synthetic Aperture RADAR (S.A.R.),
 271 %     \item Ultrasonic (medical imaging),
 272 %       \item Photographic (IR, nightshots).
 273 % \end{itemize}
 274 % \end{block}
 275 % \begin{block}{Characteristics}
 276 % \begin{itemize}
 277 %   \item 16 bit-coded gray levels,
 278 %   \item From 10~Mpixels to more than 100~Mpixels,
 279 %   \item Very noisy.
 280 % \end{itemize}
 281 % \end{block}
 282 % \end{frame}
 283
 284 \section{Snake algorithm}
 285
 286 % \begin{frame}{Active contour hypothesis}
 287 % \begin{itemize}
 288 %   \item A background region $B$ and a target region $T$, containing  $n_T$ and $n_B$ pixels, compose a $L x H$ pixel image.
 289 %   \item Probability density functions (PDF) $p^B$ and $p^T$ are of the same family (Gaussian in this example).
 290 %   % inutile ds les hypotheses, on ne dit pas qu'on ls connait
 291 %   %\item Vectors of parameters $\Theta^B$ and $\Theta^T$ of the PDFs $p^B$ and $p^T$ are unknown.
 292 %   \item The contour $\varGamma$ which surrounds $T$ is chosen polygonal.
 293 %   \item [\ding{253}] Implementation of a region-based algorithm.
 294 % \end{itemize}
 295 % \end{frame}
 296
 297 \begin{frame}{Algorithm  basics : criterion}
 298 \begin{columns}
 299 \begin{column}[l]{5cm}
 300   \resizebox{5cm}{5cm}{\input{img/fig_target.pdf_t}}
 301 \end{column}
 302 \begin{column}[l]{7.5cm}
 303   \begin{itemize}
 304   \item The goal is to find the most likely contour $\varGamma$ (number and positions of nodes).
 305   \item The criterion used is a \emph{Generalized Likelihood}.\\In the Gaussian case, it is given by
 306     $$GL = \dfrac{1}{2}\left[ n_B.log\left({\widehat{\sigma_B}}^2\right) + n_T.log\left({\widehat{\sigma_T}}^2\right)\right]$$
 307     where $\widehat{\sigma_\Omega}$ is the estimation of the deviation $\sigma$ for the region $\Omega$.
 308   \end{itemize}
 309 \end{column}
 310 \end{columns}
 311 \end{frame}
 312
 313 \begin{frame}{Algorithm  basics : criterion}
 314 \begin{columns}
 315 \begin{column}[l]{5cm}
 316   \resizebox{5cm}{5cm}{\input{img/fig_target.pdf_t}}
 317 \end{column}
 318 \begin{column}[l]{7.5cm}
 319   \begin{itemize}
 320   \item Parameters estimation is done by 1-D sums on along the contour.
 321   \item Every pixel coordinates are needed.
 322   \end{itemize}
 323 \end{column}
 324 \end{columns}
 325 \end{frame}
 326
 327 % \begin{frame}{Algorithm basics : parameters estimation}
 328 %   \begin{itemize}
 329 %   \item In the Gaussian case,  $p_{\Omega}$ has two parameters, average and deviation, which are estimated by maximum likelihood.\\
 330 %     If $z$ is the gray level of the pixel of coordinates $(i,j)$:
 331 %     $$
 332 %     \left(
 333 %       \begin{array}{l}
 334 %         \widehat{\mu_{\Omega}} = \frac{1}{n_{\Omega}} \displaystyle\sum_{(i,j)\in \Omega} z(i,j) \\
 335 %         \widehat{\sigma^2_{\Omega}} = \frac{1}{n_{\Omega}} \displaystyle\sum_{(i,j)\in \Omega} \left(z(i,j) - \widehat{\mu_{\Omega}}\right)^2 \\
 336 %       \end{array}
 337 %     \right.
 338 %     $$
 339 %   \item These estimations have to be computed for each test state of the contour $\varGamma$: time-consuming.
 340 %   \end{itemize}
 341 % \end{frame}
 342
 343 % \begin{frame}{Algorithm basics : parameters estimation}
 344 %   \begin{itemize}
 345 % %peut-être illustrer ici
 346 %   \item Based on the Green-Ostogradsky theorem, Chesnaud has shown how to replace those 2-dimensions sums inside the contour
 347 %     by 1-dimension sums along the contour.
 348 %     \item This optimization implies:
 349 %       \begin{itemize}
 350 %       \item the precomputation of a few matrices (called cumulated images) containing the potential \emph{contributions} of each pixel of the image,
 351 %         \item the use of constant lookup tables of weighting coefficients to determine the \emph{contributions} of each segment of pixels.
 352 %       \end{itemize}
 353 %   \end{itemize}
 354 % \end{frame}
 355
 356 % \begin{frame}{CPU sequential implementation : outlines}
 357 %   \begin{enumerate}
 358 %   \item Precomputation of the cumulated images.
 359 %     \item Choice of the initial contour $\varGamma_0$: rectangle near the borders.
 360 %       \item Changes made to the contour in order to find the most likely one:
 361 %         \begin{enumerate}[i]
 362 %         \item Move every node as long as it gives a better criterion.
 363 %           \item Add nodes in the middle of each big enough segment.
 364 %         \end{enumerate}
 365 %   \end{enumerate}
 366 % \end{frame}
 367
 368  \begin{frame}{Snake algorithm in action}
 369    \begin{columns}
 370      \begin{column}[T]{7cm}
 371        \begin{overprint}
 372          \onslide<1>  \includegraphics[width=7cm]{./img/cochon_positions.png}
 373          \onslide<2>  \includegraphics[width=7cm]{./img/sequences/cochon_it1.png}
 374          \onslide<3>  \includegraphics[width=7cm]{./img/sequences/cochon_it21.png}
 375          \onslide<4>  \includegraphics[width=7cm]{./img/sequences/cochon_it22.png}
 376          \onslide<5>  \includegraphics[width=7cm]{./img/sequences/cochon_it4.png}
 377          \onslide<6>  \includegraphics[width=7cm]{./img/sequences/cochon_it7.png}
 378        \end{overprint}
 379      \end{column}
 380      \begin{column}[T]{5cm}
 381        \only<1> {
 382          \begin{itemize}
 383          \item 150~Mpixels image.
 384          \item Initial contour: 4 nodes.
 385          \end{itemize}
 386        }
 387        \only<2> {
 388          \begin{itemize}
 389          \item End of first iteration: no more move can be of interest.
 390          \end{itemize}
 391        }
 392        \only<3> {
 393          \begin{itemize}
 394          \item Nodes added in the middle of segments.
 395          \end{itemize}
 396        }
 397        \only<4> {
 398          \begin{itemize}
 399          \item End of second iteration.
 400          \end{itemize}
 401        }
 402        \only<5> {
 403          \begin{itemize}
 404          \item End of fourth iteration
 405          \end{itemize}
 406        }
 407        \only<6> {
 408          \begin{itemize}
 409          \item End of seventh iteration.
 410           \item Fast segmentation.
 411           \item Efficient with noise.
 412          \end{itemize}
 413        }
 414      \end{column}
 415    \end{columns}
 416  \end{frame}
 417
 418
 419 \section{GPU Implementation}
 420 \begin{frame}{GPU : why}
 421   \begin{itemize}
 422   \item This SNAKE algorithm has proved to be fast and robust.
 423     \item Images to be processed are becoming larger and larger.
 424     \item To be user-friendly, process must be done in less than 1~second.
 425       \item GPU are cheap and can bring impressive speedups.
 426         \item GPU are easy to embed in a simple PC (in a aeroplane,...)
 427   \end{itemize}
 428 \end{frame}
 429
 430
 431 \begin{frame}{GPU design : key points}
 432   \begin{itemize}
 433   \item The parallelism of a modern GPU lays on a SIMT paradigm (Single Instruction Multiple Threads): the same instruction
 434     is processed by a great number of threads at a time (up to $2^{16}$).
 435     \item Threads are compounded in independants blocks with no possible synchronization between blocks.
 436       \item Threads in a block share a small amount of shared memory (16-48~KBytes).
 437         \item There are restrictive conditions to be fullfilled in order to make efficent accesses to global and shared memory.
 438           \item Data transfers between CPU and GPU are slow.
 439   \end{itemize}
 440 \end{frame}
 441
 442 % \begin{frame}{GPU implementation: precomputations}
 443 %   \begin{itemize}
 444 %   \item One of the cumulated images is not to be computed anymore: values are evaluated on the fly.
 445 %   \item An inclusive parallel prefixsum is performed on each row of the image for each matrix to be processed ($z$,$z^2$).
 446 %   \item[\ding{253}] Speedup is around x7 for images larger than 100~MPixels. Comparison is done with the SSE/CPU implementation of the PhyTI group.
 447 %     \item[\ding{253}] Higher speedups (x15) are obtained with specific versions for constant image sizes.
 448 %   \end{itemize}
 449 % \end{frame}
 450
 451
 452 % \begin{frame}{GPU implementation}
 453 % When modifying the shape of the contour, we have to compute:
 454 % \begin{itemize}
 455 % \item Parameters of the corresponding regions.
 456 % \item The value of the criterion.
 457 % \item The parallelization needs reside essentially in the parameters estimation.\\Two possible parallelism levels:
 458 %   \begin{itemize}
 459 %   \item One contour per thread.
 460 %   \item One pixel per thread.
 461 %   \item[\ding{253}] The \emph{one pixel per thread} rule is far more efficient, due to memory access constraints.
 462 %   \end{itemize}
 463 % \end{itemize}
 464 % \end{frame}
 465
 466
 467 \begin{frame}{GPU implementation: parallelization}
 468   \begin{columns}
 469     \begin{column}[T]{6cm}
 470       \resizebox{6cm}{6cm}{\input{./img/topologie.pdf_t}}
 471     \end{column}
 472     \begin{column}[T]{6.5cm}
 473       \begin{itemize}
 474       \item Every 16 segments for every node are processed in parallel.
 475       \item Fits GPU specific parallelism: each segment pixel is processed by a thread.
 476       \item Criterion values are obtained after several reduction stages.
 477       \item All nodes are possibly moved in one step.
 478       \end{itemize}
 479     \end{column}
 480   \end{columns}
 481 \end{frame}
 482
 483 % \begin{frame}{GPU implementation: data structure}
 484 % The main idea is to organize, in a single array, every pixels of every segments to be processed.\\
 485 % Thus, for a given state of the contour ($N$ nodes), we:
 486 %   \begin{enumerate}
 487 %   \item Find the largest segment to be processed. It gives:
 488 %     \begin{itemize}
 489 %     \item the block size $bs$ of the computing grid,
 490 %     \item the number of blocks needed for each segment ($N_{TB}$).
 491 %     \end{itemize}
 492 %   \item Compute in parallel,  the coordinates of every pixels of the $16.N$ segments to be considered,
 493 %   \item Make some parallel reductions to finally obtain parameters estimation.
 494 %   \end{enumerate}
 495 % \end{frame}
 496
 497 % \begin{frame}{GPU implementation: data structure}
 498 % \begin{center}
 499 %   \begin{overprint}
 500 %     \onslide<1> \resizebox{8cm}{1.43cm}{\input{img/contribs_segments_seg.pdf_t}}
 501 %     \onslide<2> \resizebox{8cm}{3.78cm}{\input{img/contribs_segments_16seg.pdf_t}}
 502 %     \onslide<3> \resizebox{8cm}{6cm}{\input{img/contribs_segments_even.pdf_t}}
 503 %     \onslide<4> \resizebox{8cm}{6cm}{\input{img/contribs_segments.pdf_t}}
 504 %   \end{overprint}
 505 % \end{center}
 506 % \end{frame}
 507
 508 \begin{frame}{GPU implementation: first results}
 509   \begin{itemize}
 510   \item Global speedup around x7-x8 (Nvidia C2050) for image sizes from 15 to 150~Mpixels.
 511   \item First iterations have higher speedups:
 512     \begin{itemize}
 513     \item several large segments,
 514     \item few inactive threads in the grid.
 515     \end{itemize}
 516 %   \item Last iterations are sometimes slower than on CPU:
 517 %     \begin{itemize}
 518 %     \item a lot of small segments,
 519 %     \item more inactive threads in the grid.
 520 %     \end{itemize}
 521   \end{itemize}
 522 \end{frame}
 523
 524 \begin{frame}{GPU implementation: smart init (reasons)}
 525   \begin{itemize}
 526   \item The target shape is often far from initial contour,
 527   \item It causes the very first iteration to be much more time-consuming than the other ones.
 528   %\item Horizontal segments contributions are null.
 529   %\item Vertical segments contributions computations can be fast, through a specific process.
 530   \item[\ding{253}] It's fast on GPU to find a rectangle near the target. But it needs to overrule the 'one thread/one pixel' principle.
 531   \end{itemize}
 532 \end{frame}
 533
 534 \begin{frame}{GPU implementation: smart init (process)}
 535 \begin{columns}
 536 \begin{column}[l]{7cm}
 537   \resizebox{7cm}{6.75cm}{\input{img/smart_init1.pdf_t}}
 538 \end{column}
 539 \begin{column}[l]{5.5cm}
 540   \begin{itemize}
 541   \item Realize a periodic sampling of a few hundreds of J-coordinates.
 542   \item Evaluate in parallel every possible rectangle of diagonal $(0 , j_L)-(H , j_H)$.
 543   \item Select the one with the best GL criterion.
 544   \item $j_L$ and $j_H$ are now considered as constants.
 545   \end{itemize}
 546 \end{column}
 547 \end{columns}
 548 \end{frame}
 549
 550 \begin{frame}{GPU implementation: smart init (process)}
 551 \begin{columns}
 552 \begin{column}[l]{7cm}
 553   \resizebox{7cm}{6.75cm}{\input{img/smart_init2.pdf_t}}
 554 \end{column}
 555 \begin{column}[l]{5.5cm}
 556   \begin{itemize}
 557   \item Given $j_L$ and $j_H$.
 558   \item Realize a periodic sampling of a few hundreds of I-coordinates.
 559   \item Evaluate in parallel every possible rectangle of diagonal $(i_L , j_L)-(i_H , j_H)$.
 560   \item Select the one with the best GL criterion.
 561   \end{itemize}
 562 \end{column}
 563 \end{columns}
 564 \end{frame}
 565
 566 \begin{frame}{GPU implementation: improvement}
 567   \begin{itemize}
 568   \item Global speedup around x10 (Nvidia C2050) for image sizes from 15 to 150~Mpixels and a 'small enough' target.
 569   \item Less than 0.6~s for the 150~Mpixels image of the example.
 570     \item A real-life 10~Mpix S.A.R. picture after 3 iterations in less than 50~ms .\\
 571       \centering \includegraphics[width=5cm]{./img/Montserrat_3it.png}
 572   \end{itemize}
 573 \end{frame}
 574
 575 \section{Conclusion}
 576 \begin{frame}{Conclusion, future works}
 577   \begin{itemize}
 578   \item Interesting speedups
 579   \item Original algorithm is not GPU-friendly
 580   \item Future works:
 581     \begin{itemize}
 582     \item Finding a more suited structure to describe the contour.
 583     \item Switching to a statistical model independant from a PDF: the potts model.
 584     \item Benefit from recent features of CUDA v4 (overlapping, multiple kernels).
 585     \item Extend to a multiple targets algorithm, based on this single target elementary piece of code.
 586     \end{itemize}
 587   \end{itemize}
 588 \end{frame}
 589
 590
 591 %\begin{frame}[allowframebreaks]{References}
 592 %\bibliographystyle{plain}
 593 %\bibliography{biblio}
 594 %\nocite{*}
 595 %\end{frame}
 596
 597
 598 \end{document}