+++ /dev/null
-\relax
-\@writefile{toc}{\author{Gilles Perrot}{}}
-\@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {3}Setting up the environnement.}{25}}
-\@writefile{lof}{\addvspace {10\p@ }}
-\@writefile{lot}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {section}{\numberline {3.1}Data transfers, memory management.}{25}}
-\newlabel{algo:memcopy:H2D}{{7}{26}}
-\newlabel{algo:memcopy:kernel}{{8}{26}}
-\newlabel{algo:memcopy:D2H}{{9}{26}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces global memory management on CPU and GPU sides\relax }}{26}}
-\newlabel{algo:memcopy}{{1}{26}}
-\newlabel{lst:main1}{{3.1}{27}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {3.1}generic main.cu file used to launch CUDA kernels}{27}}
-\newlabel{lst:fkern1}{{3.2}{27}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {3.2}fast\_kernels.cu file featuring one kernel skeleton}{27}}
-\newlabel{lst:mkfile}{{3.3}{28}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {3.3}generic makefile based on those provided by NVIDIA SDK}{28}}
-\@writefile{toc}{\contentsline {section}{\numberline {3.2}Performance measurements}{28}}
-\newlabel{lst:chronos}{{3.4}{28}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {3.4}Time measurement technique using cutil functions}{28}}
-\@writefile{toc}{\author{Gilles Perrot}{}}
-\@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {chapter}{\numberline {4}Implementing a fast median filter}{31}}
-\@writefile{lof}{\addvspace {10\p@ }}
-\@writefile{lot}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {section}{\numberline {4.1}Introduction}{31}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.2}Median filtering}{32}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.2.1}Basic principles}{32}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Example of 5x5 median filtering\relax }}{32}}
-\newlabel{fig:median_1}{{4.1}{32}}
-\newlabel{algoMedianGeneric}{{2}{33}}
-\newlabel{algoMedianGeneric:memcpyH2D}{{1}{33}}
-\newlabel{algoMedianGeneric:cptstart}{{3}{33}}
-\newlabel{algoMedianGeneric:cptend}{{5}{33}}
-\newlabel{algoMedianGeneric:memcpyD2H}{{7}{33}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces generic n$\times $n median filter\relax }}{33}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.2.2}A naive implementation}{33}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces Illustration of window overlapping in 5x5 median filtering\relax }}{34}}
-\newlabel{fig:median_overlap}{{4.2}{34}}
-\newlabel{lst:medianGeneric}{{4.1}{34}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.1}generic CUDA kernel achieving median filtering}{34}}
-\@writefile{lot}{\contentsline {table}{\numberline {4.1}{\ignorespaces Performance results of \texttt {kernel medianR}. \relax }}{35}}
-\newlabel{tab:medianHisto1}{{4.1}{35}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.3}NVIDIA GPU tuning recipes}{35}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces Example of median filtering, applied to salt and pepper noise reduction.\relax }}{36}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Airplane image, corrupted by salt and pepper noise of density 0.25}}}{36}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Image denoised by a $3\times 3$ median filter}}}{36}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Image denoised by a $5\times 5$ median filter}}}{36}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Image denoised by 2 iterations of a $3\times 3$ median filter}}}{36}}
-\newlabel{fig:sap_examples}{{4.3}{36}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.4}A 3$\times $3 median filter: using registers}{37}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.4.1}The simplest way}{37}}
-\newlabel{lst:kernelMedian3RegTri9}{{4.2}{38}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.2}$3\times 3$ median filter kernel using one register per neighborhood pixel and bubble sort}{38}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.4.2}Further optimization}{38}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces Comparison of pixel throughputs for CPU generic median, CPU 3$\times $3 median register-only with bubble sort, GPU generic median, GPU 3$\times $3 median register-only with bubble sort, and GPU libJacket.}}{39}}
-\newlabel{fig:compMedians1}{{4.4}{39}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2.1}Reducing register count }{39}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Forgetful selection with the minimal element register count. Illustration for $3\times 3$ pixel window represented in a row and supposed sorted.\relax }}{40}}
-\newlabel{fig:forgetful_selection}{{4.5}{40}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Determination of the median value by the \textit {forgetful selection} process, applied to a $3\times 3$ neighborhood window.\relax }}{41}}
-\newlabel{fig:forgetful3}{{4.6}{41}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces First iteration of the $5\times 5$ selection process, with $k_{25}=14$, which shows how Instruction Level Parallelism is maximized by the use of an incomplete sorting network.}}{41}}
-\newlabel{fig:bitonic}{{4.7}{41}}
-\newlabel{lst:medianForget1pix3}{{4.3}{42}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.3}3$\times $3 median filter kernel using the minimum register count of 6 to find the median value by forgetful selection method. The optimal thread block size is 128 on GTX280 and 256 on C2070}{42}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2.2}More data output per thread}{42}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces Illustration of how window overlapping is used to combine 2 pixel selections in a $3\times 3$ median kernel.\relax }}{43}}
-\newlabel{fig:median3_overlap}{{4.8}{43}}
-\newlabel{lst:medianForget2pix3}{{4.4}{43}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.4}$3\times 3$ median filter kernel processing 2 output pixel values per thread using combined forgetful selection}{43}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.5}A 5$\times $5 and more median filter }{44}}
-\newlabel{sec:median5}{{4.5.1}{44}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.5.1}A register-only 5$\times $5 median filter }{44}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces Comparison of pixel throughput on GPU C2070 for the different 3$\times $3 median kernels.\relax }}{45}}
-\newlabel{fig:compMedians2}{{4.9}{45}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.10}{\ignorespaces Reducing register count in a 5$\times $5 register-only median kernel outputting 2 pixels simultaneously.}}{45}}
-\newlabel{fig:median5overlap}{{4.10}{45}}
-\newlabel{lst:medianForget2pix5}{{4.5}{46}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.5}kernel 5$\times $5 median filter processing 2 output pixel values per thread by a combined forgetfull selection}{46}}
-\@writefile{lot}{\contentsline {table}{\numberline {4.2}{\ignorespaces Performance of various 5$\times $5 median kernel implementations, applied on 4096$\times $4096 pixel image with C2070 GPU card.\relax }}{47}}
-\newlabel{tab:median5comp}{{4.2}{47}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.5.2}Fast approximated $n\times n$ median filter }{47}}
-\@writefile{lot}{\contentsline {table}{\numberline {4.3}{\ignorespaces Measured performance of one generic pseudo-separable median kernel applied to 4096$\times $4096 pixel image with various window sizes.\relax }}{48}}
-\newlabel{tab:medianSeparable}{{4.3}{48}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.11}{\ignorespaces Example of separable median filtering (smoother), applied to salt and pepper noise reduction.\relax }}{49}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Airplane image, corrupted with by salt and pepper noise of density 0.25}}}{49}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Image denoised by a $3\times 3$ separable smoother}}}{49}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Image denoised by a $5\times 5$ separable smoother}}}{49}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Image background estimation by a $55\times 55$ separable smoother}}}{49}}
-\newlabel{fig:sap_examples2}{{4.11}{49}}
-\newlabel{lst:medianSeparable}{{4.6}{50}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.6}generic pseudo median kernel.}{50}}
-\@setckpt{Chapters/chapter3/ch3}{
-\setcounter{page}{53}
-\setcounter{equation}{0}
-\setcounter{enumi}{3}
-\setcounter{enumii}{0}
-\setcounter{enumiii}{0}
-\setcounter{enumiv}{12}
-\setcounter{footnote}{0}
-\setcounter{mpfootnote}{0}
-\setcounter{part}{2}
-\setcounter{chapter}{4}
-\setcounter{section}{5}
-\setcounter{subsection}{2}
-\setcounter{subsubsection}{0}
-\setcounter{paragraph}{0}
-\setcounter{subparagraph}{0}
-\setcounter{figure}{11}
-\setcounter{table}{3}
-\setcounter{numauthors}{0}
-\setcounter{parentequation}{0}
-\setcounter{subfigure}{0}
-\setcounter{lofdepth}{1}
-\setcounter{subtable}{0}
-\setcounter{lotdepth}{1}
-\setcounter{lstnumber}{70}
-\setcounter{ContinuedFloat}{0}
-\setcounter{AlgoLine}{7}
-\setcounter{algocfline}{2}
-\setcounter{algocfproc}{2}
-\setcounter{algocf}{2}
-\setcounter{nprt@mantissa@digitsbefore}{0}
-\setcounter{nprt@mantissa@digitsafter}{0}
-\setcounter{nprt@exponent@digitsbefore}{0}
-\setcounter{nprt@exponent@digitsafter}{0}
-\setcounter{nprt@digitsfirstblock}{0}
-\setcounter{nprt@blockcnt}{0}
-\setcounter{nprt@cntprint}{0}
-\setcounter{proposition}{0}
-\setcounter{theorem}{0}
-\setcounter{exercise}{0}
-\setcounter{example}{0}
-\setcounter{definition}{0}
-\setcounter{proof}{0}
-\setcounter{lstlisting}{6}
-}