-\@writefile{toc}{\contentsline {section}{\numberline {4.1}Introduction}{29}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.2}Median filtering}{30}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.2.1}Basic principles}{30}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.2.2}A naive implementation}{30}}
-\newlabel{img:sap_example_ref}{{4.1(a)}{31}}
-\newlabel{sub@img:sap_example_ref}{{(a)}{31}}
-\newlabel{img:sap_example_med3}{{4.1(b)}{31}}
-\newlabel{sub@img:sap_example_med3}{{(b)}{31}}
-\newlabel{img:sap_example_med5}{{4.1(c)}{31}}
-\newlabel{sub@img:sap_example_med5}{{(c)}{31}}
-\newlabel{img:sap_example_med3_it2}{{4.1(d)}{31}}
-\newlabel{sub@img:sap_example_med3_it2}{{(d)}{31}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Exemple of median filtering, applied to salt \& pepper noise reduction.\relax }}{31}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Airplane image, corrupted by salt and pepper noise of density 0.25}}}{31}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Image denoised by a $3\times 3$ median filter}}}{31}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Image denoised by a $5\times 5$ median filter}}}{31}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Image denoised by 2 iterations of a $3\times 3$ median filter}}}{31}}
-\newlabel{fig:sap_examples}{{4.1}{31}}
-\newlabel{lst:medianGeneric}{{4.1}{32}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.1}Generic CUDA kernel achieving median filtering}{32}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces Exemple of 5x5 median filtering\relax }}{33}}
-\newlabel{fig:median_1}{{4.2}{33}}
-\newlabel{algoMedianGeneric}{{2}{33}}
-\newlabel{algoMedianGeneric:memcpyH2D}{{\caption@xref {algoMedianGeneric:memcpyH2D}{ on input line 241}}{33}}
-\newlabel{algoMedianGeneric:cptstart}{{\caption@xref {algoMedianGeneric:cptstart}{ on input line 246}}{33}}
-\newlabel{algoMedianGeneric:cptend}{{\caption@xref {algoMedianGeneric:cptend}{ on input line 246}}{33}}
-\newlabel{algoMedianGeneric:memcpyD2H}{{\caption@xref {algoMedianGeneric:memcpyD2H}{ on input line 247}}{33}}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces generic n$\times $n median filter\relax }}{33}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.3}NVidia GPU tuning recipes}{33}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces Illustration of window overlapping in 5x5 median filtering\relax }}{34}}
-\newlabel{fig:median_overlap}{{4.3}{34}}
-\@writefile{lot}{\contentsline {table}{\numberline {4.1}{\ignorespaces Performance results of \texttt {kernel medianR}. \relax }}{34}}
-\newlabel{tab:medianHisto1}{{4.1}{34}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.4}A 3$\times $3 median filter: using registers }{35}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.4.1}The simplest way}{35}}
-\newlabel{lst:kernelMedian3RegTri9}{{4.2}{36}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.2}3$\times $3 median filter kernel using one register per neighborhood pixel and bubble sort}{36}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.4.2}Further optimization}{36}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces Comparison of pixel throughputs on GPU C2070 and CPU for generic median, in 3$\times $3 median register-only and \textit {libJacket}.\relax }}{37}}
-\newlabel{fig:compMedians1}{{4.4}{37}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Forgetful selection with the minimal element register count. Illustration for 3$\times $3 pixel window represented in a row and supposed sorted.\relax }}{37}}
-\newlabel{fig:forgetful_selection}{{4.5}{37}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2.1}Reducing register count}{37}}
-\newlabel{lst:medianForget1pix3}{{4.3}{38}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.3}3$\times $3 median filter kernel using the minimum register count of 6 to find the median value by forgetful selection method}{38}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2.2}More data output per thread}{39}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Illustration of how window overlapping is used to combine 2 pixel selections in a 3$\times $3 median kernel.\relax }}{40}}
-\newlabel{fig:median3_overlap}{{4.6}{40}}
-\newlabel{lst:medianForget2pix3}{{4.4}{40}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.4}3$\times $3 median filter kernel processing 2 output pixel values per thread using combined forgetful selection.}{40}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces Comparison of pixel throughput on GPU C2070 for the different 3$\times $3 median kernels.\relax }}{41}}
-\newlabel{fig:compMedians2}{{4.7}{41}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.5}A 5$\times $5 and more median filter }{41}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces Reducing register count in a 5$\times $5 register-only median kernel outputting 2 pixels simultaneously. The first 7 forgetful selection stages are common to both processed center pixels. Only the last 5 selections have to be done separately.\relax }}{42}}
-\newlabel{fig:median5overlap}{{4.8}{42}}
-\newlabel{sec:median5}{{4.5.1}{42}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.5.1}A register-only 5$\times $5 median filter }{42}}
-\newlabel{lst:medianForget2pix5}{{4.5}{42}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.5}kernel 5$\times $5 median filter processing 2 output pixel values per thread by a combined forgetfull selection.}{42}}
-\@writefile{lot}{\contentsline {table}{\numberline {4.2}{\ignorespaces Performance of various 5$\times $5 median kernel implementations, applied on 4096$\times $4096 pixel image with C2070 GPU card.\relax }}{44}}
-\newlabel{tab:median5comp}{{4.2}{44}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.5.2}Fast approximated n$\times $n median filter }{44}}
-\@writefile{lot}{\contentsline {table}{\numberline {4.3}{\ignorespaces Measured performance of one generic pseudo-separable median kernel applied to 4096$\times $4096 pixel image with various window sizes.\relax }}{45}}
-\newlabel{tab:medianSeparable}{{4.3}{45}}
-\newlabel{lst:medianSeparable}{{4.6}{45}}
-\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.6}generic pseudo median kernel.}{45}}
-\newlabel{img:sap_example_ref}{{4.9(a)}{46}}
-\newlabel{sub@img:sap_example_ref}{{(a)}{46}}
-\newlabel{img:sap_example_sep_med3}{{4.9(b)}{46}}
-\newlabel{sub@img:sap_example_sep_med3}{{(b)}{46}}
-\newlabel{img:sap_example_sep_med5}{{4.9(c)}{46}}
-\newlabel{sub@img:sap_example_sep_med5}{{(c)}{46}}
-\newlabel{img:sap_example_sep_med3_it2}{{4.9(d)}{46}}
-\newlabel{sub@img:sap_example_sep_med3_it2}{{(d)}{46}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces Exemple of separable median filtering (smoother), applied to salt \& pepper noise reduction.\relax }}{46}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Airplane image, corrupted with by salt and pepper noise of density 0.25}}}{46}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Image denoised by a $3\times 3$ separable smoother}}}{46}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Image denoised by a $5\times 5$ separable smoother}}}{46}}
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Image background estimation by a $55\times 55$ separable smoother}}}{46}}
-\newlabel{fig:sap_examples2}{{4.9}{46}}
-\@writefile{toc}{\contentsline {section}{Bibliography}{47}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.1}Introduction}{31}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.2}Median filtering}{32}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.2.1}Basic principles}{32}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.2.2}A naive implementation}{32}}
+\newlabel{img:sap_example_ref}{{4.1(a)}{33}}
+\newlabel{sub@img:sap_example_ref}{{(a)}{33}}
+\newlabel{img:sap_example_med3}{{4.1(b)}{33}}
+\newlabel{sub@img:sap_example_med3}{{(b)}{33}}
+\newlabel{img:sap_example_med5}{{4.1(c)}{33}}
+\newlabel{sub@img:sap_example_med5}{{(c)}{33}}
+\newlabel{img:sap_example_med3_it2}{{4.1(d)}{33}}
+\newlabel{sub@img:sap_example_med3_it2}{{(d)}{33}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Example of median filtering, applied to salt \& pepper noise reduction.\relax }}{33}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Airplane image, corrupted by salt and pepper noise of density 0.25}}}{33}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Image denoised by a $3\times 3$ median filter}}}{33}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Image denoised by a $5\times 5$ median filter}}}{33}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Image denoised by 2 iterations of a $3\times 3$ median filter}}}{33}}
+\newlabel{fig:sap_examples}{{4.1}{33}}
+\newlabel{lst:medianGeneric}{{4.1}{34}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.1}Generic CUDA kernel achieving median filtering}{34}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces Example of 5x5 median filtering\relax }}{35}}
+\newlabel{fig:median_1}{{4.2}{35}}
+\newlabel{algoMedianGeneric}{{2}{35}}
+\newlabel{algoMedianGeneric:memcpyH2D}{{1}{35}}
+\newlabel{algoMedianGeneric:cptstart}{{3}{35}}
+\newlabel{algoMedianGeneric:cptend}{{5}{35}}
+\newlabel{algoMedianGeneric:memcpyD2H}{{7}{35}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces generic n$\times $n median filter\relax }}{35}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.3}NVidia GPU tuning recipes}{35}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces Illustration of window overlapping in 5x5 median filtering\relax }}{36}}
+\newlabel{fig:median_overlap}{{4.3}{36}}
+\@writefile{lot}{\contentsline {table}{\numberline {4.1}{\ignorespaces Performance results of \texttt {kernel medianR}. \relax }}{36}}
+\newlabel{tab:medianHisto1}{{4.1}{36}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.4}A 3$\times $3 median filter: using registers }{37}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.4.1}The simplest way}{37}}
+\newlabel{lst:kernelMedian3RegTri9}{{4.2}{38}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.2}3$\times $3 median filter kernel using one register per neighborhood pixel and bubble sort}{38}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.4.2}Further optimization}{38}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces Comparison of pixel throughputs on GPU C2070 and CPU for generic median, 3$\times $3 median register-only and \textit {libJacket}.\relax }}{39}}
+\newlabel{fig:compMedians1}{{4.4}{39}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2.1}Reducing register count }{39}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Forgetful selection with the minimal element register count. Illustration for 3$\times $3 pixel window represented in a row and supposed sorted.\relax }}{40}}
+\newlabel{fig:forgetful_selection}{{4.5}{40}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Determination of the Median value by the forgetful selection process, applied to a $3\times 3$ neighborhood window.\relax }}{41}}
+\newlabel{fig:forgetful3}{{4.6}{41}}
+\newlabel{lst:medianForget1pix3}{{4.3}{41}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.3}3$\times $3 median filter kernel using the minimum register count of 6 to find the median value by forgetful selection method. The optimal thread block size is 128 on GTX280 and 256 on C2070.}{41}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2.2}More data output per thread}{42}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces Illustration of how window overlapping is used to combine 2 pixel selections in a 3$\times $3 median kernel.\relax }}{42}}
+\newlabel{fig:median3_overlap}{{4.7}{42}}
+\newlabel{lst:medianForget2pix3}{{4.4}{43}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.4}3$\times $3 median filter kernel processing 2 output pixel values per thread using combined forgetful selection.}{43}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.5}A 5$\times $5 and more median filter }{43}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces Comparison of pixel throughput on GPU C2070 for the different 3$\times $3 median kernels.\relax }}{44}}
+\newlabel{fig:compMedians2}{{4.8}{44}}
+\newlabel{sec:median5}{{4.5.1}{44}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.5.1}A register-only 5$\times $5 median filter }{44}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces Reducing register count in a 5$\times $5 register-only median kernel outputting 2 pixels simultaneously.}}{45}}
+\newlabel{fig:median5overlap}{{4.9}{45}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.10}{\ignorespaces First iteration of the $5\times 5$ selection process, with $k_{25}=14$, which shows how Instruction Level Parallelism is maximized by the use of an incomplete sorting network.}}{45}}
+\newlabel{fig:bitonic}{{4.10}{45}}
+\newlabel{lst:medianForget2pix5}{{4.5}{46}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.5}kernel 5$\times $5 median filter processing 2 output pixel values per thread by a combined forgetfull selection.}{46}}
+\@writefile{lot}{\contentsline {table}{\numberline {4.2}{\ignorespaces Performance of various 5$\times $5 median kernel implementations, applied on 4096$\times $4096 pixel image with C2070 GPU card.\relax }}{47}}
+\newlabel{tab:median5comp}{{4.2}{47}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.5.2}Fast approximated n$\times $n median filter }{47}}
+\@writefile{lot}{\contentsline {table}{\numberline {4.3}{\ignorespaces Measured performance of one generic pseudo-separable median kernel applied to 4096$\times $4096 pixel image with various window sizes.\relax }}{48}}
+\newlabel{tab:medianSeparable}{{4.3}{48}}
+\newlabel{img:sap_example_ref}{{4.11(a)}{49}}
+\newlabel{sub@img:sap_example_ref}{{(a)}{49}}
+\newlabel{img:sap_example_sep_med3}{{4.11(b)}{49}}
+\newlabel{sub@img:sap_example_sep_med3}{{(b)}{49}}
+\newlabel{img:sap_example_sep_med5}{{4.11(c)}{49}}
+\newlabel{sub@img:sap_example_sep_med5}{{(c)}{49}}
+\newlabel{img:sap_example_sep_med3_it2}{{4.11(d)}{49}}
+\newlabel{sub@img:sap_example_sep_med3_it2}{{(d)}{49}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.11}{\ignorespaces Example of separable median filtering (smoother), applied to salt \& pepper noise reduction.\relax }}{49}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Airplane image, corrupted with by salt and pepper noise of density 0.25}}}{49}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Image denoised by a $3\times 3$ separable smoother}}}{49}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Image denoised by a $5\times 5$ separable smoother}}}{49}}
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Image background estimation by a $55\times 55$ separable smoother}}}{49}}
+\newlabel{fig:sap_examples2}{{4.11}{49}}
+\newlabel{lst:medianSeparable}{{4.6}{50}}
+\@writefile{lol}{\contentsline {lstlisting}{\numberline {4.6}generic pseudo median kernel.}{50}}