]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter3/ch3.aux
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
6cc370710ee767d746dcdc2192ebb9e7a4a8d701
[book_gpu.git] / BookGPU / Chapters / chapter3 / ch3.aux
1 \relax 
2 \@writefile{toc}{\author{Gilles Perrot}{}}
3 \@writefile{loa}{\addvspace {10\p@ }}
4 \@writefile{toc}{\contentsline {chapter}{\numberline {3}Setting up the environnement.}{23}}
5 \@writefile{lof}{\addvspace {10\p@ }}
6 \@writefile{lot}{\addvspace {10\p@ }}
7 \newlabel{algo:memcopy:H2D}{{\caption@xref {algo:memcopy:H2D}{ on input line 124}}{23}}
8 \newlabel{algo:memcopy:kernel}{{\caption@xref {algo:memcopy:kernel}{ on input line 125}}{23}}
9 \newlabel{algo:memcopy:D2H}{{\caption@xref {algo:memcopy:D2H}{ on input line 126}}{23}}
10 \@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces Global memory management on CPU and GPU sides.\relax }}{23}}
11 \newlabel{algo:memcopy}{{1}{23}}
12 \@writefile{toc}{\contentsline {section}{\numberline {3.1}Data transfers, memory management.}{24}}
13 \newlabel{lst:main1}{{3.1}{25}}
14 \@writefile{lol}{\contentsline {lstlisting}{\numberline {3.1}Generic main.cu file used to launch CUDA kernels}{25}}
15 \newlabel{lst:fkern1}{{3.2}{25}}
16 \@writefile{lol}{\contentsline {lstlisting}{\numberline {3.2}fast\_kernels.cu file featuring one kernel skeleton}{25}}
17 \newlabel{lst:mkfile}{{3.3}{26}}
18 \@writefile{lol}{\contentsline {lstlisting}{\numberline {3.3}Generic Makefile based on those provided by NV SDK}{26}}
19 \@writefile{toc}{\contentsline {section}{\numberline {3.2}Performance measurements}{26}}
20 \newlabel{lst:chronos}{{3.4}{26}}
21 \@writefile{lol}{\contentsline {lstlisting}{\numberline {3.4}Time measurement technique using cutil functions}{26}}
22 \@writefile{loa}{\addvspace {10\p@ }}
23 \@writefile{toc}{\contentsline {chapter}{\numberline {4}Implementing a fast median filter}{29}}
24 \@writefile{lof}{\addvspace {10\p@ }}
25 \@writefile{lot}{\addvspace {10\p@ }}
26 \@writefile{toc}{\contentsline {section}{\numberline {4.1}Introduction}{29}}
27 \@writefile{toc}{\contentsline {section}{\numberline {4.2}Median filtering}{30}}
28 \@writefile{toc}{\contentsline {subsection}{\numberline {4.2.1}Basic principles}{30}}
29 \@writefile{toc}{\contentsline {subsection}{\numberline {4.2.2}A naive implementation}{30}}
30 \newlabel{img:sap_example_ref}{{4.1(a)}{31}}
31 \newlabel{sub@img:sap_example_ref}{{(a)}{31}}
32 \newlabel{img:sap_example_med3}{{4.1(b)}{31}}
33 \newlabel{sub@img:sap_example_med3}{{(b)}{31}}
34 \newlabel{img:sap_example_med5}{{4.1(c)}{31}}
35 \newlabel{sub@img:sap_example_med5}{{(c)}{31}}
36 \newlabel{img:sap_example_med3_it2}{{4.1(d)}{31}}
37 \newlabel{sub@img:sap_example_med3_it2}{{(d)}{31}}
38 \@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Exemple of median filtering, applied to salt \& pepper noise reduction.\relax }}{31}}
39 \@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Airplane image, corrupted by salt and pepper noise of density 0.25}}}{31}}
40 \@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Image denoised by a $3\times 3$ median filter}}}{31}}
41 \@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Image denoised by a $5\times 5$ median filter}}}{31}}
42 \@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Image denoised by 2 iterations of a $3\times 3$ median filter}}}{31}}
43 \newlabel{fig:sap_examples}{{4.1}{31}}
44 \newlabel{lst:medianGeneric}{{4.1}{32}}
45 \@writefile{lol}{\contentsline {lstlisting}{\numberline {4.1}Generic CUDA kernel achieving median filtering}{32}}
46 \@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces Exemple of 5x5 median filtering\relax }}{33}}
47 \newlabel{fig:median_1}{{4.2}{33}}
48 \newlabel{algoMedianGeneric}{{2}{33}}
49 \newlabel{algoMedianGeneric:memcpyH2D}{{\caption@xref {algoMedianGeneric:memcpyH2D}{ on input line 241}}{33}}
50 \newlabel{algoMedianGeneric:cptstart}{{\caption@xref {algoMedianGeneric:cptstart}{ on input line 246}}{33}}
51 \newlabel{algoMedianGeneric:cptend}{{\caption@xref {algoMedianGeneric:cptend}{ on input line 246}}{33}}
52 \newlabel{algoMedianGeneric:memcpyD2H}{{\caption@xref {algoMedianGeneric:memcpyD2H}{ on input line 247}}{33}}
53 \@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces generic n$\times $n median filter\relax }}{33}}
54 \@writefile{toc}{\contentsline {section}{\numberline {4.3}NVidia GPU tuning recipes}{33}}
55 \@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces Illustration of window overlapping in 5x5 median filtering\relax }}{34}}
56 \newlabel{fig:median_overlap}{{4.3}{34}}
57 \@writefile{lot}{\contentsline {table}{\numberline {4.1}{\ignorespaces Performance results of \texttt  {kernel medianR}. \relax }}{34}}
58 \newlabel{tab:medianHisto1}{{4.1}{34}}
59 \@writefile{toc}{\contentsline {section}{\numberline {4.4}A 3$\times $3 median filter: using registers }{35}}
60 \@writefile{toc}{\contentsline {subsection}{\numberline {4.4.1}The simplest way}{35}}
61 \newlabel{lst:kernelMedian3RegTri9}{{4.2}{36}}
62 \@writefile{lol}{\contentsline {lstlisting}{\numberline {4.2}3$\times $3 median filter kernel using one register per neighborhood pixel and bubble sort}{36}}
63 \@writefile{toc}{\contentsline {subsection}{\numberline {4.4.2}Further optimization}{36}}
64 \@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces Comparison of pixel throughputs on GPU C2070 and CPU for generic median, in 3$\times $3 median register-only and \textit  {libJacket}.\relax }}{37}}
65 \newlabel{fig:compMedians1}{{4.4}{37}}
66 \@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Forgetful selection with the minimal element register count. Illustration for 3$\times $3 pixel window represented in a row and supposed sorted.\relax }}{37}}
67 \newlabel{fig:forgetful_selection}{{4.5}{37}}
68 \@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2.1}Reducing register count}{37}}
69 \newlabel{lst:medianForget1pix3}{{4.3}{38}}
70 \@writefile{lol}{\contentsline {lstlisting}{\numberline {4.3}3$\times $3 median filter kernel using the minimum register count of 6 to find the median value by forgetful selection method}{38}}
71 \@writefile{toc}{\contentsline {subsubsection}{\numberline {4.4.2.2}More data output per thread}{39}}
72 \@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Illustration of how window overlapping is used to combine 2 pixel selections in a 3$\times $3 median kernel.\relax }}{40}}
73 \newlabel{fig:median3_overlap}{{4.6}{40}}
74 \newlabel{lst:medianForget2pix3}{{4.4}{40}}
75 \@writefile{lol}{\contentsline {lstlisting}{\numberline {4.4}3$\times $3 median filter kernel processing 2 output pixel values per thread using combined forgetful selection.}{40}}
76 \@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces Comparison of pixel throughput on GPU C2070 for the different 3$\times $3 median kernels.\relax }}{41}}
77 \newlabel{fig:compMedians2}{{4.7}{41}}
78 \@writefile{toc}{\contentsline {section}{\numberline {4.5}A 5$\times $5 and more median filter }{41}}
79 \@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces Reducing register count in a 5$\times $5 register-only median kernel outputting 2 pixels simultaneously. The first 7 forgetful selection stages are common to both processed center pixels. Only the last 5 selections have to be done separately.\relax }}{42}}
80 \newlabel{fig:median5overlap}{{4.8}{42}}
81 \newlabel{sec:median5}{{4.5.1}{42}}
82 \@writefile{toc}{\contentsline {subsection}{\numberline {4.5.1}A register-only 5$\times $5 median filter }{42}}
83 \newlabel{lst:medianForget2pix5}{{4.5}{42}}
84 \@writefile{lol}{\contentsline {lstlisting}{\numberline {4.5}kernel 5$\times $5 median filter processing 2 output pixel values per thread by a combined forgetfull selection.}{42}}
85 \@writefile{lot}{\contentsline {table}{\numberline {4.2}{\ignorespaces Performance of various 5$\times $5 median kernel implementations, applied on 4096$\times $4096 pixel image with C2070 GPU card.\relax }}{44}}
86 \newlabel{tab:median5comp}{{4.2}{44}}
87 \@writefile{toc}{\contentsline {subsection}{\numberline {4.5.2}Fast approximated n$\times $n median filter }{44}}
88 \@writefile{lot}{\contentsline {table}{\numberline {4.3}{\ignorespaces Measured performance of one generic pseudo-separable median kernel applied to 4096$\times $4096 pixel image with various window sizes.\relax }}{45}}
89 \newlabel{tab:medianSeparable}{{4.3}{45}}
90 \newlabel{lst:medianSeparable}{{4.6}{45}}
91 \@writefile{lol}{\contentsline {lstlisting}{\numberline {4.6}generic pseudo median kernel.}{45}}
92 \newlabel{img:sap_example_ref}{{4.9(a)}{46}}
93 \newlabel{sub@img:sap_example_ref}{{(a)}{46}}
94 \newlabel{img:sap_example_sep_med3}{{4.9(b)}{46}}
95 \newlabel{sub@img:sap_example_sep_med3}{{(b)}{46}}
96 \newlabel{img:sap_example_sep_med5}{{4.9(c)}{46}}
97 \newlabel{sub@img:sap_example_sep_med5}{{(c)}{46}}
98 \newlabel{img:sap_example_sep_med3_it2}{{4.9(d)}{46}}
99 \newlabel{sub@img:sap_example_sep_med3_it2}{{(d)}{46}}
100 \@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces Exemple of separable median filtering (smoother), applied to salt \& pepper noise reduction.\relax }}{46}}
101 \@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Airplane image, corrupted with by salt and pepper noise of density 0.25}}}{46}}
102 \@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Image denoised by a $3\times 3$ separable smoother}}}{46}}
103 \@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Image denoised by a $5\times 5$ separable smoother}}}{46}}
104 \@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Image background estimation by a $55\times 55$ separable smoother}}}{46}}
105 \newlabel{fig:sap_examples2}{{4.9}{46}}
106 \@writefile{toc}{\contentsline {section}{Bibliography}{47}}
107 \@setckpt{Chapters/chapter3/ch3}{
108 \setcounter{page}{49}
109 \setcounter{equation}{0}
110 \setcounter{enumi}{3}
111 \setcounter{enumii}{0}
112 \setcounter{enumiii}{0}
113 \setcounter{enumiv}{9}
114 \setcounter{footnote}{0}
115 \setcounter{mpfootnote}{0}
116 \setcounter{part}{1}
117 \setcounter{chapter}{4}
118 \setcounter{section}{5}
119 \setcounter{subsection}{2}
120 \setcounter{subsubsection}{0}
121 \setcounter{paragraph}{0}
122 \setcounter{subparagraph}{0}
123 \setcounter{figure}{9}
124 \setcounter{table}{3}
125 \setcounter{numauthors}{0}
126 \setcounter{parentequation}{0}
127 \setcounter{subfigure}{0}
128 \setcounter{lofdepth}{1}
129 \setcounter{subtable}{0}
130 \setcounter{lotdepth}{1}
131 \setcounter{lstnumber}{70}
132 \setcounter{ContinuedFloat}{0}
133 \setcounter{float@type}{16}
134 \setcounter{algorithm}{2}
135 \setcounter{ALC@unique}{0}
136 \setcounter{ALC@line}{0}
137 \setcounter{ALC@rem}{0}
138 \setcounter{ALC@depth}{0}
139 \setcounter{AlgoLine}{0}
140 \setcounter{algocfline}{0}
141 \setcounter{algocfproc}{0}
142 \setcounter{algocf}{0}
143 \setcounter{proposition}{0}
144 \setcounter{theorem}{0}
145 \setcounter{exercise}{0}
146 \setcounter{example}{0}
147 \setcounter{definition}{0}
148 \setcounter{proof}{0}
149 \setcounter{lstlisting}{6}
150 }