3 \citation{Huang:1981:TDS:539567}
4 \citation{Weiss:2006:FMB:1179352.1141918}
8 \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}}
9 \@writefile{toc}{\contentsline {section}{\numberline {2}General structure}{2}}
10 \providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
11 \newlabel{algo:memcopy:H2D}{{7}{2}}
12 \newlabel{algo:memcopy:kernel}{{8}{2}}
13 \newlabel{algo:memcopy:D2H}{{9}{2}}
14 \@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Global memory management on CPU and GPU sides.\relax }}{2}}
15 \newlabel{algo:memcopy}{{1}{2}}
16 \@writefile{toc}{\contentsline {section}{\numberline {3}Implementing a fast median filter}{2}}
17 \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Basic principles}{2}}
18 \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Illustration of $5\times 5$ median filtering\relax }}{2}}
19 \newlabel{median1}{{1}{2}}
20 \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Using registers}{2}}
21 \citation{Batcher:1968:SNA:1468075.1468121}
22 \citation{cormen2001introduction}
23 \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Determination of the Median value by the forgetful selection process, applied to a $3\times 3$ neighborhood window.\relax }}{3}}
24 \newlabel{forget}{{2}{3}}
25 \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Hiding Latencies}{3}}
26 \@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Compute complexity}{3}}
27 \citation{Sanchez-2-2012}
28 \@writefile{toc}{\contentsline {section}{\numberline {4}Experiments}{4}}
29 \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Reducing register count in a 5$\times $5 register-only median kernel processing 2 input pixels. The first 7 forgetful selection stages are common to both processed center pixels: the first one needs 14 pixels, leaving 6 more pixels to be processedone after another.\relax }}{4}}
30 \newlabel{median5overlap}{{3}{4}}
31 \@writefile{toc}{\contentsline {section}{\numberline {5}Results}{4}}
35 \@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Time cost of data transfer for each image size and gray-level format on C2070 GPU.\relax }}{5}}
36 \newlabel{tabmemcpy}{{1}{5}}
37 \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Pixel throughput value comparison, in million pixels per second, of several implementation against our PRMF. From left to right: PCMF, BVM, PRMF, ArrayFire (impossible with 4096$\times $4096)\relax }}{5}}
38 \newlabel{figcomp}{{4}{5}}
39 \@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Maximum effective pixel throughput values for $T_8$ and $T_{16}$ (in MPixel per second) on C2070, achieved when processing 8 and 16 bit-coded gray-level images.\relax }}{5}}
40 \newlabel{tabmaxtp}{{2}{5}}
41 \bibcite{Huang:1981:TDS:539567}{2}
44 \bibcite{Sanchez-2-2012}{5}
46 \bibcite{Weiss:2006:FMB:1179352.1141918}{7}
47 \@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Runtime and pixel throughput of fast median kernels processing 8 and 16 bit-coded gray-level images and run by C2070 GPU.\relax }}{6}}
48 \newlabel{tabresults}{{3}{6}}