dmems12.tex

   1
   2 \documentclass[10pt, conference, compsocconf]{IEEEtran}
   3 %\usepackage{latex8}
   4 %\usepackage{times}
   5 \usepackage[utf8]{inputenc}
   6 %\usepackage[cyr]{aeguill}
   7 %\usepackage{pstricks,pst-node,pst-text,pst-3d}
   8 %\usepackage{babel}
   9 \usepackage{amsmath}
  10 \usepackage{url}
  11 \usepackage{graphicx}
  12 \usepackage{thumbpdf}
  13 \usepackage{color}
  14 \usepackage{moreverb}
  15 \usepackage{commath}
  16 \usepackage{subfigure}
  17 %\input{psfig.sty}
  18 \usepackage{fullpage}
  19 \usepackage{fancybox}
  20
  21 \usepackage[ruled,lined,linesnumbered]{algorithm2e}
  22
  23 %%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands.
  24 \newcommand{\noun}[1]{\textsc{#1}}
  25
  26 \newcommand{\tab}{\ \ \ }
  27
  28
  29
  30 \begin{document}
  31
  32
  33 %% \author{\IEEEauthorblockN{Authors Name/s per 1st Affiliation (Author)}
  34 %% \IEEEauthorblockA{line 1 (of Affiliation): dept. name of organization\\
  35 %% line 2: name of organization, acronyms acceptable\\
  36 %% line 3: City, Country\\
  37 %% line 4: Email: name@xyz.com}
  38 %% \and
  39 %% \IEEEauthorblockN{Authors Name/s per 2nd Affiliation (Author)}
  40 %% \IEEEauthorblockA{line 1 (of Affiliation): dept. name of organization\\
  41 %% line 2: name of organization, acronyms acceptable\\
  42 %% line 3: City, Country\\
  43 %% line 4: Email: name@xyz.com}
  44 %% }
  45
  46
  47
  48 \title{Using FPGAs for high speed and real time cantilever deflection estimation}
  49 \author{\IEEEauthorblockN{Raphaël Couturier\IEEEauthorrefmark{1}, Stéphane Domas\IEEEauthorrefmark{1}, Gwenhaël Goavec-Merou\IEEEauthorrefmark{2} and Michel Lenczner\IEEEauthorrefmark{2}}
  50 \IEEEauthorblockA{\IEEEauthorrefmark{1}FEMTO-ST, DISC, University of Franche-Comte, Belfort, France\\
  51 \{raphael.couturier,stephane.domas\}@univ-fcomte.fr}
  52 \IEEEauthorblockA{\IEEEauthorrefmark{2}FEMTO-ST, Time-Frequency, University of Franche-Comte, Besançon, France\\
  53 \{michel.lenczner@utbm.fr,gwenhael.goavec@trabucayre.com}
  54 }
  55
  56
  57
  58
  59
  60
  61 \maketitle
  62
  63 \thispagestyle{empty}
  64
  65 \begin{abstract}
  66
  67
  68
  69 {\it keywords}: FPGA, cantilever, interferometry.
  70 \end{abstract}
  71
  72 \section{Introduction}
  73
  74 %% blabla +
  75 %% quelques ref commentées sur les calculs basés sur l'interférométrie
  76
  77 \section{Measurement principles}
  78 \label{sec:measure}
  79
  80 \subsection{Architecture}
  81 \label{sec:archi}
  82 %% description de l'architecture générale de l'acquisition d'images
  83 %% avec au milieu une unité de traitement dont on ne précise pas ce
  84 %% qu'elle est.
  85
  86 %% image tirée des expériences.
  87
  88 \subsection{Cantilever deflection estimation}
  89 \label{sec:deflest}
  90
  91 As shown on image \ref{img:img-xp}, each cantilever is covered by
  92 interferometric fringes. The fringes will distort when cantilevers are
  93 deflected. Estimating the deflection is done by computing this
  94 distortion. For that, (ref A. Meister + M Favre) proposed a method
  95 based on computing the phase of the fringes, at the base of each
  96 cantilever, near the tip, and on the base of the array. They assume
  97 that a linear relation binds these phases, which can be use to
  98 "unwrap" the phase at the tip and to determine the deflection.\\
  99
 100 More precisely, segment of pixels are extracted from images taken by a
 101 high-speed camera. These segments are large enough to cover several
 102 interferometric fringes and are placed at the base and near the tip of
 103 the cantilevers. They are called base profile and tip profile in the
 104 following. Furthermore, a reference profile is taken on the base of
 105 the cantilever array.
 106
 107 The pixels intensity $I$ (in gray level) of each profile is modelized by :
 108
 109 \begin{equation}
 110 \label{equ:profile}
 111 I(x) = ax+b+A.cos(2\pi f.x + \theta)
 112 \end{equation}
 113
 114 where $x$ is the position of a pixel in its associated segment.
 115
 116 The global method consists in two main sequences. The first one aims
 117 to determin the frequency $f$ of each profile with an algorithm based
 118 on spline interpolation (see section \ref{algo-spline}). It also
 119 computes the coefficient used for unwrapping the phase. The second one
 120 is the acquisition loop, while which images are taken at regular time
 121 steps. For each image, the phase $\theta$ of all profiles is computed
 122 to obtain, after unwrapping, the deflection of cantilevers.
 123
 124 \subsection{Design goals}
 125 \label{sec:goals}
 126
 127 If we put aside some hardware issues like the speed of the link
 128 between the camera and the computation unit, the time to deserialize
 129 pixels and to store them in memory, ... the phase computation is
 130 obviously the bottle-neck of the whole process. For example, if we
 131 consider the camera actually in use, an exposition time of 2.5ms for
 132 $1024\times 1204$ pixels seems the minimum that can be reached. For a
 133 $10\times 10$ cantilever array, if we neglect the time to extract
 134 pixels, it implies that computing the deflection of a single
 135 cantilever should take less than 25$\mu$s, thus 12.5$\mu$s by phase.\\
 136
 137 In fact, this timing is a very hard constraint. Let consider a very
 138 small programm that initializes twenty million of doubles in memory
 139 and then does 1000000 cumulated sums on 20 contiguous values
 140 (experimental profiles have about this size). On an intel Core 2 Duo
 141 E6650 at 2.33GHz, this program reaches an average of 155Mflops. It
 142 implies that the phase computation algorithm should not take more than
 143 $240\times 12.5 = 1937$ floating operations. For integers, it gives
 144 $3000$ operations.
 145
 146 %% to be continued ...
 147
 148 %% � faire : timing de l'algo spline en C avec atan et tout le bordel.
 149
 150
 151
 152
 153 \section{Proposed solution}
 154 \label{sec:solus}
 155
 156
 157 \subsection{FPGA constraints}
 158
 159 %% contraintes imposées par le FPGA : algo pipeline/parallele, pas d'op math complexe, ...
 160
 161
 162 \subsection{Considered algorithms}
 163
 164 Two solutions have been studied to achieve phase computation. The
 165 original one, proposed by A. Meister and M. Favre, is based on
 166 interpolation by splines. It allows to compute frequency and
 167 phase. The second one, detailed in this article, is based on a
 168 classical least square method but suppose that frequency is already
 169 known.
 170
 171 \subsubsection{Spline algorithm}
 172 \label{sec:algo-spline}
 173 Let consider a profile $P$, that is a segment of $M$ pixels with an
 174 intensity in gray levels. Let call $I(x)$ the intensity of profile in $x
 175 \in [0,M[$.
 176
 177 At first, only $M$ values of $I$ are known, for $x = 0, 1,
 178 \ldots,M-1$. A normalisation allows to scale known intensities into
 179 $[-1,1]$. We compute splines that fit at best these normalised
 180 intensities. Splines are used to interpolate $N = k\times M$ points
 181 (typically $k=3$ is sufficient), within $[0,M[$. Let call $x^s$ the
 182 coordinates of these $N$ points and $I^s$ their intensities.
 183
 184 In order to have the frequency, the mean line $a.x+b$ (see equation \ref{equ:profile}) of $I^s$ is
 185 computed. Finding intersections of $I^s$ and this line allow to obtain
 186 the period thus the frequency.
 187
 188 The phase is computed via the equation :
 189 \begin{equation}
 190 \theta = atan \left[ \frac{\sum_{i=0}^{N-1} sin(2\pi f x^s_i) \times I^s(x^s_i)}{\sum_{i=0}^{N-1} cos(2\pi f x^s_i) \times I^s(x^s_i)} \right]
 191 \end{equation}
 192
 193 Two things can be noticed. Firstly, the frequency could also be
 194 obtained using the derivates of spline equations, which only implies
 195 to solve quadratic equations. Secondly, frequency of each profile is
 196 computed a single time, before the acquisition loop. Thus, $sin(2\pi f
 197 x^s_i)$ and $cos(2\pi f x^s_i)$ could also be computed before the loop, which leads to a
 198 much faster computation of $\theta$.
 199
 200 \subsubsection{Least square algorithm}
 201
 202 Assuming that we compute the phase during the acquisition loop,
 203 equation \ref{equ:profile} has only 4 parameters :$a, b, A$, and
 204 $\theta$, $f$ and $x$ being already known. Since $I$ is non-linear, a
 205 least square method based an Gauss-newton algorithm must be used to
 206 determine these four parameters. Since it is an iterative process
 207 ending with a convergence criterion, it is obvious that it is not
 208 particularly adapted to our design goals.
 209
 210 Fortunatly, it is quite simple to reduce the number of parameters to
 211 only $\theta$. Let $x^p$ be the coordinates of pixels in a segment of
 212 size $M$. Thus, $x^p = 0, 1, \ldots, M-1$. Let $I(x^p)$ be their
 213 intensity. Firstly, we "remove" the slope by computing :
 214
 215 \[I^{corr}(x^p) = I(x^p) - a.x^p - b\]
 216
 217 Since linear equation coefficients are searched, a classical least
 218 square method can be used to determine $a$ and $b$ :
 219
 220 \[a = \frac{covar(x^p,I(x^p))}{var(x^p)} \]
 221
 222 Assuming an overlined symbol means an average, then :
 223
 224 \[b = \overline{I(x^p)} - a.\overline{{x^p}}\]
 225
 226 Let $A$ be the amplitude of $I^{corr}$, i.e.
 227
 228 \[A = \frac{max(I^{corr}) - min(I^{corr})}{2}\]
 229
 230 Then, the least square method to find $\theta$ is reduced to search the minimum of :
 231
 232 \[\sum_{i=0}^{M-1} \left[ cos(2\pi f.i + \theta) - \frac{I^{corr}(i)}{A} \right]^2\]
 233
 234 It is equivalent to derivate this expression and to solve the following equation :
 235
 236 \begin{eqnarray*}
 237 2\left[ cos\theta \sum_{i=0}^{M-1} I^{corr}(i).sin(2\pi f.i) + sin\theta \sum_{i=0}^{M-1} I^{corr}(i).cos(2\pi f.i)\right] \\
 238 - A\left[ cos2\theta \sum_{i=0}^{M-1} sin(4\pi f.i) + sin2\theta \sum_{i=0}^{M-1} cos(4\pi f.i)\right]   = 0
 239 \end{eqnarray*}
 240
 241 Several points can be noticed :
 242 \begin{itemize}
 243 \item As in the spline method, some parts of this equation can be
 244   computed before the acquisition loop. It is the case of sums that do
 245   not depend on $\theta$ :
 246
 247 \[ \sum_{i=0}^{M-1} sin(4\pi f.i), \sum_{i=0}^{M-1} cos(4\pi f.i) \]
 248
 249 \item Lookup tables for $sin(2\pi f.i)$ and $cos(2\pi f.i)$ can also be
 250 computed.
 251
 252 \item The simplest method to find the good $\theta$ is to discretize
 253   $[-\pi,\pi]$ in $N$ steps, and to search which step leads to the
 254   result closest to zero. By the way, three other lookup tables can
 255   also be computed before the loop :
 256
 257 \[ sin \theta, cos \theta, \left[ cos 2\theta \sum_{i=0}^{M-1} sin(4\pi f.i) + sin 2\theta \sum_{i=0}^{M-1} cos(4\pi f.i)\right] \]
 258
 259 \item This search can be very fast using a dichotomous process in $log_2(N)$
 260
 261 \end{itemize}
 262
 263 Finally, the whole summarizes in an algorithm (called LSQ in the following) in two parts, one before and one during the acquisition loop :
 264 \begin{algorithm}[h]
 265 \caption{LSQ algorithm - before acquisition loop.}
 266 \label{alg:lsq-before}
 267
 268    $M \leftarrow $ number of pixels of the profile\\
 269    I[] $\leftarrow $ intensities of pixels\\
 270    $f \leftarrow $ frequency of the profile\\
 271    $s4i \leftarrow \sum_{i=0}^{M-1} sin(4\pi f.i)$\\
 272    $c4i \leftarrow \sum_{i=0}^{M-1} cos(4\pi f.i)$\\
 273    $nb_s \leftarrow $ number of discretization steps of $[-\pi,\pi]$\\
 274
 275    \For{$i=0$ to $nb_s $}{
 276      $\theta  \leftarrow -\pi + 2\pi\times \frac{i}{nb_s}$\\
 277      lut\_sin[$i$] $\leftarrow sin \theta$\\
 278      lut\_cos[$i$] $\leftarrow cos \theta$\\
 279      lut\_A[$i$] $\leftarrow cos 2 \theta \times s4i + sin 2 \theta \times c4i$\\
 280      lut\_sinfi[$i$] $\leftarrow sin (2\pi f.i)$\\
 281      lut\_cosfi[$i$] $\leftarrow cos (2\pi f.i)$\\
 282    }
 283 \end{algorithm}
 284
 285 \begin{algorithm}[h]
 286 \caption{LSQ algorithm - during acquisition loop.}
 287 \label{alg:lsq-during}
 288
 289    $\bar{x} \leftarrow \frac{M-1}{2}$\\
 290    $\bar{y} \leftarrow 0$, $x_{var} \leftarrow 0$, $xy_{covar} \leftarrow 0$\\
 291    \For{$i=0$ to $M-1$}{
 292      $\bar{y} \leftarrow \bar{y} + $ I[$i$]\\
 293      $x_{var} \leftarrow x_{var} + (i-\bar{x})^2$\\
 294    }
 295    $\bar{y} \leftarrow \frac{\bar{y}}{M}$\\
 296    \For{$i=0$ to $M-1$}{
 297      $xy_{covar} \leftarrow xy_{covar} + (i-\bar{x}) \times (I[i]-\bar{y})$\\
 298    }
 299    $slope \leftarrow \frac{xy_{covar}}{x_{var}}$\\
 300    $start \leftarrow y_{moy} - slope\times \bar{x}$\\
 301    \For{$i=0$ to $M-1$}{
 302      $I[i] \leftarrow I[i] - start - slope\times i$\tcc*[f]{slope removal}\\
 303    }
 304
 305    $I_{max} \leftarrow max_i(I[i])$, $I_{min} \leftarrow min_i(I[i])$\\
 306    $amp \leftarrow \frac{I_{max}-I_{min}}{2}$\\
 307
 308    $Is \leftarrow 0$, $Ic \leftarrow 0$\\
 309    \For{$i=0$ to $M-1$}{
 310      $Is \leftarrow Is + I[i]\times $ lut\_sinfi[$i$]\\
 311      $Ic \leftarrow Ic + I[i]\times $ lut\_cosfi[$i$]\\
 312    }
 313
 314    $\theta \leftarrow -\pi$\\
 315    $val_1 \leftarrow 2\times \left[ Is.\cos(\theta) + Ic.\sin(\theta) \right] - amp\times \left[ c4i.\sin(2\theta) + s4i.\cos(2\theta) \right]$\\
 316    \For{$i=1-n_s$ to $n_s$}{
 317      $\theta \leftarrow \frac{i.\pi}{n_s}$\\
 318      $val_2 \leftarrow 2\times \left[ Is.\cos(\theta) + Ic.\sin(\theta) \right] - amp\times \left[ c4i.\sin(2\theta) + s4i.\cos(2\theta) \right]$\\
 319
 320      \lIf{$val_1 < 0$ et $val_2 >= 0$}{
 321        $\theta_s \leftarrow \theta - \left[ \frac{val_2}{val_2-val_1}\times \frac{\pi}{n_s} \right]$\\
 322      }
 323      $val_1 \leftarrow val_2$\\
 324    }
 325
 326 \end{algorithm}
 327
 328
 329 \subsubsection{Comparison}
 330
 331 \subsection{VDHL design paradigms}
 332
 333 \subsection{VDHL implementation}
 334
 335 \section{Experimental results}
 336 \label{sec:results}
 337
 338
 339
 340
 341 \section{Conclusion and perspectives}
 342
 343
 344 \bibliographystyle{plain}
 345 \bibliography{biblio}
 346
 347 \end{document}