X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/dmems12.git/blobdiff_plain/378804e7fc26018b70fc8beafbb03b16a0e536f2..0d52df2ec65d3122bfbecc24b94647275b7ae89f:/dmems12.tex?ds=sidebyside diff --git a/dmems12.tex b/dmems12.tex index 806e509..5920a5b 100644 --- a/dmems12.tex +++ b/dmems12.tex @@ -1,7 +1,9 @@ -\documentclass[12pt]{article} + +\documentclass[10pt, conference, compsocconf]{IEEEtran} %\usepackage{latex8} %\usepackage{times} -\usepackage[latin1]{inputenc} +\usepackage[utf8]{inputenc} +%\usepackage[cyr]{aeguill} %\usepackage{pstricks,pst-node,pst-text,pst-3d} %\usepackage{babel} \usepackage{amsmath} @@ -16,34 +18,45 @@ \usepackage{fullpage} \usepackage{fancybox} +\usepackage[ruled,lined,linesnumbered]{algorithm2e} + %%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands. \newcommand{\noun}[1]{\textsc{#1}} \newcommand{\tab}{\ \ \ } -%%%%%%%%%%%%%%%%%%%%%%%%%%%% my bib path. + + +\begin{document} + + +%% \author{\IEEEauthorblockN{Authors Name/s per 1st Affiliation (Author)} +%% \IEEEauthorblockA{line 1 (of Affiliation): dept. name of organization\\ +%% line 2: name of organization, acronyms acceptable\\ +%% line 3: City, Country\\ +%% line 4: Email: name@xyz.com} +%% \and +%% \IEEEauthorblockN{Authors Name/s per 2nd Affiliation (Author)} +%% \IEEEauthorblockA{line 1 (of Affiliation): dept. name of organization\\ +%% line 2: name of organization, acronyms acceptable\\ +%% line 3: City, Country\\ +%% line 4: Email: name@xyz.com} +%% } + \title{Using FPGAs for high speed and real time cantilever deflection estimation} +\author{\IEEEauthorblockN{Raphaël Couturier\IEEEauthorrefmark{1}, Stéphane Domas\IEEEauthorrefmark{1}, Gwenhaël Goavec-Merou\IEEEauthorrefmark{2} and Michel Lenczner\IEEEauthorrefmark{2}} +\IEEEauthorblockA{\IEEEauthorrefmark{1}FEMTO-ST, DISC, University of Franche-Comte, Belfort, France\\ +\{raphael.couturier,stephane.domas\}@univ-fcomte.fr} +\IEEEauthorblockA{\IEEEauthorrefmark{2}FEMTO-ST, Time-Frequency, University of Franche-Comte, Besançon, France\\ +\{michel.lenczner@utbm.fr,gwenhael.goavec@trabucayre.com} +} + + -\author{ Raphaël COUTURIER\\ -Laboratoire d'Informatique -de l'Universit\'e de Franche-Comt\'e, \\ -BP 527, \\ -90016~Belfort CEDEX, France\\ - \and Stéphane Domas\\ -Laboratoire d'Informatique -de l'Universit\'e de Franche-Comt\'e, \\ -BP 527, \\ -90016~Belfort CEDEX, France\\ - \and Gwenhaël Goavec\\ -?? -?? \\ -??, \\ -??\\} -\begin{document} \maketitle @@ -58,34 +71,62 @@ BP 527, \\ \section{Introduction} -%% blabla + +Cantilevers are used inside atomic force microscope which provides high +resolution images of surfaces. Several technics have been used to measure the +displacement of cantilevers in litterature. For example, it is possible to +determine accurately the deflection with different mechanisms. +In~\cite{CantiPiezzo01}, authors used piezoresistor integrated into the +cantilever. Nevertheless this approach suffers from the complexity of the +microfabrication process needed to implement the sensor in the cantilever. +In~\cite{CantiCapacitive03}, authors have presented an cantilever mechanism +based on capacitive sensing. This kind of technic also involves to instrument +the cantiliver which result in a complex fabrication process. + +In this paper our attention is focused on a method based on interferometry to +measure cantilevers' displacements. In this method cantilevers are illuminated +by an optic source. The interferometry produces fringes on each cantilevers +which enables to compute the cantilever displacement. In order to analyze the +fringes a high speed camera is used. Images need to be processed quickly and +then a estimation method is required to determine the displacement of each +cantilever. In~\cite{AFMCSEM11}, the authors have used an algorithm based on +spline to estimate the cantilevers' positions. +%%RAPH : ce qui est génant c'est qu'ils ne parlent pas de spline dans ce papier... + The overall process gives +accurate results but all the computation are performed on a standard computer +using labview. Consequently, the main drawback of this implementation is that +the computer is a bootleneck in the overall process. In this paper we propose to +use a method based on least square and to implement all the computation on a +FGPA. + +The remainder of the paper is organized as follows. Section~\ref{sec:measure} +describes more precisely the measurement process. Our solution based on the +least square method and the implementation on FPGA is presented in +Section~\ref{sec:solus}. Experimentations are described in +Section~\ref{sec:results}. Finally a conclusion and some perspectives are +presented. + + + %% quelques ref commentées sur les calculs basés sur l'interférométrie -\section{Measurement architecture} -\label{sec:measure-archi} +\section{Measurement principles} +\label{sec:measure} + +In order to develop simple, cost effective and user-friendly probe arrays, +authors of ~\cite{AFMCSEM11} have developped a system based of interferometry. + +\subsection{Architecture} +\label{sec:archi} %% description de l'architecture générale de l'acquisition d'images %% avec au milieu une unité de traitement dont on ne précise pas ce %% qu'elle est. %% image tirée des expériences. -\section{Design goals} -\label{sec:goals} - -%% objectifs en terme de rapidité et de précision, avec en vue l'ajout -%% du contrôle => l'unité de traitement qui s'impose est un FPGA => -%% algo adapté au FPGA. - -%% peut etre que cette section peut être déplacée en intro ... à voir. - -\section{Proposed solution} -\label{sec:solus} - \subsection{Cantilever deflection estimation} +\label{sec:deflest} -%% => faire de l'interpolation de signal sinusoidal -%% descriptif rapide des deux méthodes : splines et moindres carrés As shown on image \ref{img:img-xp}, each cantilever is covered by interferometric fringes. The fringes will distort when cantilevers are deflected. Estimating the deflection is done by computing this @@ -113,19 +154,49 @@ where $x$ is the position of a pixel in its associated segment. The global method consists in two main sequences. The first one aims to determin the frequency $f$ of each profile with an algorithm based -on spline interpolation (see below). It also computes the coefficient -used for unwrapping the phase. The second one is the acquisition loop, -while which images are taken at regular time steps. For each image, -the phase $\theta$ of all profiles is computed to obtain, after -unwrapping, the deflection of cantilevers. - -This phase computation is obviously the bottle-neck of the whole -process. For example, if we consider the camera actually in use, an -exposition time of 2.5ms for $1024\times 1204$ pixels seems the -minimum that can be reached. For a $10\times 10$ cantilever array, if -we neglect the time to extract pixels, it implies that computing the -deflection of a single cantilever should take less than 25$µ$s, which is -quite small. +on spline interpolation (see section \ref{algo-spline}). It also +computes the coefficient used for unwrapping the phase. The second one +is the acquisition loop, while which images are taken at regular time +steps. For each image, the phase $\theta$ of all profiles is computed +to obtain, after unwrapping, the deflection of cantilevers. + +\subsection{Design goals} +\label{sec:goals} + +If we put aside some hardware issues like the speed of the link +between the camera and the computation unit, the time to deserialize +pixels and to store them in memory, ... the phase computation is +obviously the bottle-neck of the whole process. For example, if we +consider the camera actually in use, an exposition time of 2.5ms for +$1024\times 1204$ pixels seems the minimum that can be reached. For a +$10\times 10$ cantilever array, if we neglect the time to extract +pixels, it implies that computing the deflection of a single +cantilever should take less than 25$\mu$s, thus 12.5$\mu$s by phase.\\ + +In fact, this timing is a very hard constraint. Let consider a very +small programm that initializes twenty million of doubles in memory +and then does 1000000 cumulated sums on 20 contiguous values +(experimental profiles have about this size). On an intel Core 2 Duo +E6650 at 2.33GHz, this program reaches an average of 155Mflops. It +implies that the phase computation algorithm should not take more than +$240\times 12.5 = 1937$ floating operations. For integers, it gives +$3000$ operations. + +%% to be continued ... + +%% � faire : timing de l'algo spline en C avec atan et tout le bordel. + + + + +\section{Proposed solution} +\label{sec:solus} + + +\subsection{FPGA constraints} + +%% contraintes imposées par le FPGA : algo pipeline/parallele, pas d'op math complexe, ... + \subsection{Considered algorithms} @@ -137,7 +208,7 @@ classical least square method but suppose that frequency is already known. \subsubsection{Spline algorithm} - +\label{sec:algo-spline} Let consider a profile $P$, that is a segment of $M$ pixels with an intensity in gray levels. Let call $I(x)$ the intensity of profile in $x \in [0,M[$. @@ -228,20 +299,77 @@ computed. \end{itemize} -\subsubsection{Comparison} - -\subsection{FPGA constraints} +Finally, the whole summarizes in an algorithm (called LSQ in the following) in two parts, one before and one during the acquisition loop : +\begin{algorithm}[h] +\caption{LSQ algorithm - before acquisition loop.} +\label{alg:lsq-before} + + $M \leftarrow $ number of pixels of the profile\\ + I[] $\leftarrow $ intensities of pixels\\ + $f \leftarrow $ frequency of the profile\\ + $s4i \leftarrow \sum_{i=0}^{M-1} sin(4\pi f.i)$\\ + $c4i \leftarrow \sum_{i=0}^{M-1} cos(4\pi f.i)$\\ + $nb_s \leftarrow $ number of discretization steps of $[-\pi,\pi]$\\ + + \For{$i=0$ to $nb_s $}{ + $\theta \leftarrow -\pi + 2\pi\times \frac{i}{nb_s}$\\ + lut\_sin[$i$] $\leftarrow sin \theta$\\ + lut\_cos[$i$] $\leftarrow cos \theta$\\ + lut\_A[$i$] $\leftarrow cos 2 \theta \times s4i + sin 2 \theta \times c4i$\\ + lut\_sinfi[$i$] $\leftarrow sin (2\pi f.i)$\\ + lut\_cosfi[$i$] $\leftarrow cos (2\pi f.i)$\\ + } +\end{algorithm} + +\begin{algorithm}[h] +\caption{LSQ algorithm - during acquisition loop.} +\label{alg:lsq-during} + + $\bar{x} \leftarrow \frac{M-1}{2}$\\ + $\bar{y} \leftarrow 0$, $x_{var} \leftarrow 0$, $xy_{covar} \leftarrow 0$\\ + \For{$i=0$ to $M-1$}{ + $\bar{y} \leftarrow \bar{y} + $ I[$i$]\\ + $x_{var} \leftarrow x_{var} + (i-\bar{x})^2$\\ + } + $\bar{y} \leftarrow \frac{\bar{y}}{M}$\\ + \For{$i=0$ to $M-1$}{ + $xy_{covar} \leftarrow xy_{covar} + (i-\bar{x}) \times (I[i]-\bar{y})$\\ + } + $slope \leftarrow \frac{xy_{covar}}{x_{var}}$\\ + $start \leftarrow y_{moy} - slope\times \bar{x}$\\ + \For{$i=0$ to $M-1$}{ + $I[i] \leftarrow I[i] - start - slope\times i$\tcc*[f]{slope removal}\\ + } + + $I_{max} \leftarrow max_i(I[i])$, $I_{min} \leftarrow min_i(I[i])$\\ + $amp \leftarrow \frac{I_{max}-I_{min}}{2}$\\ + + $Is \leftarrow 0$, $Ic \leftarrow 0$\\ + \For{$i=0$ to $M-1$}{ + $Is \leftarrow Is + I[i]\times $ lut\_sinfi[$i$]\\ + $Ic \leftarrow Ic + I[i]\times $ lut\_cosfi[$i$]\\ + } + + $\theta \leftarrow -\pi$\\ + $val_1 \leftarrow 2\times \left[ Is.\cos(\theta) + Ic.\sin(\theta) \right] - amp\times \left[ c4i.\sin(2\theta) + s4i.\cos(2\theta) \right]$\\ + \For{$i=1-n_s$ to $n_s$}{ + $\theta \leftarrow \frac{i.\pi}{n_s}$\\ + $val_2 \leftarrow 2\times \left[ Is.\cos(\theta) + Ic.\sin(\theta) \right] - amp\times \left[ c4i.\sin(2\theta) + s4i.\cos(2\theta) \right]$\\ + + \lIf{$val_1 < 0$ et $val_2 >= 0$}{ + $\theta_s \leftarrow \theta - \left[ \frac{val_2}{val_2-val_1}\times \frac{\pi}{n_s} \right]$\\ + } + $val_1 \leftarrow val_2$\\ + } + +\end{algorithm} -%% contraintes imposées par le FPGA : algo pipeline/parallele, pas d'op math complexe, ... - -\subsection{Least square algorithm} -%% description précise -%% avantage sur FPGA +\subsubsection{Comparison} -\subsection{VDHL design paradigms} +\subsection{VHDL design paradigms} -\subsection{VDHL implementation} +\subsection{VHDL implementation} \section{Experimental results} \label{sec:results}