review.txt

[16dcc.git] / stopping.tex
diff --git a/stopping.tex b/stopping.tex

index 3a07e06a67fc073968d7355d2bff8196d485c29f..284fc49e57b604847b7ebed6b8be603ece7f5b87 100644 (file)
--- a/stopping.tex
+++ b/stopping.tex
@@ -1,11 +1,6 @@
-
-
-
-Let thus be given such kind of map.
-This article focuses on studying its iterations according to
-the equation~(\ref{eq:asyn}) with a given strategy.
-First of all, this can be interpreted as walking into its iteration graph 
-where the choice of the edge to follow is decided by the strategy.
+This section considers functions $f: \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}} $ 
+issued from an hypercube where an Hamiltonian path has been removed
+as described in previous section.
  Notice that the iteration graph is always a subgraph of 
  ${\mathsf{N}}$-cube augmented with all the self-loop, \textit{i.e.}, all the 
  edges $(v,v)$ for any $v \in \Bool^{\mathsf{N}}$. 
  Notice that the iteration graph is always a subgraph of 
  ${\mathsf{N}}$-cube augmented with all the self-loop, \textit{i.e.}, all the 
  edges $(v,v)$ for any $v \in \Bool^{\mathsf{N}}$. 
@@ -38,55 +33,24 @@ P=\dfrac{1}{6} \left(
  0&0&0&0&1&0&4&1 \\
  0&0&0&1&0&1&0&4 
  \end{array}
  0&0&0&0&1&0&4&1 \\
  0&0&0&1&0&1&0&4 
  \end{array}
-\right)
+\right).
  \]
  \end{xpl}
  
  
  \]
  \end{xpl}
  
  
-% % Let us first recall the  \emph{Total Variation} distance $\tv{\pi-\mu}$,
-% % which is defined for two distributions $\pi$ and $\mu$ on the same set 
-% % $\Bool^n$  by:
-% % $$\tv{\pi-\mu}=\max_{A\subset \Bool^n} |\pi(A)-\mu(A)|.$$ 
-% % It is known that
-% % $$\tv{\pi-\mu}=\frac{1}{2}\sum_{x\in\Bool^n}|\pi(x)-\mu(x)|.$$
-
-% % Let then $M(x,\cdot)$ be the
-% % distribution induced by the $x$-th row of $M$. If the Markov chain
-% % induced by
-% % $M$ has a stationary distribution $\pi$, then we define
-% % $$d(t)=\max_{x\in\Bool^n}\tv{M^t(x,\cdot)-\pi}.$$
-% Intuitively $d(t)$ is the largest deviation between
-% the distribution $\pi$ and $M^t(x,\cdot)$, which 
-% is the result of iterating $t$ times the function.
-% Finally, let $\varepsilon$ be a positive number, the \emph{mixing time} 
-% with respect to $\varepsilon$ is given by
-% $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
-% It defines the smallest iteration number 
-% that is sufficient to obtain a deviation lesser than $\varepsilon$.
-% Notice that the upper and lower bounds of mixing times cannot    
-% directly be computed with eigenvalues formulae as expressed 
-% in~\cite[Chap. 12]{LevinPeresWilmer2006}. The authors of this latter work  
-% only consider reversible Markov matrices whereas we do no restrict our 
-% matrices to such a form.
-
-
-
-
-
-
-
-This section considers functions $f: \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}} $ 
-issued from an hypercube where an Hamiltonian path has been removed.
  A specific random walk in this modified hypercube is first 
  A specific random walk in this modified hypercube is first 
-introduced. We further detail
-a theoretical study on the length of the path 
-which is sufficient to follow to get a uniform distribution.
+introduced (See section~\ref{sub:stop:formal}). We further 
+ study this random walk in a theoretical way to 
+provide an upper bound of fair sequences 
+(See section~\ref{sub:stop:bound}).
+We finally complete these study with experimental
+results that reduce this bound (Sec.~\ref{sub:stop:exp}).
  Notice that for a general references on Markov chains
  Notice that for a general references on Markov chains
-see~\cite{LevinPeresWilmer2006}
-, and particularly Chapter~5 on stopping times.  
-
+see~\cite{LevinPeresWilmer2006}, 
+and particularly Chapter~5 on stopping times.  
  
  
  
  
+\subsection{Formalizing the Random Walk}\label{sub:stop:formal}
  
  First of all, let $\pi$, $\mu$ be two distributions on $\Bool^{\mathsf{N}}$. The total
  variation distance between $\pi$ and $\mu$ is denoted $\tv{\pi-\mu}$ and is
  
  First of all, let $\pi$, $\mu$ be two distributions on $\Bool^{\mathsf{N}}$. The total
  variation distance between $\pi$ and $\mu$ is denoted $\tv{\pi-\mu}$ and is
@@ -101,9 +65,21 @@ distribution induced by the $X$-th row of $P$. If the Markov chain induced by
  $P$ has a stationary distribution $\pi$, then we define
  $$d(t)=\max_{X\in\Bool^{\mathsf{N}}}\tv{P^t(X,\cdot)-\pi}.$$
  
  $P$ has a stationary distribution $\pi$, then we define
  $$d(t)=\max_{X\in\Bool^{\mathsf{N}}}\tv{P^t(X,\cdot)-\pi}.$$
  
+\ANNOT{incohérence de notation $X$ : entier ou dans $B^N$ ?}
  and
  
  $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
  and
  
  $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
+
+%% Intuitively speaking, $t_{\rm mix}$ is a mixing time 
+%% \textit{i.e.}, is the time until the matrix $X$ of a Markov chain  
+%% is $\epsilon$-close to a stationary distribution.
+
+Intutively speaking,  $t_{\rm mix}(\varepsilon)$ is the time/steps required
+to be sure to be $\varepsilon$-close to the stationary distribution, wherever
+the chain starts. 
+
+
+
  One can prove that
  
  $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(\frac{1}{4})$$
  One can prove that
  
  $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(\frac{1}{4})$$
@@ -112,13 +88,13 @@ $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(
  
  
  % It is known that $d(t+1)\leq d(t)$. \JFC{references ? Cela a-t-il 
  
  
  % It is known that $d(t+1)\leq d(t)$. \JFC{references ? Cela a-t-il 
-% un intérêt dans la preuve ensuite.}
+% un intérêt dans la preuve ensuite.}
  
  
  
  %and
  % $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
  
  
  
  %and
  % $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
-% One can prove that \JFc{Ou cela a-t-il été fait?}
+% One can prove that \JFc{Ou cela a-t-il été fait?}
  % $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(\frac{1}{4})$$
  
  
  % $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(\frac{1}{4})$$
  
  
@@ -140,11 +116,13 @@ randomized stopping time (possibly depending on the starting position $X$),
  such that  the distribution of $X_\tau$ is $\pi$:
  $$\P_X(X_\tau=Y)=\pi(Y).$$
  
  such that  the distribution of $X_\tau$ is $\pi$:
  $$\P_X(X_\tau=Y)=\pi(Y).$$
  
+\subsection{Upper bound of Stopping Time}\label{sub:stop:bound}
+
  A stopping time $\tau$ is a {\emph strong stationary time} if $X_{\tau}$ is
  A stopping time $\tau$ is a {\emph strong stationary time} if $X_{\tau}$ is
-independent of $\tau$. 
+independent of $\tau$. The following result will be useful~\cite[Proposition~6.10]{LevinPeresWilmer2006},
  
  
  
  
-\begin{thrm}
+\begin{thrm}\label{thm-sst}
  If $\tau$ is a strong stationary time, then $d(t)\leq \max_{X\in\Bool^{\mathsf{N}}}
  \P_X(\tau > t)$.
  \end{thrm}
  If $\tau$ is a strong stationary time, then $d(t)\leq \max_{X\in\Bool^{\mathsf{N}}}
  \P_X(\tau > t)$.
  \end{thrm}
@@ -222,7 +200,7 @@ $$
  
  
  
  
  
  
-%%%%%%%%%%%%%%%%%%%%%%%%%%%ù
+%%%%%%%%%%%%%%%%%%%%%%%%%%%ù
  %\section{Stopping time}
  
  An integer $\ell\in \llbracket 1,{\mathsf{N}} \rrbracket$ is said {\it fair} 
  %\section{Stopping time}
  
  An integer $\ell\in \llbracket 1,{\mathsf{N}} \rrbracket$ is said {\it fair} 
@@ -250,11 +228,15 @@ $b=1$ with probability $\frac{1}{2}$ and $b=0$ with probability
  $\frac{1}{2}$. Since $h(X_{\tau_\ell-1})\neq\ell$ the value of the $\ell$-th
  bit of $X_{\tau_\ell}$ 
  is $0$ or $1$ with the same probability ($\frac{1}{2}$).
  $\frac{1}{2}$. Since $h(X_{\tau_\ell-1})\neq\ell$ the value of the $\ell$-th
  bit of $X_{\tau_\ell}$ 
  is $0$ or $1$ with the same probability ($\frac{1}{2}$).
+This probability is independent of the value of the other bits.
+
+
  
  
- Moving next in the chain, at each step,
+Moving next in the chain, at each step,
  the $l$-th bit  is switched from $0$ to $1$ or from $1$ to $0$ each time with
  the same probability. Therefore,  for $t\geq \tau_\ell$, the
  the $l$-th bit  is switched from $0$ to $1$ or from $1$ to $0$ each time with
  the same probability. Therefore,  for $t\geq \tau_\ell$, the
-$\ell$-th bit of $X_t$ is $0$ or $1$ with the same probability, proving the
+$\ell$-th bit of $X_t$ is $0$ or $1$ with the same probability,  and
+independently of the value of the other bits, proving the
  lemma.\end{proof}
  
  \begin{thrm} \label{prop:stop}
  lemma.\end{proof}
  
  \begin{thrm} \label{prop:stop}
@@ -267,7 +249,12 @@ let $S_{X,\ell}$ be the
  random variable that counts the number of steps 
  from $X$ until we reach a configuration where
  $\ell$ is fair. More formally
  random variable that counts the number of steps 
  from $X$ until we reach a configuration where
  $\ell$ is fair. More formally
-$$S_{X,\ell}=\min \{t \geq 1\mid h(X_{t-1})\neq \ell\text{ and }Z_t=(\ell,.)\text{ and } X_0=X\}.$$
+\[
+\begin{array}{rcl}
+S_{X,\ell}&=&\min \{t \geq 1\mid h(X_{t-1})\neq \ell\text{ and }Z_t=(\ell,.) \\
+&& \qquad \text{ and } X_0=X\}.
+\end{array}
+\]
  
  %  We denote by
  % $$\lambda_h=\max_{X,\ell} S_{X,\ell}.$$
  
  %  We denote by
  % $$\lambda_h=\max_{X,\ell} S_{X,\ell}.$$
@@ -314,8 +301,14 @@ has, for every $i$, $\P(S_{X,\ell}\geq 2i)\leq
  since $S_{X,\ell}$ is positive, it is known~\cite[lemma 2.9]{proba}, that
  $$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i).$$
  Since $\P(S_{X,\ell}\geq i)\geq \P(S_{X,\ell}\geq i+1)$, one has
  since $S_{X,\ell}$ is positive, it is known~\cite[lemma 2.9]{proba}, that
  $$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i).$$
  Since $\P(S_{X,\ell}\geq i)\geq \P(S_{X,\ell}\geq i+1)$, one has
-$$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i)\leq
-\P(S_{X,\ell}\geq 1)+\P(S_{X,\ell}\geq 2)+2 \sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq 2i).$$
+\[
+\begin{array}{rcl}
+  E[S_{X,\ell}]&=&\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i)\\
+&\leq& 
+\P(S_{X,\ell}\geq 1) +\P(S_{X,\ell}\geq 2)\\
+&& \qquad +2 \sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq 2i).
+\end{array}
+\]
  Consequently,
  $$E[S_{X,\ell}]\leq 1+1+2
  \sum_{i=1}^{+\infty}\left(1-\frac{1}{4{\mathsf{N}}^2}\right)^i=2+2(4{\mathsf{N}}^2-1)=8{\mathsf{N}}^2,$$
  Consequently,
  $$E[S_{X,\ell}]\leq 1+1+2
  \sum_{i=1}^{+\infty}\left(1-\frac{1}{4{\mathsf{N}}^2}\right)^i=2+2(4{\mathsf{N}}^2-1)=8{\mathsf{N}}^2,$$
@@ -362,16 +355,107 @@ One can now prove Theorem~\ref{prop:stop}.
  \begin{proof}
  Since $\ts^\prime$ is the time used to obtain $\mathsf{N}-1$ fair bits.
  Assume that the last unfair bit is $\ell$. One has
  \begin{proof}
  Since $\ts^\prime$ is the time used to obtain $\mathsf{N}-1$ fair bits.
  Assume that the last unfair bit is $\ell$. One has
-$\ts=\ts^\prime+S_{X_\tau,\ell}$, and therefore
-$E[\ts] = E[\ts^\prime]+E[S_{X_\tau,\ell}]$. Therefore,
-Theorem~\ref{prop:stop} is a direct application of
-lemma~\ref{prop:lambda} and~\ref{lm:stopprime}.
+$\ts=\ts^\prime+S_{X_\tau,\ell}$, and therefore $E[\ts] =
+E[\ts^\prime]+E[S_{X_\tau,\ell}]$. Therefore, Theorem~\ref{prop:stop} is a
+direct application of lemma~\ref{prop:lambda} and~\ref{lm:stopprime}.
  \end{proof}
  
  \end{proof}
  
+Now using Markov Inequality, one has $\P_X(\tau > t)\leq \frac{E[\tau]}{t}$.
+With $t_n=32N^2+16N\ln (N+1)$, one obtains:  $\P_X(\tau > t_n)\leq \frac{1}{4}$. 
+Therefore, using the defintion of $t_{\rm mix)}$ and
+Theorem~\ref{thm-sst}, it follows that
+$t_{\rm mix}\leq 32N^2+16N\ln (N+1)=O(N^2)$.
+
+
  Notice that the calculus of the stationary time upper bound is obtained
  under the following constraint: for each vertex in the $\mathsf{N}$-cube 
  there are one ongoing arc and one outgoing arc that are removed. 
  Notice that the calculus of the stationary time upper bound is obtained
  under the following constraint: for each vertex in the $\mathsf{N}$-cube 
  there are one ongoing arc and one outgoing arc that are removed. 
-The calculus does not consider (balanced) Hamiltonian cycles, which 
+The calculus doesn't consider (balanced) Hamiltonian cycles, which 
  are more regular and more binding than this constraint.
  are more regular and more binding than this constraint.
+Moreover, the bound
+is obtained using the coarse Markov Inequality. For the
+classical (lazzy) random walk the  $\mathsf{N}$-cube, without removing any
+Hamiltonian cylce, the mixing time is in $\Theta(N\ln N)$. 
+We conjecture that in our context, the mixing time is also in $\Theta(N\ln
+N)$.
+
+
  In this later context, we claim that the upper bound for the stopping time 
  In this later context, we claim that the upper bound for the stopping time 
-should be reduced.
+should be reduced. This fact is studied in the next section.
+
+\subsection{Practical Evaluation of Stopping Times}\label{sub:stop:exp}
+ 
+Let be given a function $f: \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}}$
+and an initial seed $x^0$.
+The pseudo code given in algorithm~\ref{algo:stop} returns the smallest 
+number of iterations such that all elements $\ell\in \llbracket 1,{\mathsf{N}} \rrbracket$ are fair. It allows to deduce an approximation of $E[\ts]$
+by calling this code many times with many instances of function and many 
+seeds.
+
+\begin{algorithm}[ht]
+%\begin{scriptsize}
+\KwIn{a function $f$, an initial configuration $x^0$ ($\mathsf{N}$ bits)}
+\KwOut{a number of iterations $\textit{nbit}$}
+
+$\textit{nbit} \leftarrow 0$\;
+$x\leftarrow x^0$\;
+$\textit{fair}\leftarrow\emptyset$\;
+\While{$\left\vert{\textit{fair}}\right\vert < \mathsf{N} $}
+{
+        $ s \leftarrow \textit{Random}(\mathsf{N})$ \;
+        $\textit{image} \leftarrow f(x) $\;
+        \If{$\textit{Random}(1) \neq 0$ and $x[s] \neq \textit{image}[s]$}{
+            $\textit{fair} \leftarrow \textit{fair} \cup \{s\}$\;
+            $x[s] \leftarrow \textit{image}[s]$\;
+          }
+        $\textit{nbit} \leftarrow \textit{nbit}+1$\;
+}
+\Return{$\textit{nbit}$}\;
+%\end{scriptsize}
+\caption{Pseudo Code of stoping time calculus }
+\label{algo:stop}
+\end{algorithm}
+
+Practically speaking, for each number $\mathsf{N}$, $ 3 \le \mathsf{N} \le 16$, 
+10 functions have been generated according to method presented in section~\ref{sec:hamilton}. For each of them, the calculus of the approximation of $E[\ts]$
+is executed 10000 times with a random seed. The Figure~\ref{fig:stopping:moy}
+summarizes these results. In this one, a circle represents the 
+approximation of $E[\ts]$ for a given $\mathsf{N}$.
+The line is the graph of the function $x \mapsto 2x\ln(2x+8)$. 
+It can firstly 
+be observed that the approximation is largely
+smaller than the upper bound given in theorem~\ref{prop:stop}.
+It can be further deduced  that the conjecture of the previous section 
+is realistic according the graph of $x \mapsto 2x\ln(2x+8)$.
+
+
+
+
+
+% \begin{table}
+% $$
+% \begin{array}{|*{14}{l|}}
+% \hline
+% \mathsf{N}  & 4 & 5 & 6 & 7& 8 & 9 & 10& 11 & 12 & 13 & 14 & 15 & 16 \\
+% \hline
+% \mathsf{N}  & 21.8 & 28.4 & 35.4 & 42.5 & 50 & 57.7 & 65.6& 73.5 & 81.6 & 90 & 98.3 & 107.1 & 115.7 \\
+% \hline
+% \end{array}
+% $$
+% \caption{Average Stopping Time}\label{table:stopping:moy}
+% \end{table}
+
+\begin{figure}
+\centering
+\includegraphics[width=0.49\textwidth]{complexity}
+\caption{Average Stopping Time Approximation}\label{fig:stopping:moy}
+\end{figure}
+
+
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "main"
+%%% ispell-dictionary: "american"
+%%% mode: flyspell
+%%% End: