ajout espace bib

[rairo15.git] / stopping.tex
diff --git a/stopping.tex b/stopping.tex

index 0ad3e8a7bffba16803c5a9c8d0700e8cb6fbd0cc..3a07e06a67fc073968d7355d2bff8196d485c29f 100644 (file)
--- a/stopping.tex
+++ b/stopping.tex
@@ -75,29 +75,31 @@ P=\dfrac{1}{6} \left(
  
  
  
-This section considers functions $f: \Bool^n \rightarrow \Bool^n $ 
+This section considers functions $f: \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}} $ 
  issued from an hypercube where an Hamiltonian path has been removed.
  A specific random walk in this modified hypercube is first 
  introduced. We further detail
  a theoretical study on the length of the path 
  which is sufficient to follow to get a uniform distribution.
- 
+Notice that for a general references on Markov chains
+see~\cite{LevinPeresWilmer2006}
+, and particularly Chapter~5 on stopping times.  
  
  
  
  
-First of all, let $\pi$, $\mu$ be two distributions on $\Bool^n$. The total
+First of all, let $\pi$, $\mu$ be two distributions on $\Bool^{\mathsf{N}}$. The total
  variation distance between $\pi$ and $\mu$ is denoted $\tv{\pi-\mu}$ and is
  defined by
-$$\tv{\pi-\mu}=\max_{A\subset \Bool^n} |\pi(A)-\mu(A)|.$$ It is known that
-$$\tv{\pi-\mu}=\frac{1}{2}\sum_{X\in\Bool^n}|\pi(X)-\mu(X)|.$$ Moreover, if
-$\nu$ is a distribution on $\Bool^n$, one has
+$$\tv{\pi-\mu}=\max_{A\subset \Bool^{\mathsf{N}}} |\pi(A)-\mu(A)|.$$ It is known that
+$$\tv{\pi-\mu}=\frac{1}{2}\sum_{X\in\Bool^{\mathsf{N}}}|\pi(X)-\mu(X)|.$$ Moreover, if
+$\nu$ is a distribution on $\Bool^{\mathsf{N}}$, one has
  $$\tv{\pi-\mu}\leq \tv{\pi-\nu}+\tv{\nu-\mu}$$
  
-Let $P$ be the matrix of a Markov chain on $\Bool^n$. $P(X,\cdot)$ is the
+Let $P$ be the matrix of a Markov chain on $\Bool^{\mathsf{N}}$. $P(X,\cdot)$ is the
  distribution induced by the $X$-th row of $P$. If the Markov chain induced by
  $P$ has a stationary distribution $\pi$, then we define
-$$d(t)=\max_{X\in\Bool^n}\tv{P^t(X,\cdot)-\pi}.$$
+$$d(t)=\max_{X\in\Bool^{\mathsf{N}}}\tv{P^t(X,\cdot)-\pi}.$$
  
  and
  
@@ -121,10 +123,10 @@ $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(
  
  
  
-Let $(X_t)_{t\in \mathbb{N}}$ be a sequence of $\Bool^n$ valued random
+Let $(X_t)_{t\in \mathbb{N}}$ be a sequence of $\Bool^{\mathsf{N}}$ valued random
  variables. A $\mathbb{N}$-valued random variable $\tau$ is a {\it stopping
    time} for the sequence $(X_i)$ if for each $t$ there exists $B_t\subseteq
-(\Bool^n)^{t+1}$ such that $\{\tau=t\}=\{(X_0,X_1,\ldots,X_t)\in B_t\}$. 
+(\Bool^{\mathsf{N}})^{t+1}$ such that $\{\tau=t\}=\{(X_0,X_1,\ldots,X_t)\in B_t\}$. 
  In other words, the event $\{\tau = t \}$ only depends on the values of 
  $(X_0,X_1,\ldots,X_t)$, not on $X_k$ with $k > t$. 
   
@@ -138,72 +140,77 @@ randomized stopping time (possibly depending on the starting position $X$),
  such that  the distribution of $X_\tau$ is $\pi$:
  $$\P_X(X_\tau=Y)=\pi(Y).$$
  
+A stopping time $\tau$ is a {\emph strong stationary time} if $X_{\tau}$ is
+independent of $\tau$. 
+
  
-\begin{Theo}
-If $\tau$ is a strong stationary time, then $d(t)\leq \max_{X\in\Bool^n}
+\begin{thrm}
+If $\tau$ is a strong stationary time, then $d(t)\leq \max_{X\in\Bool^{\mathsf{N}}}
  \P_X(\tau > t)$.
-\end{Theo}
+\end{thrm}
  
  
  %Let $\Bool^n$ be the set of words of length $n$. 
  Let $E=\{(X,Y)\mid
-X\in \Bool^n, Y\in \Bool^n,\ X=Y \text{ or } X\oplus Y \in 0^*10^*\}$.
+X\in \Bool^{\mathsf{N}}, Y\in \Bool^{\mathsf{N}},\ X=Y \text{ or } X\oplus Y \in 0^*10^*\}$.
  In other words, $E$ is the set of all the edges in the classical 
-$n$-cube. 
-Let $h$ be a function from $\Bool^n$ into $\llbracket 1, n \rrbracket$.
+${\mathsf{N}}$-cube. 
+Let $h$ be a function from $\Bool^{\mathsf{N}}$ into $\llbracket 1, {\mathsf{N}} \rrbracket$.
  Intuitively speaking $h$ aims at memorizing for each node 
-$X \in \Bool^n$ which edge is removed in the Hamiltonian cycle,
-\textit{i.e.} which bit in $\llbracket 1, n \rrbracket$ 
+$X \in \Bool^{\mathsf{N}}$ which edge is removed in the Hamiltonian cycle,
+\textit{i.e.} which bit in $\llbracket 1, {\mathsf{N}} \rrbracket$ 
  cannot be switched.
  
  
  
  We denote by $E_h$ the set $E\setminus\{(X,Y)\mid X\oplus Y =
-0^{n-h(X)}10^{h(X)-1}\}$. This is the set of the modified hypercube, 
-\textit{i.e.}, the $n$-cube where the Hamiltonian cycle $h$ 
+0^{{\mathsf{N}}-h(X)}10^{h(X)-1}\}$. This is the set of the modified hypercube, 
+\textit{i.e.}, the ${\mathsf{N}}$-cube where the Hamiltonian cycle $h$ 
  has been removed.
  
  We define the Markov matrix $P_h$ for each line $X$ and 
  each column $Y$  as follows:
-$$\left\{
+\begin{equation}
+\left\{
  \begin{array}{ll}
-P_h(X,X)=\frac{1}{2}+\frac{1}{2n} & \\
+P_h(X,X)=\frac{1}{2}+\frac{1}{2{\mathsf{N}}} & \\
  P_h(X,Y)=0 & \textrm{if  $(X,Y)\notin E_h$}\\
-P_h(X,Y)=\frac{1}{2n} & \textrm{if $X\neq Y$ and $(X,Y) \in E_h$}
+P_h(X,Y)=\frac{1}{2{\mathsf{N}}} & \textrm{if $X\neq Y$ and $(X,Y) \in E_h$}
  \end{array}
  \right.
-$$ 
+\label{eq:Markov:rairo}
+\end{equation} 
  
-We denote by $\ov{h} : \Bool^n \rightarrow \Bool^n$ the function 
-such that for any $X \in \Bool^n $, 
-$(X,\ov{h}(X)) \in E$ and $X\oplus\ov{h}(X)=0^{n-h(X)}10^{h(X)-1}$. 
-The function $\ov{h}$ is said {\it square-free} if for every $X\in \Bool^n$,
+We denote by $\ov{h} : \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}}$ the function 
+such that for any $X \in \Bool^{\mathsf{N}} $, 
+$(X,\ov{h}(X)) \in E$ and $X\oplus\ov{h}(X)=0^{{\mathsf{N}}-h(X)}10^{h(X)-1}$. 
+The function $\ov{h}$ is said {\it square-free} if for every $X\in \Bool^{\mathsf{N}}$,
  $\ov{h}(\ov{h}(X))\neq X$. 
  
-\begin{Lemma}\label{lm:h}
+\begin{lmm}\label{lm:h}
  If $\ov{h}$ is bijective and square-free, then $h(\ov{h}^{-1}(X))\neq h(X)$.
-\end{Lemma}
+\end{lmm}
  
  \begin{proof}
  Let $\ov{h}$ be bijective.
-Let $k\in \llbracket 1, n \rrbracket$ s.t. $h(\ov{h}^{-1}(X))=k$.
+Let $k\in \llbracket 1, {\mathsf{N}} \rrbracket$ s.t. $h(\ov{h}^{-1}(X))=k$.
  Then $(\ov{h}^{-1}(X),X)$ belongs to $E$ and 
-$\ov{h}^{-1}(X)\oplus X = 0^{n-k}10^{k-1}$.
+$\ov{h}^{-1}(X)\oplus X = 0^{{\mathsf{N}}-k}10^{k-1}$.
  Let us suppose $h(X) = h(\ov{h}^{-1}(X))$. In such a case, $h(X) =k$.
  By definition of $\ov{h}$, $(X, \ov{h}(X)) \in E $ and 
-$X\oplus\ov{h}(X)=0^{n-h(X)}10^{h(X)-1} = 0^{n-k}10^{k-1}$.
+$X\oplus\ov{h}(X)=0^{{\mathsf{N}}-h(X)}10^{h(X)-1} = 0^{{\mathsf{N}}-k}10^{k-1}$.
  Thus $\ov{h}(X)= \ov{h}^{-1}(X)$, which leads to $\ov{h}(\ov{h}(X))= X$.
  This contradicts the square-freeness of $\ov{h}$.
  \end{proof}
  
  Let $Z$ be a random variable that is uniformly distributed over
-$\llbracket 1, n \rrbracket \times \Bool$.
-For $X\in \Bool^n$, we
+$\llbracket 1, {\mathsf{N}} \rrbracket \times \Bool$.
+For $X\in \Bool^{\mathsf{N}}$, we
  define, with $Z=(i,b)$,  
  $$
  \left\{
  \begin{array}{ll}
-f(X,Z)=X\oplus (0^{n-i}10^{i-1}) & \text{if } b=1 \text{ and } i\neq h(X),\\
+f(X,Z)=X\oplus (0^{{\mathsf{N}}-i}10^{i-1}) & \text{if } b=1 \text{ and } i\neq h(X),\\
  f(X,Z)=X& \text{otherwise.} 
  \end{array}\right.
  $$
@@ -218,7 +225,7 @@ $$
  %%%%%%%%%%%%%%%%%%%%%%%%%%%ù
  %\section{Stopping time}
  
-An integer $\ell\in \llbracket 1,n \rrbracket$ is said {\it fair} 
+An integer $\ell\in \llbracket 1,{\mathsf{N}} \rrbracket$ is said {\it fair} 
  at time $t$ if there
  exists $0\leq j <t$ such that $Z_{j+1}=(\ell,\cdot)$ and $h(X_j)\neq \ell$.
  In other words, there exist a date $j$ before $t$ where 
@@ -226,14 +233,14 @@ the first element of the random variable $Z$ is exactly $l$
  (\textit{i.e.}, $l$ is the strategy at date $j$) 
  and where the configuration $X_j$ allows to traverse the edge $l$.  
   
-Let $\ts$ be the first time all the elements of $\llbracket 1, n \rrbracket$
+Let $\ts$ be the first time all the elements of $\llbracket 1, {\mathsf{N}} \rrbracket$
  are fair. The integer $\ts$ is a randomized stopping time for
  the Markov chain $(X_t)$.
  
  
-\begin{Lemma}
+\begin{lmm}
  The integer $\ts$ is a strong stationary time.
-\end{Lemma}
+\end{lmm}
  
  \begin{proof}
  Let $\tau_\ell$ be the first time that $\ell$ is fair. The random variable
@@ -250,57 +257,59 @@ the same probability. Therefore,  for $t\geq \tau_\ell$, the
  $\ell$-th bit of $X_t$ is $0$ or $1$ with the same probability, proving the
  lemma.\end{proof}
  
-\begin{Theo} \label{prop:stop}
+\begin{thrm} \label{prop:stop}
  If $\ov{h}$ is bijective and square-free, then
-$E[\ts]\leq 8n^2+ n\ln (n+1)$. 
-\end{Theo}
+$E[\ts]\leq 8{\mathsf{N}}^2+ 4{\mathsf{N}}\ln ({\mathsf{N}}+1)$. 
+\end{thrm}
  
-For each $X\in \Bool^n$ and $\ell\in\llbracket 1,n\rrbracket$, 
+For each $X\in \Bool^{\mathsf{N}}$ and $\ell\in\llbracket 1,{\mathsf{N}}\rrbracket$, 
  let $S_{X,\ell}$ be the
  random variable that counts the number of steps 
  from $X$ until we reach a configuration where
  $\ell$ is fair. More formally
  $$S_{X,\ell}=\min \{t \geq 1\mid h(X_{t-1})\neq \ell\text{ and }Z_t=(\ell,.)\text{ and } X_0=X\}.$$
  
- We denote by
-$$\lambda_h=\max_{X,\ell} S_{X,\ell}.$$
+%  We denote by
+% $$\lambda_h=\max_{X,\ell} S_{X,\ell}.$$
  
  
-\begin{Lemma}\label{prop:lambda}
-If $\ov{h}$ is a square-free bijective function, then the inequality 
-$E[\lambda_h]\leq 8n^2$ is established.
-
-\end{Lemma}
+\begin{lmm}\label{prop:lambda}
+Let $\ov{h}$ is a square-free bijective function. Then
+for all $X$ and 
+all $\ell$, 
+the inequality 
+$E[S_{X,\ell}]\leq 8{\mathsf{N}}^2$ is established.
+\end{lmm}
  
  \begin{proof}
  For every $X$, every $\ell$, one has $\P(S_{X,\ell})\leq 2)\geq
-\frac{1}{4n^2}$. 
+\frac{1}{4{\mathsf{N}}^2}$. 
  Let $X_0= X$.
  Indeed, 
  \begin{itemize}
  \item if $h(X)\neq \ell$, then
-$\P(S_{X,\ell}=1)=\frac{1}{2n}\geq \frac{1}{4n^2}$. 
+$\P(S_{X,\ell}=1)=\frac{1}{2{\mathsf{N}}}\geq \frac{1}{4{\mathsf{N}}^2}$. 
  \item otherwise, $h(X)=\ell$, then
  $\P(S_{X,\ell}=1)=0$.
-But in this case, intutively, it is possible to move
+But in this case, intuitively, it is possible to move
  from $X$ to $\ov{h}^{-1}(X)$ (with probability $\frac{1}{2N}$). And in
  $\ov{h}^{-1}(X)$ the $l$-th bit can be switched. 
  More formally,
  since $\ov{h}$ is square-free,
  $\ov{h}(X)=\ov{h}(\ov{h}(\ov{h}^{-1}(X)))\neq \ov{h}^{-1}(X)$. It follows
  that $(X,\ov{h}^{-1}(X))\in E_h$. We thus have
-$P(X_1=\ov{h}^{-1}(X))=\frac{1}{2N}$. Now, by Lemma~\ref{lm:h},
+$P(X_1=\ov{h}^{-1}(X))=\frac{1}{2{\mathsf{N}}}$. Now, by Lemma~\ref{lm:h},
  $h(\ov{h}^{-1}(X))\neq h(X)$. Therefore $\P(S_{x,\ell}=2\mid
-X_1=\ov{h}^{-1}(X))=\frac{1}{2N}$, proving that $\P(S_{x,\ell}\leq 2)\geq
-\frac{1}{4N^2}$.
+X_1=\ov{h}^{-1}(X))=\frac{1}{2{\mathsf{N}}}$, proving that $\P(S_{x,\ell}\leq 2)\geq
+\frac{1}{4{\mathsf{N}}^2}$.
  \end{itemize}
  
  
  
  
-Therefore, $\P(S_{X,\ell}\geq 3)\leq 1-\frac{1}{4n^2}$. By induction, one
+Therefore, $\P(S_{X,\ell}\geq 3)\leq 1-\frac{1}{4{\mathsf{N}}^2}$. By induction, one
  has, for every $i$, $\P(S_{X,\ell}\geq 2i)\leq
-\left(1-\frac{1}{4n^2}\right)^i$.
+\left(1-\frac{1}{4{\mathsf{N}}^2}\right)^i$.
   Moreover,
  since $S_{X,\ell}$ is positive, it is known~\cite[lemma 2.9]{proba}, that
  $$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i).$$
@@ -309,38 +318,52 @@ $$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i)\leq
  \P(S_{X,\ell}\geq 1)+\P(S_{X,\ell}\geq 2)+2 \sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq 2i).$$
  Consequently,
  $$E[S_{X,\ell}]\leq 1+1+2
-\sum_{i=1}^{+\infty}\left(1-\frac{1}{4n^2}\right)^i=2+2(4n^2-1)=8n^2,$$
+\sum_{i=1}^{+\infty}\left(1-\frac{1}{4{\mathsf{N}}^2}\right)^i=2+2(4{\mathsf{N}}^2-1)=8{\mathsf{N}}^2,$$
  which concludes the proof.
  \end{proof}
  
-Let $\ts^\prime$ be the first time that there are exactly $n-1$ fair
-elements. 
+Let $\ts^\prime$ be the time used to get all the bits but one fair.
  
-\begin{Lemma}\label{lm:stopprime}
-One has $E[\ts^\prime]\leq n \ln (n+1).$
-\end{Lemma}
+\begin{lmm}\label{lm:stopprime}
+One has $E[\ts^\prime]\leq 4{\mathsf{N}} \ln ({\mathsf{N}}+1).$
+\end{lmm}
  
  \begin{proof}
  This is a classical  Coupon Collector's like problem. Let $W_i$ be the
  random variable counting the number of moves done in the Markov chain while
-we had exactly $i-1$ fair bits. One has $\ts^\prime=\sum_{i=1}^{n-1}W_i$.
+we had exactly $i-1$ fair bits. One has $\ts^\prime=\sum_{i=1}^{{\mathsf{N}}-1}W_i$.
   But when we are at position $X$ with $i-1$ fair bits, the probability of
- obtaining a new fair bit is either $1-\frac{i-1}{n}$ if $h(X)$ is fair,
- or  $1-\frac{i-2}{n}$ if $h(X)$ is not fair. It follows that 
-$E[W_i]\leq \frac{n}{n-i+2}$. Therefore
-$$E[\ts^\prime]=\sum_{i=1}^{n-1}E[W_i]\leq n\sum_{i=1}^{n-1}
- \frac{1}{n-i+2}=n\sum_{i=3}^{n+1}\frac{1}{i}.$$
-
-But $\sum_{i=1}^{n+1}\frac{1}{i}\leq 1+\ln(n+1)$. It follows that
-$1+\frac{1}{2}+\sum_{i=3}^{n+1}\frac{1}{i}\leq 1+\ln(n+1).$
+ obtaining a new fair bit is either $1-\frac{i-1}{{\mathsf{N}}}$ if $h(X)$ is fair,
+ or  $1-\frac{i-2}{{\mathsf{N}}}$ if $h(X)$ is not fair. 
+
+Therefore,
+$\P (W_i=k)\leq \left(\frac{i-1}{{\mathsf{N}}}\right)^{k-1} \frac{{\mathsf{N}}-i+2}{{\mathsf{N}}}.$
+Consequently, we have $\P(W_i\geq k)\leq \left(\frac{i-1}{{\mathsf{N}}}\right)^{k-1} \frac{{\mathsf{N}}-i+2}{{\mathsf{N}}-i+1}.$
+It follows that $E[W_i]=\sum_{k=1}^{+\infty} \P (W_i\geq k)\leq {\mathsf{N}} \frac{{\mathsf{N}}-i+2}{({\mathsf{N}}-i+1)^2}\leq \frac{4{\mathsf{N}}}{{\mathsf{N}}-i+2}$.
+
+
+
+It follows that 
+$E[W_i]\leq \frac{4{\mathsf{N}}}{{\mathsf{N}}-i+2}$. Therefore
+$$E[\ts^\prime]=\sum_{i=1}^{{\mathsf{N}}-1}E[W_i]\leq 
+4{\mathsf{N}}\sum_{i=1}^{{\mathsf{N}}-1} \frac{1}{{\mathsf{N}}-i+2}=
+4{\mathsf{N}}\sum_{i=3}^{{\mathsf{N}}+1}\frac{1}{i}.$$
+
+But $\sum_{i=1}^{{\mathsf{N}}+1}\frac{1}{i}\leq 1+\ln({\mathsf{N}}+1)$. It follows that
+$1+\frac{1}{2}+\sum_{i=3}^{{\mathsf{N}}+1}\frac{1}{i}\leq 1+\ln({\mathsf{N}}+1).$
  Consequently,
-$E[\ts^\prime]\leq n (-\frac{1}{2}+\ln(n+1))\leq n\ln(n+1)$.
+$E[\ts^\prime]\leq 
+4{\mathsf{N}} (-\frac{1}{2}+\ln({\mathsf{N}}+1))\leq 
+4{\mathsf{N}}\ln({\mathsf{N}}+1)$.
  \end{proof}
  
  One can now prove Theorem~\ref{prop:stop}.
  
  \begin{proof}
-One has $\ts\leq \ts^\prime+\lambda_h$. Therefore,
+Since $\ts^\prime$ is the time used to obtain $\mathsf{N}-1$ fair bits.
+Assume that the last unfair bit is $\ell$. One has
+$\ts=\ts^\prime+S_{X_\tau,\ell}$, and therefore
+$E[\ts] = E[\ts^\prime]+E[S_{X_\tau,\ell}]$. Therefore,
  Theorem~\ref{prop:stop} is a direct application of
  lemma~\ref{prop:lambda} and~\ref{lm:stopprime}.
  \end{proof}
@@ -348,7 +371,7 @@ lemma~\ref{prop:lambda} and~\ref{lm:stopprime}.
  Notice that the calculus of the stationary time upper bound is obtained
  under the following constraint: for each vertex in the $\mathsf{N}$-cube 
  there are one ongoing arc and one outgoing arc that are removed. 
-The calculus does not consider (balanced) hamiltonian cycles, which 
+The calculus does not consider (balanced) Hamiltonian cycles, which 
  are more regular and more binding than this constraint.
  In this later context, we claim that the upper bound for the stopping time 
  should be reduced.