annexePreuveStopping.tex

   1 \section{Quantifier l'écart par rapport à la distribution uniforme}
   2 %15 Rairo
   3
   4
   5
   6
   7
   8 Let thus be given such kind of map.
   9 This article focuses on studying its iterations according to
  10 the equation~(\ref{eq:asyn}) with a given strategy.
  11 First of all, this can be interpreted as walking into its iteration graph
  12 where the choice of the edge to follow is decided by the strategy.
  13 Notice that the iteration graph is always a subgraph of
  14 ${\mathsf{N}}$-cube augmented with all the self-loop, \textit{i.e.}, all the
  15 edges $(v,v)$ for any $v \in \Bool^{\mathsf{N}}$.
  16 Next, if we add probabilities on the transition graph, iterations can be
  17 interpreted as Markov chains.
  18
  19 \begin{xpl}
  20 Let us consider for instance
  21 the graph $\Gamma(f)$ defined
  22 in \textsc{Figure~\ref{fig:iteration:f*}.} and
  23 the probability function $p$ defined on the set of edges as follows:
  24 $$
  25 p(e) \left\{
  26 \begin{array}{ll}
  27 = \frac{2}{3} \textrm{ if $e=(v,v)$ with $v \in \Bool^3$,}\\
  28 = \frac{1}{6} \textrm{ otherwise.}
  29 \end{array}
  30 \right.
  31 $$
  32 The matrix $P$ of the Markov chain associated to the function $f^*$ and to its probability function $p$ is
  33 \[
  34 P=\dfrac{1}{6} \left(
  35 \begin{array}{llllllll}
  36 4&1&1&0&0&0&0&0 \\
  37 1&4&0&0&0&1&0&0 \\
  38 0&0&4&1&0&0&1&0 \\
  39 0&1&1&4&0&0&0&0 \\
  40 1&0&0&0&4&0&1&0 \\
  41 0&0&0&0&1&4&0&1 \\
  42 0&0&0&0&1&0&4&1 \\
  43 0&0&0&1&0&1&0&4
  44 \end{array}
  45 \right)
  46 \]
  47 \end{xpl}
  48
  49
  50 % % Let us first recall the  \emph{Total Variation} distance $\tv{\pi-\mu}$,
  51 % % which is defined for two distributions $\pi$ and $\mu$ on the same set
  52 % % $\Bool^n$  by:
  53 % % $$\tv{\pi-\mu}=\max_{A\subset \Bool^n} |\pi(A)-\mu(A)|.$$
  54 % % It is known that
  55 % % $$\tv{\pi-\mu}=\frac{1}{2}\sum_{x\in\Bool^n}|\pi(x)-\mu(x)|.$$
  56
  57 % % Let then $M(x,\cdot)$ be the
  58 % % distribution induced by the $x$-th row of $M$. If the Markov chain
  59 % % induced by
  60 % % $M$ has a stationary distribution $\pi$, then we define
  61 % % $$d(t)=\max_{x\in\Bool^n}\tv{M^t(x,\cdot)-\pi}.$$
  62 % Intuitively $d(t)$ is the largest deviation between
  63 % the distribution $\pi$ and $M^t(x,\cdot)$, which
  64 % is the result of iterating $t$ times the function.
  65 % Finally, let $\varepsilon$ be a positive number, the \emph{mixing time}
  66 % with respect to $\varepsilon$ is given by
  67 % $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
  68 % It defines the smallest iteration number
  69 % that is sufficient to obtain a deviation lesser than $\varepsilon$.
  70 % Notice that the upper and lower bounds of mixing times cannot
  71 % directly be computed with eigenvalues formulae as expressed
  72 % in~\cite[Chap. 12]{LevinPeresWilmer2006}. The authors of this latter work
  73 % only consider reversible Markov matrices whereas we do no restrict our
  74 % matrices to such a form.
  75
  76
  77
  78
  79
  80
  81
  82 This section considers functions $f: \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}} $
  83 issued from an hypercube where an Hamiltonian path has been removed.
  84 A specific random walk in this modified hypercube is first
  85 introduced. We further detail
  86 a theoretical study on the length of the path
  87 which is sufficient to follow to get a uniform distribution.
  88 Notice that for a general references on Markov chains
  89 see~\cite{LevinPeresWilmer2006}
  90 , and particularly Chapter~5 on stopping times.
  91
  92
  93
  94
  95 First of all, let $\pi$, $\mu$ be two distributions on $\Bool^{\mathsf{N}}$. The total
  96 variation distance between $\pi$ and $\mu$ is denoted $\tv{\pi-\mu}$ and is
  97 defined by
  98 $$\tv{\pi-\mu}=\max_{A\subset \Bool^{\mathsf{N}}} |\pi(A)-\mu(A)|.$$ It is known that
  99 $$\tv{\pi-\mu}=\frac{1}{2}\sum_{X\in\Bool^{\mathsf{N}}}|\pi(X)-\mu(X)|.$$ Moreover, if
 100 $\nu$ is a distribution on $\Bool^{\mathsf{N}}$, one has
 101 $$\tv{\pi-\mu}\leq \tv{\pi-\nu}+\tv{\nu-\mu}$$
 102
 103 Let $P$ be the matrix of a Markov chain on $\Bool^{\mathsf{N}}$. $P(X,\cdot)$ is the
 104 distribution induced by the $X$-th row of $P$. If the Markov chain induced by
 105 $P$ has a stationary distribution $\pi$, then we define
 106 $$d(t)=\max_{X\in\Bool^{\mathsf{N}}}\tv{P^t(X,\cdot)-\pi}.$$
 107
 108 and
 109
 110 $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
 111 One can prove that
 112
 113 $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(\frac{1}{4})$$
 114
 115
 116
 117
 118 % It is known that $d(t+1)\leq d(t)$. \JFC{references ? Cela a-t-il
 119 % un intérêt dans la preuve ensuite.}
 120
 121
 122
 123 %and
 124 % $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
 125 % One can prove that \JFc{Ou cela a-t-il été fait?}
 126 % $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(\frac{1}{4})$$
 127
 128
 129
 130 Let $(X_t)_{t\in \mathbb{N}}$ be a sequence of $\Bool^{\mathsf{N}}$ valued random
 131 variables. A $\mathbb{N}$-valued random variable $\tau$ is a {\it stopping
 132   time} for the sequence $(X_i)$ if for each $t$ there exists $B_t\subseteq
 133 (\Bool^{\mathsf{N}})^{t+1}$ such that $\{\tau=t\}=\{(X_0,X_1,\ldots,X_t)\in B_t\}$.
 134 In other words, the event $\{\tau = t \}$ only depends on the values of
 135 $(X_0,X_1,\ldots,X_t)$, not on $X_k$ with $k > t$.
 136
 137
 138 Let $(X_t)_{t\in \mathbb{N}}$ be a Markov chain and $f(X_{t-1},Z_t)$ a
 139 random mapping representation of the Markov chain. A {\it randomized
 140   stopping time} for the Markov chain is a stopping time for
 141 $(Z_t)_{t\in\mathbb{N}}$. If the Markov chain is irreducible and has $\pi$
 142 as stationary distribution, then a {\it stationary time} $\tau$ is a
 143 randomized stopping time (possibly depending on the starting position $X$),
 144 such that  the distribution of $X_\tau$ is $\pi$:
 145 $$\P_X(X_\tau=Y)=\pi(Y).$$
 146
 147 A stopping time $\tau$ is a {\emph strong stationary time} if $X_{\tau}$ is
 148 independent of $\tau$.
 149
 150
 151 \begin{theorem}
 152 If $\tau$ is a strong stationary time, then $d(t)\leq \max_{X\in\Bool^{\mathsf{N}}}
 153 \P_X(\tau > t)$.
 154 \end{theorem}
 155
 156
 157 %Let $\Bool^n$ be the set of words of length $n$.
 158 Let $E=\{(X,Y)\mid
 159 X\in \Bool^{\mathsf{N}}, Y\in \Bool^{\mathsf{N}},\ X=Y \text{ or } X\oplus Y \in 0^*10^*\}$.
 160 In other words, $E$ is the set of all the edges in the classical
 161 ${\mathsf{N}}$-cube.
 162 Let $h$ be a function from $\Bool^{\mathsf{N}}$ into $\llbracket 1, {\mathsf{N}} \rrbracket$.
 163 Intuitively speaking $h$ aims at memorizing for each node
 164 $X \in \Bool^{\mathsf{N}}$ which edge is removed in the Hamiltonian cycle,
 165 \textit{i.e.} which bit in $\llbracket 1, {\mathsf{N}} \rrbracket$
 166 cannot be switched.
 167
 168
 169
 170 We denote by $E_h$ the set $E\setminus\{(X,Y)\mid X\oplus Y =
 171 0^{{\mathsf{N}}-h(X)}10^{h(X)-1}\}$. This is the set of the modified hypercube,
 172 \textit{i.e.}, the ${\mathsf{N}}$-cube where the Hamiltonian cycle $h$
 173 has been removed.
 174
 175 We define the Markov matrix $P_h$ for each line $X$ and
 176 each column $Y$  as follows:
 177 \begin{equation}
 178 \left\{
 179 \begin{array}{ll}
 180 P_h(X,X)=\frac{1}{2}+\frac{1}{2{\mathsf{N}}} & \\
 181 P_h(X,Y)=0 & \textrm{if  $(X,Y)\notin E_h$}\\
 182 P_h(X,Y)=\frac{1}{2{\mathsf{N}}} & \textrm{if $X\neq Y$ and $(X,Y) \in E_h$}
 183 \end{array}
 184 \right.
 185 \label{eq:Markov:rairo}
 186 \end{equation}
 187
 188 We denote by $\ov{h} : \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}}$ the function
 189 such that for any $X \in \Bool^{\mathsf{N}} $,
 190 $(X,\ov{h}(X)) \in E$ and $X\oplus\ov{h}(X)=0^{{\mathsf{N}}-h(X)}10^{h(X)-1}$.
 191 The function $\ov{h}$ is said {\it square-free} if for every $X\in \Bool^{\mathsf{N}}$,
 192 $\ov{h}(\ov{h}(X))\neq X$.
 193
 194 \begin{lemma}\label{lm:h}
 195 If $\ov{h}$ is bijective and square-free, then $h(\ov{h}^{-1}(X))\neq h(X)$.
 196 \end{lemma}
 197
 198 \begin{proof}
 199 Let $\ov{h}$ be bijective.
 200 Let $k\in \llbracket 1, {\mathsf{N}} \rrbracket$ s.t. $h(\ov{h}^{-1}(X))=k$.
 201 Then $(\ov{h}^{-1}(X),X)$ belongs to $E$ and
 202 $\ov{h}^{-1}(X)\oplus X = 0^{{\mathsf{N}}-k}10^{k-1}$.
 203 Let us suppose $h(X) = h(\ov{h}^{-1}(X))$. In such a case, $h(X) =k$.
 204 By definition of $\ov{h}$, $(X, \ov{h}(X)) \in E $ and
 205 $X\oplus\ov{h}(X)=0^{{\mathsf{N}}-h(X)}10^{h(X)-1} = 0^{{\mathsf{N}}-k}10^{k-1}$.
 206 Thus $\ov{h}(X)= \ov{h}^{-1}(X)$, which leads to $\ov{h}(\ov{h}(X))= X$.
 207 This contradicts the square-freeness of $\ov{h}$.
 208 \end{proof}
 209
 210 Let $Z$ be a random variable that is uniformly distributed over
 211 $\llbracket 1, {\mathsf{N}} \rrbracket \times \Bool$.
 212 For $X\in \Bool^{\mathsf{N}}$, we
 213 define, with $Z=(i,b)$,
 214 $$
 215 \left\{
 216 \begin{array}{ll}
 217 f(X,Z)=X\oplus (0^{{\mathsf{N}}-i}10^{i-1}) & \text{if } b=1 \text{ and } i\neq h(X),\\
 218 f(X,Z)=X& \text{otherwise.}
 219 \end{array}\right.
 220 $$
 221
 222 The Markov chain is thus defined as
 223 $$
 224 X_t= f(X_{t-1},Z_t)
 225 $$
 226
 227
 228
 229 %%%%%%%%%%%%%%%%%%%%%%%%%%%ù
 230 %\section{Stopping time}
 231
 232 An integer $\ell\in \llbracket 1,{\mathsf{N}} \rrbracket$ is said {\it fair}
 233 at time $t$ if there
 234 exists $0\leq j <t$ such that $Z_{j+1}=(\ell,\cdot)$ and $h(X_j)\neq \ell$.
 235 In other words, there exist a date $j$ before $t$ where
 236 the first element of the random variable $Z$ is exactly $l$
 237 (\textit{i.e.}, $l$ is the strategy at date $j$)
 238 and where the configuration $X_j$ allows to traverse the edge $l$.
 239
 240 Let $\ts$ be the first time all the elements of $\llbracket 1, {\mathsf{N}} \rrbracket$
 241 are fair. The integer $\ts$ is a randomized stopping time for
 242 the Markov chain $(X_t)$.
 243
 244
 245 \begin{lemma}
 246 The integer $\ts$ is a strong stationary time.
 247 \end{lemma}
 248
 249 \begin{proof}
 250 Let $\tau_\ell$ be the first time that $\ell$ is fair. The random variable
 251 $Z_{\tau_\ell}$ is of the form $(\ell,b)$ %with $\delta\in\{0,1\}$ and
 252 such that
 253 $b=1$ with probability $\frac{1}{2}$ and $b=0$ with probability
 254 $\frac{1}{2}$. Since $h(X_{\tau_\ell-1})\neq\ell$ the value of the $\ell$-th
 255 bit of $X_{\tau_\ell}$
 256 is $0$ or $1$ with the same probability ($\frac{1}{2}$).
 257
 258  Moving next in the chain, at each step,
 259 the $l$-th bit  is switched from $0$ to $1$ or from $1$ to $0$ each time with
 260 the same probability. Therefore,  for $t\geq \tau_\ell$, the
 261 $\ell$-th bit of $X_t$ is $0$ or $1$ with the same probability, proving the
 262 lemma.\end{proof}
 263
 264 \begin{theorem} \label{prop:stop}
 265 If $\ov{h}$ is bijective and square-free, then
 266 $E[\ts]\leq 8{\mathsf{N}}^2+ 4{\mathsf{N}}\ln ({\mathsf{N}}+1)$.
 267 \end{theorem}
 268
 269 For each $X\in \Bool^{\mathsf{N}}$ and $\ell\in\llbracket 1,{\mathsf{N}}\rrbracket$,
 270 let $S_{X,\ell}$ be the
 271 random variable that counts the number of steps
 272 from $X$ until we reach a configuration where
 273 $\ell$ is fair. More formally
 274 $$S_{X,\ell}=\min \{t \geq 1\mid h(X_{t-1})\neq \ell\text{ and }Z_t=(\ell,.)\text{ and } X_0=X\}.$$
 275
 276 %  We denote by
 277 % $$\lambda_h=\max_{X,\ell} S_{X,\ell}.$$
 278
 279
 280 \begin{lemma}\label{prop:lambda}
 281 Let $\ov{h}$ is a square-free bijective function. Then
 282 for all $X$ and
 283 all $\ell$,
 284 the inequality
 285 $E[S_{X,\ell}]\leq 8{\mathsf{N}}^2$ is established.
 286 \end{lemma}
 287
 288 \begin{proof}
 289 For every $X$, every $\ell$, one has $\P(S_{X,\ell})\leq 2)\geq
 290 \frac{1}{4{\mathsf{N}}^2}$.
 291 Let $X_0= X$.
 292 Indeed,
 293 \begin{itemize}
 294 \item if $h(X)\neq \ell$, then
 295 $\P(S_{X,\ell}=1)=\frac{1}{2{\mathsf{N}}}\geq \frac{1}{4{\mathsf{N}}^2}$.
 296 \item otherwise, $h(X)=\ell$, then
 297 $\P(S_{X,\ell}=1)=0$.
 298 But in this case, intuitively, it is possible to move
 299 from $X$ to $\ov{h}^{-1}(X)$ (with probability $\frac{1}{2N}$). And in
 300 $\ov{h}^{-1}(X)$ the $l$-th bit can be switched.
 301 More formally,
 302 since $\ov{h}$ is square-free,
 303 $\ov{h}(X)=\ov{h}(\ov{h}(\ov{h}^{-1}(X)))\neq \ov{h}^{-1}(X)$. It follows
 304 that $(X,\ov{h}^{-1}(X))\in E_h$. We thus have
 305 $P(X_1=\ov{h}^{-1}(X))=\frac{1}{2{\mathsf{N}}}$. Now, by Lemma~\ref{lm:h},
 306 $h(\ov{h}^{-1}(X))\neq h(X)$. Therefore $\P(S_{x,\ell}=2\mid
 307 X_1=\ov{h}^{-1}(X))=\frac{1}{2{\mathsf{N}}}$, proving that $\P(S_{x,\ell}\leq 2)\geq
 308 \frac{1}{4{\mathsf{N}}^2}$.
 309 \end{itemize}
 310
 311
 312
 313
 314 Therefore, $\P(S_{X,\ell}\geq 3)\leq 1-\frac{1}{4{\mathsf{N}}^2}$. By induction, one
 315 has, for every $i$, $\P(S_{X,\ell}\geq 2i)\leq
 316 \left(1-\frac{1}{4{\mathsf{N}}^2}\right)^i$.
 317  Moreover,
 318 since $S_{X,\ell}$ is positive, it is known~\cite[lemma 2.9]{proba}, that
 319 $$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i).$$
 320 Since $\P(S_{X,\ell}\geq i)\geq \P(S_{X,\ell}\geq i+1)$, one has
 321 $$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i)\leq
 322 \P(S_{X,\ell}\geq 1)+\P(S_{X,\ell}\geq 2)+2 \sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq 2i).$$
 323 Consequently,
 324 $$E[S_{X,\ell}]\leq 1+1+2
 325 \sum_{i=1}^{+\infty}\left(1-\frac{1}{4{\mathsf{N}}^2}\right)^i=2+2(4{\mathsf{N}}^2-1)=8{\mathsf{N}}^2,$$
 326 which concludes the proof.
 327 \end{proof}
 328
 329 Let $\ts^\prime$ be the time used to get all the bits but one fair.
 330
 331 \begin{lemma}\label{lm:stopprime}
 332 One has $E[\ts^\prime]\leq 4{\mathsf{N}} \ln ({\mathsf{N}}+1).$
 333 \end{lemma}
 334
 335 \begin{proof}
 336 This is a classical  Coupon Collector's like problem. Let $W_i$ be the
 337 random variable counting the number of moves done in the Markov chain while
 338 we had exactly $i-1$ fair bits. One has $\ts^\prime=\sum_{i=1}^{{\mathsf{N}}-1}W_i$.
 339  But when we are at position $X$ with $i-1$ fair bits, the probability of
 340  obtaining a new fair bit is either $1-\frac{i-1}{{\mathsf{N}}}$ if $h(X)$ is fair,
 341  or  $1-\frac{i-2}{{\mathsf{N}}}$ if $h(X)$ is not fair.
 342
 343 Therefore,
 344 $\P (W_i=k)\leq \left(\frac{i-1}{{\mathsf{N}}}\right)^{k-1} \frac{{\mathsf{N}}-i+2}{{\mathsf{N}}}.$
 345 Consequently, we have $\P(W_i\geq k)\leq \left(\frac{i-1}{{\mathsf{N}}}\right)^{k-1} \frac{{\mathsf{N}}-i+2}{{\mathsf{N}}-i+1}.$
 346 It follows that $E[W_i]=\sum_{k=1}^{+\infty} \P (W_i\geq k)\leq {\mathsf{N}} \frac{{\mathsf{N}}-i+2}{({\mathsf{N}}-i+1)^2}\leq \frac{4{\mathsf{N}}}{{\mathsf{N}}-i+2}$.
 347
 348
 349
 350 It follows that
 351 $E[W_i]\leq \frac{4{\mathsf{N}}}{{\mathsf{N}}-i+2}$. Therefore
 352 $$E[\ts^\prime]=\sum_{i=1}^{{\mathsf{N}}-1}E[W_i]\leq
 353 4{\mathsf{N}}\sum_{i=1}^{{\mathsf{N}}-1} \frac{1}{{\mathsf{N}}-i+2}=
 354 4{\mathsf{N}}\sum_{i=3}^{{\mathsf{N}}+1}\frac{1}{i}.$$
 355
 356 But $\sum_{i=1}^{{\mathsf{N}}+1}\frac{1}{i}\leq 1+\ln({\mathsf{N}}+1)$. It follows that
 357 $1+\frac{1}{2}+\sum_{i=3}^{{\mathsf{N}}+1}\frac{1}{i}\leq 1+\ln({\mathsf{N}}+1).$
 358 Consequently,
 359 $E[\ts^\prime]\leq
 360 4{\mathsf{N}} (-\frac{1}{2}+\ln({\mathsf{N}}+1))\leq
 361 4{\mathsf{N}}\ln({\mathsf{N}}+1)$.
 362 \end{proof}
 363
 364 One can now prove Theorem~\ref{prop:stop}.
 365
 366 \begin{proof}
 367 Since $\ts^\prime$ is the time used to obtain $\mathsf{N}-1$ fair bits.
 368 Assume that the last unfair bit is $\ell$. One has
 369 $\ts=\ts^\prime+S_{X_\tau,\ell}$, and therefore
 370 $E[\ts] = E[\ts^\prime]+E[S_{X_\tau,\ell}]$. Therefore,
 371 Theorem~\ref{prop:stop} is a direct application of
 372 lemma~\ref{prop:lambda} and~\ref{lm:stopprime}.
 373 \end{proof}
 374
 375 Notice that the calculus of the stationary time upper bound is obtained
 376 under the following constraint: for each vertex in the $\mathsf{N}$-cube
 377 there are one ongoing arc and one outgoing arc that are removed.
 378 The calculus does not consider (balanced) Hamiltonian cycles, which
 379 are more regular and more binding than this constraint.
 380 In this later context, we claim that the upper bound for the stopping time
 381 should be reduced.