annexePreuveStopping.tex

   1 \section{Quantifier l'écart par rapport à la distribution uniforme}
   2 %15 Rairo
   3
   4
   5
   6
   7
   8 Let thus be given such kind of map.
   9 This article focuses on studying its iterations according to
  10 the equation~(\ref{eq:asyn}) with a given strategy.
  11 First of all, this can be interpreted as walking into its iteration graph
  12 where the choice of the edge to follow is decided by the strategy.
  13 Notice that the iteration graph is always a subgraph of
  14 ${\mathsf{N}}$-cube augmented with all the self-loop, \textit{i.e.}, all the
  15 edges $(v,v)$ for any $v \in \Bool^{\mathsf{N}}$.
  16 Next, if we add probabilities on the transition graph, iterations can be
  17 interpreted as Markov chains.
  18
  19 \begin{xpl}
  20 Let us consider for instance
  21 the graph $\Gamma(f)$ defined
  22 in \textsc{Figure~\ref{fig:iteration:f*}.} and
  23 the probability function $p$ defined on the set of edges as follows:
  24 $$
  25 p(e) \left\{
  26 \begin{array}{ll}
  27 = \frac{2}{3} \textrm{ if $e=(v,v)$ with $v \in \Bool^3$,}\\
  28 = \frac{1}{6} \textrm{ otherwise.}
  29 \end{array}
  30 \right.
  31 $$
  32 The matrix $P$ of the Markov chain associated to the function $f^*$ and to its probability function $p$ is
  33 \[
  34 P=\dfrac{1}{6} \left(
  35 \begin{array}{llllllll}
  36 4&1&1&0&0&0&0&0 \\
  37 1&4&0&0&0&1&0&0 \\
  38 0&0&4&1&0&0&1&0 \\
  39 0&1&1&4&0&0&0&0 \\
  40 1&0&0&0&4&0&1&0 \\
  41 0&0&0&0&1&4&0&1 \\
  42 0&0&0&0&1&0&4&1 \\
  43 0&0&0&1&0&1&0&4
  44 \end{array}
  45 \right)
  46 \]
  47 \end{xpl}
  48
  49
  50 % % Let us first recall the  \emph{Total Variation} distance $\tv{\pi-\mu}$,
  51 % % which is defined for two distributions $\pi$ and $\mu$ on the same set
  52 % % $\Bool^n$  by:
  53 % % $$\tv{\pi-\mu}=\max_{A\subset \Bool^n} |\pi(A)-\mu(A)|.$$
  54 % % It is known that
  55 % % $$\tv{\pi-\mu}=\frac{1}{2}\sum_{x\in\Bool^n}|\pi(x)-\mu(x)|.$$
  56
  57 % % Let then $M(x,\cdot)$ be the
  58 % % distribution induced by the $x$-th row of $M$. If the Markov chain
  59 % % induced by
  60 % % $M$ has a stationary distribution $\pi$, then we define
  61 % % $$d(t)=\max_{x\in\Bool^n}\tv{M^t(x,\cdot)-\pi}.$$
  62 % Intuitively $d(t)$ is the largest deviation between
  63 % the distribution $\pi$ and $M^t(x,\cdot)$, which
  64 % is the result of iterating $t$ times the function.
  65 % Finally, let $\varepsilon$ be a positive number, the \emph{mixing time}
  66 % with respect to $\varepsilon$ is given by
  67 % $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
  68 % It defines the smallest iteration number
  69 % that is sufficient to obtain a deviation lesser than $\varepsilon$.
  70 % Notice that the upper and lower bounds of mixing times cannot
  71 % directly be computed with eigenvalues formulae as expressed
  72 % in~\cite[Chap. 12]{LevinPeresWilmer2006}. The authors of this latter work
  73 % only consider reversible Markov matrices whereas we do no restrict our
  74 % matrices to such a form.
  75
  76
  77
  78
  79
  80
  81
  82 This section considers functions $f: \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}} $
  83 issued from an hypercube where an Hamiltonian path has been removed.
  84 A specific random walk in this modified hypercube is first
  85 introduced. We further detail
  86 a theoretical study on the length of the path
  87 which is sufficient to follow to get a uniform distribution.
  88 Notice that for a general references on Markov chains
  89 see~\cite{LevinPeresWilmer2006}
  90 , and particularly Chapter~5 on stopping times.
  91
  92
  93
  94
  95 First of all, let $\pi$, $\mu$ be two distributions on $\Bool^{\mathsf{N}}$. The total
  96 variation distance between $\pi$ and $\mu$ is denoted $\tv{\pi-\mu}$ and is
  97 defined by
  98 $$\tv{\pi-\mu}=\max_{A\subset \Bool^{\mathsf{N}}} |\pi(A)-\mu(A)|.$$ It is known that
  99 $$\tv{\pi-\mu}=\frac{1}{2}\sum_{X\in\Bool^{\mathsf{N}}}|\pi(X)-\mu(X)|.$$ Moreover, if
 100 $\nu$ is a distribution on $\Bool^{\mathsf{N}}$, one has
 101 $$\tv{\pi-\mu}\leq \tv{\pi-\nu}+\tv{\nu-\mu}$$
 102
 103 Let $P$ be the matrix of a Markov chain on $\Bool^{\mathsf{N}}$. $P(X,\cdot)$ is the
 104 distribution induced by the $X$-th row of $P$. If the Markov chain induced by
 105 $P$ has a stationary distribution $\pi$, then we define
 106 $$d(t)=\max_{X\in\Bool^{\mathsf{N}}}\tv{P^t(X,\cdot)-\pi}.$$
 107
 108 and
 109
 110 $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
 111 One can prove that
 112
 113 $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(\frac{1}{4})$$
 114
 115
 116
 117
 118 % It is known that $d(t+1)\leq d(t)$. \JFC{references ? Cela a-t-il
 119 % un intérêt dans la preuve ensuite.}
 120
 121
 122
 123 %and
 124 % $$t_{\rm mix}(\varepsilon)=\min\{t \mid d(t)\leq \varepsilon\}.$$
 125 % One can prove that \JFc{Ou cela a-t-il été fait?}
 126 % $$t_{\rm mix}(\varepsilon)\leq \lceil\log_2(\varepsilon^{-1})\rceil t_{\rm mix}(\frac{1}{4})$$
 127
 128
 129
 130 Let $(X_t)_{t\in \mathbb{N}}$ be a sequence of $\Bool^{\mathsf{N}}$ valued random
 131 variables. A $\mathbb{N}$-valued random variable $\tau$ is a {\it stopping
 132   time} for the sequence $(X_i)$ if for each $t$ there exists $B_t\subseteq
 133 (\Bool^{\mathsf{N}})^{t+1}$ such that $\{\tau=t\}=\{(X_0,X_1,\ldots,X_t)\in B_t\}$.
 134 In other words, the event $\{\tau = t \}$ only depends on the values of
 135 $(X_0,X_1,\ldots,X_t)$, not on $X_k$ with $k > t$.
 136
 137
 138 Let $(X_t)_{t\in \mathbb{N}}$ be a Markov chain and $f(X_{t-1},Z_t)$ a
 139 random mapping representation of the Markov chain. A {\it randomized
 140   stopping time} for the Markov chain is a stopping time for
 141 $(Z_t)_{t\in\mathbb{N}}$. If the Markov chain is irreducible and has $\pi$
 142 as stationary distribution, then a {\it stationary time} $\tau$ is a
 143 randomized stopping time (possibly depending on the starting position $X$),
 144 such that  the distribution of $X_\tau$ is $\pi$:
 145 $$\P_X(X_\tau=Y)=\pi(Y).$$
 146
 147 A stopping time $\tau$ is a {\emph strong stationary time} if $X_{\tau}$ is
 148 independent of $\tau$.
 149
 150
 151 \begin{theorem}
 152 If $\tau$ is a strong stationary time, then $d(t)\leq \max_{X\in\Bool^{\mathsf{N}}}
 153 \P_X(\tau > t)$.
 154 \end{theorem}
 155
 156
 157 %Let $\Bool^n$ be the set of words of length $n$.
 158 Let $E=\{(X,Y)\mid
 159 X\in \Bool^{\mathsf{N}}, Y\in \Bool^{\mathsf{N}},\ X=Y \text{ or } X\oplus Y \in 0^*10^*\}$.
 160 In other words, $E$ is the set of all the edges in the classical
 161 ${\mathsf{N}}$-cube.
 162 Let $h$ be a function from $\Bool^{\mathsf{N}}$ into $\llbracket 1, {\mathsf{N}} \rrbracket$.
 163 Intuitively speaking $h$ aims at memorizing for each node
 164 $X \in \Bool^{\mathsf{N}}$ which edge is removed in the Hamiltonian cycle,
 165 \textit{i.e.} which bit in $\llbracket 1, {\mathsf{N}} \rrbracket$
 166 cannot be switched.
 167
 168
 169
 170 We denote by $E_h$ the set $E\setminus\{(X,Y)\mid X\oplus Y =
 171 0^{{\mathsf{N}}-h(X)}10^{h(X)-1}\}$. This is the set of the modified hypercube,
 172 \textit{i.e.}, the ${\mathsf{N}}$-cube where the Hamiltonian cycle $h$
 173 has been removed.
 174
 175 We define the Markov matrix $P_h$ for each line $X$ and
 176 each column $Y$  as follows:
 177 \begin{equation}
 178 \left\{
 179 \begin{array}{ll}
 180 P_h(X,X)=\frac{1}{2}+\frac{1}{2{\mathsf{N}}} & \\
 181 P_h(X,Y)=0 & \textrm{if  $(X,Y)\notin E_h$}\\
 182 P_h(X,Y)=\frac{1}{2{\mathsf{N}}} & \textrm{if $X\neq Y$ and $(X,Y) \in E_h$}
 183 \end{array}
 184 \right.
 185 \label{eq:Markov:rairo}
 186 \end{equation}
 187
 188 We denote by $\ov{h} : \Bool^{\mathsf{N}} \rightarrow \Bool^{\mathsf{N}}$ the function
 189 such that for any $X \in \Bool^{\mathsf{N}} $,
 190 $(X,\ov{h}(X)) \in E$ and $X\oplus\ov{h}(X)=0^{{\mathsf{N}}-h(X)}10^{h(X)-1}$.
 191 The function $\ov{h}$ is said {\it square-free} if for every $X\in \Bool^{\mathsf{N}}$,
 192 $\ov{h}(\ov{h}(X))\neq X$.
 193
 194 \begin{lemma}\label{lm:h}
 195 If $\ov{h}$ is bijective and square-free, then $h(\ov{h}^{-1}(X))\neq h(X)$.
 196 \end{lemma}
 197
 198 \begin{proof}
 199 Let $\ov{h}$ be bijective.
 200 Let $k\in \llbracket 1, {\mathsf{N}} \rrbracket$ s.t. $h(\ov{h}^{-1}(X))=k$.
 201 Then $(\ov{h}^{-1}(X),X)$ belongs to $E$ and
 202 $\ov{h}^{-1}(X)\oplus X = 0^{{\mathsf{N}}-k}10^{k-1}$.
 203 Let us suppose $h(X) = h(\ov{h}^{-1}(X))$. In such a case, $h(X) =k$.
 204 By definition of $\ov{h}$, $(X, \ov{h}(X)) \in E $ and
 205 $X\oplus\ov{h}(X)=0^{{\mathsf{N}}-h(X)}10^{h(X)-1} = 0^{{\mathsf{N}}-k}10^{k-1}$.
 206 Thus $\ov{h}(X)= \ov{h}^{-1}(X)$, which leads to $\ov{h}(\ov{h}(X))= X$.
 207 This contradicts the square-freeness of $\ov{h}$.
 208 \end{proof}
 209
 210 Let $Z$ be a random variable that is uniformly distributed over
 211 $\llbracket 1, {\mathsf{N}} \rrbracket \times \Bool$.
 212 For $X\in \Bool^{\mathsf{N}}$, we
 213 define, with $Z=(i,b)$,
 214 $$
 215 \left\{
 216 \begin{array}{ll}
 217 f(X,Z)=X\oplus (0^{{\mathsf{N}}-i}10^{i-1}) & \text{if } b=1 \text{ and } i\neq h(X),\\
 218 f(X,Z)=X& \text{otherwise.}
 219 \end{array}\right.
 220 $$
 221
 222 The Markov chain is thus defined as
 223 $$
 224 X_t= f(X_{t-1},Z_t)
 225 $$
 226
 227
 228
 229 %%%%%%%%%%%%%%%%%%%%%%%%%%%ù
 230 %\section{Stopping time}
 231
 232 An integer $\ell\in \llbracket 1,{\mathsf{N}} \rrbracket$ is said {\it fair}
 233 at time $t$ if there
 234 exists $0\leq j <t$ such that $Z_{j+1}=(\ell,\cdot)$ and $h(X_j)\neq \ell$.
 235 In other words, there exist a date $j$ before $t$ where
 236 the first element of the random variable $Z$ is exactly $l$
 237 (\textit{i.e.}, $l$ is the strategy at date $j$)
 238 and where the configuration $X_j$ allows to traverse the edge $l$.
 239
 240 Let $\ts$ be the first time all the elements of $\llbracket 1, {\mathsf{N}} \rrbracket$
 241 are fair. The integer $\ts$ is a randomized stopping time for
 242 the Markov chain $(X_t)$.
 243
 244
 245 \begin{lemma}
 246 The integer $\ts$ is a strong stationary time.
 247 \end{lemma}
 248
 249 \begin{proof}
 250 Let $\tau_\ell$ be the first time that $\ell$ is fair. The random variable
 251 $Z_{\tau_\ell}$ is of the form $(\ell,b)$ %with $\delta\in\{0,1\}$ and
 252 such that
 253 $b=1$ with probability $\frac{1}{2}$ and $b=0$ with probability
 254 $\frac{1}{2}$. Since $h(X_{\tau_\ell-1})\neq\ell$ the value of the $\ell$-th
 255 bit of $X_{\tau_\ell}$
 256 is $0$ or $1$ with the same probability ($\frac{1}{2}$).
 257
 258  Moving next in the chain, at each step,
 259 the $l$-th bit  is switched from $0$ to $1$ or from $1$ to $0$ each time with
 260 the same probability. Therefore,  for $t\geq \tau_\ell$, the
 261 $\ell$-th bit of $X_t$ is $0$ or $1$ with the same probability, proving the
 262 lemma.\end{proof}
 263
 264 \theostopmajorant*
 265
 266 For each $X\in \Bool^{\mathsf{N}}$ and $\ell\in\llbracket 1,{\mathsf{N}}\rrbracket$,
 267 let $S_{X,\ell}$ be the
 268 random variable that counts the number of steps
 269 from $X$ until we reach a configuration where
 270 $\ell$ is fair. More formally
 271 $$S_{X,\ell}=\min \{t \geq 1\mid h(X_{t-1})\neq \ell\text{ and }Z_t=(\ell,.)\text{ and } X_0=X\}.$$
 272
 273 %  We denote by
 274 % $$\lambda_h=\max_{X,\ell} S_{X,\ell}.$$
 275
 276
 277 \begin{lemma}\label{prop:lambda}
 278 Let $\ov{h}$ is a square-free bijective function. Then
 279 for all $X$ and
 280 all $\ell$,
 281 the inequality
 282 $E[S_{X,\ell}]\leq 8{\mathsf{N}}^2$ is established.
 283 \end{lemma}
 284
 285 \begin{proof}
 286 For every $X$, every $\ell$, one has $\P(S_{X,\ell})\leq 2)\geq
 287 \frac{1}{4{\mathsf{N}}^2}$.
 288 Let $X_0= X$.
 289 Indeed,
 290 \begin{itemize}
 291 \item if $h(X)\neq \ell$, then
 292 $\P(S_{X,\ell}=1)=\frac{1}{2{\mathsf{N}}}\geq \frac{1}{4{\mathsf{N}}^2}$.
 293 \item otherwise, $h(X)=\ell$, then
 294 $\P(S_{X,\ell}=1)=0$.
 295 But in this case, intuitively, it is possible to move
 296 from $X$ to $\ov{h}^{-1}(X)$ (with probability $\frac{1}{2N}$). And in
 297 $\ov{h}^{-1}(X)$ the $l$-th bit can be switched.
 298 More formally,
 299 since $\ov{h}$ is square-free,
 300 $\ov{h}(X)=\ov{h}(\ov{h}(\ov{h}^{-1}(X)))\neq \ov{h}^{-1}(X)$. It follows
 301 that $(X,\ov{h}^{-1}(X))\in E_h$. We thus have
 302 $P(X_1=\ov{h}^{-1}(X))=\frac{1}{2{\mathsf{N}}}$. Now, by Lemma~\ref{lm:h},
 303 $h(\ov{h}^{-1}(X))\neq h(X)$. Therefore $\P(S_{x,\ell}=2\mid
 304 X_1=\ov{h}^{-1}(X))=\frac{1}{2{\mathsf{N}}}$, proving that $\P(S_{x,\ell}\leq 2)\geq
 305 \frac{1}{4{\mathsf{N}}^2}$.
 306 \end{itemize}
 307
 308
 309
 310
 311 Therefore, $\P(S_{X,\ell}\geq 3)\leq 1-\frac{1}{4{\mathsf{N}}^2}$. By induction, one
 312 has, for every $i$, $\P(S_{X,\ell}\geq 2i)\leq
 313 \left(1-\frac{1}{4{\mathsf{N}}^2}\right)^i$.
 314  Moreover,
 315 since $S_{X,\ell}$ is positive, it is known~\cite[lemma 2.9]{proba}, that
 316 $$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i).$$
 317 Since $\P(S_{X,\ell}\geq i)\geq \P(S_{X,\ell}\geq i+1)$, one has
 318 $$E[S_{X,\ell}]=\sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq i)\leq
 319 \P(S_{X,\ell}\geq 1)+\P(S_{X,\ell}\geq 2)+2 \sum_{i=1}^{+\infty}\P(S_{X,\ell}\geq 2i).$$
 320 Consequently,
 321 $$E[S_{X,\ell}]\leq 1+1+2
 322 \sum_{i=1}^{+\infty}\left(1-\frac{1}{4{\mathsf{N}}^2}\right)^i=2+2(4{\mathsf{N}}^2-1)=8{\mathsf{N}}^2,$$
 323 which concludes the proof.
 324 \end{proof}
 325
 326 Let $\ts^\prime$ be the time used to get all the bits but one fair.
 327
 328 \begin{lemma}\label{lm:stopprime}
 329 One has $E[\ts^\prime]\leq 4{\mathsf{N}} \ln ({\mathsf{N}}+1).$
 330 \end{lemma}
 331
 332 \begin{proof}
 333 This is a classical  Coupon Collector's like problem. Let $W_i$ be the
 334 random variable counting the number of moves done in the Markov chain while
 335 we had exactly $i-1$ fair bits. One has $\ts^\prime=\sum_{i=1}^{{\mathsf{N}}-1}W_i$.
 336  But when we are at position $X$ with $i-1$ fair bits, the probability of
 337  obtaining a new fair bit is either $1-\frac{i-1}{{\mathsf{N}}}$ if $h(X)$ is fair,
 338  or  $1-\frac{i-2}{{\mathsf{N}}}$ if $h(X)$ is not fair.
 339
 340 Therefore,
 341 $\P (W_i=k)\leq \left(\frac{i-1}{{\mathsf{N}}}\right)^{k-1} \frac{{\mathsf{N}}-i+2}{{\mathsf{N}}}.$
 342 Consequently, we have $\P(W_i\geq k)\leq \left(\frac{i-1}{{\mathsf{N}}}\right)^{k-1} \frac{{\mathsf{N}}-i+2}{{\mathsf{N}}-i+1}.$
 343 It follows that $E[W_i]=\sum_{k=1}^{+\infty} \P (W_i\geq k)\leq {\mathsf{N}} \frac{{\mathsf{N}}-i+2}{({\mathsf{N}}-i+1)^2}\leq \frac{4{\mathsf{N}}}{{\mathsf{N}}-i+2}$.
 344
 345
 346
 347 It follows that
 348 $E[W_i]\leq \frac{4{\mathsf{N}}}{{\mathsf{N}}-i+2}$. Therefore
 349 $$E[\ts^\prime]=\sum_{i=1}^{{\mathsf{N}}-1}E[W_i]\leq
 350 4{\mathsf{N}}\sum_{i=1}^{{\mathsf{N}}-1} \frac{1}{{\mathsf{N}}-i+2}=
 351 4{\mathsf{N}}\sum_{i=3}^{{\mathsf{N}}+1}\frac{1}{i}.$$
 352
 353 But $\sum_{i=1}^{{\mathsf{N}}+1}\frac{1}{i}\leq 1+\ln({\mathsf{N}}+1)$. It follows that
 354 $1+\frac{1}{2}+\sum_{i=3}^{{\mathsf{N}}+1}\frac{1}{i}\leq 1+\ln({\mathsf{N}}+1).$
 355 Consequently,
 356 $E[\ts^\prime]\leq
 357 4{\mathsf{N}} (-\frac{1}{2}+\ln({\mathsf{N}}+1))\leq
 358 4{\mathsf{N}}\ln({\mathsf{N}}+1)$.
 359 \end{proof}
 360
 361 One can now prove Theorem~\ref{prop:stop}.
 362
 363 \begin{proof}
 364 Since $\ts^\prime$ is the time used to obtain $\mathsf{N}-1$ fair bits.
 365 Assume that the last unfair bit is $\ell$. One has
 366 $\ts=\ts^\prime+S_{X_\tau,\ell}$, and therefore
 367 $E[\ts] = E[\ts^\prime]+E[S_{X_\tau,\ell}]$. Therefore,
 368 Theorem~\ref{prop:stop} is a direct application of
 369 lemma~\ref{prop:lambda} and~\ref{lm:stopprime}.
 370 \end{proof}
 371
 372 Notice that the calculus of the stationary time upper bound is obtained
 373 under the following constraint: for each vertex in the $\mathsf{N}$-cube
 374 there are one ongoing arc and one outgoing arc that are removed.
 375 The calculus does not consider (balanced) Hamiltonian cycles, which
 376 are more regular and more binding than this constraint.
 377 In this later context, we claim that the upper bound for the stopping time
 378 should be reduced.