\begin{document}
\author{Jacques M. Bahi, Rapha\"{e}l Couturier, and Christophe
-Guyeux\thanks{Authors in alphabetic order}}
+Guyeux, Pierre-Cyrille Heam\thanks{Authors in alphabetic order}}
\maketitle
\KwOut{NewNb: array containing random numbers in global memory}
\If{threadId is concerned} {
- retrieve data from InternalVarXorLikeArray[threadId] in local variables including shared memory\;
+ retrieve data from InternalVarXorLikeArray[threadId] in local variables including shared memory and x\;
offset = threadIdx\%permutation\_size\;
o1 = threadIdx-offset+tab1[offset]\;
o2 = threadIdx-offset+tab2[offset]\;
chaotic iterations presented previously, and for this reason, it satisfies the
Devaney's formulation of a chaotic behavior.
+\section{A cryptographically secure prng for GPU}
+
+It is possible to build a cryptographically secure prng based on the previous
+algorithm (algorithm~\ref{algo:gpu_kernel2}). It simply consists in replacing
+the {\it xor-like} algorithm by another cryptographically secure prng. In
+practice, we suggest to use the BBS algorithm~\cite{BBS} which takes the form:
+$$x_{n+1}=x_n^2~ mod~ M$$ where $M$ is the product of two prime numbers. Those
+prime numbers need to be congruent to 3 modulus 4. In practice, this PRNG is
+known to be slow and not efficient for the generation of random numbers. For
+current GPU cards, the modulus operation is the most time consuming
+operation. So in order to obtain quite reasonable performances, it is required
+to use only modulus on 32 bits integer numbers. Consequently $x_n^2$ need to be
+less than $2^{32}$ and the number $M$ need to be less than $2^{16}$. So in
+pratice we can choose prime numbers around 256 that are congruent to 3 modulus
+4. With 32 bits numbers, only the 4 least significant bits of $x_n$ can be
+chosen (the maximum number of undistinguishing is less or equals to
+$log_2(log_2(x_n))$). So to generate a 32 bits number, we need to use 8 times
+the BBS algorithm, with different combinations of $M$ is required.
+
\section{Experiments}
\label{sec:experiments}
In Figure~\ref{fig:time_gpu} we compare the number of random numbers generated
per second. The xor-like prng is a xor64 described in~\cite{Marsaglia2003}. In
order to obtain the optimal performance we remove the storage of random numbers
-in the GPU memory. This step is time consumming and slows down the random number
-generation. Moreover, if you are interested by applications that consome random
+in the GPU memory. This step is time consuming and slows down the random number
+generation. Moreover, if you are interested by applications that consume random
numbers directly when they are generated, their storage is completely
useless. In this figure we can see that when the number of threads is greater
than approximately 30,000 upto 5 millions the number of random numbers generated
+\section{Security Analysis}
+
+
+
+
+In this section the concatenation of two strings $u$ and $v$ is classically
+denoted by $uv$.
+In a cryptographic context, a pseudo-random generator is a deterministic
+algorithm $G$ transforming strings into strings and such that, for any
+seed $w$ of length $N$, $G(w)$ (the output of $G$ on the input $w$) has size
+$\ell_G(N)$ with $\ell_G(N)>N$.
+The notion of {\it secure} PRNGs can now be defined as follows.
+
+\begin{definition}
+A cryptographic PRNG $G$ is secure if for any probabilistic polynomial time
+algorithm $D$, for any positive polynomial $p$, and for all sufficiently
+large $k$'s,
+$$| \mathrm{Pr}[D(G(U_k))=1]-Pr[D(U_{\ell_G(k)}=1]|< \frac{1}{p(N)},$$
+where $U_r$ is the uniform distribution over $\{0,1\}^r$ and the
+probabilities are taken over $U_N$, $U_{\ell_G(N)}$ as well as over the
+internal coin tosses of $D$.
+\end{definition}
+
+Intuitively, it means that there is no polynomial time algorithm that can
+distinguish a perfect uniform random generator from $G$ with a non
+negligible probability. The interested reader is referred
+to~\cite[chapter~3]{Goldreich} for more information. Note that it is
+quite easily possible to change the function $\ell$ into any polynomial
+function $\ell^\prime$ satisfying $\ell^\prime(N)>N)$~\cite[Chapter 3.3]{Goldreich}.
+
+The generation schema developed in (\ref{equation Oplus}) is based on a
+pseudo-random generator. Let $H$ be a cryptographic PRNG. We may assume,
+without loss of generality, that for any string $S_0$ of size $N$, the size
+of $H(S_0)$ is $kN$, with $k>2$. It means that $\ell_H(N)=kN$.
+Let $S_1,\ldots,S_k$ be the
+strings of length $N$ such that $H(S_0)=S_1 \ldots S_k$ ($H(S_0)$ is the concatenation of
+the $S_i$'s). The cryptographic PRNG $X$ defined in (\ref{equation Oplus})
+is the algorithm mapping any string of length $2N$ $x_0S_0$ into the string
+$(x_0\oplus S_0 \oplus S_1)(x_0\oplus S_0 \oplus S_1\oplus S_2)\ldots
+(x_o\bigoplus_{i=0}^{i=k}S_i)$. Particularly one has $\ell_{X}(2N)=kN=\ell_H(N)$.
+We claim now that if this PRNG is secure,
+then the new one is secure too.
+
+\begin{proposition}
+If $H$ is a secure cryptographic PRNG, then $X$ is a secure cryptographic
+PRNG too.
+\end{proposition}
+
+\begin{proof}
+The proposition is proved by contraposition. Assume that $X$ is not
+secure. By Definition, there exists a polynomial time probabilistic
+algorithm $D$, a positive polynomial $p$, such that for all $k_0$ there exists
+$N\geq \frac{k_0}{2}$ satisfying
+$$| \mathrm{Pr}[D(X(U_{2N}))=1]-\mathrm{Pr}[D(U_{kN}=1]|\geq \frac{1}{p(2N)}.$$
+We describe a new probabilistic algorithm $D^\prime$ on an input $w$ of size
+$kN$:
+\begin{enumerate}
+\item Decompose $w$ into $w=w_1\ldots w_{k}$, where each $w_i$ has size $N$.
+\item Pick a string $y$ of size $N$ uniformly at random.
+\item Compute $z=(y\oplus w_1)(y\oplus w_1\oplus w_2)\ldots (y
+ \bigoplus_{i=1}^{i=k} w_i).$
+\item Return $D(z)$.
+\end{enumerate}
+
+
+Consider for each $y\in \mathbb{B}^{kN}$ the function $\varphi_{y}$
+from $\mathbb{B}^{kN}$ into $\mathbb{B}^{kN}$ mapping $w=w_1\ldots w_k$
+(each $w_i$ has length $N$) to
+$(y\oplus w_1)(y\oplus w_1\oplus w_2)\ldots (y
+ \bigoplus_{i=1}^{i=k_1} w_i).$ By construction, one has for every $w$,
+\begin{equation}\label{PCH-1}
+D^\prime(w)=D(\varphi_y(w)),
+\end{equation}
+where $y$ is randomly generated.
+Moreover, for each $y$, $\varphi_{y}$ is injective: if
+$(y\oplus w_1)(y\oplus w_1\oplus w_2)\ldots (y\bigoplus_{i=1}^{i=k_1}
+w_i)=(y\oplus w_1^\prime)(y\oplus w_1^\prime\oplus w_2^\prime)\ldots
+(y\bigoplus_{i=1}^{i=k} w_i^\prime)$, then for every $1\leq j\leq k$,
+$y\bigoplus_{i=1}^{i=j} w_i^\prime=y\bigoplus_{i=1}^{i=j} w_i$. It follows,
+by a direct induction, that $w_i=w_i^\prime$. Furthermore, since $\mathbb{B}^{kN}$
+is finite, each $\varphi_y$ is bijective. Therefore, and using (\ref{PCH-1}),
+one has
+\begin{equation}\label{PCH-2}
+\mathrm{Pr}[D^\prime(U_{kN})=1]=\mathrm{Pr}[D(\varphi_y(U_{kN}))=1]=\mathrm{Pr}[D(U_{kN})=1].
+\end{equation}
+
+Now, using (\ref{PCH-1}) again, one has for every $x$,
+\begin{equation}\label{PCH-3}
+D^\prime(H(x))=D(\varphi_y(H(x))),
+\end{equation}
+where $y$ is randomly generated. By construction, $\varphi_y(H(x))=X(yx)$,
+thus
+\begin{equation}\label{PCH-3}
+D^\prime(H(x))=D(yx),
+\end{equation}
+where $y$ is randomly generated.
+It follows that
+
+\begin{equation}\label{PCH-4}
+\mathrm{Pr}[D^\prime(H(U_{N}))=1]=\mathrm{Pr}[D(U_{2N})=1].
+\end{equation}
+ From (\ref{PCH-2}) and (\ref{PCH-4}), one can deduce that
+there exist a polynomial time probabilistic
+algorithm $D^\prime$, a positive polynomial $p$, such that for all $k_0$ there exists
+$N\geq \frac{k_0}{2}$ satisfying
+$$| \mathrm{Pr}[D(H(U_{N}))=1]-\mathrm{Pr}[D(U_{kN}=1]|\geq \frac{1}{p(2N)},$$
+proving that $H$ is not secure, a contradiction.
+\end{proof}
+
+
+
+
\section{Conclusion}