From 857d339487ad6e04c35fd479cff0d162bbf958ca Mon Sep 17 00:00:00 2001 From: couturie Date: Sun, 11 Dec 2011 11:14:30 +0100 Subject: [PATCH] petites modifs --- prng_gpu.tex | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/prng_gpu.tex b/prng_gpu.tex index f21f19e..c646316 100644 --- a/prng_gpu.tex +++ b/prng_gpu.tex @@ -877,18 +877,18 @@ unsigned int CIPRNG() { -In Listing~\ref{algo:seqCIPRNG} a sequential version of the proposed PRNG based on chaotic iterations - is presented. The xor operator is represented by \textasciicircum. -This function uses three classical 64-bits PRNGs, namely the \texttt{xorshift}, the -\texttt{xor128}, and the \texttt{xorwow}~\cite{Marsaglia2003}. In the following, we call them -``xor-like PRNGs''. -As -each xor-like PRNG uses 64-bits whereas our proposed generator works with 32-bits, -we use the command \texttt{(unsigned int)}, that selects the 32 least significant bits of a given integer, and the code -\texttt{(unsigned int)(t3$>>$32)} in order to obtain the 32 most significant bits of \texttt{t}. - -So producing a pseudorandom number needs 6 xor operations -with 6 32-bits numbers that are provided by 3 64-bits PRNGs. This version successfully passes the +In Listing~\ref{algo:seqCIPRNG} a sequential version of the proposed PRNG based +on chaotic iterations is presented. The xor operator is represented by +\textasciicircum. This function uses three classical 64-bits PRNGs, namely the +\texttt{xorshift}, the \texttt{xor128}, and the +\texttt{xorwow}~\cite{Marsaglia2003}. In the following, we call them ``xor-like +PRNGs''. As each xor-like PRNG uses 64-bits whereas our proposed generator +works with 32-bits, we use the command \texttt{(unsigned int)}, that selects the +32 least significant bits of a given integer, and the code \texttt{(unsigned + int)(t$>>$32)} in order to obtain the 32 most significant bits of \texttt{t}. + +So producing a pseudorandom number needs 6 xor operations with 6 32-bits numbers +that are provided by 3 64-bits PRNGs. This version successfully passes the stringent BigCrush battery of tests~\cite{LEcuyerS07}. \section{Efficient PRNGs based on Chaotic Iterations on GPU} @@ -982,12 +982,14 @@ thread uses the result of which other one, we can use a combination array that contains the indexes of all threads and for which a combination has been performed. -In Algorithm~\ref{algo:gpu_kernel2}, two combination arrays are used. -The variable \texttt{offset} is computed using the value of +In Algorithm~\ref{algo:gpu_kernel2}, two combination arrays are used. The +variable \texttt{offset} is computed using the value of \texttt{combination\_size}. Then we can compute \texttt{o1} and \texttt{o2} -representing the indexes of the other threads whose results are used -by the current one. In this algorithm, we consider that a 64-bits xor-like -PRNG has been chosen, and so its two 32-bits parts are used. +representing the indexes of the other threads whose results are used by the +current one. In this algorithm, we consider that a 32-bits xor-like PRNG has +been chosen. In practice, we use the xor128 proposed in~\cite{Marsaglia2003} in +which unsigned longs (64 bits) have been replaced by unsigned integers (32 +bits). This version also can pass the whole {\it BigCrush} battery of tests. @@ -996,14 +998,14 @@ This version also can pass the whole {\it BigCrush} battery of tests. \KwIn{InternalVarXorLikeArray: array with internal variables of 1 xor-like PRNGs in global memory\; NumThreads: Number of threads\; -tab1, tab2: Arrays containing combinations of size combination\_size\;} +array\_comb1, array\_comb2: Arrays containing combinations of size combination\_size\;} \KwOut{NewNb: array containing random numbers in global memory} \If{threadId is concerned} { retrieve data from InternalVarXorLikeArray[threadId] in local variables including shared memory and x\; offset = threadIdx\%combination\_size\; - o1 = threadIdx-offset+tab1[offset]\; - o2 = threadIdx-offset+tab2[offset]\; + o1 = threadIdx-offset+array\_comb1[offset]\; + o2 = threadIdx-offset+array\_comb2[offset]\; \For{i=1 to n} { t=xor-like()\; t=t $\wedge$ shmem[o1] $\wedge$ shmem[o2]\; @@ -1293,15 +1295,17 @@ variable for BBS number 8 is stored in place 1. \KwIn{InternalVarBBSArray: array with internal variables of the 8 BBS in global memory\; NumThreads: Number of threads\; -tab: 2D Arrays containing 16 combinations (in first dimension) of size combination\_size (in second dimension)\;} +array\_comb: 2D Arrays containing 16 combinations (in first dimension) of size combination\_size (in second dimension)\; + +} \KwOut{NewNb: array containing random numbers in global memory} \If{threadId is concerned} { retrieve data from InternalVarBBSArray[threadId] in local variables including shared memory and x\; we consider that bbs1 ... bbs8 represent the internal states of the 8 BBS numbers\; offset = threadIdx\%combination\_size\; - o1 = threadIdx-offset+tab[bbs1\&7][offset]\; - o2 = threadIdx-offset+tab[8+bbs2\&7][offset]\; + o1 = threadIdx-offset+array\_comb[bbs1\&7][offset]\; + o2 = threadIdx-offset+array\_comb[8+bbs2\&7][offset]\; \For{i=1 to n} { t<<=4\; t|=BBS1(bbs1)\&15\; -- 2.39.5