-%\renewcommand{\baselinestretch}{1}
-\begin{table}[!h]
-\begin{center}
-\caption{The average CPU time (sec) of the serial PAVA, MLS and parallel MLS algorithms. } \label{ch11:table1}
-\begin{tabular}{|r|r|r|r|}
-
-Data & PAVA & MLS & GPU MLS \\ \hline
-
-monotone increasing $f$ & & & \\
-$n=5\times 10^4$ &0.01&5& 0.092\\
-$n=10^5$ &0.03&40& 0.35\\
-$n=5\times 10^5$ &0.4&1001&8.6 \\
-$n=10^6$ &0.8& 5000& 38 \\
-$n=2 \times 10^6$ & 1.6 &-- &152 \\
-$n=10 \times 10^6$ & 2 &-- & 3500 \\
-$n=20 \times 10^6$ & 4.5&-- & --\\
-$n=50 \times 10^6$ & 12 &-- & --\\
-\hline
-
-constant or decreasing $f$ & & & \\
-$n=10^6$ &0.2&0.1& 38\\
-$n=10 \times 10^6$ &1.9& 1.9& 3500 \\
-$n=20 \times 10^6$ &3.5& 4.0&-- \\
-$n=50 \times 10^6$ &11& 11& -- \\
-
-\end{tabular}
-\end{center}
-\end{table}
-%\renewcommand{\baselinestretch}{2}
-
-
-\begin{figure}[!hp]
- \begin{alltt}
-\begin{center}
-\begin{minipage}{13cm}\small
-template<typename Tx>
-__device__ Tx Aver(Tx z,int i,int j, Tx *z) \{return (z-z[j+1])/(j-i+1);\}
-
-template<typename Tx>
-__global__ void monotonizekernel(Tx *y, Tx *z, Tx *u, int *key, int n)
-\{ int i = threadIdx.x + blockIdx.x * blockDim.x;
- if(i<n) \{
- int smallestJ = i;
- Tx curP, smallestP, curz=z[i];
- smallestP=Aver(curz,i,i,z);
- for(int j = i+1; j < n; j++) \{
- curP=Aver(curz,i,j,z);
- if(smallestP>curP) \{
- smallestJ = j;
- smallestP = curP;
- \}
- \}
- curP=y[i];
- if(curP > smallestP) t=smallestP;
- else smallestJ=i;
- key[i]=smallestJ;
- u[i]=t;
- \}
-\}
-
-template< typename Tx >
-void MonotonizeData(Tx *y, int n, Tx *u) \{
- thrust::less_equal<int> binary_pred;
- thrust::maximum<Tx> binary_op2;
- thrust::device_vector<Tx> z_d(n+1);
- thrust::device_vector<int> keys_d(n);
- thrust::device_ptr<Tx> y_d(y), u_d(u);
- thrust::fill(u_d, u_d+n, -1e100);
- thrust::fill(keys_d.begin(), keys_d.end(), 0);
-
- thrust::reverse_iterator< typename thrust::device_vector<Tx>::iterator >
- y_reverse_b(y_d+n), y_reverse_end(y_d), z_reverse_b(z_d.end());
+
+
+%% %\renewcommand{\baselinestretch}{1}
+%% \begin{table}[!h]
+%% \begin{center}
+%% \caption{The average CPU time (sec) of the serial PAVA, MLS and parallel MLS algorithms. } \label{ch11:table1}
+%% \begin{tabular}{|r|r|r|r|}
+
+%% Data & PAVA & MLS & GPU MLS \\ \hline
+
+%% monotone increasing $f$ & & & \\
+%% $n=5\times 10^4$ &0.01&5& 0.092\\
+%% $n=10^5$ &0.03&40& 0.35\\
+%% $n=5\times 10^5$ &0.4&1001&8.6 \\
+%% $n=10^6$ &0.8& 5000& 38 \\
+%% $n=2 \times 10^6$ & 1.6 &-- &152 \\
+%% $n=10 \times 10^6$ & 2 &-- & 3500 \\
+%% $n=20 \times 10^6$ & 4.5&-- & --\\
+%% $n=50 \times 10^6$ & 12 &-- & --\\
+%% \hline
+
+%% constant or decreasing $f$ & & & \\
+%% $n=10^6$ &0.2&0.1& 38\\
+%% $n=10 \times 10^6$ &1.9& 1.9& 3500 \\
+%% $n=20 \times 10^6$ &3.5& 4.0&-- \\
+%% $n=50 \times 10^6$ &11& 11& -- \\
+
+%% \end{tabular}
+%% \end{center}
+%% \end{table}
+%% %\renewcommand{\baselinestretch}{2}
+
+
+%% \begin{figure}[!hp]
+%% \begin{alltt}
+%% \begin{center}
+%% \begin{minipage}{13cm}\small
+%% template<typename Tx>
+%% __device__ Tx Aver(Tx z,int i,int j, Tx *z) \{return (z-z[j+1])/(j-i+1);\}
+
+%% template<typename Tx>
+%% __global__ void monotonizekernel(Tx *y, Tx *z, Tx *u, int *key, int n)
+%% \{ int i = threadIdx.x + blockIdx.x * blockDim.x;
+%% if(i<n) \{
+%% int smallestJ = i;
+%% Tx curP, smallestP, curz=z[i];
+%% smallestP=Aver(curz,i,i,z);
+%% for(int j = i+1; j < n; j++) \{
+%% curP=Aver(curz,i,j,z);
+%% if(smallestP>curP) \{
+%% smallestJ = j;
+%% smallestP = curP;
+%% \}
+%% \}
+%% curP=y[i];
+%% if(curP > smallestP) t=smallestP;
+%% else smallestJ=i;
+%% key[i]=smallestJ;
+%% u[i]=t;
+%% \}
+%% \}
+
+%% template< typename Tx >
+%% void MonotonizeData(Tx *y, int n, Tx *u) \{
+%% thrust::less_equal<int> binary_pred;
+%% thrust::maximum<Tx> binary_op2;
+%% thrust::device_vector<Tx> z_d(n+1);
+%% thrust::device_vector<int> keys_d(n);
+%% thrust::device_ptr<Tx> y_d(y), u_d(u);
+%% thrust::fill(u_d, u_d+n, -1e100);
+%% thrust::fill(keys_d.begin(), keys_d.end(), 0);
+
+%% thrust::reverse_iterator< typename thrust::device_vector<Tx>::iterator >
+%% y_reverse_b(y_d+n), y_reverse_end(y_d), z_reverse_b(z_d.end());