X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/kahina_paper1.git/blobdiff_plain/1fc948028049feeeacad3808eedfb3b2acb32775..2b5ea4e0a2094ab312c8046da823ffe332c9577b:/paper.tex?ds=inline

diff --git a/paper.tex b/paper.tex
index 20686d2..43bbf1e 100644
--- a/paper.tex
+++ b/paper.tex
@@ -312,8 +312,7 @@ The convergence condition determines the termination of the algorithm. It consis
 
 \begin{equation}
 \label{eq:Aberth-Conv-Cond}
-\forall i \in
-[1,n];\frac{z_{i}^{k}-z_{i}^{k-1}}{z_{i}^{k}}<\xi
+\forall i \in [1,n];\vert\frac{z_{i}^{k}-z_{i}^{k-1}}{z_{i}^{k}}\vert<\xi
 \end{equation}
 
 
@@ -424,13 +423,15 @@ In~\cite{Kahinall14} we already proposed the first implementation
 of a root finding method on GPUs, that of the Durand-Kerner method. The main result showed
 that a parallel CUDA implementation is 10 times as fast as the
 sequential implementation on a single CPU for high degree
-polynomials of 48000. In this paper we present a parallel implementation of Ehlisch-Aberth method on
-GPUs, which details are discussed in the sequel.
+polynomials of 48000.
+%In this paper we present a parallel implementation of Ehrlich-Aberth
+%method on GPUs for sparse and full polynomials with high degree (up
+%to $1,000,000$).
 
 
 \section {A CUDA parallel Ehrlich-Aberth method}
 In the following, we describe the parallel implementation of Ehrlich-Aberth method on GPU 
-for solving high degree polynomials. First, the hardware and software of the GPUs are presented. Then, a CUDA parallel Ehrlich-Aberth method are presented.
+for solving high degree polynomials (up to $1,000,000$). First, the hardware and software of the GPUs are presented. Then, the CUDA parallel Ehrlich-Aberth method is presented.
 
 \subsection{Background on the GPU architecture}
 A GPU is viewed as an accelerator for the data-parallel and
@@ -497,32 +498,37 @@ read-only caches.
 
 \subsection{A sequential Ehrlich-Aberth algorithm}
 The main steps of Ehrlich-Aberth method are shown in Algorithm.~\ref{alg1-seq} :
-  
+%\LinesNumbered  
 \begin{algorithm}[H]
 \label{alg1-seq}
-%\LinesNumbered
+
 \caption{A sequential algorithm to find roots with the Ehrlich-Aberth method}
 
-\KwIn{$Z^{0}$(Initial root's vector),$\varepsilon$ (error tolerance threshold),P(Polynomial to solve)}
-\KwOut {Z(The solution root's vector)}
+\KwIn{$Z^{0}$(Initial root's vector),$\varepsilon$ (error tolerance threshold), P(Polynomial to solve),$\Delta z_{max}$ (maximum value of stop condition),k (number of iteration),n(Polynomial's degrees)}
+\KwOut {Z (The solution root's vector),ZPrec (the previous solution root's vector)}
 
 \BlankLine
 
 Initialization of the coefficients of the polynomial to solve\;
 Initialization of the solution vector $Z^{0}$\;
+$\Delta z_{max}=0$\;
+ k=0\;
 
-\While {$\Delta z_{max}\succ \epsilon$}{
+\While {$\Delta z_{max} > \varepsilon$}{
  Let $\Delta z_{max}=0$\;
 \For{$j \gets 0 $ \KwTo $n$}{
-$ZPrec\left[j\right]=Z\left[j\right]$\;
-$Z\left[j\right]=H\left(j,Z\right)$\;
+$ZPrec\left[j\right]=Z\left[j\right]$;// save Z at the iteration k.\
+
+$Z\left[j\right]=H\left(j,Z\right)$;//update Z with the iterative function.\
 }
+k=k+1\;
 
 \For{$i \gets 0 $ \KwTo $n-1$}{
-$c=\frac{\left|Z\left[i\right]-ZPrec\left[i\right]\right|}{Z\left[i\right]}$\;
+$c= testConverge(\Delta z_{max},ZPrec\left[j\right],Z\left[j\right])$\;
 \If{$c > \Delta z_{max}$ }{
 $\Delta z_{max}$=c\;}
 }
+
 }
 \end{algorithm}
 
@@ -559,8 +565,7 @@ Algorithm~\ref{alg2-cuda} shows a sketch of the Ehrlich-Aberth algorithm using C
 %\LinesNumbered
 \caption{CUDA Algorithm to find roots with the Ehrlich-Aberth method}
 
-\KwIn{$Z^{0}$(Initial root's vector),$\varepsilon$ (error
-tolerance threshold),P(Polynomial to solve)}
+\KwIn{$Z^{0}$(Initial root's vector),$\varepsilon$ (error tolerance threshold), P(Polynomial to solve), $\Delta z_{max}$ (maximum value of stop condition)}
 
 \KwOut {Z(The solution root's vector)}
 
@@ -569,12 +574,14 @@ tolerance threshold),P(Polynomial to solve)}
 Initialization of the coeffcients of the polynomial to solve\;
 Initialization of the solution vector $Z^{0}$\;
 Allocate and copy initial data to the GPU global memory\;
-
+k=0\;
 \While {$\Delta z_{max}\succ \epsilon$}{
  Let $\Delta z_{max}=0$\;
-$ kernel\_save(d\_z^{k-1})$\;
-$ kernel\_update(d\_z^{k})$\;
-$kernel\_testConverge(\Delta z_{max},d_z^{k},d_z^{k-1})$\;
+$ kernel\_save(d\_Z^{k-1})$\;
+k=k+1\;
+$ kernel\_update(d\_Z^{k})$\;
+$kernel\_testConverge(\Delta z_{max},d\_Z^{k},d\_Z^{k-1})$\;
+
 }
 \end{algorithm}
 ~\\