MAJ figure 4

[kahina_paper1.git] / paper.tex
diff --git a/paper.tex b/paper.tex

index c8a669875ef7f21354062b4e62411a0c98328977..02f98de83b7777e248bb3e8e5abe2441e3e5cbd9 100644 (file)
--- a/paper.tex
+++ b/paper.tex
@@ -4,6 +4,7 @@
  %%\usepackage[utf8]{inputenc}
  %%\usepackage[T1]{fontenc}
  %%\usepackage[french]{babel}
+\usepackage{float} 
  \usepackage{amsmath,amsfonts,amssymb}
  \usepackage[ruled,vlined]{algorithm2e}
  %\usepackage[french,boxed,linesnumbered]{algorithm2e}
@@ -96,7 +97,7 @@ root finding of polynomials, high degree, iterative methods, Durant-Kerner, GPU,
  Polynomials are algebraic structures used in mathematics that capture physical phenomenons and that express the outcome in the form of a function of some unknown variable. Formally speaking,  a polynomial $p(x)$ of degree \textit{n} having $n$ coefficients in the complex plane \textit{C} and zeros $\alpha_{i},\textit{i=1,...,n}$ 
  %%\begin{center}
  \begin{equation}
-     {\Large p(x)=\sum{a_{i}x^{i}}=a_{n}\prod(x-\alpha_{i}),a_{0} a_{n}\neq 0}.
+     {\Large p(x)=\sum_{i=0}^{n}{a_{i}x^{i}}=a_{n}\prod_{i=1}^{n}(x-\alpha_{i}), a_{0} a_{n}\neq 0}.
  \end{equation}
  %%\end{center}
  
@@ -329,7 +330,9 @@ Q(z_{k})=\exp\left( \ln (p(z_{k}))-\ln(p(z_{k}^{'}))+\ln \left(
  \sum_{k\neq j}^{n}\frac{1}{z_{k}-z_{j}}\right)\right).
  \end{equation}
  
-This solution is applied when it is necessary ??? When ??? (SIDER)
+This solution is applied when the root except the circle unit, represented by the radius $R$ evaluated as:
+
+$$R = \exp( \log(DBL\_MAX) / (2*n) )$$ where $DBL\_MAX$ stands for the maximum representable double value.
  
  \section{The implementation of simultaneous methods in a parallel computer}
  \label{secStateofArt}   
@@ -355,7 +358,7 @@ parallelism that can be suitably exploited by SIMD machines.
  Moreover, they have fast rate of convergence (quadratic for the
  Durand-Kerner and cubic for the Ehrlisch-Aberth). Various parallel
  algorithms reported for these methods can be found
-in~\cite{Cosnard90, Freeman89,Freemanall90,,Jana99,Janall99}.
+in~\cite{Cosnard90, Freeman89,Freemanall90,Jana99,Janall99}.
  Freeman and Bane~\cite{Freemanall90} presented two parallel
  algorithms on a local memory MIMD computer with the compute-to
  communication time ratio O(n). However, their algorithms require
@@ -381,6 +384,8 @@ GPUs, which details are discussed in the sequel.
  
  
  \section {A CUDA parallel Ehrlisch-Aberth method}
+In the following, we describe the parallel implementation of Ehrlisch-Aberth method on GPU 
+for solving high degree polynomials. First, the hardware and software of the GPUs are presented. Then, a CUDA parallel Ehrlisch-Aberth method are presented.
  
  \subsection{Background on the GPU architecture}
  A GPU is viewed as an accelerator for the data-parallel and
@@ -583,33 +588,26 @@ or from GPU memory to CPU memory \verb=(cudaMemcpyDeviceToHost))=.
  \section{Experimental study}
  
  \subsection{Definition of the polynomial used}
-We use two forms of  polynomials:
-\paragraph{sparse polynomial}:
-in this following form, the roots are distributed on 2 distinct circles:
+We study two forms of  polynomials the sparse polynomials and the full polynomials:
+\paragraph{Sparse polynomial}: in this following form, the roots are distributed on 2 distinct circles:
  \begin{equation}
-       \forall \alpha_{1} \alpha_{2} \in C,\forall n_{1},n_{2} \in N^{*}; P(z)= (z^{n^{1}}-\alpha_{1})(z^{n^{2}}-\alpha_{2})
+       \forall \alpha_{1} \alpha_{2} \in C,\forall n_{1},n_{2} \in N^{*}; P(z)= (z^{n_{1}}-\alpha_{1})(z^{n_{2}}-\alpha_{2})
  \end{equation}
-
  This form makes it possible to associate roots having two
  different modules and thus to work on a polynomial constitute
  of four non zero terms.
  
-\paragraph{Full polynomial}:
- the second form used to obtain a full polynomial is:
+\paragraph{Full polynomial}: the second form used to obtain a full polynomial is:
  %%\begin{equation}
         %%\forall \alpha_{i} \in C,\forall n_{i}\in N^{*}; P(z)= \sum^{n}_{i=1}(z^{n^{i}}.a_{i})
  %%\end{equation}
  
  \begin{equation}
-     {\Large \forall a_{i} \in C, i\in N; p(x)=\sum^{n-1}_{i=1} a_{i}.x^{i}} 
+     {\Large \forall a_{i} \in C, i\in N;  p(x)=\sum^{n}_{i=0} a_{i}.x^{i}} 
  \end{equation}
  with this form, we can have until \textit{n} non zero terms.
  
  \subsection{The study condition} 
-In order to have representative average values, for each
-point of our curves we measured the roots finding of 10
-different polynomials.
-
  The our experiences results concern two parameters which are
  the polynomial degree and the execution time of our program
  to converge on the solution. The polynomial degree allows us
@@ -617,7 +615,7 @@ to validate that our algorithm is powerful with high degree
  polynomials. The execution time remains the
  element-key which justifies our work of parallelization.
         For our tests we used a CPU Intel(R) Xeon(R) CPU
-E5620@2.40GHz and a GPU K40 (with 6 Go of ram)
+E5620@2.40GHz and a GPU K40 (with 6 Go of ram).
  
  
  \subsection{Comparative study}
@@ -642,7 +640,7 @@ We initially carried out the convergence of Aberth algorithm with various sizes
  %      \label{tab:theConvergenceOfAberthAlgorithm}
  %\end{table}
   
-\begin{figure}[htbp]
+\begin{figure}[H]
  \centering
    \includegraphics[width=0.8\textwidth]{figures/Compar_EA_algorithm_CPU_GPU}
  \caption{Aberth algorithm on CPU and GPU}
@@ -671,28 +669,30 @@ We initially carried out the convergence of Aberth algorithm with various sizes
  %\end{table}
  
  
-\begin{figure}[htbp]
+\begin{figure}[H]
  \centering
    \includegraphics[width=0.8\textwidth]{figures/influence_nb_threads}
  \caption{Influence of the number of threads on the execution times of different polynomials (sparse and full)}
  \label{fig:01}
  \end{figure}
  
-
+\subsubsection{The impact of exp-log solution to compute very high degrees of  polynomial}
+\begin{figure}[H]
+\centering
+  \includegraphics[width=0.8\textwidth]{figures/log_exp}
+\caption{The impact of exp-log solution to compute very high degrees of  polynomial.}
+\label{fig:01}
+\end{figure}
  
  \subsubsection{A comparative study between Aberth and Durand-kerner algorithm}
-\begin{table}[htbp]
-       \centering
-               \begin{tabular} {|R{2cm}|L{2.5cm}|L{2.5cm}|L{1.5cm}|L{1.5cm}|}
-                       \hline Polynomial's degrees & Aberth $T_{exe}$ & D-Kerner $T_{exe}$ & Aberth iteration & D-Kerner iteration\\
-                       \hline 5000 &  0.40 & 3.42 & 17 & 138 \\
-                       \hline 50000 & 3.92 & 385.266 & 17 & 823\\
-                       \hline 500000 & 497.109 & 4677.36 & 24 & 214\\
-                       \hline                                  
-                                       \end{tabular}
-       \caption{Aberth algorithm compare to Durand-Kerner algorithm}
-       \label{tab:AberthAlgorithCompareToDurandKernerAlgorithm}
-\end{table}
+
+
+\begin{figure}[H]
+\centering
+  \includegraphics[width=0.8\textwidth]{figures/EA_DK}
+\caption{Ehrlisch-Aberth and Durand-Kerner algorithm on GPU}
+\label{fig:01}
+\end{figure}
  
  
  \bibliography{mybibfile}