la section 3

[kahina_paper1.git] / paper.tex
diff --git a/paper.tex b/paper.tex

index bcac7f4c9511b8c207889a0d437d3f4912157172..bcf399a3b5c7b8109b9046412ec75e0961cf30c0 100644 (file)
--- a/paper.tex
+++ b/paper.tex
@@ -349,12 +349,16 @@ Q(z^{k}_{i})=\exp\left( \ln (p(z^{k}_{i}))-\ln(p'(z^{k}^{i}))+\ln \left(
  \sum_{k\neq j}^{n}\frac{1}{z^{k}_{i}-z^{k}_{j}}\right)\right).
  \end{equation}
  
  \sum_{k\neq j}^{n}\frac{1}{z^{k}_{i}-z^{k}_{j}}\right)\right).
  \end{equation}
  
-This solution is applied when the root except the circle unit, represented by the radius $R$ evaluated as:
-\begin{equation}
-\label{R}
-R = \exp( \log(DBL\_MAX) / (2*n) )
-\end{equation}
- where $DBL\_MAX$ stands for the maximum representable double value.
+This solution is applied when the root except the circle unit, represented by the radius $R$ evaluated in C language as:
+\begin{verbatim}
+R = exp(log(DBL_MAX)/(2*n) );
+\end{verbatim} 
+
+%\begin{equation}
+
+%R = \exp( \log(DBL\_MAX) / (2*n) )
+%\end{equation}
+ where \verb=DBL_MAX= stands for the maximum representable \verb=double= value.
  
  \section{The implementation of simultaneous methods in a parallel computer}
  \label{secStateofArt}   
  
  \section{The implementation of simultaneous methods in a parallel computer}
  \label{secStateofArt}   
@@ -539,7 +543,7 @@ Let $K$ be the number of iterations necessary to compute all the roots, so the t
  \label{eq:T-global}
  T=\left[n\left(T_{i}(n)+T_{j}\right)+O(n)\right].K
  \end{equation}
  \label{eq:T-global}
  T=\left[n\left(T_{i}(n)+T_{j}\right)+O(n)\right].K
  \end{equation}
-The execution time increases with the increasing of the polynomial degree, which justifies to parallelise these steps  in order to reduce the global execution time. In the following,  we explain how we did parrallelize these steps on a GPU architecture using the CUDA platform.
+The execution time increases with the increasing of the polynomial degree, which justifies to parallelize these steps  in order to reduce the global execution time. In the following,  we explain how we did parallelize these steps on a GPU architecture using the CUDA platform.
  
  \subsubsection{A Parallel implementation with CUDA }
  On the CPU,  both steps 3 and 4 contain the loop \verb=for= and a single thread executes all the instructions in the loop $n$ times. In this subsection, we explain how the GPU architecture can compute this loop and reduce the execution time.
  
  \subsubsection{A Parallel implementation with CUDA }
  On the CPU,  both steps 3 and 4 contain the loop \verb=for= and a single thread executes all the instructions in the loop $n$ times. In this subsection, we explain how the GPU architecture can compute this loop and reduce the execution time.