From b34893a990fb0c4094c477543a475e91d15cde25 Mon Sep 17 00:00:00 2001
From: couturie <couturie@extinction>
Date: Fri, 15 Jan 2016 15:24:14 +0100
Subject: [PATCH 1/1] new

---
 paper.tex | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/paper.tex b/paper.tex
index 5384818..0b2d113 100644
--- a/paper.tex
+++ b/paper.tex
@@ -990,7 +990,8 @@ In this experiments we report the execution time of the EA algorithm, on single
 \begin{figure}[htbp]
 \centering
   \includegraphics[angle=-90,width=0.5\textwidth]{Sparse_omp}
-\caption{Execution time in seconds of the Ehrlich-Aberth method to solve sparse polynomials on multiple GPUs.}
+\caption{Execution time in seconds of the Ehrlich-Aberth method to
+  solve sparse polynomials on multiple GPUs with CUDA-OpenMP.}
 \label{fig:01}
 \end{figure}
 
@@ -1005,7 +1006,8 @@ These experiments show the execution times of the EA algorithm, on a single GPU
 \begin{figure}[htbp]
 \centering
   \includegraphics[angle=-90,width=0.5\textwidth]{Full_omp}
-\caption{Execution time in seconds of the Ehrlich-Aberth method to solve full polynomials on multiple GPUs}
+\caption{Execution time in seconds of the Ehrlich-Aberth method to
+  solve full polynomials on multiple GPUs with CUDA-OpenMP.}
 \label{fig:02}
 \end{figure}
 
@@ -1023,7 +1025,8 @@ approach to solve full and sparse polynomials of degrees ranging from
 \begin{figure}[htbp]
 \centering
   \includegraphics[angle=-90,width=0.5\textwidth]{Sparse_mpi}
-\caption{Execution time in seconds of the Ehrlich-Aberth method to solve sparse polynomials on multiple GPUs.}
+\caption{Execution time in seconds of the Ehrlich-Aberth method to
+  solve sparse polynomials on multiple GPUs with CUDA-MPI.}
 \label{fig:03}
 \end{figure}
 Figure~\ref{fig:03} shows the execution times of te EA algorithm,
@@ -1034,25 +1037,36 @@ for a single GPU, and multiple GPUs (2, 3, 4) with the CUDA-MPI approach.
 \begin{figure}[htbp]
 \centering
  \includegraphics[angle=-90,width=0.5\textwidth]{Full_mpi}
-\caption{Execution times in seconds of the Ehrlich-Aberth method for full polynomials on GPUs using the Multi-GPU}
+\caption{Execution times in seconds of the Ehrlich-Aberth method for
+  full polynomials on  multiple GPUs with CUDA-MPI.}
 \label{fig:04}
 \end{figure}
 
 In Figure~\ref{fig:04}, we can also observe that the CUDA-MPI approach
 is also efficient to solve full polynimails on multiple GPUs.
-\subsection{Comparing  the CUDA-OpenMP approach and the CUDA-MPI approach}
 
-In the previuos section we saw that both approches are very effective in reducing execution time for sparse as well as full polynomials. At this stage, the interesting question is which approach is better. In the fellowing, we present appropriate experiments comparing the two Multi-GPU approaches to answer the question.
+\subsection{Comparison of  the CUDA-OpenMP and the CUDA-MPI approaches}
+
+In the previuos section we saw that both approches are very effecient
+to  reduce the execution times the  sparse and full polynomials. In
+this section we try to compare these two approaches.
 
 \subsubsection{Solving sparse polynomials}
 In this experiment three sparse polynomials of size 200K, 800K and 1,4M are investigated.
 \begin{figure}[htbp]
 \centering
  \includegraphics[angle=-90,width=0.5\textwidth]{Sparse}
-\caption{Execution time  for solving sparse polynomials of three distinct sizes on multiple GPUs using MPI and OpenMP approaches using Ehrlich-Aberth}
+\caption{Execution times  to solvs sparse polynomials of three
+  distinct sizes on multiple GPUs using MPI and OpenMP with the
+  Ehrlich-Aberth method}
 \label{fig:05}
 \end{figure}
-In Figure~\ref{fig:05} there two curves for each polynomial size : one for the MPI-CUDA and another for the OpenMP. We can see that the results are similar between OpenMP and MPI for the polynomials size of 200K. For the size of 800K, the MPI version is a little slower than the OpenMP approach but for the 1,4 millions size, there is a slight advantage for the MPI version.
+In Figure~\ref{fig:05} there is one curve for CUDA-MPI and another one
+for CUDA-OpenMP. We can see that the results are quite similar between
+OpenMP and MPI for the polynomials size of 200K. For the size of 800K,
+the MPI version is a little bit slower than the OpenMP approach but for
+the 1,4 millions size, there is a slight advantage for the MPI
+version.
 
 \subsubsection{Solving full polynomials}
 \begin{figure}[htbp]
-- 
2.39.5