01-05-2014b

[Krylov_multi.git] / krylov_multi.tex
diff --git a/krylov_multi.tex b/krylov_multi.tex

index 87ee2279999d06ae61bfa857b6c2ae4aab60c1bd..9e423d1751084b923d9169bb1c3a2d6e3573462b 100644 (file)
--- a/krylov_multi.tex
+++ b/krylov_multi.tex
@@ -17,10 +17,8 @@
  \newcommand{\Prec}{\mathit{prec}}
  \newcommand{\Ratio}{\mathit{Ratio}}
  
  \newcommand{\Prec}{\mathit{prec}}
  \newcommand{\Ratio}{\mathit{Ratio}}
  
-%\usepackage{xspace}
-%\usepackage[textsize=footnotesize]{todonotes}
-%\newcommand{\LZK}[2][inline]{%
-%\todo[color=green!40,#1]{\sffamily\textbf{LZK:} #2}\xspace}
+\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
+\let\endchangemargin=\endlist
  
  \title{A scalable multisplitting algorithm for solving large sparse linear systems} 
  \date{}
  
  \title{A scalable multisplitting algorithm for solving large sparse linear systems} 
  \date{}
@@ -75,9 +73,26 @@ thousands of cores are used.
  %proposed  in~\cite{huang1993krylov},  the  use  of a  minimization  process  can
  %drastically improve the convergence.
  
  %proposed  in~\cite{huang1993krylov},  the  use  of a  minimization  process  can
  %drastically improve the convergence.
  
-Traditional parallel iterative solvers are based on fine-grain computations that frequently require data exchanges between computing nodes and have global synchronizations that penalize the scalability. Particularly, they are more penalized on large scale architectures or on distributed platforms composed of distant clusters interconnected by a high-latency network. It is therefore imperative to develop coarse-grain based algorithms to reduce the communications in the parallel iterative solvers. Two  possible solutions consists either in using asynchronous iterative methods~\cite{ref18} or to use multisplitting algorithms. In this paper, we will reconsider the use of a multisplitting method. In opposition to traditional multisplitting method that suffer from slow convergence, as proposed in~\cite{huang1993krylov}, the use of a minimization process can drastically improve the convergence.
-
-The present paper is organized as follows. First in Section~\ref{sec:02} is given some related works and the main principle of multisplitting methods. Then, in Section~\ref{sec:03} is presented the algorithm of our Krylov multisplitting method based on inner-outer iterations. Finally, in Section~\ref{sec:04}, the parallel experiments on Hector architecture show the performances of the Krylov multisplitting algorithm compared to the classical GMRES algorithm to solve a 3D Poisson problem.
+Traditional parallel iterative solvers are based on fine-grain computations that
+frequently  require  data exchanges  between  computing  nodes  and have  global
+synchronizations  that penalize  the  scalability. Particularly,  they are  more
+penalized on large  scale architectures or on distributed  platforms composed of
+distant  clusters interconnected  by  a high-latency  network.  It is  therefore
+imperative to develop coarse-grain based algorithms to reduce the communications
+in the  parallel iterative  solvers. Two possible  solutions consists  either in
+using  asynchronous  iterative  methods~\cite{ref18}  or to  use  multisplitting
+algorithms.  In this  paper,  we will  reconsider  the use  of a  multisplitting
+method. In opposition to traditional multisplitting method that suffer from slow
+convergence, as  proposed in~\cite{huang1993krylov},  the use of  a minimization
+process can drastically improve the convergence.
+
+The present paper is  organized as follows. First, Section~\ref{sec:02} presents
+some  related  works and  the  principle  of  multisplitting methods.  Then,  in
+Section~\ref{sec:03}  is presented  the algorithm  of our  Krylov multisplitting
+method based  on inner-outer  iterations. Finally, in  Section~\ref{sec:04}, the
+parallel experiments on Hector architecture  show the performances of the Krylov
+multisplitting algorithm compared to the classical GMRES algorithm to solve a 3D
+Poisson problem.
  
  
  %%%%%%%%%%%%%%%%%%%%%%%%
  
  
  %%%%%%%%%%%%%%%%%%%%%%%%
@@ -309,6 +324,38 @@ is reached. The precision and the maximum number of iterations of CGNR method ar
  
  \begin{table}[htbp]
  \begin{center}
  
  \begin{table}[htbp]
  \begin{center}
+\begin{changemargin}{-1.4cm}{0cm}
+\begin{footnotesize}
+\begin{tabular}{|c|c||c|c|c||c|c|c||c|} 
+\hline
+\multirow{2}{*}{Pb size}&\multirow{2}{*}{Nb. cores} &  \multicolumn{3}{c||}{GMRES} &  \multicolumn{3}{c||}{Krylov Multisplitting} & \multirow{2}{*}{Ratio}\\
+ \cline{3-8}
+           &                   &  Time (s) & nb Iter. & $\Delta$  &   Time (s)& nb Iter. & $\Delta$ & \\
+\hline
+$468^3$ & 2,048 (2x1,024)        &  299.7    & 41,028    & 5.02e-8  &  48.4    & 691(6,146) & 8.24e-08  & 6.19   \\
+\hline
+$590^3$ & 4,096 (2x2,048)        &  433.1    & 55,494    & 4.92e-7  &  74.1    & 1,101(8,211) & 6.62e-08  & 5.84   \\
+\hline
+$743^3$ & 8,192 (2x4,096)        & 704.4     & 87,822    & 4.80e-07 &  151.2   & 3,061(14,914) & 5.87e-08 & 4.65    \\
+\hline
+$743^3$ & 8,192 (4x2,048)        & 704.4     & 87,822    & 4.80e-07 &  110.3   & 1,531(12,721) & 1.47e-07& 6.39  \\
+\hline
+
+\end{tabular}
+\caption{Results}
+\label{tab1}
+\end{footnotesize}
+\end{changemargin}
+\end{center}
+\end{table}
+
+
+
+
+\begin{table}[htbp]
+\begin{center}
+\begin{changemargin}{-1.8cm}{0cm}
+\begin{small}
  \begin{tabular}{|c|c||c|c|c||c|c|c||c|} 
  \hline
  \multirow{2}{*}{Pb size}&\multirow{2}{*}{Nb. cores} &  \multicolumn{3}{c||}{GMRES} &  \multicolumn{3}{c||}{Krylov Multisplitting} & \multirow{2}{*}{Ratio}\\
  \begin{tabular}{|c|c||c|c|c||c|c|c||c|} 
  \hline
  \multirow{2}{*}{Pb size}&\multirow{2}{*}{Nb. cores} &  \multicolumn{3}{c||}{GMRES} &  \multicolumn{3}{c||}{Krylov Multisplitting} & \multirow{2}{*}{Ratio}\\
@@ -327,6 +374,8 @@ $743^3$ & 8,192 (4x2,048)        & 704.4     & 87,822    & 4.80e-07 &  110.3   &
  \end{tabular}
  \caption{Results}
  \label{tab1}
  \end{tabular}
  \caption{Results}
  \label{tab1}
+\end{small}
+\end{changemargin}
  \end{center}
  \end{table}
  
  \end{center}
  \end{table}