X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/GMRES2stage.git/blobdiff_plain/58544a2932f7124415b708bec79740f1269f8b52..8fb4ff584e40d579fbf8019ec98eda27c2f877f0:/paper.tex

diff --git a/paper.tex b/paper.tex
index f2e0621..8f469bd 100644
--- a/paper.tex
+++ b/paper.tex
@@ -439,7 +439,7 @@ can be around 7 times faster.
 \end{abstract}
 
 \begin{IEEEkeywords}
-Iterative Krylov methods; sparse linear systems; error minimization; PETSc; %Ã  voir... 
+Iterative Krylov methods; sparse linear systems; residual minimization; PETSc; %Ã  voir... 
 \end{IEEEkeywords}
 
 
@@ -649,7 +649,7 @@ appropriate than a direct method in a parallel context.
     \State retrieve error
     \State $S_{k~mod~s}=x^k$ \label{algo:store}
     \If {$k$ mod $s=0$ {\bf and} error$>\epsilon_{kryl}$}
-      \State $R=AS$ \Comment{compute dense matrix}
+      \State $R=AS$ \Comment{compute dense matrix} \label{algo:matrix_mul}
       \State Solve least-squares problem $\underset{\alpha\in\mathbb{R}^{s}}{min}\|b-R\alpha\|_2$ \label{algo:}
       \State $x^k=S\alpha$  \Comment{compute new solution}
     \EndIf
@@ -671,7 +671,7 @@ solve the minimization problem, an  iterative method is used. Two parameters are
 required for that: the maximum number of iteration and the threshold to stop the
 method.
 
-To summarize, the important parameters of are:
+To summarize, the important parameters of TSARM are:
 \begin{itemize}
 \item $\epsilon_{kryl}$ the threshold to stop the method of the krylov method
 \item $max\_iter_{kryl}$ the maximum number of iterations for the krylov method
@@ -693,6 +693,46 @@ To summarize, the important parameters of are:
 \section{Parallelization}
 \label{sec:05}
 
+The  parallelisation  of  TSARM  relies   on  the  parallelization  of  all  its
+parts. More  precisely, except  the least-square step,  all the other  parts are
+obvious to  achieve out in parallel. In  order to develop a  parallel version of
+our   code,   we   have   chosen  to   use   PETSc~\cite{petsc-web-page}.    For
+line~\ref{algo:matrix_mul} the  matrix-matrix multiplication is  implemented and
+efficient since the  matrix $A$ is sparse and since the  matrix $S$ contains few
+colums in  practice. As explained  previously, at least  two methods seem  to be
+interesting to solve the least-square minimization, CGLS and LSQR.
+
+In the following  we remind the CGLS algorithm. The LSQR  method follows more or
+less the same principle but it take more place, so we briefly explain the parallelization of CGLS which is similar to LSQR.
+
+\begin{algorithm}[t]
+\caption{CGLS}
+\begin{algorithmic}[1]
+  \Input $A$ (matrix), $b$ (right-hand side)
+  \Output $x$ (solution vector)\vspace{0.2cm}
+  \State $r=b-Ax$
+  \State $p=A'r$
+  \State $s=p$
+  \State $g=||s||^2_2$
+  \For {$k=1,2,3,\ldots$ until convergence (g$<\epsilon_{ls}$)} \label{algo2:conv}
+    \State $q=Ap$
+    \State $\alpha=g/||q||^2_2$
+    \State $x=x+alpha*p$
+    \State $r=r-alpha*q$
+    \State $s=A'*r$
+    \State $g_{old}=g$
+    \State $g=||s||^2_2$
+    \State $\beta=g/g_{old}$
+  \EndFor
+\end{algorithmic}
+\label{algo:02}
+\end{algorithm}
+
+
+In each iteration  of CGLS, there is two  matrix-vector multiplications and some
+classical operations:  dots, norm, multiplication  and addition on  vectors. All
+these operations are easy to implement in PETSc or similar environment.
+
 %%%*********************************************************
 %%%*********************************************************
 \section{Experiments using petsc}
@@ -753,7 +793,7 @@ minimization.
 \begin{tabular}{|c|c|r|r|r|r|} 
 \hline
 
- \multirow{2}{*}{Matrix name}  & Solver /   & \multicolumn{2}{c|}{gmres variant} & \multicolumn{2}{c|}{2 stage CGLS} \\ 
+ \multirow{2}{*}{Matrix name}  & Solver /   & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSARM CGLS} \\ 
 \cline{3-6}
        &  precond             & Time  & \# Iter.  & Time  & \# Iter.  \\\hline \hline
 
@@ -774,14 +814,16 @@ torso3             & fgmres / sor  & 37.70 & 565 & 34.97 & 510 \\
 
 
 
-Larger experiments ....
+Larger experiments ....\\
+
+Describe the problems ex15 and ex54
 
 \begin{table*}
 \begin{center}
 \begin{tabular}{|r|r|r|r|r|r|r|r|r|} 
 \hline
 
-  nb. cores & precond   & \multicolumn{2}{c|}{gmres variant} & \multicolumn{2}{c|}{2 stage CGLS} &  \multicolumn{2}{c|}{2 stage LSQR} & best gain \\ 
+  nb. cores & precond   & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSARM CGLS} &  \multicolumn{2}{c|}{TSARM LSQR} & best gain \\ 
 \cline{3-8}
              &                       & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. & \\\hline \hline
   2,048      & mg                    & 403.49   & 18,210    & 73.89  & 3,060   & 77.84  & 3,270  & 5.46 \\
@@ -806,7 +848,7 @@ Larger experiments ....
 \begin{tabular}{|r|r|r|r|r|r|r|r|r|} 
 \hline
 
-  nb. cores & threshold   & \multicolumn{2}{c|}{gmres variant} & \multicolumn{2}{c|}{2 stage CGLS} &  \multicolumn{2}{c|}{2 stage LSQR} & best gain \\ 
+  nb. cores & threshold   & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSARM CGLS} &  \multicolumn{2}{c|}{TSARM LSQR} & best gain \\ 
 \cline{3-8}
              &                       & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. & \\\hline \hline
   2,048      & 8e-5                  & 108.88 & 16,560  & 23.06  &  3,630  & 22.79  & 3,630   & 4.77 \\
@@ -814,7 +856,7 @@ Larger experiments ....
   4,096      & 7e-5                  & 160.59 & 22,530  & 35.15  &  5,130  & 29.21  & 4,350   & 5.49 \\
   4,096      & 6e-5                  & 249.27 & 35,520  & 52.13  &  7,950  & 39.24  & 5,790   & 6.35 \\
   8,192      & 6e-5                  & 149.54 & 17,280  & 28.68  &  3,810  & 29.05  & 3,990  & 5.21 \\
-  8,192      & 5e-5                  & 792.11 & 109,590 & 76.83  &  10,470  & 65.20  & 9,030  & 12.14 \\
+  8,192      & 5e-5                  & 785.04 & 109,590 & 76.07  &  10,470  & 69.42 & 9,030  & 11.30 \\
   16,384     & 4e-5                  & 718.61 & 86,400 & 98.98  &  10,830  & 131.86  & 14,790  & 7.26 \\
 \hline
 
@@ -823,6 +865,33 @@ Larger experiments ....
 \label{tab:04}
 \end{center}
 \end{table*}
+
+
+
+
+
+\begin{table*}
+\begin{center}
+\begin{tabular}{|r|r|r|r|r|r|r|r|r|r|r|} 
+\hline
+
+  nb. cores   & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSARM CGLS} &  \multicolumn{2}{c|}{TSARM LSQR} & best gain & \multicolumn{3}{c|}{efficiency} \\ 
+\cline{2-7} \cline{9-11}
+                    & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. &   & GMRES & TS CGLS & TS LSQR\\\hline \hline
+   512              & 3,969.69 & 33,120 & 709.57 & 5,790  & 622.76 & 5,070  & 6.37  &   1    &    1    &     1     \\
+   1024             & 1,530.06  & 25,860 & 290.95 & 4,830  & 307.71 & 5,070 & 5.25  &  1.30  &    1.21  &   1.01     \\
+   2048             & 919.62    & 31,470 & 237.52 & 8,040  & 194.22 & 6,510 & 4.73  & 1.08   &    .75   &   .80\\
+   4096             & 405.60    & 28,380 & 111.67 & 7,590  & 91.72  & 6,510 & 4.42  & 1.22   &  .79     &   .84 \\
+   8192             & 785.04   & 109,590 & 76.07  & 10,470 & 69.42 & 9,030  & 11.30 &   .32  &   .58    &  .56 \\
+
+\hline
+
+\end{tabular}
+\caption{Comparison of FGMRES  and 2 stage FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores (restart=30, s=12, threshol 5e-5),  time is expressed in seconds.}
+\label{tab:05}
+\end{center}
+\end{table*}
+
 %%%*********************************************************
 %%%*********************************************************
 
@@ -839,6 +908,7 @@ Larger experiments ....
 
 future plan : \\
 - study other kinds of matrices, problems, inner solvers\\
+- test the influence of all the parameters\\
 - adaptative number of outer iterations to minimize\\
 - other methods to minimize the residuals?\\
 - implement our solver inside PETSc