X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/GMRES2stage.git/blobdiff_plain/257eb3828b7e512fff530ca5609ec15a27e3fa56..317020501fc8da3f9e443457700821b26f66da55:/paper.tex

diff --git a/paper.tex b/paper.tex
index 517cb8e..436909a 100644
--- a/paper.tex
+++ b/paper.tex
@@ -380,7 +380,7 @@
 % use a multiple column layout for up to two different
 % affiliations
 
-\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja \IEEEauthorrefmark{2}, and Christophe Guyeux\IEEEauthorrefmark{1}}
+\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja\IEEEauthorrefmark{2}, and Christophe Guyeux\IEEEauthorrefmark{1}}
 \IEEEauthorblockA{\IEEEauthorrefmark{1} Femto-ST Institute, University of Franche Comte, France\\
 Email: \{raphael.couturier,christophe.guyeux\}@univ-fcomte.fr}
 \IEEEauthorblockA{\IEEEauthorrefmark{2} INRIA Bordeaux Sud-Ouest, France\\
@@ -737,6 +737,7 @@ these operations are easy to implement in PETSc or similar environment.
 \label{sec:04}
 Let us recall the following result, see~\cite{Saad86}.
 \begin{proposition}
+\label{prop:saad}
 Suppose that $A$ is a positive real matrix with symmetric part $M$. Then the residual norm provided at the $m$-th step of GMRES satisfies:
 \begin{equation}
 ||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0|| ,
@@ -748,7 +749,11 @@ the convergence of GMRES($m$) for all $m$ under that assumption regarding $A$.
 
 We can now claim that,
 \begin{proposition}
-If $A$ is a positive real matrix and GMRES($m$) is used as solver, then the TSIRM algorithm is convergent.
+If $A$ is a positive real matrix and GMRES($m$) is used as solver, then the TSIRM algorithm is convergent. Furthermore, we still have 
+\begin{equation}
+||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0|| ,
+\end{equation}
+where $\alpha$ and $\beta$ are defined as in Proposition~\ref{prop:saad}.
 \end{proposition}
 
 \begin{proof}
@@ -757,16 +762,21 @@ $k$-th iterate of TSIRM.
 We will prove that $r_k \rightarrow 0$ when $k \rightarrow +\infty$.
 
 Each step of the TSIRM algorithm \\
+
+Let $\operatorname{span}(S) = \left \{ {\sum_{i=1}^k \lambda_i v_i \Big| k \in \mathbb{N}, v_i \in S, \lambda _i \in \mathbb{R}} \right \}$ be the linear span of a set of vectors $S$. So,\\
 $\min_{\alpha \in \mathbb{R}^s} ||b-R\alpha ||_2 = \min_{\alpha \in \mathbb{R}^s} ||b-AS\alpha ||_2$
 
 $\begin{array}{ll}
-& = \min_{x \in Vect\left(S_{k-s}, S_{k-s+1}, \hdots, S_{k-1} \right)} ||b-AS\alpha ||_2\\
-& = \min_{x \in Vect\left(x_{k-s}, x_{k-s}+1, \hdots, x_{k-1} \right)} ||b-AS\alpha ||_2\\
-& \leqslant \min_{x \in Vect\left( x_{k-1} \right)} ||b-Ax ||_2\\
-& \leqslant ||b-Ax_{k-1}||
+& = \min_{x \in span\left(S_{k-s}, S_{k-s+1}, \hdots, S_{k-1} \right)} ||b-AS\alpha ||_2\\
+& = \min_{x \in span\left(x_{k-s}, x_{k-s}+1, \hdots, x_{k-1} \right)} ||b-AS\alpha ||_2\\
+& \leqslant \min_{x \in span\left( x_{k-1} \right)} ||b-Ax ||_2\\
+& \leqslant \min_{\lambda \in \mathbb{R}} ||b-\lambda Ax_{k-1} ||_2\\
+& \leqslant ||b-Ax_{k-1}||_2 .
 \end{array}$
 \end{proof}
 
+We can remark that, at each iterate, the residue of the TSIRM algorithm is lower 
+than the one of the GMRES method.
 
 %%%*********************************************************
 %%%*********************************************************
@@ -838,7 +848,7 @@ torso3             & fgmres / sor  & 37.70 & 565 & 34.97 & 510 \\
 \hline
 
 \end{tabular}
-\caption{Comparison of (F)GMRES and 2 stage (F)GMRES algorithms in sequential with some matrices, time is expressed in seconds.}
+\caption{Comparison of (F)GMRES and TSIRM with (F)GMRES in sequential with some matrices, time is expressed in seconds.}
 \label{tab:02}
 \end{center}
 \end{table}
@@ -897,7 +907,7 @@ Table~\ref{tab:03} shows  the execution  times and the  number of  iterations of
 example ex15  of PETSc on the  Juqueen architecture. Different  numbers of cores
 are  studied ranging  from  2,048  up-to 16,383.   Two  preconditioners have  been
 tested: {\it mg} and {\it sor}.   For those experiments,  the number  of components  (or unknowns  of the
-problems)  per processor  is fixed  to 25,000,  also called  weak  scaling. This
+problems)  per core  is fixed  to 25,000,  also called  weak  scaling. This
 number can seem relatively small. In fact, for some applications that need a lot
 of  memory, the  number of  components per  processor requires  sometimes  to be
 small.
@@ -944,7 +954,7 @@ the number of iterations. So, the overall benefit of using TSIRM is interesting.
 \begin{tabular}{|r|r|r|r|r|r|r|r|r|} 
 \hline
 
-  nb. cores & threshold   & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSIRM CGLS} &  \multicolumn{2}{c|}{TSIRM LSQR} & best gain \\ 
+  nb. cores & threshold   & \multicolumn{2}{c|}{FGMRES} & \multicolumn{2}{c|}{TSIRM CGLS} &  \multicolumn{2}{c|}{TSIRM LSQR} & best gain \\ 
 \cline{3-8}
              &                       & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. & \\\hline \hline
   2,048      & 8e-5                  & 108.88 & 16,560  & 23.06  &  3,630  & 22.79  & 3,630   & 4.77 \\
@@ -957,7 +967,7 @@ the number of iterations. So, the overall benefit of using TSIRM is interesting.
 \hline
 
 \end{tabular}
-\caption{Comparison of FGMRES  and 2 stage FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 25000 components per core on Curie (restart=30, s=12),  time is expressed in seconds.}
+\caption{Comparison of FGMRES  and TSIRM with FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 25,000 components per core on Curie (restart=30, s=12),  time is expressed in seconds.}
 \label{tab:04}
 \end{center}
 \end{table*}
@@ -971,9 +981,9 @@ In Table~\ref{tab:04}, some experiments with example ex54 on the Curie architect
 \begin{tabular}{|r|r|r|r|r|r|r|r|r|r|r|} 
 \hline
 
-  nb. cores   & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSIRM CGLS} &  \multicolumn{2}{c|}{TSIRM LSQR} & best gain & \multicolumn{3}{c|}{efficiency} \\ 
+  nb. cores   & \multicolumn{2}{c|}{FGMRES} & \multicolumn{2}{c|}{TSIRM CGLS} &  \multicolumn{2}{c|}{TSIRM LSQR} & best gain & \multicolumn{3}{c|}{efficiency} \\ 
 \cline{2-7} \cline{9-11}
-                    & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. &   & GMRES & TS CGLS & TS LSQR\\\hline \hline
+                    & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. &   & FGMRES & TS CGLS & TS LSQR\\\hline \hline
    512              & 3,969.69 & 33,120 & 709.57 & 5,790  & 622.76 & 5,070  & 6.37  &   1    &    1    &     1     \\
    1024             & 1,530.06  & 25,860 & 290.95 & 4,830  & 307.71 & 5,070 & 5.25  &  1.30  &    1.21  &   1.01     \\
    2048             & 919.62    & 31,470 & 237.52 & 8,040  & 194.22 & 6,510 & 4.73  & 1.08   &    .75   &   .80\\
@@ -983,7 +993,7 @@ In Table~\ref{tab:04}, some experiments with example ex54 on the Curie architect
 \hline
 
 \end{tabular}
-\caption{Comparison of FGMRES  and 2 stage FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores (restart=30, s=12, threshol 5e-5),  time is expressed in seconds.}
+\caption{Comparison of FGMRES  and TSIRM with FGMRES for ex54 of Petsc (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores (restart=30, s=12, threshold 5e-5),  time is expressed in seconds.}
 \label{tab:05}
 \end{center}
 \end{table*}
@@ -1011,7 +1021,7 @@ In Table~\ref{tab:04}, some experiments with example ex54 on the Curie architect
 
 future plan : \\
 - study other kinds of matrices, problems, inner solvers\\
-- test the influence of all the parameters\\
+- test the influence of all parameters\\
 - adaptative number of outer iterations to minimize\\
 - other methods to minimize the residuals?\\
 - implement our solver inside PETSc
@@ -1026,7 +1036,7 @@ future plan : \\
 %%%*********************************************************
 \section*{Acknowledgment}
 This  paper  is   partially  funded  by  the  Labex   ACTION  program  (contract
-ANR-11-LABX-01-01).   We acknowledge PRACE  for awarding  us access  to resource
+ANR-11-LABX-01-01).   We acknowledge PRACE  for awarding  us access  to resources
 Curie and Juqueen respectively based in France and Germany.
 
 
@@ -1069,4 +1079,3 @@ Curie and Juqueen respectively based in France and Germany.
 
 % that's all folks
 \end{document}
-