X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/GMRES2stage.git/blobdiff_plain/16f0ca498711e0af6a2de8200c2fbee0e5f8810e..70ad0cb4dcbf00dcc4bade65051bf0273854d231:/paper.tex diff --git a/paper.tex b/paper.tex index 35d9ed7..51eab5c 100644 --- a/paper.tex +++ b/paper.tex @@ -367,7 +367,7 @@ % % paper title % can use linebreaks \\ within to get better formatting as desired -\title{A Krylov two-stage algorithm to solve large sparse linear systems} +\title{TSARM: A Two-Stage Algorithm with least-square Residual Minimization to solve large sparse linear systems} %où %\title{A two-stage algorithm with error minimization to solve large sparse linear systems} %où @@ -378,20 +378,15 @@ % use a multiple column layout for up to two different % affiliations -\author{\IEEEauthorblockN{Rapha\"el Couturier} -\IEEEauthorblockA{Femto-ST Institute - DISC Department\\ -Universit\'e de Franche-Comt\'e, IUT de Belfort-Montb\'eliard\\ -19 avenue de Mar\'echal Juin, BP 527 \\ -90016 Belfort Cedex, France\\ -Email: raphael.couturier@univ-fcomte.fr} -\and -\IEEEauthorblockN{Lilia Ziane Khodja} -\IEEEauthorblockA{Centre de Recherche INRIA Bordeaux Sud-Ouest\\ -200 avenue de la Vieille Tour\\ -33405 Talence Cedex, France\\ +\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja \IEEEauthorrefmark{2} and Christophe Guyeux\IEEEauthorrefmark{1}} +\IEEEauthorblockA{\IEEEauthorrefmark{1} Femto-ST Institute, University of Franche Comte, France\\ +Email: \{raphael.couturier,christophe.guyeux\}@univ-fcomte.fr} +\IEEEauthorblockA{\IEEEauthorrefmark{2} INRIA Bordeaux Sud-Ouest, France\\ Email: lilia.ziane@inria.fr} } + + % conference papers do not typically use \thanks and this command % is locked out in conference mode. If really needed, such as for % the acknowledgment of grants, issue a \IEEEoverridecommandlockouts @@ -428,7 +423,16 @@ Email: lilia.ziane@inria.fr} \begin{abstract} -%The abstract goes here. DO NOT USE SPECIAL CHARACTERS, SYMBOLS, OR MATH IN YOUR TITLE OR ABSTRACT. +In this paper we propose a two stage iterative method which increases the +convergence of Krylov iterative methods, typically those of GMRES variants. The +principle of our approach is to build an external iteration over the Krylov +method and to save the current residual frequently (for example, for each +restart of GMRES). Then after a given number of outer iterations, a minimization +step is applied on the matrix composed of the save residuals in order to compute +a better solution and make a new iteration if necessary. We prove that our +method has the same convergence property than the inner method used. Some +experiments using up to 16,394 cores show that compared to GMRES our algorithm +can be around 7 times faster. \end{abstract} \begin{IEEEkeywords} @@ -719,7 +723,7 @@ minimization. \begin{tabular}{|c|c|r|r|r|r|} \hline - \multirow{2}{*}{Matrix name} & Solver / & \multicolumn{2}{c|}{gmres variant} & \multicolumn{2}{c|}{2 stage} \\ + \multirow{2}{*}{Matrix name} & Solver / & \multicolumn{2}{c|}{gmres variant} & \multicolumn{2}{c|}{2 stage CGLS} \\ \cline{3-6} & precond & Time & \# Iter. & Time & \# Iter. \\\hline \hline @@ -728,17 +732,46 @@ parabolic\_fem & gmres / ilu & 1009.94 & 7573 & 401.52 & 2970 \\ epb3 & fgmres / sor & 8.67 & 600 & 8.21 & 540 \\ atmosmodj & fgmres / sor & 104.23 & 451 & 88.97 & 366 \\ bfwa398 & gmres / none & 1.42 & 9612 & 0.28 & 1650 \\ -torso3 & fgmres/sor & 37.70 & 565 & 34.97 & 510 \\ +torso3 & fgmres / sor & 37.70 & 565 & 34.97 & 510 \\ \hline \end{tabular} -\caption{Comparison of GMRES and 2 stage GMRES algorithms in sequential with some matrices, time is expressed in seconds.} +\caption{Comparison of (F)GMRES and 2 stage (F)GMRES algorithms in sequential with some matrices, time is expressed in seconds.} \label{tab:02} \end{center} \end{table} + +Larger experiments .... + +\begin{table*} +\begin{center} +\begin{tabular}{|r|r|r|r|r|r|r|r|r|} +\hline + + nb. cores & precond & \multicolumn{2}{c|}{gmres variant} & \multicolumn{2}{c|}{2 stage CGLS} & \multicolumn{2}{c|}{2 stage LSQR} & best gain \\ +\cline{3-8} + & & Time & \# Iter. & Time & \# Iter. & Time & \# Iter. & \\\hline \hline + 2,048 & mg & 403.49 & 18,210 & 73.89 & 3,060 & 77.84 & 3,270 & 5.46 \\ + 2,048 & sor & 745.37 & 57,060 & 87.31 & 6,150 & 104.21 & 7,230 & 8.53 \\ + 4,096 & mg & 562.25 & 25,170 & 97.23 & 3,990 & 89.71 & 3,630 & 6.27 \\ + 4,096 & sor & 912.12 & 70,194 & 145.57 & 9,750 & 168.97 & 10,980 & 6.26 \\ + 8,192 & mg & 917.02 & 40,290 & 148.81 & 5,730 & 143.03 & 5,280 & 6.41 \\ + 8,192 & sor & 1,404.53 & 106,530 & 212.55 & 12,990 & 180.97 & 10,470 & 7.76 \\ + 16,384 & mg & 1,430.56 & 63,930 & 237.17 & 8,310 & 244.26 & 7,950 & 6.03 \\ + 16,384 & sor & 2,852.14 & 216,240 & 418.46 & 21,690 & 505.26 & 23,970 & 6.82 \\ +\hline + +\end{tabular} +\caption{Comparison of FGMRES and 2 stage FGMRES algorithms for ex15 of Petsc with 25000 components per core on Juqueen (threshold 1e-3, restart=30, s=12), time is expressed in seconds.} +\label{tab:03} +\end{center} +\end{table*} + + + %%%********************************************************* %%%********************************************************* @@ -753,6 +786,11 @@ torso3 & fgmres/sor & 37.70 & 565 & 34.97 & 510 \\ %%%********************************************************* +future plan : \\ +- study other kinds of matrices, problems, inner solvers\\ +- adaptative number of outer iterations to minimize\\ +- implement our solver inside PETSc + % conference papers do not normally have an appendix @@ -762,10 +800,10 @@ torso3 & fgmres/sor & 37.70 & 565 & 34.97 & 510 \\ %%%********************************************************* %%%********************************************************* \section*{Acknowledgment} -%The authors would like to thank... -%more thanks here -%%%********************************************************* -%%%********************************************************* +This paper is partially funded by the Labex ACTION program (contract +ANR-11-LABX-01-01). We acknowledge PRACE for awarding us access to resource +Curie and Juqueen respectively based in France and Germany. + % trigger a \newpage just before the given reference