From 7a7e3e4142880bede1e4c831277adaf5e3773160 Mon Sep 17 00:00:00 2001 From: lilia Date: Fri, 10 Oct 2014 12:58:23 +0200 Subject: [PATCH 01/16] 10-10-2014 09 --- paper.tex | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/paper.tex b/paper.tex index 14ad53c..d660010 100644 --- a/paper.tex +++ b/paper.tex @@ -832,17 +832,16 @@ scalable linear equations solvers: \begin{itemize} \item ex15 is an example which solves in parallel an operator using a finite difference scheme. The diagonal is equal to 4 and 4 extra-diagonals - representing the neighbors in each directions is equal to -1. This example is + representing the neighbors in each directions are equal to -1. This example is used in many physical phenomena, for example, heat and fluid flow, wave - propagation... + propagation, etc. \item ex54 is another example based on 2D problem discretized with quadrilateral finite elements. For this example, the user can define the scaling of material - coefficient in embedded circle, it is called $\alpha$. + coefficient in embedded circle called $\alpha$. \end{itemize} -For more technical details on these applications, interested reader are invited -to read the codes available in the PETSc sources. Those problem have been -chosen because they are scalable with many cores. We have tested other problem -but they are not scalable with many cores. +For more technical details on these applications, interested readers are invited +to read the codes available in the PETSc sources. Those problems have been +chosen because they are scalable with many cores which is not the case of other problems that we have tested. In the following larger experiments are described on two large scale architectures: Curie and Juqeen... {\bf description...}\\ -- 2.39.5 From 6ee06aaa241c211c1fbd5d1efd3697c9e1e20b41 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 13:23:10 +0200 Subject: [PATCH 02/16] Reprise du boulot --- paper.tex | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/paper.tex b/paper.tex index fe7fa39..a2a3e9d 100644 --- a/paper.tex +++ b/paper.tex @@ -364,6 +364,7 @@ \algnewcommand\Output{\item[\algorithmicoutput]} \newtheorem{proposition}{Proposition} +\newtheorem{proof}{Proof} \begin{document} % @@ -380,7 +381,7 @@ % use a multiple column layout for up to two different % affiliations -\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja \IEEEauthorrefmark{2}, and Christophe Guyeux\IEEEauthorrefmark{1}} +\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja\IEEEauthorrefmark{2}, and Christophe Guyeux\IEEEauthorrefmark{1}} \IEEEauthorblockA{\IEEEauthorrefmark{1} Femto-ST Institute, University of Franche Comte, France\\ Email: \{raphael.couturier,christophe.guyeux\}@univ-fcomte.fr} \IEEEauthorblockA{\IEEEauthorrefmark{2} INRIA Bordeaux Sud-Ouest, France\\ @@ -739,11 +740,17 @@ Suppose that $A$ is a positive real matrix with symmetric part $M$. Then the res \begin{equation} ||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0|| , \end{equation} -where $\alpha = \lambda_min(M)^2$ and $\beta = \lambda_max(A^T A)$, which proves +where $\alpha = \lambda_{min}(M)^2$ and $\beta = \lambda_{max}(A^T A)$, which proves the convergence of GMRES($m$) for all $m$ under that assumption regarding $A$. \end{proposition} +We can now claim that, +\begin{proposition} +If $A$ is a positive real matrix, then the TSIRM algorithm is convergent. +\end{proposition} +\begin{proof} +\end{proof} %%%********************************************************* %%%********************************************************* @@ -1048,4 +1055,3 @@ Curie and Juqueen respectively based in France and Germany. % that's all folks \end{document} - -- 2.39.5 From 95fea275e81cda394a2278d66259fce8f4df5e8c Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 13:31:18 +0200 Subject: [PATCH 03/16] =?utf8?q?D=C3=A9but=20de=20la=20preuve=20de=20conve?= =?utf8?q?rgence.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- paper.tex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paper.tex b/paper.tex index e512018..e927821 100644 --- a/paper.tex +++ b/paper.tex @@ -752,6 +752,11 @@ If $A$ is a positive real matrix, then the TSIRM algorithm is convergent. \end{proposition} \begin{proof} +Let $r_k = b-Ax_k$, where $x_k$ is the approximation of the solution after the +$k$-th iterate of TSIRM. +We will prove that $r_k \rightarrow 0$ when $k \rightarrow +\infty$. + + \end{proof} %%%********************************************************* @@ -1055,4 +1060,3 @@ Curie and Juqueen respectively based in France and Germany. % that's all folks \end{document} - -- 2.39.5 From c58c00531fe9eae1fa885f5ab10b3308d38feb6d Mon Sep 17 00:00:00 2001 From: lilia Date: Fri, 10 Oct 2014 13:33:52 +0200 Subject: [PATCH 04/16] 10-10-2014 10 --- paper.tex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/paper.tex b/paper.tex index d660010..cba14da 100644 --- a/paper.tex +++ b/paper.tex @@ -846,7 +846,7 @@ chosen because they are scalable with many cores which is not the case of other In the following larger experiments are described on two large scale architectures: Curie and Juqeen... {\bf description...}\\ -{\bf Description of preconditioners} +{\bf Description of preconditioners}\\ \begin{table*}[htbp] \begin{center} @@ -867,15 +867,15 @@ In the following larger experiments are described on two large scale architectur \hline \end{tabular} -\caption{Comparison of FGMRES and TSIRM with FGMRES for example ex15 of PETSc with two preconditioner (mg and sor) with 25,000 components per core on Juqueen (threshold 1e-3, restart=30, s=12), time is expressed in seconds.} +\caption{Comparison of FGMRES and TSIRM with FGMRES for example ex15 of PETSc with two preconditioners (mg and sor) with 25,000 components per core on Juqueen (threshold 1e-3, restart=30, s=12), time is expressed in seconds.} \label{tab:03} \end{center} \end{table*} Table~\ref{tab:03} shows the execution times and the number of iterations of -example ex15 of PETSc on the Juqueen architecture. Differents number of cores -are studied rangin from 2,048 upto 16,383. Two preconditioners have been -tested. For those experiments, the number of components (or unknown of the +example ex15 of PETSc on the Juqueen architecture. Different numbers of cores +are studied ranging from 2,048 up-to 16,383. Two preconditioners have been +tested: {\it mg} and {\it sor}. For those experiments, the number of components (or unknowns of the problems) per processor is fixed to 25,000, also called weak scaling. This number can seem relatively small. In fact, for some applications that need a lot of memory, the number of components per processor requires sometimes to be @@ -883,11 +883,11 @@ small. -In this Table, we can notice that TSIRM is always faster than FGMRES. The last +In Table~\ref{tab:03}, we can notice that TSIRM is always faster than FGMRES. The last column shows the ratio between FGMRES and the best version of TSIRM according to the minimization procedure: CGLS or LSQR. Even if we have computed the worst -case between CGLS and LSQR, it is clear that TSIRM is alsways faster than -FGMRES. For this example, the multigrid preconditionner is faster than SOR. The +case between CGLS and LSQR, it is clear that TSIRM is always faster than +FGMRES. For this example, the multigrid preconditioner is faster than SOR. The gain between TSIRM and FGMRES is more or less similar for the two preconditioners. Looking at the number of iterations to reach the convergence, it is obvious that TSIRM allows the reduction of the number of iterations. It -- 2.39.5 From e9ef8e49b713cba35e5f44d77ad7c9cb1385c804 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 13:52:39 +0200 Subject: [PATCH 05/16] Petites modifs --- paper.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper.tex b/paper.tex index e927821..0290628 100644 --- a/paper.tex +++ b/paper.tex @@ -748,7 +748,7 @@ the convergence of GMRES($m$) for all $m$ under that assumption regarding $A$. We can now claim that, \begin{proposition} -If $A$ is a positive real matrix, then the TSIRM algorithm is convergent. +If $A$ is a positive real matrix and GMRES($m$) is used as solver, then the TSIRM algorithm is convergent. \end{proposition} \begin{proof} @@ -756,7 +756,7 @@ Let $r_k = b-Ax_k$, where $x_k$ is the approximation of the solution after the $k$-th iterate of TSIRM. We will prove that $r_k \rightarrow 0$ when $k \rightarrow +\infty$. - +Each step of the TSIRM algorithm \end{proof} %%%********************************************************* -- 2.39.5 From acead508c1f820b919aa203c78668e5e645cc9b8 Mon Sep 17 00:00:00 2001 From: lilia Date: Fri, 10 Oct 2014 13:58:26 +0200 Subject: [PATCH 06/16] 10-10-2014 11 --- paper.tex | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/paper.tex b/paper.tex index 1d4cac0..bf9e767 100644 --- a/paper.tex +++ b/paper.tex @@ -364,7 +364,6 @@ \algnewcommand\Output{\item[\algorithmicoutput]} \newtheorem{proposition}{Proposition} -\newtheorem{proof}{Proof} \begin{document} % @@ -381,7 +380,7 @@ % use a multiple column layout for up to two different % affiliations -\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja\IEEEauthorrefmark{2}, and Christophe Guyeux\IEEEauthorrefmark{1}} +\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja \IEEEauthorrefmark{2}, and Christophe Guyeux\IEEEauthorrefmark{1}} \IEEEauthorblockA{\IEEEauthorrefmark{1} Femto-ST Institute, University of Franche Comte, France\\ Email: \{raphael.couturier,christophe.guyeux\}@univ-fcomte.fr} \IEEEauthorblockA{\IEEEauthorrefmark{2} INRIA Bordeaux Sud-Ouest, France\\ @@ -742,23 +741,12 @@ Suppose that $A$ is a positive real matrix with symmetric part $M$. Then the res \begin{equation} ||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0|| , \end{equation} -where $\alpha = \lambda_{min}(M)^2$ and $\beta = \lambda_{max}(A^T A)$, which proves +where $\alpha = \lambda_min(M)^2$ and $\beta = \lambda_max(A^T A)$, which proves the convergence of GMRES($m$) for all $m$ under that assumption regarding $A$. \end{proposition} -We can now claim that, -\begin{proposition} -If $A$ is a positive real matrix, then the TSIRM algorithm is convergent. -\end{proposition} - -\begin{proof} -Let $r_k = b-Ax_k$, where $x_k$ is the approximation of the solution after the -$k$-th iterate of TSIRM. -We will prove that $r_k \rightarrow 0$ when $k \rightarrow +\infty$. -\end{proof} - %%%********************************************************* %%%********************************************************* \section{Experiments using PETSc} @@ -918,9 +906,9 @@ corresponds to 30*12, there are $max\_iter_{ls}$ which corresponds to 15. In Figure~\ref{fig:01}, the number of iterations per second corresponding to Table~\ref{tab:01} is displayed. It can be noticed that the number of -iterations per second of FMGRES is constant whereas it decrease with TSIRM with -both preconditioner. This can be explained by the fact that when the number of -core increases the time for the minimization step also increases but, generally, +iterations per second of FMGRES is constant whereas it decreases with TSIRM with +both preconditioners. This can be explained by the fact that when the number of +cores increases the time for the least-squares minimization step also increases but, generally, when the number of cores increases, the number of iterations to reach the threshold also increases, and, in that case, TSIRM is more efficient to reduce the number of iterations. So, the overall benefit of using TSIRM is interesting. @@ -1060,3 +1048,5 @@ Curie and Juqueen respectively based in France and Germany. % that's all folks \end{document} + + -- 2.39.5 From 0c77825b587a6051bceb28a3a8762f660f3c6c52 Mon Sep 17 00:00:00 2001 From: lilia Date: Fri, 10 Oct 2014 14:05:43 +0200 Subject: [PATCH 07/16] 10-10-2014 12 --- paper.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper.tex b/paper.tex index f1becbd..3b19b2d 100644 --- a/paper.tex +++ b/paper.tex @@ -920,7 +920,7 @@ corresponds to 30*12, there are $max\_iter_{ls}$ which corresponds to 15. In Figure~\ref{fig:01}, the number of iterations per second corresponding to -Table~\ref{tab:01} is displayed. It can be noticed that the number of +Table~\ref{tab:03} is displayed. It can be noticed that the number of iterations per second of FMGRES is constant whereas it decreases with TSIRM with both preconditioners. This can be explained by the fact that when the number of cores increases the time for the least-squares minimization step also increases but, generally, -- 2.39.5 From 80c1614695ad0d65eb7a93f7483aa9e133406670 Mon Sep 17 00:00:00 2001 From: lilia Date: Fri, 10 Oct 2014 14:16:24 +0200 Subject: [PATCH 08/16] 10-10-2014 13 --- nb_iter_sec_ex15_juqueen.plot | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nb_iter_sec_ex15_juqueen.plot b/nb_iter_sec_ex15_juqueen.plot index b80c7da..96a4191 100755 --- a/nb_iter_sec_ex15_juqueen.plot +++ b/nb_iter_sec_ex15_juqueen.plot @@ -1,4 +1,4 @@ -# Analysis description +# Analysis description set encoding iso_8859_1 set terminal x11 set size 1,0.5 @@ -7,10 +7,10 @@ set term postscript enhanced portrait "Helvetica" 12 set ylabel "nb. iter. per second" set xlabel "nb. of cores" set key on outside left bmargin -plot 'nb_iter_sec_ex15_juqueen.txt' using 1:2 t "GMRES/mg" with linespoints lt 1 lw 2 ps 0 pt 5,\ +plot 'nb_iter_sec_ex15_juqueen.txt' using 1:2 t "FGMRES/mg" with linespoints lt 1 lw 2 ps 0 pt 5,\ 'nb_iter_sec_ex15_juqueen.txt' using 1:3 t "TSIRM CGLS/mg" with linespoints lt 2 lw 2 ps 0 pt 1,\ 'nb_iter_sec_ex15_juqueen.txt' using 1:4 t "TSIRM LSQR/mg" with linespoints lt 3 lw 2 ps 0 pt 1,\ -'nb_iter_sec_ex15_juqueen.txt' using 1:5 t "GMRES/sor" with linespoints lt 4 lw 2 ps 0 pt 1,\ +'nb_iter_sec_ex15_juqueen.txt' using 1:5 t "FGMRES/sor" with linespoints lt 4 lw 2 ps 0 pt 1,\ 'nb_iter_sec_ex15_juqueen.txt' using 1:6 t "TSIRM CGLS/sor" with linespoints lt 5 lw 2 ps 0 pt 1,\ 'nb_iter_sec_ex15_juqueen.txt' using 1:7 t "TSIRM LSQR/sor" with linespoints lt 6 lw 2 ps 0 pt 1 -- 2.39.5 From 7d6625e2b26c8e17c8cf8fafc8fbed964dda513a Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 14:15:30 +0200 Subject: [PATCH 09/16] =?utf8?q?Avanc=C3=A9es=20dans=20la=20preuve?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- paper.tex | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/paper.tex b/paper.tex index 3b19b2d..ceffa3d 100644 --- a/paper.tex +++ b/paper.tex @@ -745,9 +745,7 @@ where $\alpha = \lambda_min(M)^2$ and $\beta = \lambda_max(A^T A)$, which proves the convergence of GMRES($m$) for all $m$ under that assumption regarding $A$. \end{proposition} -<<<<<<< HEAD -======= We can now claim that, \begin{proposition} If $A$ is a positive real matrix and GMRES($m$) is used as solver, then the TSIRM algorithm is convergent. @@ -758,9 +756,16 @@ Let $r_k = b-Ax_k$, where $x_k$ is the approximation of the solution after the $k$-th iterate of TSIRM. We will prove that $r_k \rightarrow 0$ when $k \rightarrow +\infty$. -Each step of the TSIRM algorithm +Each step of the TSIRM algorithm \\ +$\min_{\alpha \in \mathbb{R}^s} ||b-R\alpha ||_2 = \min_{\alpha \in \mathbb{R}^s} ||b-AS\alpha ||_2$ + +$\begin{array}{ll} +& = \min_{x \in Vect\left(x_0, x_1, \hdots, x_{k-1} \right)} ||b-AS\alpha ||_2\\ +& \leqslant \min_{x \in Vect\left( S_{k-1} \right)} ||b-Ax ||_2\\ +& \leqslant ||b-Ax_{k-1}|| +\end{array}$ \end{proof} ->>>>>>> 84e15020344b77e5497c4a516cc20b472b2914cd + %%%********************************************************* %%%********************************************************* @@ -1064,4 +1069,3 @@ Curie and Juqueen respectively based in France and Germany. % that's all folks \end{document} - -- 2.39.5 From a0cf63f4131f16e0075ab94bbf53937fb998cca0 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 14:38:59 +0200 Subject: [PATCH 10/16] Typos --- paper.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paper.tex b/paper.tex index ceffa3d..64a88a8 100644 --- a/paper.tex +++ b/paper.tex @@ -669,8 +669,8 @@ called for a maximum of $max\_iter_{kryl}$ iterations. In practice, we sugges equals to the restart number of the GMRES-like method. Moreover, a tolerance threshold must be specified for the solver. In practice, this threshold must be much smaller than the convergence threshold of the TSIRM algorithm (\emph{i.e.} -$\epsilon_{tsirm}$). Line~\ref{algo:store}, $S_{k~ mod~ s}=x^k$ consists in copying the -solution $x_k$ into the column $k~ mod~ s$ of the matrix $S$. After the +$\epsilon_{tsirm}$). Line~\ref{algo:store}, $S_{k \mod s}=x^k$ consists in copying the +solution $x_k$ into the column $k \mod s$ of the matrix $S$, where $S$ is a matrix of size $n\times s$ whose column vector $i$ is denoted by $S_i$. After the minimization, the matrix $S$ is reused with the new values of the residuals. To solve the minimization problem, an iterative method is used. Two parameters are required for that: the maximum number of iterations and the threshold to stop the @@ -686,13 +686,13 @@ Let us summarize the most important parameters of TSIRM: \end{itemize} -The parallelisation of TSIRM relies on the parallelization of all its +The parallelization of TSIRM relies on the parallelization of all its parts. More precisely, except the least-squares step, all the other parts are obvious to achieve out in parallel. In order to develop a parallel version of our code, we have chosen to use PETSc~\cite{petsc-web-page}. For line~\ref{algo:matrix_mul} the matrix-matrix multiplication is implemented and efficient since the matrix $A$ is sparse and since the matrix $S$ contains few -colums in practice. As explained previously, at least two methods seem to be +columns in practice. As explained previously, at least two methods seem to be interesting to solve the least-squares minimization, CGLS and LSQR. In the following we remind the CGLS algorithm. The LSQR method follows more or -- 2.39.5 From 257eb3828b7e512fff530ca5609ec15a27e3fa56 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 14:43:50 +0200 Subject: [PATCH 11/16] Typos dans la preuve --- paper.tex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paper.tex b/paper.tex index 64a88a8..517cb8e 100644 --- a/paper.tex +++ b/paper.tex @@ -760,8 +760,9 @@ Each step of the TSIRM algorithm \\ $\min_{\alpha \in \mathbb{R}^s} ||b-R\alpha ||_2 = \min_{\alpha \in \mathbb{R}^s} ||b-AS\alpha ||_2$ $\begin{array}{ll} -& = \min_{x \in Vect\left(x_0, x_1, \hdots, x_{k-1} \right)} ||b-AS\alpha ||_2\\ -& \leqslant \min_{x \in Vect\left( S_{k-1} \right)} ||b-Ax ||_2\\ +& = \min_{x \in Vect\left(S_{k-s}, S_{k-s+1}, \hdots, S_{k-1} \right)} ||b-AS\alpha ||_2\\ +& = \min_{x \in Vect\left(x_{k-s}, x_{k-s}+1, \hdots, x_{k-1} \right)} ||b-AS\alpha ||_2\\ +& \leqslant \min_{x \in Vect\left( x_{k-1} \right)} ||b-Ax ||_2\\ & \leqslant ||b-Ax_{k-1}|| \end{array}$ \end{proof} -- 2.39.5 From 2013b77d04aa67e242c885349c663c432142782a Mon Sep 17 00:00:00 2001 From: lilia Date: Fri, 10 Oct 2014 14:45:24 +0200 Subject: [PATCH 12/16] 10-10-2014 14 --- nb_iter_sec_ex54_curie.plot | 4 ++-- paper.tex | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/nb_iter_sec_ex54_curie.plot b/nb_iter_sec_ex54_curie.plot index ee5fab6..ed76c99 100644 --- a/nb_iter_sec_ex54_curie.plot +++ b/nb_iter_sec_ex54_curie.plot @@ -1,4 +1,4 @@ -# Analysis description +# Analysis description set encoding iso_8859_1 set terminal x11 set size 1,0.5 @@ -9,7 +9,7 @@ set term postscript enhanced portrait "Helvetica" 12 set ylabel "nb. iter. per second" set xlabel "nb. of cores" set key on outside left bmargin -plot 'nb_iter_sec_ex54_curie.txt' using 1:2 t "GMRES/mg" with linespoints lt 1 lw 2 ps 0 pt 5,\ +plot 'nb_iter_sec_ex54_curie.txt' using 1:2 t "FGMRES/mg" with linespoints lt 1 lw 2 ps 0 pt 5,\ 'nb_iter_sec_ex54_curie.txt' using 1:3 t "TSIRM CGLS/mg" with linespoints lt 2 lw 2 ps 0 pt 1,\ 'nb_iter_sec_ex54_curie.txt' using 1:4 t "TSIRM LSQR/mg" with linespoints lt 3 lw 2 ps 0 pt 1 diff --git a/paper.tex b/paper.tex index ceffa3d..c0e8b16 100644 --- a/paper.tex +++ b/paper.tex @@ -837,7 +837,7 @@ torso3 & fgmres / sor & 37.70 & 565 & 34.97 & 510 \\ \hline \end{tabular} -\caption{Comparison of (F)GMRES and 2 stage (F)GMRES algorithms in sequential with some matrices, time is expressed in seconds.} +\caption{Comparison of (F)GMRES and TSIRM with (F)GMRES in sequential with some matrices, time is expressed in seconds.} \label{tab:02} \end{center} \end{table} @@ -896,7 +896,7 @@ Table~\ref{tab:03} shows the execution times and the number of iterations of example ex15 of PETSc on the Juqueen architecture. Different numbers of cores are studied ranging from 2,048 up-to 16,383. Two preconditioners have been tested: {\it mg} and {\it sor}. For those experiments, the number of components (or unknowns of the -problems) per processor is fixed to 25,000, also called weak scaling. This +problems) per core is fixed to 25,000, also called weak scaling. This number can seem relatively small. In fact, for some applications that need a lot of memory, the number of components per processor requires sometimes to be small. @@ -943,7 +943,7 @@ the number of iterations. So, the overall benefit of using TSIRM is interesting. \begin{tabular}{|r|r|r|r|r|r|r|r|r|} \hline - nb. cores & threshold & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSIRM CGLS} & \multicolumn{2}{c|}{TSIRM LSQR} & best gain \\ + nb. cores & threshold & \multicolumn{2}{c|}{FGMRES} & \multicolumn{2}{c|}{TSIRM CGLS} & \multicolumn{2}{c|}{TSIRM LSQR} & best gain \\ \cline{3-8} & & Time & \# Iter. & Time & \# Iter. & Time & \# Iter. & \\\hline \hline 2,048 & 8e-5 & 108.88 & 16,560 & 23.06 & 3,630 & 22.79 & 3,630 & 4.77 \\ @@ -956,7 +956,7 @@ the number of iterations. So, the overall benefit of using TSIRM is interesting. \hline \end{tabular} -\caption{Comparison of FGMRES and 2 stage FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 25000 components per core on Curie (restart=30, s=12), time is expressed in seconds.} +\caption{Comparison of FGMRES and TSIRM with FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 25,000 components per core on Curie (restart=30, s=12), time is expressed in seconds.} \label{tab:04} \end{center} \end{table*} @@ -970,9 +970,9 @@ In Table~\ref{tab:04}, some experiments with example ex54 on the Curie architect \begin{tabular}{|r|r|r|r|r|r|r|r|r|r|r|} \hline - nb. cores & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSIRM CGLS} & \multicolumn{2}{c|}{TSIRM LSQR} & best gain & \multicolumn{3}{c|}{efficiency} \\ + nb. cores & \multicolumn{2}{c|}{FGMRES} & \multicolumn{2}{c|}{TSIRM CGLS} & \multicolumn{2}{c|}{TSIRM LSQR} & best gain & \multicolumn{3}{c|}{efficiency} \\ \cline{2-7} \cline{9-11} - & Time & \# Iter. & Time & \# Iter. & Time & \# Iter. & & GMRES & TS CGLS & TS LSQR\\\hline \hline + & Time & \# Iter. & Time & \# Iter. & Time & \# Iter. & & FGMRES & TS CGLS & TS LSQR\\\hline \hline 512 & 3,969.69 & 33,120 & 709.57 & 5,790 & 622.76 & 5,070 & 6.37 & 1 & 1 & 1 \\ 1024 & 1,530.06 & 25,860 & 290.95 & 4,830 & 307.71 & 5,070 & 5.25 & 1.30 & 1.21 & 1.01 \\ 2048 & 919.62 & 31,470 & 237.52 & 8,040 & 194.22 & 6,510 & 4.73 & 1.08 & .75 & .80\\ @@ -982,7 +982,7 @@ In Table~\ref{tab:04}, some experiments with example ex54 on the Curie architect \hline \end{tabular} -\caption{Comparison of FGMRES and 2 stage FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores (restart=30, s=12, threshol 5e-5), time is expressed in seconds.} +\caption{Comparison of FGMRES and TSIRM with FGMRES for ex54 of Petsc (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores (restart=30, s=12, threshold 5e-5), time is expressed in seconds.} \label{tab:05} \end{center} \end{table*} @@ -1010,7 +1010,7 @@ In Table~\ref{tab:04}, some experiments with example ex54 on the Curie architect future plan : \\ - study other kinds of matrices, problems, inner solvers\\ -- test the influence of all the parameters\\ +- test the influence of all parameters\\ - adaptative number of outer iterations to minimize\\ - other methods to minimize the residuals?\\ - implement our solver inside PETSc @@ -1025,7 +1025,7 @@ future plan : \\ %%%********************************************************* \section*{Acknowledgment} This paper is partially funded by the Labex ACTION program (contract -ANR-11-LABX-01-01). We acknowledge PRACE for awarding us access to resource +ANR-11-LABX-01-01). We acknowledge PRACE for awarding us access to resources Curie and Juqueen respectively based in France and Germany. -- 2.39.5 From 0501cb56e1c4b2968b9f435cc1f53e348a8bd434 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 14:51:15 +0200 Subject: [PATCH 13/16] i8n de la preuve --- paper.tex | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/paper.tex b/paper.tex index 4e40730..4a8bc4d 100644 --- a/paper.tex +++ b/paper.tex @@ -757,13 +757,16 @@ $k$-th iterate of TSIRM. We will prove that $r_k \rightarrow 0$ when $k \rightarrow +\infty$. Each step of the TSIRM algorithm \\ + +Let $\operatorname{span}(S) = \left \{ {\sum_{i=1}^k \lambda_i v_i \Big| k \in \mathbb{N}, v_i \in S, \lambda _i \in \mathbb{R}} \right \}$ be the linear span of a set of vectors $S$. So,\\ $\min_{\alpha \in \mathbb{R}^s} ||b-R\alpha ||_2 = \min_{\alpha \in \mathbb{R}^s} ||b-AS\alpha ||_2$ $\begin{array}{ll} -& = \min_{x \in Vect\left(S_{k-s}, S_{k-s+1}, \hdots, S_{k-1} \right)} ||b-AS\alpha ||_2\\ -& = \min_{x \in Vect\left(x_{k-s}, x_{k-s}+1, \hdots, x_{k-1} \right)} ||b-AS\alpha ||_2\\ -& \leqslant \min_{x \in Vect\left( x_{k-1} \right)} ||b-Ax ||_2\\ -& \leqslant ||b-Ax_{k-1}|| +& = \min_{x \in span\left(S_{k-s}, S_{k-s+1}, \hdots, S_{k-1} \right)} ||b-AS\alpha ||_2\\ +& = \min_{x \in span\left(x_{k-s}, x_{k-s}+1, \hdots, x_{k-1} \right)} ||b-AS\alpha ||_2\\ +& \leqslant \min_{x \in span\left( x_{k-1} \right)} ||b-Ax ||_2\\ +& \leqslant \min_{\lambda \in \mathbb{R}} ||b-\lambda Ax_{k-1} ||_2\\ +& \leqslant ||b-Ax_{k-1}||_2 . \end{array}$ \end{proof} @@ -1069,4 +1072,3 @@ Curie and Juqueen respectively based in France and Germany. % that's all folks \end{document} - -- 2.39.5 From 317020501fc8da3f9e443457700821b26f66da55 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 14:58:22 +0200 Subject: [PATCH 14/16] maj de la prop --- paper.tex | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/paper.tex b/paper.tex index 4a8bc4d..436909a 100644 --- a/paper.tex +++ b/paper.tex @@ -380,7 +380,7 @@ % use a multiple column layout for up to two different % affiliations -\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja \IEEEauthorrefmark{2}, and Christophe Guyeux\IEEEauthorrefmark{1}} +\author{\IEEEauthorblockN{Rapha\"el Couturier\IEEEauthorrefmark{1}, Lilia Ziane Khodja\IEEEauthorrefmark{2}, and Christophe Guyeux\IEEEauthorrefmark{1}} \IEEEauthorblockA{\IEEEauthorrefmark{1} Femto-ST Institute, University of Franche Comte, France\\ Email: \{raphael.couturier,christophe.guyeux\}@univ-fcomte.fr} \IEEEauthorblockA{\IEEEauthorrefmark{2} INRIA Bordeaux Sud-Ouest, France\\ @@ -737,6 +737,7 @@ these operations are easy to implement in PETSc or similar environment. \label{sec:04} Let us recall the following result, see~\cite{Saad86}. \begin{proposition} +\label{prop:saad} Suppose that $A$ is a positive real matrix with symmetric part $M$. Then the residual norm provided at the $m$-th step of GMRES satisfies: \begin{equation} ||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0|| , @@ -748,7 +749,11 @@ the convergence of GMRES($m$) for all $m$ under that assumption regarding $A$. We can now claim that, \begin{proposition} -If $A$ is a positive real matrix and GMRES($m$) is used as solver, then the TSIRM algorithm is convergent. +If $A$ is a positive real matrix and GMRES($m$) is used as solver, then the TSIRM algorithm is convergent. Furthermore, we still have +\begin{equation} +||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0|| , +\end{equation} +where $\alpha$ and $\beta$ are defined as in Proposition~\ref{prop:saad}. \end{proposition} \begin{proof} @@ -770,6 +775,8 @@ $\begin{array}{ll} \end{array}$ \end{proof} +We can remark that, at each iterate, the residue of the TSIRM algorithm is lower +than the one of the GMRES method. %%%********************************************************* %%%********************************************************* -- 2.39.5 From c21f0db90e14920174026f446c338c0a88e35262 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 15:20:12 +0200 Subject: [PATCH 15/16] =?utf8?q?Avanc=C3=A9es=20dans=20la=20preuve=20par?= =?utf8?q?=20induction?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- paper.tex | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/paper.tex b/paper.tex index 436909a..d9880d1 100644 --- a/paper.tex +++ b/paper.tex @@ -759,11 +759,17 @@ where $\alpha$ and $\beta$ are defined as in Proposition~\ref{prop:saad}. \begin{proof} Let $r_k = b-Ax_k$, where $x_k$ is the approximation of the solution after the $k$-th iterate of TSIRM. -We will prove that $r_k \rightarrow 0$ when $k \rightarrow +\infty$. +We will prove by a mathematical induction that, for each $k \in \mathbb{N}^\ast$, +$||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0||.$ -Each step of the TSIRM algorithm \\ +The base case is obvious, as for $k=1$, the TSIRM algorithm simply consists in applying GMRES($m$) once, leading to a new residual $r_1$ which follows the inductive hypothesis due to Proposition~\ref{prop:saad}. -Let $\operatorname{span}(S) = \left \{ {\sum_{i=1}^k \lambda_i v_i \Big| k \in \mathbb{N}, v_i \in S, \lambda _i \in \mathbb{R}} \right \}$ be the linear span of a set of vectors $S$. So,\\ +Suppose now that the claim holds for all $m=1, 2, \hdots, k-1$, that is, $\forall m \in \{1,2,\hdots, k-1\}$, $||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0||$. +We will show that the statement holds too for $r_k$. Two situations can occur: +\begin{itemize} +\item If $k \mod m \neq 0$, then + +\item Else, let $\operatorname{span}(S) = \left \{ {\sum_{i=1}^k \lambda_i v_i \Big| k \in \mathbb{N}, v_i \in S, \lambda _i \in \mathbb{R}} \right \}$ be the linear span of a set of real vectors $S$. So,\\ $\min_{\alpha \in \mathbb{R}^s} ||b-R\alpha ||_2 = \min_{\alpha \in \mathbb{R}^s} ||b-AS\alpha ||_2$ $\begin{array}{ll} @@ -773,6 +779,7 @@ $\begin{array}{ll} & \leqslant \min_{\lambda \in \mathbb{R}} ||b-\lambda Ax_{k-1} ||_2\\ & \leqslant ||b-Ax_{k-1}||_2 . \end{array}$ +\end{itemize} \end{proof} We can remark that, at each iterate, the residue of the TSIRM algorithm is lower -- 2.39.5 From 59bde6ffdfbc419978262601f80b4a376f26d4d0 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Fri, 10 Oct 2014 15:27:04 +0200 Subject: [PATCH 16/16] Typos --- paper.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper.tex b/paper.tex index d9880d1..6ae2692 100644 --- a/paper.tex +++ b/paper.tex @@ -742,7 +742,7 @@ Suppose that $A$ is a positive real matrix with symmetric part $M$. Then the res \begin{equation} ||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0|| , \end{equation} -where $\alpha = \lambda_min(M)^2$ and $\beta = \lambda_max(A^T A)$, which proves +where $\alpha = \lambda_{min}(M)^2$ and $\beta = \lambda_{max}(A^T A)$, which proves the convergence of GMRES($m$) for all $m$ under that assumption regarding $A$. \end{proposition} @@ -767,7 +767,7 @@ The base case is obvious, as for $k=1$, the TSIRM algorithm simply consists in a Suppose now that the claim holds for all $m=1, 2, \hdots, k-1$, that is, $\forall m \in \{1,2,\hdots, k-1\}$, $||r_m|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0||$. We will show that the statement holds too for $r_k$. Two situations can occur: \begin{itemize} -\item If $k \mod m \neq 0$, then +\item If $k \mod m \neq 0$, then the TSIRM algorithm consists in executing GMRES once. In that case, we obtain $||r_k|| \leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_{k-1}||\leqslant \left(1-\dfrac{\alpha}{\beta}\right)^{\frac{m}{2}} ||r_0||$. \item Else, let $\operatorname{span}(S) = \left \{ {\sum_{i=1}^k \lambda_i v_i \Big| k \in \mathbb{N}, v_i \in S, \lambda _i \in \mathbb{R}} \right \}$ be the linear span of a set of real vectors $S$. So,\\ $\min_{\alpha \in \mathbb{R}^s} ||b-R\alpha ||_2 = \min_{\alpha \in \mathbb{R}^s} ||b-AS\alpha ||_2$ -- 2.39.5