From 07677631c1d2e2d7aa7b88b82f62e12c72b11327 Mon Sep 17 00:00:00 2001 From: lilia Date: Sun, 12 Jan 2014 17:53:14 +0100 Subject: [PATCH] 12-01-2014 --- biblio.bib | 14 +++++- krylov_multi.tex | 108 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 98 insertions(+), 24 deletions(-) diff --git a/biblio.bib b/biblio.bib index 7d16800..a5311d6 100644 --- a/biblio.bib +++ b/biblio.bib @@ -99,14 +99,24 @@ pages = {}, year = {2008}, } -@article{refCGNR +@article{refCGNR, year={2001}, journal={Annals of Operations Research}, volume={103}, number={1-4}, -title={An Adaptive CGNR Algorithm for Solving Large Linear Systems}, +title={An {A}daptive {CGNR} {A}lgorithm for {S}olving {L}arge {L}inear {S}ystems}, publisher={Kluwer Academic Publishers}, author={Li, Chunguang}, pages={329-338}, } +@article{ref34, +title = {{GMRES}: {A} {G}eneralized {M}inimal {R}esidual {A}lgorithm for {S}olving {N}onsymmetric {L}inear {S}ystems}, +author = {Saad, Y. and Schultz, M. H.}, +journal = {SIAM Journal on Scientific and Statistical Computing}, +volume = {7}, +number = {3}, +pages = {856--869}, +year = {1986}, +} + diff --git a/krylov_multi.tex b/krylov_multi.tex index 1b8b43e..036c190 100644 --- a/krylov_multi.tex +++ b/krylov_multi.tex @@ -3,6 +3,12 @@ \usepackage{amsfonts,amssymb} \usepackage{amsmath} \usepackage{graphicx} +%\usepackage{algorithmic} +%\usepackage[ruled,english,boxed,linesnumbered]{algorithm2e} +%\usepackage[english]{algorithme} +\usepackage{algorithm} +\usepackage{algpseudocode} + \title{A scalable multisplitting algorithm for solving large sparse linear systems} @@ -191,10 +197,11 @@ Y_l = B_l - \displaystyle\sum_{i=1,i\neq l}^{L}A_{li}X_i, is solved independently by a cluster of processors and communication are required to update the right-hand side vectors $Y_l$, such that the vectors $X_i$ represent the data dependencies between the -clusters. In this work, we use the GMRES method as an inner iteration -method for solving the sub-systems~(\ref{sec03:eq03}). It is a -well-known iterative method which gives good performances for solving -sparse linear systems in parallel on a cluster of processors. +clusters. In this work, we use the parallel GMRES method~\cite{ref34} +as an inner iteration method for solving the +sub-systems~(\ref{sec03:eq03}). It is a well-known iterative method +which gives good performances for solving sparse linear systems in +parallel on a cluster of processors. It should be noted that the convergence of the inner iterative solver for the different linear sub-systems~(\ref{sec03:eq03}) does not @@ -218,39 +225,96 @@ splittings~(\ref{sec03:eq03}) S=\{x^1,x^2,\ldots,x^s\},~s\leq n, \label{sec03:eq04} \end{equation} -where for $k\in\{1,\ldots,s\}$, $x^k=[X_1^k,\ldots,X_L^k]$ is a +where for $j\in\{1,\ldots,s\}$, $x^j=[X_1^j,\ldots,X_L^j]$ is a solution of the global linear system.%The advantage such a method is that the Krylov subspace does not need to be spanned by an orthogonal basis. The advantage of such a Krylov subspace is that we need neither an orthogonal basis nor synchronizations between the different clusters to generate this basis. -The multisplitting method is periodically restarted every $s$ iterations -with a new initial guess $\tilde{x}=S\alpha$ which minimizes the error -function $\|b-Ax\|_2$ over the Krylov subspace spanned by the vectors of $S$. -So, $\alpha$ is defined as the solution of the large overdetermined linear system +The multisplitting method is periodically restarted every $s$ +iterations with a new initial guess $\tilde{x}=S\alpha$ which +minimizes the error function $\|b-Ax\|_2$ over the Krylov subspace +spanned by the vectors of $S$. So, $\alpha$ is defined as the +solution of the large overdetermined linear system \begin{equation} -B\alpha=b, +R\alpha=b, \label{sec03:eq05} \end{equation} -where $B=AS$ is a dense rectangular matrix of size $n\times s$ and $s\ll n$. This leads -us to solve the system of normal equations +where $R=AS$ is a dense rectangular matrix of size $n\times s$ and +$s\ll n$. This leads us to solve the system of normal equations \begin{equation} -B^TB\alpha=B^Tb, +R^TR\alpha=R^Tb, \label{sec03:eq06} \end{equation} which is associated with the least squares problem \begin{equation} -\text{minimize}~\|b-B\alpha\|_2, +\text{minimize}~\|b-R\alpha\|_2, \label{sec03:eq07} \end{equation} -where $B^T$ denotes the transpose of the matrix $B$. Since $B$ (i.e. $AS$) and -$b$ are split among $L$ clusters, the symmetric positive definite system~(\ref{sec03:eq06}) -is solved in parallel. Thus, an iterative method would be more appropriate than -a direct one for solving this system. We use the parallel conjugate gradient -method for the normal equations CGNR~\cite{S96,refCGNR}. - -%%% Ecrire l'algorithme(s) -%%% Parler des synchronisations entre proc et clusters +where $R^T$ denotes the transpose of the matrix $R$. Since $R$ +(i.e. $AS$) and $b$ are split among $L$ clusters, the symmetric +positive definite system~(\ref{sec03:eq06}) is solved in +parallel. Thus, an iterative method would be more appropriate than a +direct one for solving this system. We use the parallel conjugate +gradient method for the normal equations CGNR~\cite{S96,refCGNR}. + +\begin{algorithm}[!t] +\caption{A two-stage linear solver with inner iteration GMRES method} +\begin{algorithmic}[1] +\State Load $A_l$, $B_l$, initial guess $x^0$ +\State Initialize the minimizer $\tilde{x}^0=x^0$ +\For {$k=1,2,3,\ldots$ until the global convergence} +\State Restart with $x^0=\tilde{x}^{k-1}$: \textbf{for} $j=1,2,\ldots,s$ \textbf{do} +\State\hspace{0.5cm} Inner iteration solver: \Call{InnerSolver}{$x^0$, $j$} +\State\hspace{0.5cm} Construct the basis $S$: add the column vector $X_l^j$ to the matrix $S_l$ +\State\hspace{0.5cm} Exchange the local solution vector $X_l^j$ with the neighboring clusters +\State\hspace{0.5cm} Compute the dense matrix $R$: $R_l^j=\sum^L_{i=1}A_{li}X_i^j$ +\State\textbf{end for} +\State Minimization $\|b-R\alpha\|_2$: \Call{UpdateMinimizer}{$S_l$, $R$, $b$} +\State Local solution of the linear system $Ax=b$: $X_l^k=\tilde{X}_l^k$ +\State Exchange the local minimizer $\tilde{X}_l^k$ with the neighboring clusters +\EndFor + +\Statex + +\Function {InnerSolver}{$x^0$, $j$} +\State Compute the local right-hand side: $Y_l = B_l - \sum^L_{i=1,i\neq l}A_{li}X_i^0$ +\State Solving the local splitting $A_{ll}X_l^j=Y_l$ with the parallel GMRES method, such that $X_l^0$ is the initial guess. +\State \Return $X_l^j$ +\EndFunction + +\Statex + +\Function {UpdateMinimizer}{$S_l$, $R$, $b$, $k$} +\State Solving the normal equations $R^TR\alpha=R^Tb$ in parallel by $L$ clusters using the parallel CGNR method +\State Compute the local minimizer: $\tilde{X}_l^k=S_l\alpha$ +\State \Return $\tilde{X}_l^k$ +\EndFunction +\end{algorithmic} +\label{algo:01} +\end{algorithm} + +The main key points of the multisplitting method for solving large +sparse linear systems are given in Algorithm~\ref{algo:01}. This +algorithm is based on a two-stage method with a minimization using the +GMRES iterative method as an inner solver. It is executed in parallel +by each cluster of processors. The matrices and vectors with the +subscript $l$ represent the local data for the cluster $l$, where +$l\in\{1,\ldots,L\}$. The two-stage solver uses two different parallel +iterative algorithms: the GMRES method for solving each splitting on a +cluster of processors, and the CGNR method executed in parallel by all +clusters for minimizing the function error over the Krylov subspace +spanned by $S$. The algorithm requires two global synchronizations +between the $L$ clusters. The first one is performed at line~$12$ in +Algorithm~\ref{algo:01} to exchange the local values of the vector +solution $x$ (i.e. the minimizer $\tilde{x}$) required to restart the +multisplitting solver. The second one is needed to construct the +matrix $R$ of the Krylov subspace. We choose to perform this latter +synchronization $s$ times in every outer iteration $k$ (line~$7$ in +Algorithm~\ref{algo:01}). This is a straightforward way to compute the +matrix-matrix multiplication $R=AS$. We implement all +synchronizations by using the MPI collective communication +subroutines. -- 2.39.5