From: lilia <lilia@mondomaine.fr>
Date: Sun, 12 Jan 2014 16:53:14 +0000 (+0100)
Subject: 12-01-2014
X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/Krylov_multi.git/commitdiff_plain/07677631c1d2e2d7aa7b88b82f62e12c72b11327

12-01-2014
---

diff --git a/biblio.bib b/biblio.bib
index 7d16800..a5311d6 100644
--- a/biblio.bib
+++ b/biblio.bib
@@ -99,14 +99,24 @@ pages = {},
 year = {2008},
 }
 
-@article{refCGNR
+@article{refCGNR,
 year={2001},
 journal={Annals of Operations Research},
 volume={103},
 number={1-4},
-title={An Adaptive CGNR Algorithm for Solving Large Linear Systems},
+title={An {A}daptive {CGNR} {A}lgorithm for {S}olving {L}arge {L}inear {S}ystems},
 publisher={Kluwer Academic Publishers},
 author={Li, Chunguang},
 pages={329-338},
 }
 
+@article{ref34,
+title = {{GMRES}: {A} {G}eneralized {M}inimal {R}esidual {A}lgorithm for {S}olving {N}onsymmetric {L}inear {S}ystems},
+author = {Saad, Y. and Schultz, M. H.},
+journal = {SIAM Journal on Scientific and Statistical Computing},
+volume = {7},
+number = {3},
+pages = {856--869},
+year = {1986},
+}
+
diff --git a/krylov_multi.tex b/krylov_multi.tex
index 1b8b43e..036c190 100644
--- a/krylov_multi.tex
+++ b/krylov_multi.tex
@@ -3,6 +3,12 @@
 \usepackage{amsfonts,amssymb}
 \usepackage{amsmath}
 \usepackage{graphicx}
+%\usepackage{algorithmic}
+%\usepackage[ruled,english,boxed,linesnumbered]{algorithm2e}
+%\usepackage[english]{algorithme}
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+
 
 \title{A scalable multisplitting algorithm for solving large sparse linear systems} 
 
@@ -191,10 +197,11 @@ Y_l = B_l - \displaystyle\sum_{i=1,i\neq l}^{L}A_{li}X_i,
 is solved  independently by a cluster of  processors and communication
 are required  to update the  right-hand side vectors $Y_l$,  such that
 the  vectors  $X_i$  represent   the  data  dependencies  between  the
-clusters. In this work, we use  the GMRES method as an inner iteration
-method  for  solving   the  sub-systems~(\ref{sec03:eq03}).  It  is  a
-well-known iterative method which  gives good performances for solving
-sparse linear systems in parallel on a cluster of processors.
+clusters. In this work,  we use the parallel GMRES method~\cite{ref34}
+as     an     inner    iteration     method     for    solving     the
+sub-systems~(\ref{sec03:eq03}).  It  is a well-known  iterative method
+which  gives good performances  for solving  sparse linear  systems in
+parallel on a cluster of processors.
 
 It should be noted that  the convergence of the inner iterative solver
 for  the  different  linear  sub-systems~(\ref{sec03:eq03})  does  not
@@ -218,39 +225,96 @@ splittings~(\ref{sec03:eq03})
 S=\{x^1,x^2,\ldots,x^s\},~s\leq n,
 \label{sec03:eq04}
 \end{equation}
-where   for  $k\in\{1,\ldots,s\}$,  $x^k=[X_1^k,\ldots,X_L^k]$   is  a
+where   for  $j\in\{1,\ldots,s\}$,  $x^j=[X_1^j,\ldots,X_L^j]$   is  a
 solution of the  global linear system.%The advantage such a method is that the Krylov subspace does not need to be spanned by an orthogonal basis.
 The advantage  of such a  Krylov subspace is  that we need  neither an
 orthogonal basis  nor synchronizations between  the different clusters
 to generate this basis.
 
-The multisplitting method is periodically restarted every $s$ iterations
-with a new initial guess $\tilde{x}=S\alpha$ which minimizes the error
-function $\|b-Ax\|_2$ over the Krylov subspace spanned by the vectors of $S$.
-So, $\alpha$ is defined as the solution of the large overdetermined linear system
+The  multisplitting   method  is  periodically   restarted  every  $s$
+iterations  with   a  new  initial   guess  $\tilde{x}=S\alpha$  which
+minimizes  the error  function $\|b-Ax\|_2$  over the  Krylov subspace
+spanned  by  the vectors  of  $S$.  So,  $\alpha$  is  defined as  the
+solution of the large overdetermined linear system
 \begin{equation}
-B\alpha=b,
+R\alpha=b,
 \label{sec03:eq05}
 \end{equation}
-where $B=AS$ is a dense rectangular matrix of size $n\times s$ and $s\ll n$. This leads
-us to solve the system of normal equations 
+where $R=AS$  is a  dense rectangular matrix  of size $n\times  s$ and
+$s\ll n$. This leads us to solve the system of normal equations
 \begin{equation}
-B^TB\alpha=B^Tb,
+R^TR\alpha=R^Tb,
 \label{sec03:eq06}
 \end{equation}
 which is associated with the least squares problem
 \begin{equation}
-\text{minimize}~\|b-B\alpha\|_2,
+\text{minimize}~\|b-R\alpha\|_2,
 \label{sec03:eq07}
 \end{equation}  
-where $B^T$ denotes the transpose of the matrix $B$. Since $B$ (i.e. $AS$) and
-$b$ are split among $L$ clusters, the symmetric positive definite system~(\ref{sec03:eq06})
-is solved in parallel. Thus, an iterative method would be more appropriate than
-a direct one for solving this system. We use the parallel conjugate gradient 
-method for the normal equations CGNR~\cite{S96,refCGNR}.
-
-%%% Ecrire l'algorithme(s)
-%%% Parler des synchronisations entre proc et clusters 
+where  $R^T$  denotes the  transpose  of  the  matrix $R$.  Since  $R$
+(i.e.  $AS$) and  $b$  are  split among  $L$  clusters, the  symmetric
+positive    definite    system~(\ref{sec03:eq06})    is   solved    in
+parallel. Thus, an  iterative method would be more  appropriate than a
+direct  one for  solving this  system. We  use the  parallel conjugate
+gradient method for the normal equations CGNR~\cite{S96,refCGNR}.
+
+\begin{algorithm}[!t]
+\caption{A two-stage linear solver with inner iteration GMRES method}
+\begin{algorithmic}[1]
+\State Load $A_l$, $B_l$, initial guess $x^0$
+\State Initialize the minimizer $\tilde{x}^0=x^0$
+\For {$k=1,2,3,\ldots$ until the global convergence}
+\State Restart with $x^0=\tilde{x}^{k-1}$: \textbf{for} $j=1,2,\ldots,s$ \textbf{do}
+\State\hspace{0.5cm} Inner iteration solver: \Call{InnerSolver}{$x^0$, $j$}
+\State\hspace{0.5cm} Construct the basis $S$: add the column vector $X_l^j$ to the matrix $S_l$
+\State\hspace{0.5cm} Exchange the local solution vector $X_l^j$ with the neighboring clusters
+\State\hspace{0.5cm} Compute the dense matrix $R$: $R_l^j=\sum^L_{i=1}A_{li}X_i^j$ 
+\State\textbf{end for} 
+\State Minimization $\|b-R\alpha\|_2$: \Call{UpdateMinimizer}{$S_l$, $R$, $b$}
+\State Local solution of the linear system $Ax=b$: $X_l^k=\tilde{X}_l^k$
+\State Exchange the local minimizer $\tilde{X}_l^k$ with the neighboring clusters
+\EndFor
+
+\Statex
+
+\Function {InnerSolver}{$x^0$, $j$}
+\State Compute the local right-hand side: $Y_l = B_l - \sum^L_{i=1,i\neq l}A_{li}X_i^0$
+\State Solving the local splitting $A_{ll}X_l^j=Y_l$ with the parallel GMRES method, such that $X_l^0$ is the initial guess.
+\State \Return $X_l^j$
+\EndFunction
+
+\Statex
+
+\Function {UpdateMinimizer}{$S_l$, $R$, $b$, $k$}
+\State Solving the normal equations $R^TR\alpha=R^Tb$ in parallel by $L$ clusters using the parallel CGNR method
+\State Compute the local minimizer: $\tilde{X}_l^k=S_l\alpha$
+\State \Return $\tilde{X}_l^k$
+\EndFunction
+\end{algorithmic}
+\label{algo:01}
+\end{algorithm}
+
+The main  key points  of the multisplitting  method for  solving large
+sparse  linear  systems are  given  in Algorithm~\ref{algo:01}.   This
+algorithm is based on a two-stage method with a minimization using the
+GMRES iterative method as an  inner solver. It is executed in parallel
+by  each cluster  of processors.   The matrices  and vectors  with the
+subscript  $l$ represent  the local  data for  the cluster  $l$, where
+$l\in\{1,\ldots,L\}$. The two-stage solver uses two different parallel
+iterative algorithms: the GMRES method for solving each splitting on a
+cluster of processors, and the CGNR method executed in parallel by all
+clusters for  minimizing the function  error over the  Krylov subspace
+spanned by  $S$.  The  algorithm requires two  global synchronizations
+between the $L$  clusters. The first one is  performed at line~$12$ in
+Algorithm~\ref{algo:01}  to exchange  the local  values of  the vector
+solution $x$ (i.e. the  minimizer $\tilde{x}$) required to restart the
+multisplitting  solver. The  second  one is  needed  to construct  the
+matrix $R$ of  the Krylov subspace.  We choose  to perform this latter
+synchronization $s$  times in every  outer iteration $k$  (line~$7$ in
+Algorithm~\ref{algo:01}). This is a straightforward way to compute the
+matrix-matrix    multiplication     $R=AS$.     We    implement    all
+synchronizations   by   using   the   MPI   collective   communication
+subroutines.