X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/hpcc2014.git/blobdiff_plain/61b2ca8a9fc3fc1c52c4b66234796415d8b47e3f..9a16c3f8b303f6260ecf3bf14459ee0bd43e6ef1:/hpcc.tex

diff --git a/hpcc.tex b/hpcc.tex
index 6cc2be1..e780bbd 100644
--- a/hpcc.tex
+++ b/hpcc.tex
@@ -296,51 +296,75 @@ Let $Ax=b$ be a large sparse system of $n$ linear equations in $\mathbb{R}$, whe
       B_L
     \end{array} \right)
 \end{equation*}
-in such a way that successive rows of matrix $A$ and both vectors $x$ and $b$ are assigned to one cluster, where for all $l,m\in\{1,\ldots,L\}$ $A_{lm}$ is a rectangular block of $A$ of size $n_l\times n_m$, $X_l$ and $B_l$ are sub-vectors of $x$ and $b$, respectively, of size $n_l$ each and $\sum_{l} n_l=\sum_{m} n_m=n$.
+in such a way that successive rows of matrix $A$ and both vectors $x$ and $b$
+are assigned to one cluster, where for all $\ell,m\in\{1,\ldots,L\}$ $A_{\ell
+  m}$ is a rectangular block of $A$ of size $n_\ell\times n_m$, $X_\ell$ and
+$B_\ell$ are sub-vectors of $x$ and $b$, respectively, of size $n_\ell$ each and
+$\sum_{\ell} n_\ell=\sum_{m} n_m=n$.
 
 The multisplitting method proceeds by iteration to solve in parallel the linear system on $L$ clusters of processors, in such a way each sub-system
 \begin{equation}
   \label{eq:4.1}
   \left\{
     \begin{array}{l}
-      A_{ll}X_l = Y_l \text{, such that}\\
-      Y_l = B_l - \displaystyle\sum_{\substack{m=1\\ m\neq l}}^{L}A_{lm}X_m
+      A_{\ell\ell}X_\ell = Y_\ell \text{, such that}\\
+      Y_\ell = B_\ell - \displaystyle\sum_{\substack{m=1\\ m\neq \ell}}^{L}A_{\ell m}X_m
     \end{array}
   \right.
 \end{equation}
-is solved independently by a cluster and communications are required to update the right-hand side sub-vector $Y_l$, such that the sub-vectors $X_m$ represent the data dependencies between the clusters. As each sub-system (\ref{eq:4.1}) is solved in parallel by a cluster of processors, our multisplitting method uses an iterative method as an inner solver which is easier to parallelize and more scalable than a direct method. In this work, we use the parallel algorithm of GMRES method~\cite{ref1} which is one of the most used iterative method by many researchers. 
+is solved independently by a cluster and communications are required to update
+the right-hand side sub-vector $Y_\ell$, such that the sub-vectors $X_m$
+represent the data dependencies between the clusters. As each sub-system
+(\ref{eq:4.1}) is solved in parallel by a cluster of processors, our
+multisplitting method uses an iterative method as an inner solver which is
+easier to parallelize and more scalable than a direct method. In this work, we
+use the parallel algorithm of GMRES method~\cite{ref1} which is one of the most
+used iterative method by many researchers.
 
 \begin{figure}[!t]
   %%% IEEE instructions forbid to use an algorithm environment here, use figure
   %%% instead
 \begin{algorithmic}[1]
-\Input $A_l$ (sparse sub-matrix), $B_l$ (right-hand side sub-vector)
-\Output $X_l$ (solution sub-vector)\vspace{0.2cm}
-\State Load $A_l$, $B_l$
+\Input $A_\ell$ (sparse sub-matrix), $B_\ell$ (right-hand side sub-vector)
+\Output $X_\ell$ (solution sub-vector)\medskip
+
+\State Load $A_\ell$, $B_\ell$
 \State Set the initial guess $x^0$
 \For {$k=0,1,2,\ldots$ until the global convergence}
 \State Restart outer iteration with $x^0=x^k$
 \State Inner iteration: \Call{InnerSolver}{$x^0$, $k+1$}
-\State\label{algo:01:send} Send shared elements of $X_l^{k+1}$ to neighboring clusters
-\State\label{algo:01:recv} Receive shared elements in $\{X_m^{k+1}\}_{m\neq l}$
+\State\label{algo:01:send} Send shared elements of $X_\ell^{k+1}$ to neighboring clusters
+\State\label{algo:01:recv} Receive shared elements in $\{X_m^{k+1}\}_{m\neq \ell}$
 \EndFor
 
 \Statex
 
 \Function {InnerSolver}{$x^0$, $k$}
-\State Compute local right-hand side $Y_l$:
+\State Compute local right-hand side $Y_\ell$:
        \begin{equation*}
-         Y_l = B_l - \sum\nolimits^L_{\substack{m=1\\ m\neq l}}A_{lm}X_m^0
+         Y_\ell = B_\ell - \sum\nolimits^L_{\substack{m=1\\ m\neq \ell}}A_{\ell m}X_m^0
        \end{equation*}
-\State Solving sub-system $A_{ll}X_l^k=Y_l$ with the parallel GMRES method
-\State \Return $X_l^k$
+\State Solving sub-system $A_{\ell\ell}X_\ell^k=Y_\ell$ with the parallel GMRES method
+\State \Return $X_\ell^k$
 \EndFunction
 \end{algorithmic}
 \caption{A multisplitting solver with GMRES method}
 \label{algo:01}
 \end{figure}
 
-Algorithm on Figure~\ref{algo:01} shows the main key points of the multisplitting method to solve a large sparse linear system. This algorithm is based on an outer-inner iteration method where the parallel synchronous GMRES method is used to solve the inner iteration. It is executed in parallel by each cluster of processors. For all $l,m\in\{1,\ldots,L\}$, the matrices and vectors with the subscript $l$ represent the local data for cluster $l$, while $\{A_{lm}\}_{m\neq l}$ are off-diagonal matrices of sparse matrix $A$ and $\{X_m\}_{m\neq l}$ contain vector elements of solution $x$ shared with neighboring clusters. At every outer iteration $k$, asynchronous communications are performed between processors of the local cluster and those of distant clusters (lines~\ref{algo:01:send} and~\ref{algo:01:recv} in Figure~\ref{algo:01}). The shared vector elements of the solution $x$ are exchanged by message passing using MPI non-blocking communication routines.
+Algorithm on Figure~\ref{algo:01} shows the main key points of the
+multisplitting method to solve a large sparse linear system. This algorithm is
+based on an outer-inner iteration method where the parallel synchronous GMRES
+method is used to solve the inner iteration. It is executed in parallel by each
+cluster of processors. For all $\ell,m\in\{1,\ldots,L\}$, the matrices and
+vectors with the subscript $\ell$ represent the local data for cluster $\ell$,
+while $\{A_{\ell m}\}_{m\neq \ell}$ are off-diagonal matrices of sparse matrix
+$A$ and $\{X_m\}_{m\neq \ell}$ contain vector elements of solution $x$ shared
+with neighboring clusters. At every outer iteration $k$, asynchronous
+communications are performed between processors of the local cluster and those
+of distant clusters (lines~\ref{algo:01:send} and~\ref{algo:01:recv} in
+Figure~\ref{algo:01}). The shared vector elements of the solution $x$ are
+exchanged by message passing using MPI non-blocking communication routines.
 
 \begin{figure}[!t]
 \centering
@@ -362,14 +386,14 @@ sets the token to \textit{True} if the local convergence is achieved or to
 global convergence is detected when the master of cluster 1 receives from the
 master of cluster $L$ a token set to \textit{True}. In this case, the master of
 cluster 1 broadcasts a stop message to masters of other clusters. In this work,
-the local convergence on each cluster $l$ is detected when the following
+the local convergence on each cluster $\ell$ is detected when the following
 condition is satisfied
 \begin{equation*}
-  (k\leq \MI) \text{ or } (\|X_l^k - X_l^{k+1}\|_{\infty}\leq\epsilon)
+  (k\leq \MI) \text{ or } (\|X_\ell^k - X_\ell^{k+1}\|_{\infty}\leq\epsilon)
 \end{equation*}
 where $\MI$ is the maximum number of outer iterations and $\epsilon$ is the
 tolerance threshold of the error computed between two successive local solution
-$X_l^k$ and $X_l^{k+1}$.
+$X_\ell^k$ and $X_\ell^{k+1}$.
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 We did not encounter major blocking problems when adapting the multisplitting algorithm previously described to a simulation environment like SimGrid unless some code 
@@ -636,6 +660,7 @@ obtained with a bandwidth of \np[Mbit/s]{1} as shown in
 Table~\ref{tab.cluster.3x67}.
 
 \RC{Est ce qu'on sait expliquer pourquoi il y a une telle diffÃ©rence entre les rÃ©sultats avec 2 et 3 clusters... Avec 3 clusters, ils sont pas trÃ¨s bons... Je me demande s'il ne faut pas les enlever...}
+\RC{En fait je pense avoir la rÃ©ponse Ã  ma remarque... On voit avec les 2 clusters que le gain est d'autant plus grand qu'on choisit une bonne prÃ©cision. Donc, plusieurs solutions, lancer rapidement un long test pour confirmer ca, ou enlever des tests... ou on ne change rien :-)}
 \LZK{Ma question est: le bw et lat sont ceux inter-clusters ou pour les deux inter et intra cluster??}
 
 \section{Conclusion}