aMerge branch 'master' of ssh://bilbo.iut-bm.univ-fcomte.fr/GMRES2stage

author lilia <lilia@agora>

Fri, 10 Oct 2014 12:45:41 +0000 (14:45 +0200)

committer lilia <lilia@agora>

Fri, 10 Oct 2014 12:45:41 +0000 (14:45 +0200)
author lilia <lilia@agora>
Fri, 10 Oct 2014 12:45:41 +0000 (14:45 +0200)
committer lilia <lilia@agora>
Fri, 10 Oct 2014 12:45:41 +0000 (14:45 +0200)
diff --combined paper.tex

index c0e8b160c9e49691cd802e77fbeb37f4f5bdf246,64a88a8a1d339bc6ab79a382281a7c8d415edb3b..063cbf9acbeb195e2500a1bbacaa905fa9327143
--- 1/paper.tex
--- 2/paper.tex
+++ b/paper.tex
@@@ -669,8 -669,8 +669,8 @@@ called for a  maximum of $max\_iter_{kr
   equals to  the restart  number of the  GMRES-like method. Moreover,  a tolerance
   threshold must be specified for the  solver. In practice, this threshold must be
   much  smaller  than the  convergence  threshold  of  the TSIRM  algorithm  (\emph{i.e.}
- $\epsilon_{tsirm}$).  Line~\ref{algo:store}, $S_{k~ mod~ s}=x^k$ consists in copying the
- solution  $x_k$  into the  column  $k~ mod~ s$ of  the  matrix  $S$. After  the
+ $\epsilon_{tsirm}$).  Line~\ref{algo:store}, $S_{k \mod s}=x^k$ consists in copying the
+ solution  $x_k$  into the  column  $k \mod s$ of  the  matrix  $S$, where $S$ is a matrix of size $n\times s$ whose column vector $i$ is denoted by $S_i$. After  the
   minimization, the matrix $S$ is reused with the new values of the residuals.  To
   solve the minimization problem, an  iterative method is used. Two parameters are
   required for that: the maximum number of iterations and the threshold to stop the
@@@ -686,13 -686,13 +686,13 @@@ Let us summarize the most important par
   \end{itemize}
   
   
- The  parallelisation  of  TSIRM  relies   on  the  parallelization  of  all  its
+ The  parallelization  of  TSIRM  relies   on  the  parallelization  of  all  its
   parts. More  precisely, except  the least-squares step,  all the other  parts are
   obvious to  achieve out in parallel. In  order to develop a  parallel version of
   our   code,   we   have   chosen  to   use   PETSc~\cite{petsc-web-page}.    For
   line~\ref{algo:matrix_mul} the  matrix-matrix multiplication is  implemented and
   efficient since the  matrix $A$ is sparse and since the  matrix $S$ contains few
- colums in  practice. As explained  previously, at least  two methods seem  to be
+ columns in  practice. As explained  previously, at least  two methods seem  to be
   interesting to solve the least-squares minimization, CGLS and LSQR.
   
   In the following  we remind the CGLS algorithm. The LSQR  method follows more or
@@@ -837,7 -837,7 +837,7 @@@ torso3             & fgmres / sor  & 37
   \hline
   
   \end{tabular}
- -\caption{Comparison of (F)GMRES and 2 stage (F)GMRES algorithms in sequential with some matrices, time is expressed in seconds.}
+ +\caption{Comparison of (F)GMRES and TSIRM with (F)GMRES in sequential with some matrices, time is expressed in seconds.}
   \label{tab:02}
   \end{center}
   \end{table}
@@@ -896,7 -896,7 +896,7 @@@ Table~\ref{tab:03} shows  the executio
   example ex15  of PETSc on the  Juqueen architecture. Different  numbers of cores
   are  studied ranging  from  2,048  up-to 16,383.   Two  preconditioners have  been
   tested: {\it mg} and {\it sor}.   For those experiments,  the number  of components  (or unknowns  of the
- -problems)  per processor  is fixed  to 25,000,  also called  weak  scaling. This
+ +problems)  per core  is fixed  to 25,000,  also called  weak  scaling. This
   number can seem relatively small. In fact, for some applications that need a lot
   of  memory, the  number of  components per  processor requires  sometimes  to be
   small.
@@@ -943,7 -943,7 +943,7 @@@ the number of iterations. So, the overa
   \begin{tabular}{|r|r|r|r|r|r|r|r|r|} 
   \hline
   
- -  nb. cores & threshold   & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSIRM CGLS} &  \multicolumn{2}{c|}{TSIRM LSQR} & best gain \\ 
+ +  nb. cores & threshold   & \multicolumn{2}{c|}{FGMRES} & \multicolumn{2}{c|}{TSIRM CGLS} &  \multicolumn{2}{c|}{TSIRM LSQR} & best gain \\ 
   \cline{3-8}
                &                       & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. & \\\hline \hline
     2,048      & 8e-5                  & 108.88 & 16,560  & 23.06  &  3,630  & 22.79  & 3,630   & 4.77 \\
@@@ -956,7 -956,7 +956,7 @@@
   \hline
   
   \end{tabular}
- -\caption{Comparison of FGMRES  and 2 stage FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 25000 components per core on Curie (restart=30, s=12),  time is expressed in seconds.}
+ +\caption{Comparison of FGMRES  and TSIRM with FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 25,000 components per core on Curie (restart=30, s=12),  time is expressed in seconds.}
   \label{tab:04}
   \end{center}
   \end{table*}
@@@ -970,9 -970,9 +970,9 @@@ In Table~\ref{tab:04}, some experiment
   \begin{tabular}{|r|r|r|r|r|r|r|r|r|r|r|} 
   \hline
   
- -  nb. cores   & \multicolumn{2}{c|}{GMRES} & \multicolumn{2}{c|}{TSIRM CGLS} &  \multicolumn{2}{c|}{TSIRM LSQR} & best gain & \multicolumn{3}{c|}{efficiency} \\ 
+ +  nb. cores   & \multicolumn{2}{c|}{FGMRES} & \multicolumn{2}{c|}{TSIRM CGLS} &  \multicolumn{2}{c|}{TSIRM LSQR} & best gain & \multicolumn{3}{c|}{efficiency} \\ 
   \cline{2-7} \cline{9-11}
- -                    & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. &   & GMRES & TS CGLS & TS LSQR\\\hline \hline
+ +                    & Time  & \# Iter.  & Time  & \# Iter. & Time  & \# Iter. &   & FGMRES & TS CGLS & TS LSQR\\\hline \hline
      512              & 3,969.69 & 33,120 & 709.57 & 5,790  & 622.76 & 5,070  & 6.37  &   1    &    1    &     1     \\
      1024             & 1,530.06  & 25,860 & 290.95 & 4,830  & 307.71 & 5,070 & 5.25  &  1.30  &    1.21  &   1.01     \\
      2048             & 919.62    & 31,470 & 237.52 & 8,040  & 194.22 & 6,510 & 4.73  & 1.08   &    .75   &   .80\\
@@@ -982,7 -982,7 +982,7 @@@
   \hline
   
   \end{tabular}
- -\caption{Comparison of FGMRES  and 2 stage FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores (restart=30, s=12, threshol 5e-5),  time is expressed in seconds.}
+ +\caption{Comparison of FGMRES  and TSIRM with FGMRES for ex54 of Petsc (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores (restart=30, s=12, threshold 5e-5),  time is expressed in seconds.}
   \label{tab:05}
   \end{center}
   \end{table*}
@@@ -1010,7 -1010,7 +1010,7 @@@
   
   future plan : \\
   - study other kinds of matrices, problems, inner solvers\\
- -- test the influence of all the parameters\\
+ +- test the influence of all parameters\\
   - adaptative number of outer iterations to minimize\\
   - other methods to minimize the residuals?\\
   - implement our solver inside PETSc
@@@ -1025,7 -1025,7 +1025,7 @@@
   %%%*********************************************************
   \section*{Acknowledgment}
   This  paper  is   partially  funded  by  the  Labex   ACTION  program  (contract
- -ANR-11-LABX-01-01).   We acknowledge PRACE  for awarding  us access  to resource
+ +ANR-11-LABX-01-01).   We acknowledge PRACE  for awarding  us access  to resources
   Curie and Juqueen respectively based in France and Germany.
author	lilia <lilia@agora>
	Fri, 10 Oct 2014 12:45:41 +0000 (14:45 +0200)
committer	lilia <lilia@agora>
	Fri, 10 Oct 2014 12:45:41 +0000 (14:45 +0200)