X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/GMRES2stage.git/blobdiff_plain/df562f71fc6dcfdbb4e0b77f138977ca7219df6f..49d3ad986610c9438c522b61b9c1f57ebe4b824e:/IJHPCN/paper.tex diff --git a/IJHPCN/paper.tex b/IJHPCN/paper.tex index 0c88f29..abe4b2d 100644 --- a/IJHPCN/paper.tex +++ b/IJHPCN/paper.tex @@ -511,7 +511,7 @@ Table~\ref{tab:01}. These latter, which are real-world applications matrices, have been extracted from the Davis collection, University of Florida~\cite{Dav97}. -\begin{table}[htbp] +\begin{table*}[htbp] \begin{center} \begin{tabular}{|c|c|r|r|r|} \hline @@ -528,7 +528,7 @@ torso3 & 2D/3D problem & 259,156 & 4,429,042 \\ \caption{Main characteristics of the sparse matrices chosen from the Davis collection} \label{tab:01} \end{center} -\end{table} +\end{table*} Chosen parameters are detailed below. We have stopped the GMRES every 30 iterations (\emph{i.e.}, $max\_iter_{kryl}=30$), which is the default @@ -550,7 +550,7 @@ fact this also depends on two parameters: the number of iterations before stoppi and the number of iterations to perform the minimization. -\begin{table}[htbp] +\begin{table*}[htbp] \begin{center} \begin{tabular}{|c|c|r|r|r|r|} \hline @@ -571,7 +571,7 @@ torso3 & fgmres / sor & 37.70 & 565 & 34.97 & 510 \\ \caption{Comparison between sequential standalone (F)GMRES and TSIRM with (F)GMRES (time in seconds).} \label{tab:02} \end{center} -\end{table} +\end{table*} @@ -638,7 +638,7 @@ preconditioners in PETSc, readers are referred to~\cite{petsc-web-page}. \hline \end{tabular} -\caption{Comparison of FGMRES and TSIRM with FGMRES for example ex15 of PETSc with two preconditioners (mg and sor) having 25,000 components per core on Juqueen ($\epsilon_{tsirm}=1e-3$, $max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} +\caption{Comparison of FGMRES and TSIRM with FGMRES for example ex15 of PETSc/KSP with two preconditioners (mg and sor) having 25,000 components per core on Juqueen ($\epsilon_{tsirm}=1e-3$, $max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} \label{tab:03} \end{center} \end{table*} @@ -710,7 +710,7 @@ interesting. \hline \end{tabular} -\caption{Comparison of FGMRES and TSIRM with FGMRES algorithms for ex54 of Petsc (both with the MG preconditioner) with 25,000 components per core on Curie ($max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} +\caption{Comparison of FGMRES and TSIRM with FGMRES algorithms for ex54 of PETSc/KSP (both with the MG preconditioner) with 25,000 components per core on Curie ($max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} \label{tab:04} \end{center} \end{table*} @@ -769,7 +769,7 @@ taken into account with TSIRM. \hline \end{tabular} -\caption{Comparison of FGMRES and TSIRM for ex54 of PETSc (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores ($\epsilon_{tsirm}=5e-5$, $max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} +\caption{Comparison of FGMRES and TSIRM for ex54 of PETSc/KSP (both with the MG preconditioner) with 204,919,225 components on Curie with different number of cores ($\epsilon_{tsirm}=5e-5$, $max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} \label{tab:05} \end{center} \end{table*} @@ -784,7 +784,7 @@ taken into account with TSIRM. Concerning the experiments some other remarks are interesting. \begin{itemize} -\item We have tested other examples of PETSc (ex29, ex45, ex49). For all these +\item We have tested other examples of PETSc/KSP (ex29, ex45, ex49). For all these examples, we have also obtained similar gains between GMRES and TSIRM but those examples are not scalable with many cores. In general, we had some problems with more than $4,096$ cores. @@ -805,6 +805,82 @@ Concerning the experiments some other remarks are interesting. %%%********************************************************* +%%NEW +\begin{table*}[htbp] +\begin{center} +\begin{tabular}{|r|r|r|r|r|r|r|r|} +\hline + + nb. cores & \multicolumn{2}{c|}{FGMRES/ASM} & \multicolumn{2}{c|}{TSIRM CGLS/ASM} & gain& \multicolumn{2}{c|}{FGMRES/HYPRE} \\ +\cline{2-5} \cline{7-8} + & Time & \# Iter. & Time & \# Iter. & & Time & \# Iter. \\\hline \hline + 512 & 5.54 & 685 & 2.5 & 570 & 2.21 & 128.9 & 9 \\ + 2048 & 14.95 & 1,560 & 4.32 & 746 & 3.48 & 335.7 & 9 \\ + 4096 & 25.13 & 2,369 & 5.61 & 859 & 4.48 & >1000 & -- \\ + 8192 & 44.35 & 3,197 & 7.6 & 1083 & 5.84 & >1000 & -- \\ + +\hline + +\end{tabular} +\caption{Comparison of FGMRES and TSIRM for ex45 of PETSc/KSP with two preconditioner (ASM and HYPRE) having 25,000 components per core on Curie ($\epsilon_{tsirm}=1e-10$, $max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} +\label{tab:06} +\end{center} +\end{table*} + + +\begin{figure}[htbp] +\centering + \includegraphics[width=0.5\textwidth]{nb_iter_sec_ex45_curie} +\caption{Number of iterations per second with ex45 and the same parameters as in Table~\ref{tab:06} (weak scaling)} +\label{fig:03} +\end{figure} + + + +\begin{table*}[htbp] +\begin{center} +\begin{tabular}{|r|r|r|r|r|r|} +\hline + + nb. cores & \multicolumn{2}{c|}{FGMRES/BJAC} & \multicolumn{2}{c|}{TSIRM CGLS/BJAC} & gain \\ +\cline{2-5} + & Time & \# Iter. & Time & \# Iter. & \\\hline \hline + 1024 & 667.92 & 48,732 & 81.65 & 5,087 & 8.18 \\ + 2048 & 966.87 & 77,177 & 90.34 & 5,716 & 10.70\\ + 4096 & 1,742.31 & 124,411 & 119.21 & 6,905 & 14.61\\ + 8192 & 2,739.21 & 187,626 & 168.9 & 9,000 & 16.22\\ + +\hline + +\end{tabular} +\caption{Comparison of FGMRES and TSIRM for ex20 of PETSc/SNES with a Block Jacobi preconditioner having 100,000 components per core on Curie ($\epsilon_{tsirm}=1e-10$, $max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} +\label{tab:07} +\end{center} +\end{table*} + +\begin{table*}[htbp] +\begin{center} +\begin{tabular}{|r|r|r|r|r|r|} +\hline + + nb. cores & \multicolumn{2}{c|}{FGMRES/BJAC} & \multicolumn{2}{c|}{TSIRM CGLS/BJAC} & gain \\ +\cline{2-5} + & Time & \# Iter. & Time & \# Iter. & \\\hline \hline + 1024 & 159.52 & 11,584 & 26.34 & 1,563 & 6.06 \\ + 2048 & 226.24 & 16,459 & 37.23 & 2,248 & 6.08\\ + 4096 & 391.21 & 27,794 & 50.93 & 2,911 & 7.69\\ + 8192 & 543.23 & 37,770 & 79.21 & 4,324 & 6.86 \\ + +\hline + +\end{tabular} +\caption{Comparison of FGMRES and TSIRM for ex14 of PETSc/SNES with a Block Jacobi preconditioner having 100,000 components per core on Curie ($\epsilon_{tsirm}=1e-10$, $max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$, $\epsilon_{ls}=1e-40$), time is expressed in seconds.} +\label{tab:08} +\end{center} +\end{table*} + + +%%ENDNEW %%%********************************************************* %%%*********************************************************