From 38a7609896d59a80d0149b4698596417805bbe3f Mon Sep 17 00:00:00 2001 From: couturie Date: Sat, 19 Sep 2015 16:34:55 +0200 Subject: [PATCH] new --- IJHPCN/paper.tex | 64 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/IJHPCN/paper.tex b/IJHPCN/paper.tex index 999ce37..063abb3 100644 --- a/IJHPCN/paper.tex +++ b/IJHPCN/paper.tex @@ -786,6 +786,30 @@ taken into account with TSIRM. %%NEW +{\bf example ex45/ksp à décrire et commenter en montrant que hypre est pourri avec cet exemple} + +\begin{table*}[htbp] +\begin{center} +\begin{tabular}{|r|r|r|r|r|r|r|r|} +\hline + + nb. cores & \multicolumn{2}{c|}{FGMRES/ASM} & \multicolumn{2}{c|}{TSIRM CGLS/ASM} & gain& \multicolumn{2}{c|}{FGMRES/HYPRE} \\ +\cline{2-5} \cline{7-8} + & Time & \# Iter. & Time & \# Iter. & & Time & \# Iter. \\\hline \hline + 512 & 5.54 & 685 & 2.5 & 570 & 2.21 & 128.9 & 9 \\ + 2048 & 14.95 & 1,560 & 4.32 & 746 & 3.48 & 335.7 & 9 \\ + 4096 & 25.13 & 2,369 & 5.61 & 859 & 4.48 & >1000 & -- \\ + 8192 & 44.35 & 3,197 & 7.6 & 1083 & 5.84 & >1000 & -- \\ + +\hline + +\end{tabular} +\caption{Comparison of FGMRES and TSIRM for ex45 of PETSc/KSP with two preconditioner (ASM and HYPRE) having 5,000 components per core on Curie ($\epsilon_{tsirm}=1e-10$, $max\_iter_{kryl}=30$, $s=12$, $max\_iter_{ls}=15$,$\epsilon_{ls}=1e-40$), time is expressed in seconds.} +\label{tab:06} +\end{center} +\end{table*} + + \subsection{Parallel nonlinear problems} With PETSc, linear solvers are used inside nonlinear solvers. The SNES @@ -799,10 +823,17 @@ classical solvers. Consequently, we have chosen two of these examples: ex14 and ex20. In ex14, the code solves the Bratu (SFI - solid fuel ignition) nonlinear partial difference equations in 3 dimension. In ex20, the code solves a 3 dimension radiative transport test problem. For more details on these examples, -interested readers are invited to see the code in the PETSc examples. - -In Table~\ref{tab:07} we report the result of our experiments for the example -ex14. +interested readers are invited to see the code in the PETSc examples. For both +these examples, a weak scaling case is chosen where processors have +approximately a number of components equals to 100,000. + +In Table~\ref{tab:07} we report the result of our experiments for the example +ex14 with the block Jacobi preconditioner. For TSIRM the CGLS algorithm is used +to solve the minimization step. In this table, we can see that the number of +iterations used by the linear solver is smaller with TSIRM compared with FGMRES. +Consequently the execution times are smaller with TSIRM. The gain between TSIRM +and FGMRES is around 6 and 7. The parameters of TSIRM are expressed in the +caption of the table. \begin{table*}[htbp] \begin{center} @@ -812,10 +843,10 @@ ex14. nb. cores & \multicolumn{2}{c|}{FGMRES/BJAC} & \multicolumn{2}{c|}{TSIRM CGLS/BJAC} & gain \\ \cline{2-5} & Time & \# Iter. & Time & \# Iter. & \\\hline \hline - 1024 & 159.52 & 11,584 & 26.34 & 1,563 & 6.06 \\ - 2048 & 226.24 & 16,459 & 37.23 & 2,248 & 6.08\\ - 4096 & 391.21 & 27,794 & 50.93 & 2,911 & 7.69\\ - 8192 & 543.23 & 37,770 & 79.21 & 4,324 & 6.86 \\ + 1,024 & 159.52 & 11,584 & 26.34 & 1,563 & 6.06 \\ + 2,048 & 226.24 & 16,459 & 37.23 & 2,248 & 6.08\\ + 4,096 & 391.21 & 27,794 & 50.93 & 2,911 & 7.69\\ + 8,192 & 543.23 & 37,770 & 79.21 & 4,324 & 6.86 \\ \hline @@ -825,6 +856,15 @@ ex14. \end{center} \end{table*} +In Table~\cite{tab:08}, the results of the experiments with the example ex20 are +reported. The block Jacobi preconditioner has also been used and CGLS to solve +the minimization step for TSIRM. For this example, we can observ that the number +of iterations for FMGRES increase drastically when the number of cores +increases. With TSIRM, we can see that the number of iterations is initially +very small compared to the FGMRES ones and when the number of cores increase, +the number of iterations increases slighther with TSIRM than with FGMRES. For +this example, the gain between TSIRM and FGMRES ranges between 8 with 1,024 +cores to more than 16 with 8,192 cores. \begin{table*}[htbp] \begin{center} @@ -834,10 +874,10 @@ ex14. nb. cores & \multicolumn{2}{c|}{FGMRES/BJAC} & \multicolumn{2}{c|}{TSIRM CGLS/BJAC} & gain \\ \cline{2-5} & Time & \# Iter. & Time & \# Iter. & \\\hline \hline - 1024 & 667.92 & 48,732 & 81.65 & 5,087 & 8.18 \\ - 2048 & 966.87 & 77,177 & 90.34 & 5,716 & 10.70\\ - 4096 & 1,742.31 & 124,411 & 119.21 & 6,905 & 14.61\\ - 8192 & 2,739.21 & 187,626 & 168.9 & 9,000 & 16.22\\ + 1,024 & 667.92 & 48,732 & 81.65 & 5,087 & 8.18 \\ + 2,048 & 966.87 & 77,177 & 90.34 & 5,716 & 10.70\\ + 4,096 & 1,742.31 & 124,411 & 119.21 & 6,905 & 14.61\\ + 8,192 & 2,739.21 & 187,626 & 168.9 & 9,000 & 16.22\\ \hline -- 2.39.5