\todo[color=orange!10,#1]{\sffamily\textbf{AS:} #2}\xspace}
-
-
\begin{document}
\title{Two parallel implementations of Ehrlich-Aberth algorithm for root-finding of polynomials on multiple GPUs with OpenMP and MPI}
$n_{loc}$ = $n/ngpu$ (local size)\;
%$idx$ = $id_{gpu}\times n_{loc}$ (local offset)\;
Copy $P$, $P'$ from CPU to GPU\;
-\While{\emph{not convergence}}{
+\While{$max > \epsilon$}{
Copy $Z$ from CPU to GPU\;
$Z^{prev}$ = KernelSave($Z,n$)\;
- $Z_{loc}$ = KernelUpdate($P,P',Z^{prev},n_{loc}$)\;
+ $Z_{loc}$ = KernelUpdate($P,P',Z,n_{loc}$)\;
$\Delta Z_{loc}$ = KernelComputeError($Z_{loc},Z^{prev}_{loc},n_{loc}$)\;
$\Delta Z_{max}[id_{gpu}]$ = CudaMaxFunction($\Delta Z_{loc},n_{loc}$)\;
Copy $Z_{loc}$ from GPU to $Z$ in CPU\;
- $max$ = MaxFunction($\Delta Z_{max},ngpu$)\;
- TestConvergence($max,\epsilon$)\;
+ $max$ = MaxFunction($\Delta Z_{max},ngpu$)\;
}
\label{alg2-cuda-openmp}
\LZK{J'ai modifié l'algo. Le $P$ est mis shared. Qu'en est-il pour