-%% \begin{enumerate}
-%% \begin{algorithm}[htpb]
-%% \label{alg2-cuda-mpi}
-%% %\LinesNumbered
-%% \caption{CUDA-MPI Algorithm to find roots with the Ehrlich-Aberth method}
-
-%% \KwIn{$Z^{0}$ (Initial root's vector), $\varepsilon$ (Error tolerance
-%% threshold), P (Polynomial to solve), Pu (Derivative of P), $n$ (Polynomial degrees), $\Delta z$ ( error of stop condition), $num_gpus$ (number of MPI processes/ number of GPUs), Size (number of roots)}
-
-%% \KwOut {$Z$ (Solution root's vector), $ZPrec$ (Previous solution root's vector)}
-
-%% \BlankLine
-%% \item Initialization of P\;
-%% \item Initialization of Pu\;
-%% \item Initialization of the solution vector $Z^{0}$\;
-%% \item Allocate and copy initial data from CPU memories to GPU global memories\;
-%% \item $index= Size/num_gpus$\;
-%% \item k=0\;
-%% \While {$error > \epsilon$}{
-%% \item Let $\Delta z=0$\;
-%% \item $kernel\_save(ZPrec,Z)$\;
-%% \item k=k+1\;
-%% \item $kernel\_update(Z,P,Pu,index)$\;
-%% \item $kernel\_testConverge(\Delta z,Z,ZPrec)$\;
-%% \item ComputeMaxError($\Delta z$,error)\;
-%% \item Copy results from GPU memories to CPU memories\;
-%% \item Send $Z[id]$ to all processes\;
-%% \item Receive $Z[j]$ from every other process j\;
-%% }
-%% \end{algorithm}
-%% \end{enumerate}
-%% ~\\
-
-%% \RC{ENCORE ENCORE PIRE}
+\begin{algorithm}[htpb]
+\label{alg2-cuda-mpi}
+%\LinesNumbered
+\caption{CUDA-MPI Algorithm to find roots with the Ehrlich-Aberth method}
+
+\KwIn{$Z^{0}$ (Initial root's vector), $\varepsilon$ (Error tolerance
+ threshold), P (Polynomial to solve), Pu (Derivative of P), $n$ (Polynomial degrees), $\Delta z$ ( error of stop condition), $num_gpus$ (number of MPI processes/ number of GPUs), Size (number of roots)}
+
+\KwOut {$Z$ (Solution root's vector), $ZPrec$ (Previous solution root's vector)}
+
+\BlankLine
+Initialization of P\;
+Initialization of Pu\;
+Initialization of the solution vector $Z^{0}$\;
+Distribution of Z\;
+Allocate memory to GPU\;
+\While {$error > \epsilon$}{
+copy Z from CPU to GPU\;
+$ZPrec_{loc}=kernel\_save(Z_{loc})$\;
+$Z_{loc}=kernel\_update(Z,P,Pu)$\;
+$\Delta z=kernel\_testConv(Z_{loc},ZPrec_{loc})$\;
+$error=MPI\_Reduce(\Delta z)$\;
+Copy $Z_{loc}$ from GPU to CPU\;
+$Z=MPI\_AlltoAll(Z_{loc})$\;
+}
+\end{algorithm}
+