+%% \RC{Surement à virer ou réécrire pour etre compris sans algo}
+%% $num\_gpus$ OpenMP threads are created using
+%% \verb=omp_set_num_threads();=function (step $3$, Algorithm
+%% \ref{alg2-cuda-openmp}), the shared memory is created using
+%% \verb=#pragma omp parallel shared()= OpenMP function (line $5$,
+%% Algorithm\ref{alg2-cuda-openmp}), then each OpenMP thread allocates
+%% memory and copies initial data from CPU memory to GPU global memory,
+%% executes the kernels on GPU, but computes only his portion of roots
+%% indicated with variable \textit{index} initialized in (line 5,
+%% Algorithm \ref{alg2-cuda-openmp}), used as input data in the
+%% $kernel\_update$ (line 10, Algorithm \ref{alg2-cuda-openmp}). After
+%% each iteration, all OpenMP threads synchronize using
+%% \verb=#pragma omp barrier;= to gather all the correct values of
+%% $\Delta z$, thus allowing the computation the maximum stop condition
+%% on vector $\Delta z$ (line 12, Algorithm
+%% \ref{alg2-cuda-openmp}). Finally, threads copy the results from GPU
+%% memories to CPU memory. The OpenMP threads execute kernels until the
+%% roots sufficiently converge.
+
+
+%% \begin{algorithm}[h]
+%% \label{alg2-cuda-openmp}
+%% \LinesNumbered
+%% \SetAlgoNoLine
+%% \caption{CUDA-OpenMP Algorithm to find roots with the Ehrlich-Aberth method}
+
+%% \KwIn{$Z^{0}$ (Initial root's vector), $\varepsilon$ (Error tolerance
+%% threshold), P (Polynomial to solve), Pu (Derivative of P), $n$ (Polynomial degree), $\Delta z$ ( Vector of errors for stop condition), $num\_gpus$ (number of OpenMP threads/ Number of GPUs), $Size$ (number of roots)}
+
+%% \KwOut {$Z$ ( Root's vector), $ZPrec$ (Previous root's vector)}
+
+%% \BlankLine
+
+%% Initialization of P\;
+%% Initialization of Pu\;
+%% Initialization of the solution vector $Z^{0}$\;
+%% Start of a parallel part with OpenMP (Z, $\Delta z$, P are shared variables)\;
+%% gpu\_id=cudaGetDevice()\;
+%% Allocate memory on GPU\;
+%% Compute local size and offet according to gpu\_id\;
+%% \While {$error > \epsilon$}{
+%% copy Z from CPU to GPU\;
+%% $ ZPrec_{loc}=kernel\_save(Z_{loc})$\;
+%% $ Z_{loc}=kernel\_update(Z,P,Pu)$\;
+%% $\Delta z[gpu\_id] = kernel\_testConv(Z_{loc},ZPrec_{loc})$\;
+%% $ error= Max(\Delta z)$\;
+%% copy $Z_{loc}$ from GPU to Z in CPU
+%% }
+%%\end{algorithm}
+