From 57e564506a8117605eca5b5d80c0f492e6121c12 Mon Sep 17 00:00:00 2001 From: couturie Date: Sat, 21 Sep 2013 21:03:30 +0200 Subject: [PATCH 1/1] fin correct ch14 --- BookGPU/Chapters/chapter13/biblio13.bib | 2 +- BookGPU/Chapters/chapter13/ch13.tex | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/BookGPU/Chapters/chapter13/biblio13.bib b/BookGPU/Chapters/chapter13/biblio13.bib index 2a82936..0a07906 100644 --- a/BookGPU/Chapters/chapter13/biblio13.bib +++ b/BookGPU/Chapters/chapter13/biblio13.bib @@ -35,7 +35,7 @@ year = {2012} } @book{ch13:ref5, - title = {{Parallel Iterative Algorithms: from Sequential to Grid Computing}}, + title = {{Parallel Iterative Algorithms: From Sequential to Grid Computing}}, author = {Bahi, J.M. and Contassot-Vivier, S. and Couturier, R.}, publisher = {Chapman \& Hall/CRC}, pages = {240}, diff --git a/BookGPU/Chapters/chapter13/ch13.tex b/BookGPU/Chapters/chapter13/ch13.tex index b60105d..ce2c7c2 100755 --- a/BookGPU/Chapters/chapter13/ch13.tex +++ b/BookGPU/Chapters/chapter13/ch13.tex @@ -698,6 +698,7 @@ $800^{3}$ & $222,108.09$ & $1,769,232$ & $188,790 \begin{table} \centering +\begin{scriptsize} \begin{tabular}{|c|c|c|c|c|c|c|c|} \hline \multirow{2}{*}{\bf Pb. size} & \multicolumn{3}{c|}{\bf Synchronous} & \multicolumn{3}{c|}{\bf Asynchronous} & \multirow{2}{*}{\bf Gain\%} \\ \cline{2-7} @@ -712,6 +713,7 @@ $768^{3}$ & $4,112.68$ & $831,144$ & $50.13$ $800^{3}$ & $3,950.87$ & $899,088$ & $56.22$ & $3,636.57$ & $834,900$ & $51.91$ & $7.95$ \\ \hline \end{tabular} +\end{scriptsize} \vspace{0.5cm} \caption{Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 12 GPUs.} \label{ch13:tab:02} @@ -745,7 +747,7 @@ consequently it also depends on the number of computing nodes. %%--------------------------%% \section{Red-black ordering technique} \label{ch13:sec:06} -As is wellknown, the Jacobi method\index{iterative method!Jacobi} is characterized +As is well-known, the Jacobi method\index{iterative method!Jacobi} is characterized by a slow convergence\index{convergence} rate compared to some iterative methods\index{iterative method} (for example, Gauss-Seidel method\index{iterative method!Gauss-Seidel}). So, in this section, we present some solutions to reduce the execution time and the number of @@ -776,7 +778,7 @@ vector elements leads to using twice the initial number of memory transactions. we apply the point red-black ordering\index{iterative method!red-black ordering} accordingly to the $y$-coordinate, as is shown in Figure~\ref{ch13:fig:06.02}. In this case, the vector elements having even $y$-coordinate are computed in parallel -using the values of those having odd $y$-coordinate and then viceversa. Moreover, +using the values of those having odd $y$-coordinate and then vice-versa. Moreover, in the GPU implementation of the parallel projected Richardson method (Section~\ref{ch13:sec:04}), we have shown that a subproblem of size $(NX\times ny\times nz)$ is decomposed into $nz$ grids of size $(NX\times ny)$. Then, each kernel is executed in parallel by -- 2.39.5