last

[book_gpu.git] / BookGPU / Chapters / chapter13 / ch13.tex
diff --git a/BookGPU/Chapters/chapter13/ch13.tex b/BookGPU/Chapters/chapter13/ch13.tex

index b60105d8580976c9f64d28c020b804c23aa2dbb8..ce2c7c281c5162b463f25a3dea8c0d23760a2937 100755 (executable)
--- a/BookGPU/Chapters/chapter13/ch13.tex
+++ b/BookGPU/Chapters/chapter13/ch13.tex
@@ -698,6 +698,7 @@ $800^{3}$                     & $222,108.09$       & $1,769,232$      & $188,790
  
  \begin{table}
  \centering
+\begin{scriptsize}
  \begin{tabular}{|c|c|c|c|c|c|c|c|}
  \hline
  \multirow{2}{*}{\bf Pb. size} & \multicolumn{3}{c|}{\bf Synchronous}                 & \multicolumn{3}{c|}{\bf Asynchronous}                & \multirow{2}{*}{\bf Gain\%}  \\ \cline{2-7}
@@ -712,6 +713,7 @@ $768^{3}$                    & $4,112.68$         & $831,144$        & $50.13$
  
  $800^{3}$                    & $3,950.87$         & $899,088$        & $56.22$         & $3,636.57$        & $834,900$        & $51.91$     & $7.95$ \\ \hline 
  \end{tabular}
+\end{scriptsize}
  \vspace{0.5cm}
  \caption{Execution times in seconds of the parallel projected Richardson method implemented on a cluster of 12 GPUs.}
  \label{ch13:tab:02}
@@ -745,7 +747,7 @@ consequently it also depends on the number of computing nodes.
  %%--------------------------%%
  \section{Red-black ordering technique}
  \label{ch13:sec:06}
-As is wellknown, the Jacobi method\index{iterative method!Jacobi} is characterized
+As is well-known, the Jacobi method\index{iterative method!Jacobi} is characterized
  by a slow convergence\index{convergence} rate compared to some iterative methods\index{iterative method}
  (for example, Gauss-Seidel method\index{iterative method!Gauss-Seidel}). So, in this
  section, we present some solutions to reduce the execution time and the number of
@@ -776,7 +778,7 @@ vector elements leads to using twice the initial number of memory transactions.
  we apply the point red-black ordering\index{iterative method!red-black ordering}
  accordingly to the $y$-coordinate, as is shown in Figure~\ref{ch13:fig:06.02}. In
  this case, the vector elements having even $y$-coordinate are computed in parallel
-using the values of those having odd $y$-coordinate and then viceversa. Moreover,
+using the values of those having odd $y$-coordinate and then vice-versa. Moreover,
  in the GPU implementation of the parallel projected Richardson method (Section~\ref{ch13:sec:04}),
  we have shown that a subproblem of size $(NX\times ny\times nz)$ is decomposed into
  $nz$ grids of size $(NX\times ny)$. Then, each kernel is executed in parallel by