From: couturie Date: Sun, 6 Oct 2013 16:17:00 +0000 (+0200) Subject: new X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/commitdiff_plain/b1fd489e34a8d46d286a0d271c38cbfb442f511f?ds=sidebyside new --- diff --git a/BookGPU/Chapters/chapter10/ch10.tex b/BookGPU/Chapters/chapter10/ch10.tex index c481bc4..0c6a8bb 100644 --- a/BookGPU/Chapters/chapter10/ch10.tex +++ b/BookGPU/Chapters/chapter10/ch10.tex @@ -449,7 +449,7 @@ An optimized way of doing the reduction can be found in the examples\footnote{Av \begin{figure}[!h] \centering \includegraphics[width=10cm]{Chapters/chapter10/figures/Reduc3.pdf} -\caption{Example of a parallel reduction at block level (courtesy NVIDIA).} +\caption{Example of a parallel reduction at block level. (Courtesy NVIDIA).} \label{chXXX:fig:reduc} \end{figure} diff --git a/BookGPU/Chapters/chapter19/ch19.tex b/BookGPU/Chapters/chapter19/ch19.tex index 307b851..d0d59cd 100755 --- a/BookGPU/Chapters/chapter19/ch19.tex +++ b/BookGPU/Chapters/chapter19/ch19.tex @@ -130,7 +130,8 @@ For each nonzero, both its column and row indices are explicitly stored. The Cus *, *, *, 10, *, *} \end{lstlisting} - \subsubsection*{Hybrid (HYB)\index{compressed storage format!HYB}} The HYB format heuristically computes a value $K$ and stores $K$ nonzeros per rows in the ELL format. When a row has more than $K$ non-zeros, the trailing nonzeros are stored in COO. This design decreases the storage overhead due to ELL padding elements and thus improves the overall performance. + \subsubsection*{Hybrid (HYB)\index{compressed storage format!HYB}} The HYB format heuristically computes a value $K$ and stores $K$ nonzeros per rows in the ELL format. When a row has more than $K$ non zeros, the trailing nonzeros are stored in COO. This design decreases the storage overhead due to ELL padding elements and thus improves the overall performance. +\pagebreak \begin{lstlisting}[caption={}] hyb.nnz_per_row = 2 hyb.ell.col_index = {2, 1, 1, 0, 2, 0, *, 4, 3, 2, *, 5} @@ -203,7 +204,7 @@ As a preprocessing step, we reorder the rows of the matrix by their \emph{row we \begin{figure}[t] \centering \includegraphics[height=5cm]{Chapters/chapter19/fig/scoo.pdf} - \caption{Example of the memory access pattern for a $6 \times 6$ matrix stored in Sliced COO format (Slice Size = 3 rows).} + \caption{Example of the memory access pattern for a $6 \times 6$ matrix stored in sliced COO format (slice size = 3 rows).} \label{fig:scoo-1} \end{figure} @@ -302,7 +303,7 @@ Max Memory bandwidth (GB/s)& 144 & 192 & 21 \\ \hline \end{tabular} \end{center} -\caption{Overview of hardware used in the experiments} +\caption{Overview of hardware used in the experiments.} \label{table:hardware} \end{table} @@ -329,7 +330,7 @@ The speedups compared to the multithreaded CADO-NFS bucket implementation on an \hline \end{tabular} - \caption{Performance of SpMV on RSA-170 matrix} + \caption{Performance of SpMV on RSA-170 matrix.} \label{table:rsa170} \end{table}