X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/11bf000acddf9ee6b14cf8c3ca3ab2674f686b47..55ce7168c6e69a2462d76c95dc9a5298ceedb04f:/BookGPU/Chapters/chapter19/ch19.tex?ds=sidebyside diff --git a/BookGPU/Chapters/chapter19/ch19.tex b/BookGPU/Chapters/chapter19/ch19.tex index 307b851..d0d59cd 100755 --- a/BookGPU/Chapters/chapter19/ch19.tex +++ b/BookGPU/Chapters/chapter19/ch19.tex @@ -130,7 +130,8 @@ For each nonzero, both its column and row indices are explicitly stored. The Cus *, *, *, 10, *, *} \end{lstlisting} - \subsubsection*{Hybrid (HYB)\index{compressed storage format!HYB}} The HYB format heuristically computes a value $K$ and stores $K$ nonzeros per rows in the ELL format. When a row has more than $K$ non-zeros, the trailing nonzeros are stored in COO. This design decreases the storage overhead due to ELL padding elements and thus improves the overall performance. + \subsubsection*{Hybrid (HYB)\index{compressed storage format!HYB}} The HYB format heuristically computes a value $K$ and stores $K$ nonzeros per rows in the ELL format. When a row has more than $K$ non zeros, the trailing nonzeros are stored in COO. This design decreases the storage overhead due to ELL padding elements and thus improves the overall performance. +\pagebreak \begin{lstlisting}[caption={}] hyb.nnz_per_row = 2 hyb.ell.col_index = {2, 1, 1, 0, 2, 0, *, 4, 3, 2, *, 5} @@ -203,7 +204,7 @@ As a preprocessing step, we reorder the rows of the matrix by their \emph{row we \begin{figure}[t] \centering \includegraphics[height=5cm]{Chapters/chapter19/fig/scoo.pdf} - \caption{Example of the memory access pattern for a $6 \times 6$ matrix stored in Sliced COO format (Slice Size = 3 rows).} + \caption{Example of the memory access pattern for a $6 \times 6$ matrix stored in sliced COO format (slice size = 3 rows).} \label{fig:scoo-1} \end{figure} @@ -302,7 +303,7 @@ Max Memory bandwidth (GB/s)& 144 & 192 & 21 \\ \hline \end{tabular} \end{center} -\caption{Overview of hardware used in the experiments} +\caption{Overview of hardware used in the experiments.} \label{table:hardware} \end{table} @@ -329,7 +330,7 @@ The speedups compared to the multithreaded CADO-NFS bucket implementation on an \hline \end{tabular} - \caption{Performance of SpMV on RSA-170 matrix} + \caption{Performance of SpMV on RSA-170 matrix.} \label{table:rsa170} \end{table}