X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/rce2015.git/blobdiff_plain/ba83d1e1dca7d4eaed2a24aa284fc7735dd5fc07..bc14ae668e7b920eaf8ddf83fd7b89eeff55b290:/paper.tex?ds=inline diff --git a/paper.tex b/paper.tex index cda1fdd..c4c3939 100644 --- a/paper.tex +++ b/paper.tex @@ -563,8 +563,8 @@ architectures and scaling up the input matrix size} \hline Grid Architecture & 2x16, 4x8, 4x16 and 8x8\\ %\hline Network & N2 : bw=1Gbits/s - lat=5.10$^{-5}$ \\ %\hline - Input matrix size & N$_{x}$ x N$_{y}$ x N$_{z}$ =150 x 150 x 150\\ %\hline - - & N$_{x}$ x N$_{y}$ x N$_{z}$ =170 x 170 x 170 \\ \hline + Input matrix size & N$_{x}$ $\times$ N$_{y}$ $\times$ N$_{z}$ =150 $\times$ 150 $\times$ 150\\ %\hline + - & N$_{x}$ $\times$ N$_{y}$ $\times$ N$_{z}$ =170 $\times$ 170 $\times$ 170 \\ \hline \end{tabular} \caption{Test conditions: various grid configurations with the input matix size N$_{x}$=150 or N$_{x}$=170 \RC{N2 n'est pas défini..}\RC{Nx est défini, Ny? Nz?} \AG{La lettre 'x' n'est pas le symbole de la multiplication. Utiliser \texttt{\textbackslash times}. Idem dans le texte, les figures, etc.}} @@ -590,7 +590,7 @@ multisplitting method. \begin{center} \includegraphics[width=100mm]{cluster_x_nodes_nx_150_and_nx_170.pdf} \end{center} - \caption{Various grid configurations with the input matrix size N$_{x}$=150 and N$_{x}$=170\RC{idem} + \caption{Various grid configurations with the input matrix size $N_{x}=150$ and $N_{x}=170$\RC{idem} \AG{Utiliser le point comme séparateur décimal et non la virgule. Idem dans les autres figures.}} \label{fig:01} \end{figure} @@ -612,7 +612,7 @@ $40\%$ better (resp. $48\%$) when running from 2x16=32 to 8x8=64 processors. \RC Grid Architecture & 2x16, 4x8\\ %\hline Network & N1 : bw=10Gbs-lat=8.10$^{-6}$ \\ %\hline - & N2 : bw=1Gbs-lat=5.10$^{-5}$ \\ - Input matrix size & N$_{x}$ x N$_{y}$ x N$_{z}$ =150 x 150 x 150\\ \hline + Input matrix size & $N_{x} \times N_{y} \times N_{z} =150 \times 150 \times 150$\\ \hline \end{tabular} \caption{Test conditions: grid 2x16 and 4x8 with networks N1 vs N2} \label{tab:02} @@ -646,7 +646,7 @@ the network speed drops down (variation of 12.5\%), the difference between t \hline Grid Architecture & 2x16\\ %\hline Network & N1 : bw=1Gbs \\ %\hline - Input matrix size & N$_{x}$ x N$_{y}$ x N$_{z}$ =150 x 150 x 150\\ \hline + Input matrix size & $N_{x} \times N_{y} \times N_{z} = 150 \times 150 \times 150$\\ \hline \end{tabular} \caption{Test conditions: network latency impacts} \label{tab:03} @@ -682,7 +682,7 @@ magnitude with a latency of $8.10^{-6}$. \hline Grid Architecture & 2x16\\ %\hline Network & N1 : bw=1Gbs - lat=5.10$^{-5}$ \\ %\hline - Input matrix size & N$_{x}$ x N$_{y}$ x N$_{z}$ =150 x 150 x 150\\ \hline \\ + Input matrix size & $N_{x} \times N_{y} \times N_{z} =150 \times 150 \times 150$\\ \hline \\ \end{tabular} \caption{Test conditions: Network bandwidth impacts\RC{Qu'est ce qui varie ici? Il n'y a pas de variation dans le tableau}} \label{tab:04} @@ -711,7 +711,7 @@ of $40\%$ which is only around $24\%$ for the classical GMRES. \hline Grid Architecture & 4x8\\ %\hline Network & N2 : bw=1Gbs - lat=5.10$^{-5}$ \\ - Input matrix size & N$_{x}$ = From 40 to 200\\ \hline + Input matrix size & $N_{x}$ = From 40 to 200\\ \hline \end{tabular} \caption{Test conditions: Input matrix size impacts} \label{tab:05} @@ -751,7 +751,7 @@ grid 2x16 leading to the same conclusion. \hline Grid architecture & 2x16\\ %\hline Network & N2 : bw=1Gbs - lat=5.10$^{-5}$ \\ %\hline - Input matrix size & N$_{x}$ = 150 x 150 x 150\\ \hline + Input matrix size & $N_{x} = 150 \times 150 \times 150$\\ \hline \end{tabular} \caption{Test conditions: CPU Power impacts} \label{tab:06} @@ -814,7 +814,7 @@ The test conditions are summarized in the table~\ref{tab:07}: \\ Processors Power & 1 GFlops to 1.5 GFlops\\ Intra-Network & bw=1.25 Gbits - lat=5.10$^{-5}$ \\ %\hline Inter-Network & bw=5 Mbits - lat=2.10$^{-2}$\\ - Input matrix size & N$_{x}$ = From 62 to 150\\ %\hline + Input matrix size & $N_{x}$ = From 62 to 150\\ %\hline Residual error precision & 10$^{-5}$ to 10$^{-9}$\\ \hline \\ \end{tabular} \caption{Test conditions: GMRES in synchronous mode vs Krylov Multisplitting in asynchronous mode} @@ -894,7 +894,11 @@ application data can lead to very different numbers of iterations to reach the converge and so to very different execution times. -Our future works... +In future works, we plan to investigate how to simulate the behavior of really +large scale applications. For example, if we are interested to simulate the +execution of the solvers of this paper with thousand or even dozens of thousands +or core, it is not possible to do that with SimGrid. In fact, this tool will +make the real computation. So we plan to focus our research on that problematic.