corrections

[hpcc2014.git] / hpcc.tex
diff --git a/hpcc.tex b/hpcc.tex

index c48f1f67fbd162e5d46bcf110ee93fb6d31fec7c..04c540346aa2c1d772aac9c74d7569de61725a16 100644 (file)
--- a/hpcc.tex
+++ b/hpcc.tex
@@ -493,7 +493,7 @@ simulates the case of distant clusters linked with long distance network as in g
  
  
  Both codes were simulated on a two clusters based network with 50 hosts each, totaling 100 hosts. Various combinations of the above
-factors have provided the results shown in Table~\ref{tab.cluster.2x50}. The problem size of the 3D Poisson problem  ranges from $N_x = N_y = N_z = \text{62}$ to 150 elements (that is from
+factors have provided the results shown in Table~\ref{tab.cluster.2x50}. The problem size of the 3D Poisson problem  ranges from $N=N_x = N_y = N_z = \text{62}$ to 150 elements (that is from
  $\text{62}^\text{3} = \text{\np{238328}}$ to $\text{150}^\text{3} =
  \text{\np{3375000}}$ entries). With the asynchronous multisplitting algorithm the simulated execution time is in average 2.5 times faster than with the synchronous GMRES one. 
  %\AG{Expliquer comment lire les tableaux.}
@@ -523,7 +523,7 @@ $\text{62}^\text{3} = \text{\np{238328}}$ to $\text{150}^\text{3} =
      power (GFlops)
      & 1         & 1         & 1         & 1.5       & 1.5       \\
      \hline
-    size $(n^3)$
+    size $(N)$
      & 62        & 62        & 62        & 100       & 100       \\
      \hline
      Precision
@@ -548,7 +548,7 @@ $\text{62}^\text{3} = \text{\np{238328}}$ to $\text{150}^\text{3} =
      Power (GFlops)
      & 1.5       & 1.5       & 1.5       & 1.5       & 1.5 \\ %      & 1         & 1.5 \\
      \hline
-    size $(n^3)$
+    size $(N)$
      & 110       & 120       & 130       & 140       & 150  \\ %     & 171       & 171 \\
      \hline
      Precision
@@ -561,7 +561,7 @@ $\text{62}^\text{3} = \text{\np{238328}}$ to $\text{150}^\text{3} =
    \end{mytable}
  \end{table}
    
-\RC{Du coup la latence est toujours la même, pourquoi la mettre dans la table?}
+%\RC{Du coup la latence est toujours la même, pourquoi la mettre dans la table?}
  
  %Then we have changed the network configuration using three clusters containing
  %respectively 33, 33 and 34 hosts, or again by on hundred hosts for all the
@@ -647,11 +647,11 @@ Note that the program was run with the following parameters:
  \begin{itemize}
  \item Description of the cluster architecture matching the format <Number of
    clusters> <Number of hosts in cluster1> <Number of hosts in cluster2>;
-\item Maximum number of iterations;
-\item Precisions on the residual error;
+\item Maximum numbers of outer and inner iterations;
+\item Outer and inner precisions on the residual error;
  \item Matrix size $N_x$, $N_y$ and $N_z$;
-\item Matrix diagonal value: $6$ (See Equation~(\ref{eq:03}));
-\item Matrix off-diagonal value: $-1$;
+\item Matrix diagonal value: $6$ (see Equation~(\ref{eq:03}));
+\item Matrix off-diagonal values: $-1$;
  \item Communication mode: asynchronous.
  \end{itemize}
  
@@ -664,12 +664,12 @@ asynchronous multisplitting  compared to GMRES with two distant clusters.
  With these settings, Table~\ref{tab.cluster.2x50} shows
  that after setting the bandwidth of the  inter cluster network to  \np[Mbit/s]{5} and a latency in order of one hundredth of millisecond and a processor power
  of one GFlops, an efficiency of about \np[\%]{40} is
-obtained in asynchronous mode for a matrix size of 62 elements. It is noticed that the result remains
+obtained in asynchronous mode for a matrix size of $62^3$ elements. It is noticed that the result remains
  stable even we vary the residual error precision from \np{E-5} to \np{E-9}. By
-increasing the matrix size up to 100 elements, it was necessary to increase the
+increasing the matrix size up to $100^3$ elements, it was necessary to increase the
  CPU power of \np[\%]{50} to \np[GFlops]{1.5} to get the algorithm convergence and the same order of asynchronous mode efficiency.  Maintaining such processor power but increasing network throughput inter cluster up to
  \np[Mbit/s]{50}, the result of efficiency with a relative gain of 2.5 is obtained with
-high external precision of \np{E-11} for a matrix size from 110 to 150 side
+high external precision of \np{E-11} for a matrix size from $110^3$ to $150^3$ side
  elements.
  
  %For the 3 clusters architecture including a total of 100 hosts,
@@ -679,8 +679,8 @@ elements.
  %(synchronous and asynchronous) is achieved with an inter cluster of
  %\np[Mbit/s]{10} and a latency of \np[ms]{E-1}. To challenge an efficiency greater than 1.2 with a matrix %size of 100 points, it was necessary to degrade the
  %inter cluster network bandwidth from 5 to \np[Mbit/s]{2}.
-\AG{Conclusion, on prend une plateforme pourrie pour avoir un bon ratio sync/async ???
-  Quelle est la perte de perfs en faisant ça ?}
+%\AG{Conclusion, on prend une plateforme pourrie pour avoir un bon ratio sync/async ???
+  %Quelle est la perte de perfs en faisant ça ?}
  
  %A last attempt was made for a configuration of three clusters but more powerful
  %with 200 nodes in total. The convergence with a relative gain around 1.1 was
@@ -704,7 +704,7 @@ reach the following two objectives:
  
  \item To test the combination of the cluster and network specifications permitting to execute an asynchronous algorithm faster than a synchronous one.
  \end{enumerate}
-Our results have shown that with two distant clusters, the asynchronous multisplitting is faster to \np[\%]{40} compared to the synchronous GMRES method
+Our results have shown that with two distant clusters, the asynchronous multisplitting method is faster to \np[\%]{40} compared to the synchronous GMRES method
  which is not negligible for solving complex practical problems with more 
  and more increasing size.