correcting equation format

[mpi-energy2.git] / mpi-energy2-extension / Heter_paper.tex
diff --git a/mpi-energy2-extension/Heter_paper.tex b/mpi-energy2-extension/Heter_paper.tex

index 431c0c76a7df3497ec1e041acf5b2a23a60b15ed..88731ece7ef478992c449cec9a04c6c7b8eb9bc3 100644 (file)
--- a/mpi-energy2-extension/Heter_paper.tex
+++ b/mpi-energy2-extension/Heter_paper.tex
@@ -733,7 +733,7 @@ frequency scaling factors are computed as a ratio between the computation time
  of the slowest node and the computation time of the node $i$ as follows:
  \begin{equation}
    \label{eq:Scp}
  of the slowest node and the computation time of the node $i$ as follows:
  \begin{equation}
    \label{eq:Scp}
-  \Scp[ij] =  \frac{ \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M}(\Tcp[ij])} {\Tcp[ij]}
+  \Scp[ij] =  \frac{ \mathop{\max\limits_{i=1,\dots N}}\limits_{j=1,\dots,M}(\Tcp[ij])} {\Tcp[ij]}
  \end{equation}
  Using the initial frequency scaling factors computed in (\ref{eq:Scp}), the
  algorithm computes the initial frequencies for all nodes as a ratio between the
  \end{equation}
  Using the initial frequency scaling factors computed in (\ref{eq:Scp}), the
  algorithm computes the initial frequencies for all nodes as a ratio between the
@@ -855,25 +855,25 @@ The benchmarks have seven different classes, S, W, A, B, C, D and E, that repres
    \centering
    \begin{tabular}{|*{7}{c|}}
      \hline
    \centering
    \begin{tabular}{|*{7}{c|}}
      \hline
-    Cluster     & CPU         & Max   & Min   & Diff. & no. of cores    & dynamic power   \\
-    Name        & model       & Freq. & Freq. & Freq. & per CPU         & of one core     \\
-                &             & GHz   & GHz   & GHz   &                 &           \\
+                &             & Max   & Min   & Diff. &                 &               \\
+    Cluster     & CPU         & Freq. & Freq. & Freq. & No. of cores    & Dynamic power \\
+    Name        & model       & GHz   & GHz   & GHz   & per CPU         & of one core   \\
      \hline
      \hline
-                & Intel       & 2.3  & 1.2  & 0.1     & 6               & \np[W]{35} \\
-    Taurus      & Xeon        &       &       &       &                 &            \\
-                & E5-2630     &       &       &       &                 &            \\         
+                & Intel       &       &       &         &           &              \\
+    Taurus      & Xeon        & 2.3   & 1.2   & 0.1     & 6         & \np[W]{35}    \\
+                & E5-2630     &       &       &         &           &            \\         
      \hline
      \hline
-                & Intel       & 2.53  & 1.2   & 0.133 & 4               & \np[W]{23} \\
-    Graphene    & Xeon        &       &       &       &                 &            \\
-                & X3440       &       &       &       &                 &            \\    
+                & Intel       &       &       &         &           &             \\
+    Graphene    & Xeon        & 2.53  & 1.2   & 0.133   & 4         & \np[W]{23}  \\
+                & X3440       &       &       &         &           &             \\    
      \hline
      \hline
-                & Intel       & 2.5   & 2     & 0.5   & 4               & \np[W]{46} \\
-    Griffon     & Xeon        &       &       &       &                 &            \\
-                & L5420       &       &       &       &                 &            \\  
+                & Intel       &       &       &         &           &            \\
+    Griffon     & Xeon        & 2.5   & 2     & 0.5     & 4         & \np[W]{46}  \\
+                & L5420       &       &       &         &           &            \\  
      \hline
      \hline
-                & Intel       & 2     & 1.2   & 0.1   & 8               & \np[W]{35} \\
-     Graphite   & Xeon        &       &       &       &                 &            \\
-                & E5-2650     &       &       &       &                 &            \\  
+                & Intel       &       &       &         &           &            \\
+     Graphite   & Xeon        & 2     & 1.2   & 0.1     & 8         & \np[W]{35} \\
+                & E5-2650     &       &       &         &           &            \\  
      \hline
    \end{tabular}
    \label{table:grid5000}
      \hline
    \end{tabular}
    \label{table:grid5000}
@@ -942,8 +942,8 @@ The NAS parallel benchmarks are executed over these two platforms
  The overall energy consumption of all the benchmarks solving the class D instance and
  using the proposed frequency selection algorithm is measured 
  using the equation of the reduced energy consumption, equation 
  The overall energy consumption of all the benchmarks solving the class D instance and
  using the proposed frequency selection algorithm is measured 
  using the equation of the reduced energy consumption, equation 
-(\ref{eq:energy}). This model uses the measured dynamic and static 
-power values  showed in Table \ref{table:grid5000}. The execution
+(\ref{eq:energy}). This model uses the measured dynamic power showed in Table \ref{table:grid5000} and the static 
+power is assumed to be equal to 20\% of the dynamic power. The execution
  time is measured for all the benchmarks over these different scenarios.  
  
  The energy consumptions  and the execution times for all the benchmarks are 
  time is measured for all the benchmarks over these different scenarios.  
  
  The energy consumptions  and the execution times for all the benchmarks are 
@@ -1001,7 +1001,7 @@ algorithm  select smaller frequencies for the powerful nodes which
  produces less energy consumption and thus more energy saving.
  The best energy saving percentage was obtained in the one site scenario with 16 nodes, the energy consumption was on average reduced up to 30\%.
  
  produces less energy consumption and thus more energy saving.
  The best energy saving percentage was obtained in the one site scenario with 16 nodes, the energy consumption was on average reduced up to 30\%.
  
-\begin{figure}
+\begin{figure*}[t]
    \centering
    \subfloat[The energy reduction while executing the NAS benchmarks over different scenarios ]{%
      \includegraphics[width=.48\textwidth]{fig/eng_s.eps}\label{fig:eng_s}} \hspace{0.4cm}%
    \centering
    \subfloat[The energy reduction while executing the NAS benchmarks over different scenarios ]{%
      \includegraphics[width=.48\textwidth]{fig/eng_s.eps}\label{fig:eng_s}} \hspace{0.4cm}%
@@ -1012,7 +1012,7 @@ The best energy saving percentage was obtained in the one site scenario with 16
      \includegraphics[width=.48\textwidth]{fig/dist.eps}\label{fig:dist}}
    \label{fig:exp-res}
    \caption{The experimental results of different scenarios}
      \includegraphics[width=.48\textwidth]{fig/dist.eps}\label{fig:dist}}
    \label{fig:exp-res}
    \caption{The experimental results of different scenarios}
-\end{figure}
+\end{figure*}
  Figure \ref{fig:per_d} presents the performance degradation percentages for all benchmarks over the two scenarios.
  The performance degradation percentage for the benchmarks running on two sites  with
  16 or 32  nodes is on average equal to 8.3\% or 4.7\% respectively. 
  Figure \ref{fig:per_d} presents the performance degradation percentages for all benchmarks over the two scenarios.
  The performance degradation percentage for the benchmarks running on two sites  with
  16 or 32  nodes is on average equal to 8.3\% or 4.7\% respectively. 
@@ -1050,8 +1050,8 @@ the participating number of cores from a certain cluster is equal to 14,
  in the multi-core scenario the selected nodes is equal to  4 nodes while using 
  3 or 4 cores from each node. The platforms with one  
  core per node and  multi-cores nodes are  shown in Table \ref{table:sen-mc}. 
  in the multi-core scenario the selected nodes is equal to  4 nodes while using 
  3 or 4 cores from each node. The platforms with one  
  core per node and  multi-cores nodes are  shown in Table \ref{table:sen-mc}. 
-The energy consumptions and execution times of running the class D of the NAS parallel 
-benchmarks over these four different scenarios are presented 
+The energy consumptions and execution times of running  class D of the NAS parallel 
+benchmarks over these two different scenarios are presented 
  in  figures \ref{fig:eng-cons-mc} and \ref{fig:time-mc} respectively.
  
  \begin{table}[]
  in  figures \ref{fig:eng-cons-mc} and \ref{fig:time-mc} respectively.
  
  \begin{table}[]
@@ -1095,7 +1095,18 @@ the one site one core scenario  when compared to the ratio of the multi-cores sc
  More energy reduction can be gained when this ratio is big because it pushes the proposed scaling algorithm to select smaller frequencies that decrease the dynamic power consumption. These experiments also showed that the energy 
  consumption and the execution times of the EP and MG benchmarks do not change significantly over these two
  scenarios  because there are no or small communications. Contrary to EP and MG, the  energy consumptions and the execution times of the rest of the  benchmarks  vary according to the  communication times that are different from one scenario to the other.
  More energy reduction can be gained when this ratio is big because it pushes the proposed scaling algorithm to select smaller frequencies that decrease the dynamic power consumption. These experiments also showed that the energy 
  consumption and the execution times of the EP and MG benchmarks do not change significantly over these two
  scenarios  because there are no or small communications. Contrary to EP and MG, the  energy consumptions and the execution times of the rest of the  benchmarks  vary according to the  communication times that are different from one scenario to the other.
-  
+\begin{figure*}[t]
+  \centering
+    \subfloat[The energy saving of running NAS benchmarks over one core and multicores scenarios]{%
+    \includegraphics[width=.48\textwidth]{fig/eng_s_mc.eps}\label{fig:eng-s-mc}} \hspace{0.4cm}%
+    \subfloat[The performance degradation of running NAS benchmarks over one core and multicores scenarios
+      ]{%
+    \includegraphics[width=.48\textwidth]{fig/per_d_mc.eps}\label{fig:per-d-mc}}\hspace{0.4cm}%
+    \subfloat[The tradeoff distance of running NAS benchmarks over one core and multicores scenarios]{%
+    \includegraphics[width=.48\textwidth]{fig/dist_mc.eps}\label{fig:dist-mc}}
+  \label{fig:exp-res}
+  \caption{The experimental results of one core and multi-cores scenarios}
+\end{figure*}  
    
  The energy saving percentages of all NAS benchmarks running over these two scenarios are presented in figure \ref{fig:eng-s-mc}. 
  The figure shows that  the energy saving percentages in the one 
    
  The energy saving percentages of all NAS benchmarks running over these two scenarios are presented in figure \ref{fig:eng-s-mc}. 
  The figure shows that  the energy saving percentages in the one 
@@ -1112,22 +1123,11 @@ in figure \ref{fig:dist-mc}. These  tradeoff distance between energy consumption
  
  
  
  
  
  
-\begin{figure}
-  \centering
-    \subfloat[The energy saving of running NAS benchmarks over one core and multicores scenarios]{%
-    \includegraphics[width=.48\textwidth]{fig/eng_s_mc.eps}\label{fig:eng-s-mc}} \hspace{0.4cm}%
-    \subfloat[The performance degradation of running NAS benchmarks over one core and multicores scenarios
-      ]{%
-    \includegraphics[width=.48\textwidth]{fig/per_d_mc.eps}\label{fig:per-d-mc}}\hspace{0.4cm}%
-    \subfloat[The tradeoff distance of running NAS benchmarks over one core and multicores scenarios]{%
-    \includegraphics[width=.48\textwidth]{fig/dist_mc.eps}\label{fig:dist-mc}}
-  \label{fig:exp-res}
-  \caption{The experimental results of one core and multi-cores scenarios}
-\end{figure}
  
  
  
  
  
  
-\subsection{Experiments with different static and dynamic powers consumption scenarios}
+
+\subsection{Experiments with different static power scenarios}
  \label{sec.pow_sen}
  
  In section \ref{sec.grid5000}, since it was not possible to measure the static power consumed by a CPU,   the static power was assumed to be equal to 20\% of the measured dynamic power. This power is consumed during the whole execution time, during computation and communication times. Therefore, when the DVFS operations are applied by the scaling algorithm and the CPUs' frequencies lowered, the execution time might increase and consequently the consumed static energy will be increased too. 
  \label{sec.pow_sen}
  
  In section \ref{sec.grid5000}, since it was not possible to measure the static power consumed by a CPU,   the static power was assumed to be equal to 20\% of the measured dynamic power. This power is consumed during the whole execution time, during computation and communication times. Therefore, when the DVFS operations are applied by the scaling algorithm and the CPUs' frequencies lowered, the execution time might increase and consequently the consumed static energy will be increased too. 
@@ -1138,7 +1138,7 @@ The experiments have been executed with these two new static power scenarios  ov
  In these experiments, class D of the NAS parallel benchmarks are executed over the Nancy site. 16 computing nodes from the three clusters, Graphite, Graphene and Griffon, where used in this experiment. 
  
  
  In these experiments, class D of the NAS parallel benchmarks are executed over the Nancy site. 16 computing nodes from the three clusters, Graphite, Graphene and Griffon, where used in this experiment. 
  
  
-\begin{figure}
+\begin{figure*}[t]
    \centering
    \subfloat[The energy saving percentages for the nodes executing the NAS benchmarks over the three power scenarios]{%
      \includegraphics[width=.48\textwidth]{fig/eng_pow.eps}\label{fig:eng-pow}} \hspace{0.4cm}%
    \centering
    \subfloat[The energy saving percentages for the nodes executing the NAS benchmarks over the three power scenarios]{%
      \includegraphics[width=.48\textwidth]{fig/eng_pow.eps}\label{fig:eng-pow}} \hspace{0.4cm}%
@@ -1149,7 +1149,7 @@ In these experiments, class D of the NAS parallel benchmarks are executed over t
      \includegraphics[width=.48\textwidth]{fig/dist_pow.eps}\label{fig:dist-pow}}
    \label{fig:exp-pow}
    \caption{The experimental results of different static power scenarios}
      \includegraphics[width=.48\textwidth]{fig/dist_pow.eps}\label{fig:dist-pow}}
    \label{fig:exp-pow}
    \caption{The experimental results of different static power scenarios}
-\end{figure}
+\end{figure*}
  
  
  
  
  
  
@@ -1203,7 +1203,7 @@ The experimental results, the energy saving, performance degradation and tradeof
  presented in the figures \ref{fig:edp-eng}, \ref{fig:edp-perf} and \ref{fig:edp-dist} respectively.
  
  
  presented in the figures \ref{fig:edp-eng}, \ref{fig:edp-perf} and \ref{fig:edp-dist} respectively.
  
  
-\begin{figure}
+\begin{figure*}[t]
    \centering
    \subfloat[The energy reduction induced by the Maxdist method and the EDP method]{%
      \includegraphics[width=.48\textwidth]{fig/edp_eng}\label{fig:edp-eng}} \hspace{0.4cm}%
    \centering
    \subfloat[The energy reduction induced by the Maxdist method and the EDP method]{%
      \includegraphics[width=.48\textwidth]{fig/edp_eng}\label{fig:edp-eng}} \hspace{0.4cm}%
@@ -1213,7 +1213,7 @@ presented in the figures \ref{fig:edp-eng}, \ref{fig:edp-perf} and \ref{fig:edp-
      \includegraphics[width=.48\textwidth]{fig/edp_dist}\label{fig:edp-dist}}
    \label{fig:edp-comparison}
    \caption{The comparison results}
      \includegraphics[width=.48\textwidth]{fig/edp_dist}\label{fig:edp-dist}}
    \label{fig:edp-comparison}
    \caption{The comparison results}
-\end{figure}
+\end{figure*}
  
  As shown in these figures, the proposed frequencies selection algorithm, Maxdist, outperforms the EDP algorithm in terms of energy consumption reduction and performance for all of the benchmarks executed over the two scenarios. 
  The proposed algorithm gives better results than EDP  because it 
  
  As shown in these figures, the proposed frequencies selection algorithm, Maxdist, outperforms the EDP algorithm in terms of energy consumption reduction and performance for all of the benchmarks executed over the two scenarios. 
  The proposed algorithm gives better results than EDP  because it 
@@ -1248,8 +1248,7 @@ that the proposed algorithm outperforms the latter by selecting a vector of freq
  
  In the near future, we would like to develop a similar method that is adapted to
  asynchronous iterative applications where iterations are not synchronized and communications are overlapped with computations. 
  
  In the near future, we would like to develop a similar method that is adapted to
  asynchronous iterative applications where iterations are not synchronized and communications are overlapped with computations. 
- The development of
-such a method might require a new energy model because the
+The development of such a method might require a new energy model because the
  number of iterations is not known in advance and depends on
  the global convergence of the iterative system.
  
  number of iterations is not known in advance and depends on
  the global convergence of the iterative system.