X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/mpi-energy.git/blobdiff_plain/449ff37f88ef932d63e318c50e4130759c59be99..0758dda69cea0d33bd22557cfe1935c2c52d19f7:/paper.tex?ds=sidebyside

diff --git a/paper.tex b/paper.tex
index a7860cd..832d62f 100644
--- a/paper.tex
+++ b/paper.tex
@@ -23,6 +23,33 @@
 \newcommand{\JC}[2][inline]{%
   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
 
+\newcommand{\Xsub}[2]{\ensuremath{#1_\textit{#2}}}
+
+\newcommand{\Dist}{\textit{Dist}}
+\newcommand{\Eind}{\Xsub{E}{ind}}
+\newcommand{\Enorm}{\Xsub{E}{Norm}}
+\newcommand{\Eoriginal}{\Xsub{E}{Original}}
+\newcommand{\Ereduced}{\Xsub{E}{Reduced}}
+\newcommand{\Fdiff}{\Xsub{F}{diff}}
+\newcommand{\Fmax}{\Xsub{F}{max}}
+\newcommand{\Fnew}{\Xsub{F}{new}}
+\newcommand{\Ileak}{\Xsub{I}{leak}}
+\newcommand{\Kdesign}{\Xsub{K}{design}}
+\newcommand{\MaxDist}{\textit{Max Dist}}
+\newcommand{\Ntrans}{\Xsub{N}{trans}}
+\newcommand{\Pdyn}{\Xsub{P}{dyn}}
+\newcommand{\PnormInv}{\Xsub{P}{NormInv}}
+\newcommand{\Pnorm}{\Xsub{P}{Norm}}
+\newcommand{\Pstates}{\Xsub{P}{states}}
+\newcommand{\Pstatic}{\Xsub{P}{static}}
+\newcommand{\Sopt}{\Xsub{S}{opt}}
+\newcommand{\Tcomp}{\Xsub{T}{comp}}
+\newcommand{\TmaxCommOld}{\Xsub{T}{Max Comm Old}}
+\newcommand{\TmaxCompOld}{\Xsub{T}{Max Comp Old}}
+\newcommand{\Tmax}{\Xsub{T}{max}}
+\newcommand{\Tnew}{\Xsub{T}{New}}
+\newcommand{\Told}{\Xsub{T}{Old}}
+
 \begin{document}
 
 \title{Dynamic Frequency Scaling for Energy Consumption
@@ -206,28 +233,28 @@ our paper is to present a new online scaling factor selection method which has t
 Many researchers~\cite{9,3,15,26} divide the power consumed by a processor into
 two power metrics: the static and the dynamic power.  While the first one is
 consumed as long as the computing unit is on, the latter is only consumed during
-computation times.  The dynamic power $P_{dyn}$ is related to the switching
+computation times.  The dynamic power $\Pdyn$ is related to the switching
 activity $\alpha$, load capacitance $C_L$, the supply voltage $V$ and
 operational frequency $f$, as shown in EQ~\eqref{eq:pd}.
 \begin{equation}
   \label{eq:pd}
-  P_\textit{dyn} = \alpha \cdot C_L \cdot V^2 \cdot f
+  \Pdyn = \alpha \cdot C_L \cdot V^2 \cdot f
 \end{equation}
-The static power $P_{static}$ captures the leakage power as follows:
+The static power $\Pstatic$ captures the leakage power as follows:
 \begin{equation}
   \label{eq:ps}
-   P_\textit{static}  = V \cdot N_{trans} \cdot K_{design} \cdot I_{leak}
+   \Pstatic  = V \cdot \Ntrans \cdot \Kdesign \cdot \Ileak
 \end{equation}
-where V is the supply voltage, $N_{trans}$ is the number of transistors,
-$K_{design}$ is a design dependent parameter and $I_{leak}$ is a
+where V is the supply voltage, $\Ntrans$ is the number of transistors,
+$\Kdesign$ is a design dependent parameter and $\Ileak$ is a
 technology-dependent parameter.  The energy consumed by an individual processor
 to execute a given program can be computed as:
 \begin{equation}
   \label{eq:eind}
-   E_\textit{ind} =  P_\textit{dyn} \cdot T_{Comp} + P_\textit{static} \cdot T
+   \Eind =  \Pdyn \cdot \Tcomp + \Pstatic \cdot T
 \end{equation}
-where $T$ is the execution time of the program, $T_{Comp}$ is the computation
-time and $T_{Comp} \leq T$.  $T_{Comp}$ may be equal to $T$ if there is no
+where $T$ is the execution time of the program, $\Tcomp$ is the computation
+time and $\Tcomp \leq T$. $\Tcomp$ may be equal to $T$ if there is no
 communication, no slack time and no synchronization.
 
 DVFS is a process that is allowed in modern processors to reduce the dynamic
@@ -240,7 +267,7 @@ process of the frequency can be expressed by the scaling factor $S$ which is the
 ratio between the maximum and the new frequency as in EQ~\eqref{eq:s}.
 \begin{equation}
   \label{eq:s}
- S = \frac{F_\textit{max}}{F_\textit{new}}
+  S = \frac{\Fmax}{\Fnew}
 \end{equation}
 The value of the scaling factor $S$ is greater than 1 when changing the
 frequency of the CPU to any new frequency value~(\emph{P-state}) in the
@@ -254,20 +281,19 @@ function of the scaling factor $S$, as in EQ~\eqref{eq:energy}.
 
 \begin{equation}
   \label{eq:energy}
-  E = P_\textit{dyn} \cdot S_1^{-2} \cdot
+  E = \Pdyn \cdot S_1^{-2} \cdot
     \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
-    P_\textit{static} \cdot T_1 \cdot S_1 \cdot N
- \hfill
+      \Pstatic \cdot T_1 \cdot S_1 \cdot N
 \end{equation}
 where $N$ is the number of parallel nodes, $T_i$ for $i=1,\dots,N$ are
-the execution times and scaling factors of the sorted tasks.  Therefore, $T1$ is
+the execution times and scaling factors of the sorted tasks.  Therefore, $T_1$ is
 the time of the slowest task, and $S_1$ its scaling factor which should be the
 highest because they are proportional to the time values $T_i$.  The scaling
 factors are computed as in EQ~\eqref{eq:si}.
 \begin{equation}
   \label{eq:si}
   S_i = S \cdot \frac{T_1}{T_i}
-      = \frac{F_\textit{max}}{F_\textit{new}} \cdot \frac{T_1}{T_i}
+      = \frac{\Fmax}{\Fnew} \cdot \frac{T_1}{T_i}
 \end{equation}
 In this paper we use Rauber and RÃ¼nger's energy model, EQ~\eqref{eq:energy}, because it can be applied to homogeneous clusters if the communication time is taken in consideration. Moreover, we compare our algorithm with Rauber and RÃ¼nger's scaling factor selection
 method which uses the same energy model.  In their method, the optimal scaling factor is
@@ -276,7 +302,7 @@ EQ~\eqref{eq:sopt}.
 
 \begin{equation}
   \label{eq:sopt}
-  S_\textit{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_\textit{dyn}}{P_\textit{static}} \cdot
+  \Sopt = \sqrt[3]{\frac{2}{N} \cdot \frac{\Pdyn}{\Pstatic} \cdot
     \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3} \right) }
 \end{equation}
 
@@ -302,7 +328,7 @@ times are used to predict the execution time for any MPI program as a function
 of the new scaling factor as in EQ~\eqref{eq:tnew}.
 \begin{equation}
   \label{eq:tnew}
- \textit  T_\textit{new} = T_\textit{Max Comp Old} \cdot S + T_{\textit{Max Comm Old}}
+  \Tnew = \TmaxCompOld \cdot S + \TmaxCommOld
 \end{equation}
 In this paper, this prediction method is used to select the best scaling factor
 for each processor as presented in the next section.
@@ -319,20 +345,19 @@ the consumed energy with scaled frequency and the consumed energy without scaled
 frequency:
 \begin{multline}
   \label{eq:enorm}
-  E_\textit{Norm} = \frac{ E_\textit{Reduced}}{E_\textit{Original}} \\
-        {} = \frac{P_\textit{dyn} \cdot S_1^{-2} \cdot
-               \left( T_1 + \sum_{i=2}^{N}\frac{T_i^3}{T_1^2}\right) +
-               P_\textit{static} \cdot T_1 \cdot S_1 \cdot N  }{
-              P_\textit{dyn} \cdot \left(T_1+\sum_{i=2}^{N}\frac{T_i^3}{T_1^2}\right) +
-              P_\textit{static} \cdot T_1 \cdot N }
+  \Enorm = \frac{ \Ereduced}{\Eoriginal} \\
+      {} = \frac{\Pdyn \cdot S_1^{-2} \cdot
+             \left( T_1 + \sum_{i=2}^{N}\frac{T_i^3}{T_1^2}\right) +
+               \Pstatic \cdot T_1 \cdot S_1 \cdot N}{
+             \Pdyn \cdot \left(T_1+\sum_{i=2}^{N}\frac{T_i^3}{T_1^2}\right) +
+               \Pstatic \cdot T_1 \cdot N }
 \end{multline}
 In the same way we can normalize the performance as follows:
 \begin{equation}
   \label{eq:pnorm}
-  P_\textit{Norm} = \frac{T_\textit{New}}{T_\textit{Old}}
-          = \frac{T_\textit{Max Comp Old} \cdot S +
-           T_\textit{Max Comm Old}}{T_\textit{Max Comp Old} +
-           T_\textit{Max Comm Old}}
+  \Pnorm = \frac{\Tnew}{\Told}
+         = \frac{\TmaxCompOld \cdot S + \TmaxCommOld}{
+             \TmaxCompOld + \TmaxCommOld}
 \end{equation}
 The second problem is that the optimization operation for both energy and
 performance is not in the same direction.  In other words, the normalized energy
@@ -352,10 +377,10 @@ direction.  Therefore, we inverse the equation of the normalized performance as
 follows:
 \begin{equation}
   \label{eq:pnorm_en}
-  P^{-1}_\textit{Norm} = \frac{ T_\textit{Old}}{ T_\textit{New}}
-               = \frac{T_\textit{Max Comp Old} +
-                 T_\textit{Max Comm Old}}{T_\textit{Max Comp Old} \cdot S +
-                 T_\textit{Max Comm Old}}
+  \Pnorm^{-1} = \frac{ \Told}{ \Tnew}
+               = \frac{\TmaxCompOld +
+                 \TmaxCommOld}{\TmaxCompOld \cdot S +
+                 \TmaxCommOld}
 \end{equation}
 \begin{figure}
   \centering
@@ -374,9 +399,9 @@ performance) at the same time, see Figure~\ref{fig:rel}\subref{fig:r1}.  Then
 our objective function has the following form:
 \begin{equation}
   \label{eq:max}
-  \textit{Max Dist} = \max_{j=1,2,\dots,F}
-      (\overbrace{P^{-1}_\textit{Norm}(S_j)}^{\text{Maximize}} -
-       \overbrace{E_\textit{Norm}(S_j)}^{\text{Minimize}} )
+  \MaxDist = \max_{j=1,2,\dots,F}
+      (\overbrace{\Pnorm^{-1}(S_j)}^{\text{Maximize}} -
+       \overbrace{\Enorm(S_j)}^{\text{Minimize}} )
 \end{equation}
 where $F$ is the number of available frequencies. Then we can select the optimal
 scaling factor that satisfies EQ~\eqref{eq:max}.  Our objective function can
@@ -392,32 +417,32 @@ the objective function described above.
 \begin{figure}[tp]
   \begin{algorithmic}[1]
     % \footnotesize
-    \State  Initialize the variable $Dist=0$
+    \State  Initialize the variable $\Dist=0$
     \State Set dynamic and static power values.
-    \State Set $P_{states}$ to the number of available frequencies.
-    \State Set the variable $F_{new}$ to max. frequency,  $F_{new} = F_{max} $
-    \State Set the variable $F_{diff}$ to the difference between two successive
+    \State Set $\Pstates$ to the number of available frequencies.
+    \State Set the variable $\Fnew$ to max. frequency,  $\Fnew = \Fmax $
+    \State Set the variable $\Fdiff$ to the difference between two successive
            frequencies.
-    \For {$j:=1$   to   $P_{states} $}
-      \State $F_{new}=F_{new} - F_{diff} $
-      \State $S = \frac{F_\textit{max}}{F_\textit{new}}$
+    \For {$j := 1$ to $\Pstates $}
+      \State $\Fnew = \Fnew - \Fdiff $
+      \State $S = \frac{\Fmax}{\Fnew}$
       \State $S_i = S \cdot \frac{T_1}{T_i}
-                  = \frac{F_\textit{max}}{F_\textit{new}} \cdot \frac{T_1}{T_i}$
+                  = \frac{\Fmax}{\Fnew} \cdot \frac{T_1}{T_i}$
              for $i=1,\dots,N$
-      \State $E_\textit{Norm} =
-          \frac{P_\textit{dyn} \cdot S_1^{-2} \cdot
+      \State $\Enorm =
+          \frac{\Pdyn \cdot S_1^{-2} \cdot
                   \left( T_1 + \sum_{i=2}^{N}\frac{T_i^3}{T_1^2}\right) +
-                  P_\textit{static} \cdot T_1 \cdot S_1 \cdot N }{
-                P_\textit{dyn} \cdot
+                  \Pstatic \cdot T_1 \cdot S_1 \cdot N }{
+                \Pdyn \cdot
                   \left(T_1+\sum_{i=2}^{N}\frac{T_i^3}{T_1^2}\right) +
-                  P_\textit{static} \cdot T_1 \cdot N }$
-      \State $P_{NormInv}=T_{old}/T_{new}$
-      \If{$(P_{NormInv}-E_{Norm} > Dist)$}
-        \State $S_{opt} = S$
-        \State $Dist = P_{NormInv} - E_{Norm}$
+                  \Pstatic \cdot T_1 \cdot N }$
+      \State $\PnormInv = \Told / \Tnew$
+      \If{$(\PnormInv - \Enorm > \Dist)$}
+        \State $\Sopt = S$
+        \State $\Dist = \PnormInv - \Enorm$
       \EndIf
     \EndFor
-    \State  Return $S_{opt}$
+    \State  Return $\Sopt$
   \end{algorithmic}
   \caption{Scaling factor selection algorithm}
   \label{EPSA}
@@ -476,7 +501,7 @@ substitution of EQ~\eqref{eq:s} in EQ~\eqref{eq:si}, we can calculate the new
 frequency $F_i$ as follows:
 \begin{equation}
   \label{eq:fi}
-  F_i = \frac{F_\textit{max} \cdot T_i}{S_\textit{optimal} \cdot T_\textit{max}}
+  F_i = \frac{\Fmax \cdot T_i}{\Sopt \cdot \Tmax}
 \end{equation}
 According to this equation all the nodes may have the same frequency value if
 they have balanced workloads, otherwise, they take different frequencies when