2 \usepackage{beamerthemefemto}
3 \usepackage[latin1]{inputenc}
4 \usepackage[T1]{fontenc}
5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
6 \usepackage{algorithm,algorithmicx,algpseudocode}
7 \usepackage{graphicx,graphics}
14 \usepackage[textsize=footnotesize]{todonotes}
15 \title{Optimal Dynamic Frequency Scaling for Energy - Performance of Parallel MPI Programs}
17 \author{Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh
19 \institute[DISC Department - AND Team]{FEMTO-ST - DISC Department - AND Team}
20 \date{August 29th,~2014}
21 % ____ _____ ____ _ _ _____
22 % | _ \| ____| __ )| | | |_ _|
23 % | | | | _| | _ \| | | | | |
24 % | |_| | |___| |_) | |_| | | |
25 % |____/|_____|____/ \___/ |_|
28 \setbeamertemplate{background}{\titrefemto}
33 \setbeamertemplate{background}{\pagefemto}
34 \begin{frame}{Outline}
35 \setbeamertemplate{section in toc}[sections numbered]
38 \section{Definitions and objectives }
39 \begin{frame}{Definitions}
42 % Et ici le texte dans le femtoBlock
44 \small \item Modern processors provide \textbf{Dynamic Voltage and Frequency Scaling (DVFS)} technique. \medskip
45 \item DVFS is used to reduce the frequency and thus to \textbf{reduce the energy consumption} by a CPU while computing.\medskip
46 \item Energy consumption by \textbf{individual processor} of a synchronous parallel program:
48 $E_{ind} = P_{dyn} \cdot T_{Comp} + P_{static} \cdot (T_{Comp}+T_{Comm})$.\medskip
49 \item The frequency scaling factor is the ratio between the maximum and the new frequency, $S = \frac{F_{max}}{F_{new}}$. \medskip
56 \begin{frame}{Objectives}
57 \begin{femtoBlock}{} \vspace{-12 mm}
58 \begin{itemize} \small
59 \item Study the effect of the scaling factor $S$ on \textbf{energy consumption} of parallel iterative applications such as NAS
60 Benchmarks. \includegraphics[width=.06\textwidth]{fig/nasa.pdf} \medskip
61 \item Study the effect of the scaling factor $S$ on \textbf{performance} of these benchmarks.\medskip
62 \item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency.\medskip
63 \item We propose an algorithm for selecting the scaling factor $S$ producing \textbf {optimal trade-off} between the energy and performance. \medskip
64 \item Improving Rauber and Rünger's\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
65 energy consumption \\ \quad ~ ~\quad of independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
67 \let\thefootnote\relax\footnote{}
71 \section{Energy and performance models}
72 \begin{frame}{Energy model for homogeneous platform}
73 \begin{femtoBlock}{}\small
74 The dynamic power is \textbf{exponentially} related to the scaling factor $S$ and the static consumed energy is \textbf{linearly}
75 related to this factor.
76 \begin{block}{\small Rauber and Rünger's energy model}
77 $ E = P_{dyn} \cdot S_1^{-2} \cdot
78 \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
79 P_{static} \cdot S_1 \cdot T_1 \cdot N$
81 $S_1$: is the max. scaling factor, $T_I$: is the time of the slower task, $T_i$: is the time of the other tasks and
82 $N$: is the number of nodes.
83 \begin{block}{\small Rauber and Rünger's optimal scaling factor}
84 $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
85 \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
87 They reduce degradation of the performance by \textbf{setting the highest frequency to the slowest task}.
92 \begin{frame}{Performance evaluation of MPI programs}
95 \begin{block}{\small Execution time prediction model}
96 \centering{ $ T_{new} = T_{Max Comp Old} \cdot S + T_{{Max Comm Old}}$}
99 \centering{\includegraphics[width=.35\textwidth]{fig/cg_per}
101 \includegraphics[width=.35\textwidth]{fig/lu_pre}}
104 \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
108 \section{Performance and energy reduction trade-off}
110 \begin{frame}{Performance and energy reduction trade-off}
111 \begin{femtoBlock}{} \vspace{-15 mm}
114 \subfloat[\small Real relation.]{%
115 \includegraphics[width=.4\textwidth]{fig/file3}\label{fig:r2}}
117 \subfloat[\small Converted relation.]{%
118 \includegraphics[width=.4\textwidth]{fig/file}\label{fig:r1}}%
120 % \caption{The energy and performance relation}
122 $Performance=\frac{1}{execution~time}$
124 \begin{block}{\small Our objective function}
125 \centering{$\textbf{\emph {MaxDist}} = \max_{j=1,2,\dots ,F}
126 (\overbrace{P_{Norm}(S_j)}^{{Maximize}} -
127 \overbrace{E_{Norm}(S_j)}^{{Minimize}} )$}
137 \section{Experimental results and comparison}
139 \begin{frame}{Experimental results }
143 \item Our experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
144 \item Our algorithm is applied to NAS parallel benchmarks.\medskip
145 \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
146 \item We run the classes A, B and C on 4, 8 or 9 and 16 nodes respectively.\medskip
147 \item The dynamic power with the highest frequency is equal to \textbf{20 $W$} and the power static is equal to \textbf{4 $W$}.
152 \begin{frame}{Experimental results}
155 \includegraphics[width=.35\textwidth]{fig/ep}
156 \includegraphics[width=.35\textwidth]{fig/cg}
157 \includegraphics[width=.35\textwidth]{fig/bt}}
159 \centering {\includegraphics[width=.55\textwidth]{fig/results.pdf}}
163 \begin{frame}{Results comparison}
166 \includegraphics[width=.33\textwidth]{fig/c1.pdf}
168 \includegraphics[width=.33\textwidth]{fig/c2.pdf}}
171 \includegraphics[width=.45\textwidth]{fig/compare_c.pdf}
179 \section{Conclusions}
180 \begin{frame}{Conclusions}
184 \item We have presented a new online scaling factor selection method that \textbf{optimizes simultaneously the energy and performance}.\medskip
185 \item It predicts \textbf{ the energy consumption and the performance} of the parallel applications. \medskip
186 \item Our algorithm \textbf{saves more energy} when the communication and the other slacks times are big. \medskip
187 \item It gives the \textbf{best trade-off between energy reduction and
188 performance}. \medskip
189 \item Our method \textbf{outperforms Rauber and Rünger's method} in terms of energy-performance ratio.
198 \begin{frame}{Thanks for Listening} \vspace{-10 mm}
200 \begin{block}{\small Appeared}
201 This work has appeared in ISPA conference proceedings, 26-28 August 2014
204 \medskip \medskip \medskip
207 \centering {\Large Questions?}