thesis-presentation/ModelePresentationFemto.tex~

   1 \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13
  14 \usepackage[textsize=footnotesize]{todonotes}
  15 \title{Optimal Dynamic Frequency Scaling for Energy - Performance of Parallel MPI Programs}
  16
  17 \author{Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh
  18     and Arnaud Giersch}
  19 \institute[DISC Department - AND Team]{FEMTO-ST - DISC Department - AND Team}
  20 \date{August 29th,~2014}
  21 %  ____  _____ ____  _   _ _____
  22 % |  _ \| ____| __ )| | | |_   _|
  23 % | | | |  _| |  _ \| | | | | |
  24 % | |_| | |___| |_) | |_| | | |
  25 % |____/|_____|____/ \___/  |_|
  26 %
  27 \begin{document}
  28 \setbeamertemplate{background}{\titrefemto}
  29 \begin{frame}[plain]
  30 \titlepage
  31 \end{frame}
  32
  33 \setbeamertemplate{background}{\pagefemto}
  34 \begin{frame}{Outline}
  35 \setbeamertemplate{section in toc}[sections numbered]
  36 \tableofcontents
  37 \end{frame}
  38 \section{Definitions and objectives }
  39 \begin{frame}{Definitions}
  40         \begin{femtoBlock}
  41                 {}
  42 %               Et ici le texte dans le femtoBlock
  43                 \begin{itemize}
  44                        \small \item Modern processors provide  \textbf{Dynamic Voltage and Frequency Scaling (DVFS)} technique. \medskip
  45                        \item  DVFS is used to reduce the frequency and thus to \textbf{reduce the energy consumption} by a CPU while computing.\medskip
  46                        \item  Energy consumption by \textbf{individual processor} of a synchronous parallel program:
  47
  48                     $E_{ind} =  P_{dyn} \cdot T_{Comp} + P_{static} \cdot (T_{Comp}+T_{Comm})$.\medskip
  49                     \item  The frequency scaling factor is the ratio between the maximum and the new frequency, $S = \frac{F_{max}}{F_{new}}$.  \medskip
  50
  51                 \end{itemize}
  52         \end{femtoBlock}
  53 \end{frame}
  54
  55
  56 \begin{frame}{Objectives}
  57         \begin{femtoBlock}{} \vspace{-12 mm}
  58                 \begin{itemize} \small
  59                    \item  Study the effect of the scaling factor $S$ on \textbf{energy consumption} of parallel iterative applications such as NAS
  60                           Benchmarks. \includegraphics[width=.06\textwidth]{fig/nasa.pdf} \medskip
  61                    \item  Study the effect of the scaling factor $S$ on \textbf{performance} of these benchmarks.\medskip
  62                    \item  Discovering the \textbf{energy-performance trade-off relation} when changing the frequency.\medskip
  63                    \item  We propose an algorithm for selecting the scaling factor $S$ producing \textbf {optimal trade-off} between the energy and performance. \medskip
  64                    \item  Improving Rauber and Rünger's\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
  65                           energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
  66                 \end{itemize}
  67                  \let\thefootnote\relax\footnote{}
  68           \vspace{-10 mm}
  69         \end{femtoBlock}
  70 \end{frame}
  71 \section{Energy and performance models}
  72 \begin{frame}{Energy model for homogeneous platform}
  73         \begin{femtoBlock}{}\small
  74               The dynamic power is \textbf{exponentially} related to the scaling factor $S$ and the static consumed energy is \textbf{linearly}
  75                related to this factor.
  76         \begin{block}{\small Rauber and Rünger's energy model}
  77          $ E = P_{dyn} \cdot S_1^{-2} \cdot
  78          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
  79             P_{static} \cdot S_1  \cdot T_1 \cdot N$
  80         \end{block}
  81            $S_1$: is the max. scaling factor,  $T_I$: is the time of the slower task, $T_i$: is the time of the other tasks and
  82            $N$: is the number of  nodes.
  83        \begin{block}{\small Rauber and Rünger's optimal scaling factor}
  84            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
  85             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
  86         \end{block}
  87           They reduce degradation of the performance by \textbf{setting the highest frequency to the slowest task}.
  88         \end{femtoBlock}
  89 \end{frame}
  90
  91
  92 \begin{frame}{Performance evaluation of MPI programs}
  93         \begin{femtoBlock}{}
  94               \vspace{-5 mm}
  95               \begin{block}{\small Execution time prediction model}
  96                      \centering{ $ T_{new} = T_{Max Comp Old} \cdot S + T_{{Max Comm Old}}$}
  97           \end{block}
  98           \vspace{10 mm}
  99            \centering{\includegraphics[width=.35\textwidth]{fig/cg_per}
 100            \quad%
 101            \includegraphics[width=.35\textwidth]{fig/lu_pre}}
 102             \vspace{5 mm}
 103
 104            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 105            \end{femtoBlock}
 106 \end{frame}
 107
 108 \section{Performance and energy reduction trade-off}
 109
 110 \begin{frame}{Performance and energy reduction trade-off}
 111         \begin{femtoBlock}{} \vspace{-15 mm}
 112                \begin{figure}
 113      \centering
 114      \subfloat[\small  Real relation.]{%
 115      \includegraphics[width=.4\textwidth]{fig/file3}\label{fig:r2}}
 116      \quad%
 117     \subfloat[\small Converted relation.]{%
 118     \includegraphics[width=.4\textwidth]{fig/file}\label{fig:r1}}%
 119   \label{fig:rel}
 120  % \caption{The energy and performance relation}
 121 \end{figure}
 122 $Performance=\frac{1}{execution~time}$
 123       \small
 124          \begin{block}{\small Our objective function}
 125          \centering{$\textbf{\emph {MaxDist}} = \max_{j=1,2,\dots ,F}
 126                     (\overbrace{P_{Norm}(S_j)}^{{Maximize}} -
 127                      \overbrace{E_{Norm}(S_j)}^{{Minimize}} )$}
 128
 129         \end{block}
 130         \end{femtoBlock}
 131
 132 \end{frame}
 133
 134
 135
 136
 137 \section{Experimental results and comparison}
 138
 139 \begin{frame}{Experimental results }
 140       \begin{femtoBlock}{}
 141         \begin{itemize}
 142          \small
 143            \item Our experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
 144            \item Our algorithm is applied to  NAS parallel benchmarks.\medskip
 145            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 146            \item We run the classes A, B and C on 4, 8 or 9 and 16 nodes respectively.\medskip
 147            \item The dynamic power with the highest frequency is equal to \textbf{20 $W$} and the power static is equal to \textbf{4 $W$}.
 148                 \end{itemize}
 149         \end{femtoBlock}
 150 \end{frame}
 151
 152 \begin{frame}{Experimental results}
 153   \begin{femtoBlock}{}
 154       \centering {
 155      \includegraphics[width=.35\textwidth]{fig/ep}
 156      \includegraphics[width=.35\textwidth]{fig/cg}
 157      \includegraphics[width=.35\textwidth]{fig/bt}}
 158
 159      \centering {\includegraphics[width=.55\textwidth]{fig/results.pdf}}
 160  \end{femtoBlock}
 161 \end{frame}
 162
 163 \begin{frame}{Results comparison}
 164       \begin{femtoBlock}{}
 165     \centering {
 166          \includegraphics[width=.33\textwidth]{fig/c1.pdf}
 167          \qquad
 168          \includegraphics[width=.33\textwidth]{fig/c2.pdf}}
 169
 170
 171             \includegraphics[width=.45\textwidth]{fig/compare_c.pdf}
 172         \end{femtoBlock}
 173 \end{frame}
 174
 175
 176
 177
 178
 179 \section{Conclusions}
 180 \begin{frame}{Conclusions}
 181       \begin{femtoBlock}{}
 182       \begin{itemize}
 183       \small
 184        \item  We have presented a new online scaling factor selection method that \textbf{optimizes simultaneously the energy and performance}.\medskip
 185         \item It predicts \textbf{ the energy consumption and the performance} of the parallel applications. \medskip
 186          \item Our algorithm \textbf{saves more energy} when the communication and the other slacks times are big.     \medskip
 187          \item It gives the \textbf{best trade-off between energy reduction and
 188                 performance}. \medskip
 189          \item  Our method \textbf{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 190          \end{itemize}
 191
 192         \end{femtoBlock}
 193 \end{frame}
 194
 195
 196
 197
 198 \begin{frame}{Thanks for Listening} \vspace{-10 mm}
 199       \begin{femtoBlock}{}
 200          \begin{block}{\small Appeared}
 201            This work has appeared in ISPA conference proceedings, 26-28 August 2014
 202          \end{block}
 203 \medskip
 204 \medskip \medskip \medskip
 205
 206
 207     \centering {\Large Questions?}
 208
 209         \end{femtoBlock}
 210 \end{frame}
 211
 212 \end{document}
 213 %  _____ ___ _   _
 214 % |  ___|_ _| \ | |
 215 % | |_   | ||  \| |
 216 % |  _|  | || |\  |
 217 % |_|   |___|_| \_|
 218 %