]> AND Private Git Repository - ThesisAhmed.git/blob - thesis-presentation/ModelePresentationFemto.tex~
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
correcting referne\e[2~ce
[ThesisAhmed.git] / thesis-presentation / ModelePresentationFemto.tex~
1 \documentclass{beamer}
2 \usepackage{beamerthemefemto}
3 \usepackage[latin1]{inputenc}
4 \usepackage[T1]{fontenc}
5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
6 \usepackage{algorithm,algorithmicx,algpseudocode}
7 \usepackage{graphicx,graphics}
8 \usepackage{subfig}
9 \usepackage{listings}
10 \usepackage{colortbl}
11 \usepackage{amsmath}
12 \usepackage{xspace}
13          
14 \usepackage[textsize=footnotesize]{todonotes}
15 \title{Optimal Dynamic Frequency Scaling for Energy - Performance of Parallel MPI Programs}
16
17 \author{Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh 
18     and Arnaud Giersch}
19 \institute[DISC Department - AND Team]{FEMTO-ST - DISC Department - AND Team}
20 \date{August 29th,~2014}
21 %  ____  _____ ____  _   _ _____ 
22 % |  _ \| ____| __ )| | | |_   _|
23 % | | | |  _| |  _ \| | | | | |  
24 % | |_| | |___| |_) | |_| | | |  
25 % |____/|_____|____/ \___/  |_|  
26
27 \begin{document}
28 \setbeamertemplate{background}{\titrefemto}
29 \begin{frame}[plain]
30 \titlepage
31 \end{frame}
32  
33 \setbeamertemplate{background}{\pagefemto}
34 \begin{frame}{Outline}
35 \setbeamertemplate{section in toc}[sections numbered] 
36 \tableofcontents
37 \end{frame}
38 \section{Definitions and objectives }
39 \begin{frame}{Definitions}
40         \begin{femtoBlock}
41                 {}
42 %               Et ici le texte dans le femtoBlock
43                 \begin{itemize}
44                        \small \item Modern processors provide  \textbf{Dynamic Voltage and Frequency Scaling (DVFS)} technique. \medskip  
45                        \item  DVFS is used to reduce the frequency and thus to \textbf{reduce the energy consumption} by a CPU while computing.\medskip
46                        \item  Energy consumption by \textbf{individual processor} of a synchronous parallel program: 
47                     
48                     $E_{ind} =  P_{dyn} \cdot T_{Comp} + P_{static} \cdot (T_{Comp}+T_{Comm})$.\medskip 
49                     \item  The frequency scaling factor is the ratio between the maximum and the new frequency, $S = \frac{F_{max}}{F_{new}}$.  \medskip  
50                        
51                 \end{itemize}
52         \end{femtoBlock}
53 \end{frame}
54
55
56 \begin{frame}{Objectives}
57         \begin{femtoBlock}{} \vspace{-12 mm}
58                 \begin{itemize} \small
59                    \item  Study the effect of the scaling factor $S$ on \textbf{energy consumption} of parallel iterative applications such as NAS 
60                           Benchmarks. \includegraphics[width=.06\textwidth]{fig/nasa.pdf} \medskip
61                    \item  Study the effect of the scaling factor $S$ on \textbf{performance} of these benchmarks.\medskip
62                    \item  Discovering the \textbf{energy-performance trade-off relation} when changing the frequency.\medskip
63                    \item  We propose an algorithm for selecting the scaling factor $S$ producing \textbf {optimal trade-off} between the energy and performance. \medskip
64                    \item  Improving Rauber and Rünger's\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the  
65                           energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on. 
66                 \end{itemize}
67                  \let\thefootnote\relax\footnote{}
68           \vspace{-10 mm}
69         \end{femtoBlock}      
70 \end{frame}
71 \section{Energy and performance models}
72 \begin{frame}{Energy model for homogeneous platform}
73         \begin{femtoBlock}{}\small
74               The dynamic power is \textbf{exponentially} related to the scaling factor $S$ and the static consumed energy is \textbf{linearly} 
75                related to this factor. 
76         \begin{block}{\small Rauber and Rünger's energy model}
77          $ E = P_{dyn} \cdot S_1^{-2} \cdot
78          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
79             P_{static} \cdot S_1  \cdot T_1 \cdot N$
80         \end{block}     
81            $S_1$: is the max. scaling factor,  $T_I$: is the time of the slower task, $T_i$: is the time of the other tasks and 
82            $N$: is the number of  nodes.
83        \begin{block}{\small Rauber and Rünger's optimal scaling factor} 
84            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
85             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
86         \end{block}      
87           They reduce degradation of the performance by \textbf{setting the highest frequency to the slowest task}.
88         \end{femtoBlock}
89 \end{frame}
90   
91
92 \begin{frame}{Performance evaluation of MPI programs}      
93         \begin{femtoBlock}{}
94               \vspace{-5 mm}
95               \begin{block}{\small Execution time prediction model}
96                      \centering{ $ T_{new} = T_{Max Comp Old} \cdot S + T_{{Max Comm Old}}$}
97           \end{block}   
98           \vspace{10 mm}
99            \centering{\includegraphics[width=.35\textwidth]{fig/cg_per}
100            \quad%
101            \includegraphics[width=.35\textwidth]{fig/lu_pre}}
102             \vspace{5 mm}
103             
104            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
105            \end{femtoBlock}
106 \end{frame}
107
108 \section{Performance and energy reduction trade-off}
109   
110 \begin{frame}{Performance and energy reduction trade-off}      
111         \begin{femtoBlock}{} \vspace{-15 mm}
112                \begin{figure}
113      \centering
114      \subfloat[\small  Real relation.]{%
115      \includegraphics[width=.4\textwidth]{fig/file3}\label{fig:r2}}
116      \quad%
117     \subfloat[\small Converted relation.]{%
118     \includegraphics[width=.4\textwidth]{fig/file}\label{fig:r1}}%
119   \label{fig:rel}
120  % \caption{The energy and performance relation}
121 \end{figure}
122 $Performance=\frac{1}{execution~time}$
123       \small 
124          \begin{block}{\small Our objective function}
125          \centering{$\textbf{\emph {MaxDist}} = \max_{j=1,2,\dots ,F}             
126                     (\overbrace{P_{Norm}(S_j)}^{{Maximize}} - 
127                      \overbrace{E_{Norm}(S_j)}^{{Minimize}} )$}
128                                          
129         \end{block}                
130         \end{femtoBlock}
131         
132 \end{frame}
133
134   
135
136
137 \section{Experimental results and comparison}
138   
139 \begin{frame}{Experimental results }
140       \begin{femtoBlock}{}      
141         \begin{itemize}
142          \small
143            \item Our experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
144            \item Our algorithm is applied to  NAS parallel benchmarks.\medskip
145            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
146            \item We run the classes A, B and C on 4, 8 or 9 and 16 nodes respectively.\medskip
147            \item The dynamic power with the highest frequency is equal to \textbf{20 $W$} and the power static is equal to \textbf{4 $W$}.
148                 \end{itemize}
149         \end{femtoBlock}
150 \end{frame}
151   
152 \begin{frame}{Experimental results}
153   \begin{femtoBlock}{}  
154       \centering {
155      \includegraphics[width=.35\textwidth]{fig/ep}
156      \includegraphics[width=.35\textwidth]{fig/cg}
157      \includegraphics[width=.35\textwidth]{fig/bt}}
158      
159      \centering {\includegraphics[width=.55\textwidth]{fig/results.pdf}}
160  \end{femtoBlock}
161 \end{frame}
162   
163 \begin{frame}{Results comparison}
164       \begin{femtoBlock}{}      
165     \centering {
166          \includegraphics[width=.33\textwidth]{fig/c1.pdf}
167          \qquad
168          \includegraphics[width=.33\textwidth]{fig/c2.pdf}}
169            
170          
171             \includegraphics[width=.45\textwidth]{fig/compare_c.pdf}
172         \end{femtoBlock}
173 \end{frame}
174
175   
176
177
178
179 \section{Conclusions}
180 \begin{frame}{Conclusions}
181       \begin{femtoBlock}{}    
182       \begin{itemize}
183       \small
184        \item  We have presented a new online scaling factor selection method that \textbf{optimizes simultaneously the energy and performance}.\medskip
185         \item It predicts \textbf{ the energy consumption and the performance} of the parallel applications. \medskip
186          \item Our algorithm \textbf{saves more energy} when the communication and the other slacks times are big.     \medskip    
187          \item It gives the \textbf{best trade-off between energy reduction and
188                 performance}. \medskip
189          \item  Our method \textbf{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
190          \end{itemize}      
191          
192         \end{femtoBlock}
193 \end{frame}
194   
195   
196
197     
198 \begin{frame}{Thanks for Listening} \vspace{-10 mm}
199       \begin{femtoBlock}{} 
200          \begin{block}{\small Appeared} 
201            This work has appeared in ISPA conference proceedings, 26-28 August 2014
202          \end{block}       
203 \medskip
204 \medskip \medskip \medskip
205
206
207     \centering {\Large Questions?}
208          
209         \end{femtoBlock}
210 \end{frame}
211   
212 \end{document}
213 %  _____ ___ _   _ 
214 % |  ___|_ _| \ | |
215 % | |_   | ||  \| |
216 % |  _|  | || |\  |
217 % |_|   |___|_| \_|
218 %