]> AND Private Git Repository - ThesisAhmed.git/blob - thesis-presentation/AhmedSlides.tex
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
correcting referne\e[2~ce
[ThesisAhmed.git] / thesis-presentation / AhmedSlides.tex
1  \documentclass{beamer}
2 \usepackage{beamerthemefemto}
3 \usepackage[latin1]{inputenc}
4 \usepackage[T1]{fontenc}
5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
6 \usepackage{algorithm,algorithmicx,algpseudocode}
7 \usepackage{graphicx,graphics}
8 \usepackage{subfig}
9 \usepackage{listings}
10 \usepackage{colortbl}
11 \usepackage{amsmath}
12 \usepackage{xspace}
13  \usepackage{movie15}
14  \usepackage{animate}
15 \usepackage{xmpmulti} 
16  \newcommand{\AG}[2][inline]{%
17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
18 \newcommand{\JC}[2][inline]{%
19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
20 \definecolor{myblue}{RGB}{0,29,119}
21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
22 \usepackage{fixltx2e}
23 %% used to put some subscripts lower, and make them more legible
24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
25 \usepackage{ragged2e}
26 \newcommand{\CL}{\Xsub{C}{L}}
27 \newcommand{\Dist}{\mathit{Dist}}
28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
29 \newcommand{\Eind}{\Xsub{E}{ind}}
30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
33 \newcommand{\Es}{\Xsub{E}{S}}
34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
36 \newcommand{\Fnew}{\Xsub{F}{new}}
37 \newcommand{\Vnew}{\Xsub{V}{new}}
38 \newcommand{\Vmax}{\Xsub{V}{max}}
39 \newcommand{\Ileak}{\Xsub{I}{leak}}
40 \newcommand{\Kdesign}{\Xsub{K}{design}}
41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
46
47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
56 \newcommand{\Tnew}{\Xsub{T}{New}}
57 \newcommand{\Told}{\Xsub{T}{Old}}
58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
63  \usepackage{pifont}
64 \usepackage{xcolor}
65 \definecolor{myblue}{RGB}{0,29,119}
66 \usepackage[textsize=footnotesize]{todonotes}
67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]} 
68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
71
72
73
74 %\title{Energy Consumption Optimization of Parallel Applications with
75 %Iterations using CPU Frequency Scaling} 
76 \vspace{2cm}
77
78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}} 
80
81 \date{}
82 \vspace{-3cm}
83 %  ____  _____ ____  _   _ _____ 
84 % |  _ \| ____| __ )| | | |_   _|
85 % | | | |  _| |  _ \| | | | | |  
86 % | |_| | |___| |_) | |_| | | |  
87 % |____/|_____|____/ \___/  |_|  
88
89 \begin{document}
90 \setbeamertemplate{background}{\titrefemto}
91
92 %%%%%%%%%%%%%%%%%%%%
93 %%    SLIDE 01    %%
94 %%%%%%%%%%%%%%%%%%%% 
95 \begin{frame}[plain]
96 \vspace{1cm}
97 \centering
98    \titlepage
99 \end{frame}
100
101
102 %%%%%%%%%%%%%%%%%%%%
103 %%    SLIDE 02    %%
104 %%%%%%%%%%%%%%%%%%%% 
105 \setbeamertemplate{background}{\pagefemto}
106 \begin{frame}{Outline}
107
108 \setbeamertemplate{section in toc}[sections numbered] 
109 \tableofcontents
110 \end{frame}
111
112 %%%%%%%%%%%%%%%%%%%%
113 %%    SLIDE 03    %%
114 %%%%%%%%%%%%%%%%%%%% 
115 \begin{frame}{Definition of parallel computing}
116 \section{\small {Introduction and Problem definition}}
117  \centering
118  \includegraphics[width=0.99\textwidth]{para.pdf} 
119 \end{frame}
120
121  
122
123 \begin{frame}{Execution of synchronous parallel tasks}
124 \vspace{-0.5 cm}
125 \begin{figure}
126   \centering
127   \subfloat[Synchronous imbalanced communications]{%
128     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
129   \subfloat[Synchronous imbalanced computations]{%
130     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
131  % \caption{Parallel tasks on homogeneous platform}
132   \label{fig:homo}
133 \end{figure}
134
135  \end{frame}
136  
137  
138 %%%%%%%%%%%%%%%%%%%%
139 %%    SLIDE 07   %%
140 %%%%%%%%%%%%%%%%%%%% 
141
142
143 \begin{frame}{\large Synchronous and asynchronous iterative methods }
144 \vspace{-0.5 cm}
145 \begin{figure}
146
147 \includegraphics[scale=0.42]{syn_tasks.pdf}
148 \vspace{0.6 cm}
149 \includegraphics[scale=0.42]{Asyn_tasks.pdf}
150 \end{figure}
151
152  
153  \end{frame}
154  
155  %%%%%%%%%%%%%%%%%%%%
156 %%    SLIDE 03    %%
157 %%%%%%%%%%%%%%%%%%%% 
158 \begin{frame}{Approaches to get more computing power}
159  
160    %\bf \textcolor{blue}{}
161      \begin{minipage}{0.5\textwidth} 
162       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increase the frequency of a  processor.\\ (limited due to overheating)}
163     \end{minipage}%
164     \begin{minipage}{0.6\textwidth} 
165     
166 \begin{figure}[h!]
167         
168     \includegraphics[width=0.7\textwidth]{fig/freq-years} 
169     \end{figure}
170     \end{minipage}%
171     \vspace{0.2cm}
172     \begin{minipage}{0.5\textwidth} 
173      \textcolor{blue}{2)} \small \bf \textcolor{black}{Increase the number of computing   
174      units.}
175      
176  \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}  
177           
178     \end{minipage}%
179     \begin{minipage}{0.6\textwidth} 
180     \begin{figure}[h!]
181      \includegraphics[width=0.7\textwidth]{fig/clusters} 
182     \end{figure}
183     \end{minipage}%
184  \end{frame}
185  
186  
187  
188  %%%%%%%%%%%%%%%%%%%
189 %%    SLIDE 04   %%
190 %%%%%%%%%%%%%%%%%%%% 
191 \begin{frame}{Techniques for energy consumption reduction}
192
193      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}  
194     \vspace{-0.9cm}
195     \begin{figure}
196      \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{200}{on-off/a-}{0}{111}
197      %\includegraphics[width=0.6\textwidth]{on-off/a-69}
198     \end{figure}
199  \end{frame}
200
201 %%%%%%%%%%%%%%%%%%%%
202 %%    SLIDE 05    %%
203 %%%%%%%%%%%%%%%%%%%% 
204 \begin{frame}{Techniques for energy consumption reduction}
205  
206   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic Voltage and Frequency Scaling (DVFS)}
207      \vspace{-0.9cm}
208     \begin{figure}
209     \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{175}
210      %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
211     \end{figure}
212     \end{frame}
213  
214 %%%%%%%%%%%%%%%%%%%%
215 %%    SLIDE 06   %%
216 %%%%%%%%%%%%%%%%%%%% 
217 %%%%%%%%%%%%%%%%%%%%
218 %%    SLIDE 07    %%
219 %%%%%%%%%%%%%%%%%%%% 
220 \begin{frame}{Motivations}
221 \vspace{0.05cm}
222 \section{\small {Motivations}}
223 \textcolor{blue}{Why we used the DVFS method:}
224 \vspace{-0.49cm}
225 \begin{minipage}{0.5\textwidth} 
226     \vspace{-0.49cm} 
227       \begin{itemize} 
228        \item  \small \textcolor{black}{ The CPU is the component that consumes the  highest amount of energy in a node \textsuperscript{1}. }
229                 
230          \end{itemize}
231
232     \end{minipage}%
233     \begin{minipage}{0.5\textwidth}
234      \vspace{-0.49cm} 
235     \begin{figure}[h!]
236      \includegraphics[width=0.85\textwidth]{fig/node-power} 
237      
238     \end{figure}
239     \end{minipage}%
240     
241   \begin{itemize} \item \small  \textcolor{black}{DVFS reduces the energy consumption while 
242    keeping all the nodes working.}
243                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.}  \end{itemize} 
244     
245 \vspace{-0.12cm}
246
247  \begin{block}{\textcolor{white}{Challenge and Objective}}
248
249         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it also degrades the performance of the CPU.}
250                 
251                 \vspace{0.1cm}
252  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
253 \end{block}
254  
255  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
256 for a warehouse-sized computer.
257
258     \end{frame}
259
260
261
262 %%%%%%%%%%%%%%%%%%%%
263 %%    SLIDE 08    %%
264 %%%%%%%%%%%%%%%%%%%% 
265
266
267 \begin{frame}{The first contribution}
268
269 \section{\small {Energy optimization of a homogeneous platform}}
270 %\vspace{-3cm}
271  % \includegraphics[width=0.6\textwidth]{white.pdf} 
272
273 \begin{center}
274 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
275 \end{center}
276  \end{frame}
277
278
279
280 %%%%%%%%%%%%%%%%%%%%
281 %%    SLIDE 09    %%
282 %%%%%%%%%%%%%%%%%%%% 
283  
284 \begin{frame}{Objectives}
285         
286                 \begin{itemize}   \small \justifying
287                  
288                    \item   Studying the effect of the frequency scaling  on the \textbf{energy consumption and performance } of parallel  applications with iterations. \medskip
289                    
290                    \item   Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
291                    \item   Proposing an algorithm for selecting the scaling factor that produces  \textbf {the good trade-off} between the energy consumption and the performance. \medskip
292                    \item   Comparing the proposed algorithm to existing methods.
293                    
294                    
295                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the  
296                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on. 
297                 \end{itemize}
298                  %\let\thefootnote\relax\footnote{}
299         
300         
301 \end{frame}
302
303
304
305
306 %%%%%%%%%%%%%%%%%%%%
307 %%    SLIDE 13   %%
308 %%%%%%%%%%%%%%%%%%%% 
309 \begin{frame}{Performance evaluation of MPI programs}  
310
311 \small The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.  
312     \vspace{5 mm}
313     
314         \begin{femtoBlock}{}
315               \vspace{-5 mm}
316               \begin{block}{\small Execution time prediction model}
317                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
318           \end{block}   
319           \vspace{5 mm}
320            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
321            \quad%
322            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
323             \vspace{1 mm}
324             
325            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
326            \end{femtoBlock}
327 \end{frame}
328
329
330
331
332
333
334
335   
336
337
338  
339  
340  
341
342 %%%%%%%%%%%%%%%%%%%%
343 %%    SLIDE 11   %%
344 %%%%%%%%%%%%%%%%%%%% 
345 \begin{frame}{Energy model for a homogeneous platform}    
346       The power consumed by a processor is divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and  the static   
347        (\textcolor{red}{$P_s$}) powers. 
348     \begin{equation}
349      \label{eq:pd}
350      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
351    \end{equation}
352     \scriptsize \underline{Where}: \\ 
353     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity. \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance [F].\\     
354     \textcolor{blue}{$V$}: the supply voltage [V]. \hspace{8 mm} \textcolor{blue}{$F$}: operational frequency [Hz].}
355    \begin{equation}
356      \label{eq:ps}
357      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
358    \end{equation}
359     \underline{Where}:\\ 
360         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage [V].  \hspace{19 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\   
361         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{3 mm} \textcolor{blue}{$I_{leak}$}: technology dependent  
362              parameter [A].} 
363              
364              
365 \end{frame}
366
367
368   
369 %%%%%%%%%%%%%%%%%%%%
370 %%    SLIDE 12   %%
371 %%%%%%%%%%%%%%%%%%%% 
372
373 \begin{frame}{Energy model for a homogeneous platform}
374        \vspace{-0.77cm} 
375             \begin{figure}
376   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{homo-model/a-}{0}{441}
377   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
378   \end{figure}  
379               
380       %  \begin{block}{\small Rauber and Rünger's energy model}
381          %$ E = P_{d} \cdot S_1^{-2} \cdot
382          %\left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
383           %  P_{s} \cdot S_1  \cdot T_1 \cdot N$
384         %\end{block}     
385           % \textcolor{blue}{$S_1$}: the maximum scaling factor.\\ 
386           % \textcolor{blue}{$P_{d}$}: the dynamic power.\\
387           % \textcolor{blue}{$P_{s}$}: the static power.\\
388           % \textcolor{blue}{$T_I$}: the execution time of the slower task.\\ 
389           % \textcolor{blue}{$T_i$}: the execution time of task i.\\ 
390           % \textcolor{blue}{$N$}:  the number of  nodes.
391           
392           
393        
394 \end{frame}
395
396
397
398
399  %%%%%%%%%%%%%%%%%%%%
400 %%    SLIDE 14   %%
401 %%%%%%%%%%%%%%%%%%%%  
402 \begin{frame}{Performance and energy reduction trade-off}      
403         \begin{femtoBlock}{} \vspace{-15 mm}
404                \begin{figure}
405      \centering
406      \subfloat[\small  Real relation.]{%
407      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
408      \quad%
409      \subfloat[\small Converted relation.]{%
410      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
411   \label{fig:rel}
412  % \caption{The energy and performance relation}
413 \end{figure}
414
415  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
416
417 %\vspace{-0.3cm}
418       \small 
419          \begin{block}{\small Our objective function}
420          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}             
421                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} - 
422                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
423                                          
424         \end{block}                
425         \end{femtoBlock}
426        
427 \end{frame}
428
429 %%%%%%%%%%%%%%%%%%%%
430 %%    SLIDE 15   %%
431 %%%%%%%%%%%%%%%%%%%% 
432  %\begin{frame}{Scaling factor selection algorithm}
433 %\vspace{-0.75cm}
434     % \begin{center}
435       %\includegraphics[width=.56 \textwidth]{c1/algo-homo}
436      %\end{center}
437      
438 %\end{frame}
439
440
441 %%%%%%%%%%%%%%%%%%%%
442 %%    SLIDE 16   %%
443 %%%%%%%%%%%%%%%%%%%% 
444 \begin{frame}{Scaling factor selection algorithm}
445 \vspace{-0.75cm}
446      
447      \begin{figure}
448   \animategraphics[autopause,controls,scale=0.29,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{335}
449   %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
450   \end{figure}
451 \end{frame}
452
453 %%%%%%%%%%%%%%%%%%%%
454 %%    SLIDE 17   %%
455 %%%%%%%%%%%%%%%%%%%% 
456 \begin{frame}{Experiment over SimGrid }
457       \begin{femtoBlock}{}      
458         \begin{itemize}
459          \small
460            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
461            \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
462            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
463            \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
464            \item $P_d=20W$,  $P_s=4W$.
465                 \end{itemize}
466         \end{femtoBlock}
467 \end{frame}
468
469
470 %%%%%%%%%%%%%%%%%%%%
471 %%    SLIDE 18   %%
472 %%%%%%%%%%%%%%%%%%%% 
473 \begin{frame}{Experimental results}
474   \begin{femtoBlock}{}  
475       \centering { 
476      \includegraphics[width=.35\textwidth]{c1/ep}
477      \includegraphics[width=.35\textwidth]{c1/cg}
478      \includegraphics[width=.35\textwidth]{c1/bt}}
479      
480 \hspace{0.5cm}     
481      
482      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
483  \end{femtoBlock}
484 \end{frame}
485
486
487   %%%%%%%%%%%%%%%%%%%%
488 %%    SLIDE 19   %%
489 %%%%%%%%%%%%%%%%%%%% 
490 \begin{frame}{Results comparison}
491          \small \textcolor{blue}{Rauber and Rünger's  scaling factor  \textcolor{black}{ \tiny \textsuperscript{2}}}
492          
493          \vspace{2 mm}
494          
495            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
496             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $ 
497      
498         
499    \begin{center}
500             \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}
501    \end{center}
502             
503                      
504 \vspace{-2 mm}
505          \tiny \textsuperscript{2}  Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the energy consumption of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.
506 \end{frame}
507
508
509 %%%%%%%%%%%%%%%%%%%%
510 %%    SLIDE 20   %%
511 %%%%%%%%%%%%%%%%%%%% 
512 %\begin{frame}{The proposed new energy model}
513    % \vspace{-0.75cm}     
514   %\begin{figure}
515  % \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
516   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
517  % \end{figure}
518 %\end{frame}
519
520
521 %%%%%%%%%%%%%%%%%%%%
522 %%    SLIDE 21   %%
523 %%%%%%%%%%%%%%%%%%%% 
524 %\begin{frame}{\large Comparing the new model with Rauber's model }
525 % \vspace{0.1cm}    
526 % \centering
527     %\includegraphics[width=.45\textwidth]{c1/energy_con}
528     
529    %\includegraphics[width=.5\textwidth]{c1/compare-scales}
530 %\end{frame}
531
532
533
534
535    % \begin{frame}{Summary}
536      % \begin{femtoBlock}{}    
537      % \begin{itemize}
538       %\small
539        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
540        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
541          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip    
542          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
543                % performance}. \medskip
544          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
545          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
546          %\end{itemize}      
547          
548         %\end{femtoBlock}
549 %\end{frame}
550
551
552 %%%%%%%%%%%%%%%%%%%%
553 %%    SLIDE 22    %%
554 %%%%%%%%%%%%%%%%%%%% 
555
556
557 \begin{frame}{The second contribution}
558
559 \section{\small {Energy optimization of a heterogeneous platform}}
560 \begin{center}
561
562
563 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
564 \end{center}
565  \end{frame}
566  
567
568
569 %%%%%%%%%%%%%%%%%%%%
570 %%    SLIDE 23    %%
571 %%%%%%%%%%%%%%%%%%%% 
572  
573 \begin{frame}{Objectives}
574         \begin{femtoBlock}{} \vspace{-12 mm}
575                 \begin{itemize} \small
576                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running  
577                           over a heterogeneous platform (cluster or Grid). \medskip
578                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
579                           message passing iterative applications.    \medskip                      
580                    
581                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the good trade-off} between
582                           the energy consumption and the performance. 
583                 \end{itemize}
584                  
585           \vspace{-10 mm}
586         \end{femtoBlock}      
587 \end{frame}
588
589
590 %%%%%%%%%%%%%%%%%%%%
591 %%    SLIDE 24    %%
592 %%%%%%%%%%%%%%%%%%%%
593 \begin{frame}{The execution time model}    
594       \vspace{-8 mm}
595      \begin{figure}[!t]
596        \centering
597        \includegraphics[scale=0.5]{c2/commtasks}
598        \label{fig:heter}
599      \end{figure}     
600        \vspace{-12 mm}
601        \medskip
602        
603     \begin{block}{\small The execution time prediction model}
604     \begin{equation}
605      \label{eq:perf}
606      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
607     \end{equation}
608     \end{block}   
609  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
610   
611 \end{frame}
612  
613  %%%%%%%%%%%%%%%%%%%%
614 %%    SLIDE 25    %%
615 %%%%%%%%%%%%%%%%%%%%
616  %\begin{frame}{The energy consumption model} 
617    % The overall energy consumption of a message passing synchronous  application executed over
618    %  a heterogeneous platform can be computed as  follows:
619    % \begin{multline}
620     % \label{eq:energy}
621    %  \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
622   %   \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}   
623    %   \hspace{10 mm}
624    % \end{multline}
625    % \underline{where}:\\
626    % \textcolor{blue}{N} : is the number of nodes.
627 %\end{frame}
628  
629  
630 %%%%%%%%%%%%%%%%%%%%
631 %%    SLIDE 26    %%
632 %%%%%%%%%%%%%%%%%%%%
633   \begin{frame}{The energy  model  for heterogeneous cluster}
634   \vspace{-0.77cm}
635  \begin{figure}
636   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{heter-model/a-}{0}{350}
637   %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
638   \end{figure}
639  \end{frame}
640  
641  
642  
643  
644 %%%%%%%%%%%%%%%%%%%%
645 %%    SLIDE 27    %%
646 %%%%%%%%%%%%%%%%%%%%
647 %\begin{frame}{The trade-off between energy  and performance}
648    % \vspace{-7 mm}
649     %\begin{figure}
650    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
651    % \end{figure}
652    % \vspace{-7 mm}
653    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}} 
654     %{E_{Max}}$}. \\
655     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
656    
657    %  \begin{block}{\small The tradeoff model}
658     % \begin{equation}
659     %  \label{eq:max}
660     %  \textcolor{red}{MaxDist} =
661      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
662       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
663       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
664       %\end{equation}
665     % \end{block}  
666 %\end{frame}
667    
668  
669 %%%%%%%%%%%%%%%%%%%%
670 %%    SLIDE 28    %%
671 %%%%%%%%%%%%%%%%%%%%
672  %\begin{frame}{The scaling algorithm for heter. cluster}
673
674  %\centering
675    %\includegraphics[width=.52\textwidth]{algo-heter}
676  %\end{frame}
677  
678  
679  %%%%%%%%%%%%%%%%%%%%
680 %%    SLIDE 29    %%
681 %%%%%%%%%%%%%%%%%%%%
682  \begin{frame}{The scaling algorithm for heter. cluster}
683  \vspace{-0.77cm}
684  \centering
685  
686   \begin{figure}
687   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{836}
688  % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
689   \end{figure}
690 \end{frame}
691
692
693
694
695 %%%%%%%%%%%%%%%%%%%%
696 %%    SLIDE 30    %%
697 %%%%%%%%%%%%%%%%%%%%
698 %\begin{frame}{Experiments over a heterogeneous cluster  }   
699       %  \begin{itemize}
700         % \small
701           % \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
702           % \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
703           % \item Four types of processors with different computing powers were used.\medskip
704           % \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
705           % \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
706                  % \medskip
707          
708         %\end{itemize}
709
710 %\end{frame}  
711
712
713 %%%%%%%%%%%%%%%%%%%%
714 %%    SLIDE 31    %%
715 %%%%%%%%%%%%%%%%%%%%
716 %\begin{frame}{The simulation results}
717   % \vspace{-5 mm}
718   % \begin{figure}[!t]
719    %\centering
720     %\includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
721     
722    % \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%} 
723      %for the class C of the NAS Benchmarks executed over 8 nodes}
724     
725   % \end{figure}
726 %\end{frame} 
727  
728  
729  
730 %%%%%%%%%%%%%%%%%%%%
731 %%    SLIDE 32    %%
732 %%%%%%%%%%%%%%%%%%%%
733 %\begin{frame}{The simulation results}
734  %  \vspace{-5 mm}
735   % \begin{figure}[!t]
736   % \centering
737     
738    % \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
739    
740   % \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
741     % of NAS Benchmarks class C executed over 8 nodes}
742   %   \end{figure}
743 %\end{frame} 
744  
745  
746  
747
748
749
750
751
752 %%%%%%%%%%%%%%%%%%%%
753 %%    SLIDE 35    %%
754 %%%%%%%%%%%%%%%%%%%%
755 %\begin{frame}{Energy optimization of grid platform} 
756   % \begin{figure}[!t]
757    % \centering
758          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
759              
760         %   \small  10 sites distributed over France and Luxembourg
761         %\end{figure}
762 %\end{frame} 
763
764
765 %%%%%%%%%%%%%%%%%%%%
766 %%    SLIDE 36    %%
767 %%%%%%%%%%%%%%%%%%%%
768 %\begin{frame}{The grid architecture}
769 %\begin{center}
770 %\includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
771 %\end{center}
772
773  %\begin{frame}{Performance, Energy and trade-off models} \small
774   %\begin{block}{\small The performance model of grid}
775    % \begin{equation}
776   %\label{eq:perf}
777   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij}) 
778  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
779 %\end{equation}
780     %\end{block}   
781  
782  
783  %\begin{block}{\small The energy model of grid}\small
784   %  \begin{equation}
785   %\label{eq:energy}
786  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  
787 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
788 %\end{equation}
789    % \end{block}  
790
791 %\begin{block}{\small The trade-off model of grid}
792 %\small
793     %\begin{equation}
794    %\label{eq:max}
795   %\MaxDist =
796   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
797    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
798     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
799 %\end{equation}
800    % \end{block}  
801      
802      
803  %\end{frame}
804   
805   
806   
807 %%%%%%%%%%%%%%%%%%%%
808 %%    SLIDE 37    %%
809 %%%%%%%%%%%%%%%%%%%%
810  \begin{frame}{Experiments over Grid'5000}
811  
812    \textcolor{blue}{The experiments were conducted using three 
813           clusters distributed over one or two sites.}
814            \vspace{-7 mm}
815           \begin{center}
816           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
817           \end{center}          
818       \vspace{-10 mm}
819   \textcolor{blue}{Grid'5000 power measurement tools were used.} 
820         \vspace{-9 mm}
821   \begin{center}
822           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
823           \end{center}
824           
825       
826 \end{frame}   
827
828
829
830
831 %%%%%%%%%%%%%%%%%%%%
832 %%    SLIDE 38    %%
833 %%%%%%%%%%%%%%%%%%%%
834 \begin{frame}{Experiments over Grid'5000}
835
836    \begin{minipage}{0.4\textwidth}
837        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by  
838         %\textcolor{red}{30\%}}
839      \small \textcolor{blue}{The average energy saving =  \textcolor{red}{30\%}}
840    \end{minipage}  
841      \begin{minipage}{0.55\textwidth}
842         \begin{figure}[h!]
843           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
844      \end{figure}
845 \end{minipage}
846
847          \begin{minipage}{0.4\textwidth}
848            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the 
849                 %performance by \textcolor{red}{3.2\%}}
850       \small  \textcolor{blue}{The average performance degradation  =  \textcolor{red}{3.2\%}}  
851         \end{minipage}
852        \begin{minipage}{0.55\textwidth}
853          \begin{figure}[h!] 
854            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
855          \end{figure}  
856           \end{minipage}
857  \end{frame}
858
859
860
861 %%%%%%%%%%%%%%%%%%%%
862 %%    SLIDE 33    %%
863 %%%%%%%%%%%%%%%%%%%%
864 \begin{frame}{The results of the three power scenarios}
865    \vspace{-5 mm}
866    \begin{figure}[!t]
867    \centering
868    \includegraphics[width=.45\textwidth]{c2/eng_pow.eps}
869    \hspace{0.3cm}
870    \includegraphics[width=.45\textwidth]{c2/per_pow.eps}
871    \vspace{4 mm}
872    \includegraphics[width=.7\textwidth]{c2/three_scenarios.pdf}
873    \end{figure}
874 \end{frame}  
875
876
877
878
879
880
881
882 %%%%%%%%%%%%%%%%%%%%
883 %%    SLIDE 39    %%
884 %%%%%%%%%%%%%%%%%%%%
885 \begin{frame}{One core and Multi-cores per node results}
886    %\textcolor{blue}{One core  and Multi-cores per node results:}
887    
888   \begin{figure}[h!] 
889   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
890   \hspace{0.3cm}
891   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
892   \end{figure} 
893   
894   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
895 \end{frame}
896
897
898 %%%%%%%%%%%%%%%%%%%%
899 %%    SLIDE 34    %%
900 %%%%%%%%%%%%%%%%%%%%
901 \begin{frame}{Comparing the objective function to EDP}
902      
903      EDP is the product between the energy consumption and the delay \tiny\textsuperscript{3}.
904     \vspace{-5 mm}
905     \begin{figure}[!t]
906     \centering
907     \includegraphics[width=.6\textwidth]{c2/edp_dist.eps}
908     
909   
910     \end{figure}
911     
912   \tiny  \textsuperscript{3} Spiliopoulos et al, Green governors: A framework for continuously adaptive dvfs, in International Green Computing Conference and Workshops (IGCC), 2011.
913 \end{frame} 
914 %\begin{frame}{Summary}
915 %\begin{itemize}
916      % \small
917         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
918         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
919       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real 
920           %test-bed \textcolor{blue}{Grid'5000}.
921          
922          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
923         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
924       %    clusters.
925          
926          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
927         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
928          
929        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
930      %    \end{itemize}   
931 %\end{frame}
932
933
934 %%%%%%%%%%%%%%%%%%%%
935 %%    SLIDE 40    %%
936 %%%%%%%%%%%%%%%%%%%%
937 \begin{frame}{The third contribution}
938 \section{\small {Energy optimization of asynchronous applications}}
939 \begin{center}
940 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing  applications}
941 \end{center}
942  \end{frame}
943
944
945
946 %%%%%%%%%%%%%%%%%%%%
947 %%    SLIDE 41   %%
948 %%%%%%%%%%%%%%%%%%%%
949 \begin{frame}{Problem definition}\vspace{0.8 mm}
950 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
951 \vspace{-8 mm}
952 \begin{figure}
953  \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{syn/a-}{0}{647}
954  %\includegraphics[width=0.6\textwidth]{syn/a-503}
955   \end{figure}
956 \end{frame}
957
958
959
960 %%%%%%%%%%%%%%%%%%%%
961 %%    SLIDE 42   %%
962 %%%%%%%%%%%%%%%%%%%%
963 \begin{frame}{Problem definition}\vspace{0.8 mm}
964 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
965 \vspace{-8 mm}
966 \begin{figure}
967  \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn/a-}{0}{556}
968  %\includegraphics[width=0.6\textwidth]{asyn/a-440}
969   \end{figure}
970 \end{frame}
971
972
973
974 %%%%%%%%%%%%%%%%%%%%
975 %%    SLIDE 43   %%
976 %%%%%%%%%%%%%%%%%%%%
977 \begin{frame}{Solution}\vspace{0.8mm}
978 \textcolor{blue}{Using asynchronous communications with DVFS }
979 \vspace{-8 mm}
980 \begin{figure}
981   \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{344}
982   %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
983   \end{figure}
984 \end{frame}
985
986
987
988
989 %%%%%%%%%%%%%%%%%%%%
990 %%    SLIDE 44   %%
991 %%%%%%%%%%%%%%%%%%%%
992 %\begin{frame}{The performance models}
993
994 %\begin{block}{\small The performance model of Asynch. Applications}\small
995 %\begin{equation}
996   %\label{eq:asyn_time}
997  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
998 %\end{equation}
999 %\end{block}
1000
1001
1002 %\begin{block}{\small The performance model of Hybrid Applications}\small
1003 %\begin{equation}
1004   %\label{eq:asyn_perf}
1005   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +  
1006    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
1007 %\end{equation}
1008 %\end{block}
1009
1010
1011 %\end{frame}
1012
1013
1014
1015 %%%%%%%%%%%%%%%%%%%%
1016 %%    SLIDE 45   %%
1017 %%%%%%%%%%%%%%%%%%%%
1018 %\begin{frame}{The energy consumption models}
1019
1020 %\begin{block}{\small The energy model of Asynch. Applications}\small
1021 %\begin{equation}
1022   %\label{eq:asyn_energy1}
1023 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )} 
1024 %\end{equation} 
1025 %\end{block}
1026
1027
1028 %\begin{block}{\small The energy model of Hybrid Applications}\small
1029 %\begin{multline}
1030   %\label{eq:asyn_energy}
1031  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
1032 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]}))) 
1033 %\end{multline}
1034 %\end{block}
1035 %\end{frame}
1036
1037
1038
1039 %%%%%%%%%%%%%%%%%%%%
1040 %%    SLIDE 44   %%
1041 %%%%%%%%%%%%%%%%%%%%
1042 \begin{frame}{The performance and the energy models }
1043
1044 \centering
1045 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
1046 \end{frame}
1047
1048
1049
1050
1051
1052 %%%%%%%%%%%%%%%%%%%%
1053 %%    SLIDE 46   %%
1054 %%%%%%%%%%%%%%%%%%%%
1055 %\begin{frame}{The scaling algorithm for Asynch.  applications}
1056 %\vspace{-0.1 mm}
1057 %\centering
1058 %\includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
1059 %\end{frame}
1060
1061
1062
1063 %%%%%%%%%%%%%%%%%%%%
1064 %%    SLIDE 47   %%
1065 %%%%%%%%%%%%%%%%%%%%
1066 \begin{frame}{The experiments}
1067    \vspace{-5 mm}
1068    \begin{figure}[!t]
1069    \begin{itemize}
1070       \small
1071         \item The architecture of the grid:
1072    \end{itemize}
1073     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf} 
1074    \end{figure}
1075    \begin{itemize}
1076       \small
1077         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1078         \item Evaluating the application over the simulator and Grid'5000.
1079    \end{itemize}
1080 \end{frame} 
1081
1082
1083
1084 %%%%%%%%%%%%%%%%%%%%
1085 %%    SLIDE 48   %%
1086 %%%%%%%%%%%%%%%%%%%%
1087 %\begin{frame}{The simulation results}
1088 %\centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is %the Async. MS with Sync. DVFS}
1089
1090 %\centering
1091    % \includegraphics[scale=0.42]{c3/energy_saving.eps}
1092
1093  %\centering  The average energy saving  = \textcolor{red}{22\%}
1094 %\end{frame} 
1095
1096
1097
1098 %%%%%%%%%%%%%%%%%%%%
1099 %%    SLIDE 49   %%
1100 %%%%%%%%%%%%%%%%%%%%
1101 %\begin{frame}{The simulation results}
1102 %\centering
1103    
1104    %  \includegraphics[scale=0.42]{c3/perf_degra.eps}
1105      
1106 %\centering    The average speed-up  = \textcolor{red}{5.72\%}
1107 %\end{frame} 
1108
1109
1110
1111 %%%%%%%%%%%%%%%%%%%%
1112 %%    SLIDE 50   %%
1113 %%%%%%%%%%%%%%%%%%%%
1114  \begin{frame}{The Grid'5000 results}
1115    \vspace{-10 mm}
1116    \begin{figure}[!t]
1117    \centering
1118    \hspace{-8 mm}
1119     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}                    
1120     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1121    \end{figure}
1122     \vspace{-5 mm}
1123      \centering \footnotesize
1124      
1125      %\small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1126      
1127 The average energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1128 \end{frame} 
1129
1130
1131 %%%%%%%%%%%%%%%%%%%%
1132 %%    SLIDE 51   %%
1133 %%%%%%%%%%%%%%%%%%%%
1134 \begin{frame}{The comparison results}
1135  \centering
1136     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1137     
1138     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1139 \end{frame} 
1140
1141
1142
1143
1144 %%%%%%%%%%%%%%%%%%%%
1145 %%    SLIDE 52  %%
1146 %%%%%%%%%%%%%%%%%%%%
1147 \begin{frame}{Conclusions}
1148 \section{Conclusions and Perspectives}
1149 \begin{itemize}
1150
1151 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over 
1152 \textcolor{blue}{homogeneous and  heterogeneous clusters or grids}.  
1153       
1154
1155
1156 \small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
1157
1158 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1159
1160 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1161 Multi-splitting} method.
1162
1163 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  the \textcolor{blue}{Grid'5000 testbed}.
1164
1165 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or to the \textcolor{blue}{EDP objective function}.
1166
1167
1168 \end{itemize}
1169 \end{frame}
1170
1171
1172
1173 %%%%%%%%%%%%%%%%%%%%
1174 %%    SLIDE 53   %%
1175 %%%%%%%%%%%%%%%%%%%%
1176 \begin{frame}{Publications}
1177
1178 \begin{block}{\small Journal Articles }\scriptsize
1179 \begin{enumerate}[$\lbrack$1$\rbrack$]
1180
1181 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational 
1182       Science}, 2016.
1183
1184 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for     
1185       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Accepted with minor revisions)
1186  
1187 \end{enumerate}
1188 \end{block}
1189
1190
1191 \begin{block}{\small Conference Articles }\scriptsize
1192
1193 \begin{enumerate}[$\lbrack$1$\rbrack$]
1194
1195 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1196       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1197       225-230. IEEE Computer Society, Milan, Italy (2014).
1198
1199 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1200       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1201       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1202
1203 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1204       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society, 
1205       Paris (2016).  
1206
1207 \end{enumerate}
1208
1209 \end{block}
1210 \end{frame}
1211
1212
1213 %%%%%%%%%%%%%%%%%%%%
1214 %%    SLIDE 54   %%
1215 %%%%%%%%%%%%%%%%%%%%
1216 \begin{frame}{Perspectives}
1217
1218 \begin{itemize}
1219
1220 \small  \barrow The proposed algorithms should  take into consideration the
1221 \textcolor{blue}{variability between some iterations}.
1222
1223 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1224
1225 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1226
1227 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1228 \small \barrow  Considering the power consumed by the other devices in the node such as 
1229 \textcolor{blue}{the memory and the hard drive}  in the energy consumption model.
1230
1231 \end{itemize}
1232
1233 \end{frame}
1234
1235 %%%%%%%%%%%%%%%%%%%%
1236 %%    SLIDE 55  %%
1237 %%%%%%%%%%%%%%%%%%%%
1238 \begin{frame}{Fin} \vspace{-10 mm}
1239
1240             \centering \Large \textcolor{blue}{Thank you for your attention}
1241             
1242             \vspace{2cm}
1243             \centering \textcolor{blue}{ {\Large Questions?}}
1244         
1245 \end{frame}
1246 \end{document}
1247 %  _____ ___ _   _ 
1248 % |  ___|_ _| \ | |
1249 % | |_   | ||  \| |
1250 % |  _|  | || |\  |
1251 % |_|   |___|_| \_|
1252 %