]> AND Private Git Repository - ThesisAhmed.git/blob - thesis-presentation/AhmedSlides.tex
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
2a57d28c6b77ecb4a5e766fa6b1eb06b6d8fde9e
[ThesisAhmed.git] / thesis-presentation / AhmedSlides.tex
1  \documentclass{beamer}
2 \usepackage{beamerthemefemto}
3 \usepackage[latin1]{inputenc}
4 \usepackage[T1]{fontenc}
5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
6 \usepackage{algorithm,algorithmicx,algpseudocode}
7 \usepackage{graphicx,graphics}
8 \usepackage{subfig}
9 \usepackage{listings}
10 \usepackage{colortbl}
11 \usepackage{amsmath}
12 \usepackage{xspace}
13  \usepackage{movie15}
14  \usepackage{animate}
15 \usepackage{xmpmulti} 
16  \newcommand{\AG}[2][inline]{%
17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
18 \newcommand{\JC}[2][inline]{%
19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
20 \definecolor{myblue}{RGB}{0,29,119}
21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
22 \usepackage{fixltx2e}
23 %% used to put some subscripts lower, and make them more legible
24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
25 \usepackage{ragged2e}
26 \newcommand{\CL}{\Xsub{C}{L}}
27 \newcommand{\Dist}{\mathit{Dist}}
28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
29 \newcommand{\Eind}{\Xsub{E}{ind}}
30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
33 \newcommand{\Es}{\Xsub{E}{S}}
34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
36 \newcommand{\Fnew}{\Xsub{F}{new}}
37 \newcommand{\Vnew}{\Xsub{V}{new}}
38 \newcommand{\Vmax}{\Xsub{V}{max}}
39 \newcommand{\Ileak}{\Xsub{I}{leak}}
40 \newcommand{\Kdesign}{\Xsub{K}{design}}
41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
46
47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
56 \newcommand{\Tnew}{\Xsub{T}{New}}
57 \newcommand{\Told}{\Xsub{T}{Old}}
58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
63  \usepackage{pifont}
64 \usepackage{xcolor}
65 \definecolor{myblue}{RGB}{0,29,119}
66 \usepackage[textsize=footnotesize]{todonotes}
67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]} 
68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
71
72
73
74 %\title{Energy Consumption Optimization of Parallel Applications with
75 %Iterations using CPU Frequency Scaling} 
76 \vspace{2cm}
77
78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}} 
80
81 \date{}
82 \vspace{-3cm}
83 %  ____  _____ ____  _   _ _____ 
84 % |  _ \| ____| __ )| | | |_   _|
85 % | | | |  _| |  _ \| | | | | |  
86 % | |_| | |___| |_) | |_| | | |  
87 % |____/|_____|____/ \___/  |_|  
88
89 \begin{document}
90 \setbeamertemplate{background}{\titrefemto}
91
92 %%%%%%%%%%%%%%%%%%%%
93 %%    SLIDE 01    %%
94 %%%%%%%%%%%%%%%%%%%% 
95 \begin{frame}[plain]
96 \vspace{1cm}
97 \centering
98    \titlepage
99 \end{frame}
100
101
102 %%%%%%%%%%%%%%%%%%%%
103 %%    SLIDE 02    %%
104 %%%%%%%%%%%%%%%%%%%% 
105 \setbeamertemplate{background}{\pagefemto}
106 \begin{frame}{Outline}
107
108 \setbeamertemplate{section in toc}[sections numbered] 
109 \tableofcontents
110 \end{frame}
111
112 %%%%%%%%%%%%%%%%%%%%
113 %%    SLIDE 03    %%
114 %%%%%%%%%%%%%%%%%%%% 
115 \begin{frame}{Introduction and problem definition}
116 \section{\small {Introduction and Problem definition}}
117  \centering
118  \includegraphics[width=0.99\textwidth]{para.pdf} 
119 \end{frame}
120
121  
122  
123  
124
125  
126
127
128 \begin{frame}{Execution of synchronous parallel tasks}
129 \vspace{-0.5 cm}
130 \begin{figure}
131   \centering
132   \subfloat[Synchronous imbalanced communications]{%
133     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
134   \subfloat[Synchronous imbalanced computations]{%
135     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
136  % \caption{Parallel tasks on homogeneous platform}
137   \label{fig:homo}
138 \end{figure}
139
140  \end{frame}
141  
142  
143 %%%%%%%%%%%%%%%%%%%%
144 %%    SLIDE 07   %%
145 %%%%%%%%%%%%%%%%%%%% 
146
147
148 \begin{frame}{\large Synchronous and asynchronous iterative methods }
149 \vspace{-0.5 cm}
150 \begin{figure}
151
152 \includegraphics[scale=0.42]{syn_tasks.pdf}
153 \vspace{0.6 cm}
154 \includegraphics[scale=0.42]{Asyn_tasks.pdf}
155 \end{figure}
156
157  
158  \end{frame}
159  
160  %%%%%%%%%%%%%%%%%%%%
161 %%    SLIDE 03    %%
162 %%%%%%%%%%%%%%%%%%%% 
163 \begin{frame}{Approaches to get more computing power}
164  
165    %\bf \textcolor{blue}{}
166      \begin{minipage}{0.5\textwidth} 
167       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increase the frequency of a  processor.\\ (limited due to overheating)}
168     \end{minipage}%
169     \begin{minipage}{0.6\textwidth} 
170     
171 \begin{figure}[h!]
172         
173     \includegraphics[width=0.7\textwidth]{fig/freq-years} 
174     \end{figure}
175     \end{minipage}%
176     \vspace{0.2cm}
177     \begin{minipage}{0.5\textwidth} 
178      \textcolor{blue}{2)} \small \bf \textcolor{black}{Increase the number of nodes.}
179      
180  \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}  
181           
182     \end{minipage}%
183     \begin{minipage}{0.6\textwidth} 
184     \begin{figure}[h!]
185      \includegraphics[width=0.7\textwidth]{fig/clusters} 
186     \end{figure}
187     \end{minipage}%
188  \end{frame}
189  
190  
191  
192  %%%%%%%%%%%%%%%%%%%
193 %%    SLIDE 04   %%
194 %%%%%%%%%%%%%%%%%%%% 
195 \begin{frame}{Techniques for energy consumption reduction}
196
197      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}  
198     \vspace{-0.9cm}
199     \begin{figure}
200      \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{200}{on-off/a-}{0}{111}
201      %\includegraphics[width=0.6\textwidth]{on-off/a-69}
202     \end{figure}
203  \end{frame}
204
205 %%%%%%%%%%%%%%%%%%%%
206 %%    SLIDE 05    %%
207 %%%%%%%%%%%%%%%%%%%% 
208 \begin{frame}{Techniques for energy consumption reduction}
209  
210   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic Voltage and Frequency Scaling (DVFS)}
211      \vspace{-0.9cm}
212     \begin{figure}
213     \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{175}
214      %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
215     \end{figure}
216     \end{frame}
217  
218 %%%%%%%%%%%%%%%%%%%%
219 %%    SLIDE 06   %%
220 %%%%%%%%%%%%%%%%%%%% 
221 %%%%%%%%%%%%%%%%%%%%
222 %%    SLIDE 07    %%
223 %%%%%%%%%%%%%%%%%%%% 
224 \begin{frame}{Motivations}
225 \vspace{0.05cm}
226 \section{\small {Motivations}}
227 \textcolor{blue}{Why we used the DVFS method:}
228 \vspace{-0.49cm}
229 \begin{minipage}{0.5\textwidth} 
230     \vspace{-0.49cm} 
231       \begin{itemize} 
232        \item  \small \textcolor{black}{ The CPU is the component that consumes the  highest amount of energy in a node \textsuperscript{1}. }
233                 
234          \end{itemize}
235
236     \end{minipage}%
237     \begin{minipage}{0.5\textwidth}
238      \vspace{-0.49cm} 
239     \begin{figure}[h!]
240      \includegraphics[width=0.85\textwidth]{fig/node-power} 
241      
242     \end{figure}
243     \end{minipage}%
244     
245   \begin{itemize} \item \small  \textcolor{black}{DVFS reduces the energy consumption while 
246    keeping all the nodes working.}
247                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.}  \end{itemize} 
248     
249 \vspace{-0.12cm}
250
251  \begin{block}{\textcolor{white}{Challenge and Objective}}
252
253         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it also degrades the performance of the CPU.}
254                 
255                 \vspace{0.1cm}
256  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
257 \end{block}
258  
259  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
260 for a warehouse-sized computer.
261
262     \end{frame}
263
264
265
266 %%%%%%%%%%%%%%%%%%%%
267 %%    SLIDE 08    %%
268 %%%%%%%%%%%%%%%%%%%% 
269
270
271 \begin{frame}{The first contribution}
272
273 \section{\small {Energy optimization of a homogeneous platform}}
274 %\vspace{-3cm}
275  % \includegraphics[width=0.6\textwidth]{white.pdf} 
276
277 \begin{center}
278 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
279 \end{center}
280  \end{frame}
281
282
283
284 %%%%%%%%%%%%%%%%%%%%
285 %%    SLIDE 09    %%
286 %%%%%%%%%%%%%%%%%%%% 
287  
288 \begin{frame}{Objectives}
289         
290                 \begin{itemize}   \small \justifying
291                  
292                    \item   Studying the effect of the scaling factor on the \textbf{energy consumption and performance } of parallel  applications with iterations. \medskip
293                    
294                    \item   Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
295                    \item   Proposing an algorithm for selecting the scaling factor that produces  \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
296                    \item   Comparing the proposed algorithm to existing methods.
297                    
298                    
299                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the  
300                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on. 
301                 \end{itemize}
302                  %\let\thefootnote\relax\footnote{}
303         
304         
305 \end{frame}
306
307
308
309
310  
311  
312  
313
314 %%%%%%%%%%%%%%%%%%%%
315 %%    SLIDE 11   %%
316 %%%%%%%%%%%%%%%%%%%% 
317 \begin{frame}{Energy model for a homogeneous platform}    
318       The power consumed by a processor is divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and  the static   
319        (\textcolor{red}{$P_s$}) power. 
320     \begin{equation}
321      \label{eq:pd}
322      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
323    \end{equation}
324     \scriptsize \underline{Where}: \\ 
325     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\     
326     \textcolor{blue}{$V$}: the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
327    \begin{equation}
328      \label{eq:ps}
329      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
330    \end{equation}
331     \underline{Where}:\\ 
332         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\   
333         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent  
334              parameter.} 
335              
336              The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.
337 \end{frame}
338
339 %%%%%%%%%%%%%%%%%%%%
340 %%    SLIDE 12   %%
341 %%%%%%%%%%%%%%%%%%%% 
342
343 \begin{frame}{Energy model for a homogeneous platform}
344        \vspace{-0.77cm}
345             \begin{figure}
346   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{homo-model/a-}{0}{441}
347   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
348   \end{figure}  
349               
350       %  \begin{block}{\small Rauber and Rünger's energy model}
351          %$ E = P_{d} \cdot S_1^{-2} \cdot
352          %\left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
353           %  P_{s} \cdot S_1  \cdot T_1 \cdot N$
354         %\end{block}     
355           % \textcolor{blue}{$S_1$}: the maximum scaling factor.\\ 
356           % \textcolor{blue}{$P_{d}$}: the dynamic power.\\
357           % \textcolor{blue}{$P_{s}$}: the static power.\\
358           % \textcolor{blue}{$T_I$}: the execution time of the slower task.\\ 
359           % \textcolor{blue}{$T_i$}: the execution time of task i.\\ 
360           % \textcolor{blue}{$N$}:  the number of  nodes.
361           
362           
363        
364 \end{frame}
365   
366   
367 %%%%%%%%%%%%%%%%%%%%
368 %%    SLIDE 13   %%
369 %%%%%%%%%%%%%%%%%%%% 
370 \begin{frame}{Performance evaluation of MPI programs}      
371         \begin{femtoBlock}{}
372               \vspace{-5 mm}
373               \begin{block}{\small Execution time prediction model}
374                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
375           \end{block}   
376           \vspace{10 mm}
377            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
378            \quad%
379            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
380             \vspace{5 mm}
381             
382            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
383            \end{femtoBlock}
384 \end{frame}
385
386
387
388
389  %%%%%%%%%%%%%%%%%%%%
390 %%    SLIDE 14   %%
391 %%%%%%%%%%%%%%%%%%%%  
392 \begin{frame}{Performance and energy reduction trade-off}      
393         \begin{femtoBlock}{} \vspace{-15 mm}
394                \begin{figure}
395      \centering
396      \subfloat[\small  Real relation.]{%
397      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
398      \quad%
399      \subfloat[\small Converted relation.]{%
400      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
401   \label{fig:rel}
402  % \caption{The energy and performance relation}
403 \end{figure}
404
405  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
406
407 %\vspace{-0.3cm}
408       \small 
409          \begin{block}{\small Our objective function}
410          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}             
411                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} - 
412                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
413                                          
414         \end{block}                
415         \end{femtoBlock}
416        
417 \end{frame}
418
419 %%%%%%%%%%%%%%%%%%%%
420 %%    SLIDE 15   %%
421 %%%%%%%%%%%%%%%%%%%% 
422  %\begin{frame}{Scaling factor selection algorithm}
423 %\vspace{-0.75cm}
424     % \begin{center}
425       %\includegraphics[width=.56 \textwidth]{c1/algo-homo}
426      %\end{center}
427      
428 %\end{frame}
429
430
431 %%%%%%%%%%%%%%%%%%%%
432 %%    SLIDE 16   %%
433 %%%%%%%%%%%%%%%%%%%% 
434 \begin{frame}{Scaling factor selection algorithm}
435 \vspace{-0.75cm}
436      
437      \begin{figure}
438   \animategraphics[autopause,controls,scale=0.29,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{335}
439   %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
440   \end{figure}
441 \end{frame}
442
443 %%%%%%%%%%%%%%%%%%%%
444 %%    SLIDE 17   %%
445 %%%%%%%%%%%%%%%%%%%% 
446 \begin{frame}{Experimental results }
447       \begin{femtoBlock}{}      
448         \begin{itemize}
449          \small
450            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
451            \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
452            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
453            \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
454            \item $P_d=20W$,  $P_s=4W$.
455                 \end{itemize}
456         \end{femtoBlock}
457 \end{frame}
458
459
460 %%%%%%%%%%%%%%%%%%%%
461 %%    SLIDE 18   %%
462 %%%%%%%%%%%%%%%%%%%% 
463 \begin{frame}{Experimental results}
464   \begin{femtoBlock}{}  
465       \centering { 
466      \includegraphics[width=.35\textwidth]{c1/ep}
467      \includegraphics[width=.35\textwidth]{c1/cg}
468      \includegraphics[width=.35\textwidth]{c1/bt}}
469      
470 \hspace{0.5cm}     
471      
472      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
473  \end{femtoBlock}
474 \end{frame}
475
476
477   %%%%%%%%%%%%%%%%%%%%
478 %%    SLIDE 19   %%
479 %%%%%%%%%%%%%%%%%%%% 
480 \begin{frame}{Results comparison}
481          \begin{block}{\small Rauber and Rünger's optimal scaling factor} 
482            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
483             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
484         \end{block}   
485         
486         
487     \centering {
488          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
489          %\qquad
490          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
491            
492          
493             \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}}
494         
495 \end{frame}
496
497
498 %%%%%%%%%%%%%%%%%%%%
499 %%    SLIDE 20   %%
500 %%%%%%%%%%%%%%%%%%%% 
501 %\begin{frame}{The proposed new energy model}
502    % \vspace{-0.75cm}     
503   %\begin{figure}
504  % \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
505   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
506  % \end{figure}
507 %\end{frame}
508
509
510 %%%%%%%%%%%%%%%%%%%%
511 %%    SLIDE 21   %%
512 %%%%%%%%%%%%%%%%%%%% 
513 %\begin{frame}{\large Comparing the new model with Rauber's model }
514 % \vspace{0.1cm}    
515 % \centering
516     %\includegraphics[width=.45\textwidth]{c1/energy_con}
517     
518    %\includegraphics[width=.5\textwidth]{c1/compare-scales}
519 %\end{frame}
520
521
522
523
524    % \begin{frame}{Summary}
525      % \begin{femtoBlock}{}    
526      % \begin{itemize}
527       %\small
528        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
529        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
530          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip    
531          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
532                % performance}. \medskip
533          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
534          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
535          %\end{itemize}      
536          
537         %\end{femtoBlock}
538 %\end{frame}
539
540
541 %%%%%%%%%%%%%%%%%%%%
542 %%    SLIDE 22    %%
543 %%%%%%%%%%%%%%%%%%%% 
544
545
546 \begin{frame}{The second contribution}
547
548 \section{\small {Energy optimization of a heterogeneous platform}}
549 \begin{center}
550
551
552 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
553 \end{center}
554  \end{frame}
555  
556
557
558 %%%%%%%%%%%%%%%%%%%%
559 %%    SLIDE 23    %%
560 %%%%%%%%%%%%%%%%%%%% 
561  
562 \begin{frame}{Objectives}
563         \begin{femtoBlock}{} \vspace{-12 mm}
564                 \begin{itemize} \small
565                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running  
566                           over a heterogeneous platform (cluster or Grid). \medskip
567                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
568                           message passing iterative applications.    \medskip                      
569                    
570                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the optimal trade-off} between
571                           the energy consumption and the performance. 
572                 \end{itemize}
573                  
574           \vspace{-10 mm}
575         \end{femtoBlock}      
576 \end{frame}
577
578
579 %%%%%%%%%%%%%%%%%%%%
580 %%    SLIDE 24    %%
581 %%%%%%%%%%%%%%%%%%%%
582 \begin{frame}{The execution time model}    
583       \vspace{-8 mm}
584      \begin{figure}[!t]
585        \centering
586        \includegraphics[scale=0.5]{c2/commtasks}
587        \label{fig:heter}
588      \end{figure}     
589        \vspace{-12 mm}
590        \medskip
591        
592     \begin{block}{\small The execution time prediction model}
593     \begin{equation}
594      \label{eq:perf}
595      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
596     \end{equation}
597     \end{block}   
598  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
599   
600 \end{frame}
601  
602  %%%%%%%%%%%%%%%%%%%%
603 %%    SLIDE 25    %%
604 %%%%%%%%%%%%%%%%%%%%
605  %\begin{frame}{The energy consumption model} 
606    % The overall energy consumption of a message passing synchronous  application executed over
607    %  a heterogeneous platform can be computed as  follows:
608    % \begin{multline}
609     % \label{eq:energy}
610    %  \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
611   %   \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}   
612    %   \hspace{10 mm}
613    % \end{multline}
614    % \underline{where}:\\
615    % \textcolor{blue}{N} : is the number of nodes.
616 %\end{frame}
617  
618  
619 %%%%%%%%%%%%%%%%%%%%
620 %%    SLIDE 26    %%
621 %%%%%%%%%%%%%%%%%%%%
622   \begin{frame}{The energy  model  for heterogeneous cluster}
623   \vspace{-0.77cm}
624  \begin{figure}
625   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{heter-model/a-}{0}{350}
626   %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
627   \end{figure}
628  \end{frame}
629  
630  
631  
632  
633 %%%%%%%%%%%%%%%%%%%%
634 %%    SLIDE 27    %%
635 %%%%%%%%%%%%%%%%%%%%
636 %\begin{frame}{The trade-off between energy  and performance}
637    % \vspace{-7 mm}
638     %\begin{figure}
639    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
640    % \end{figure}
641    % \vspace{-7 mm}
642    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}} 
643     %{E_{Max}}$}. \\
644     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
645    
646    %  \begin{block}{\small The tradeoff model}
647     % \begin{equation}
648     %  \label{eq:max}
649     %  \textcolor{red}{MaxDist} =
650      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
651       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
652       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
653       %\end{equation}
654     % \end{block}  
655 %\end{frame}
656    
657  
658 %%%%%%%%%%%%%%%%%%%%
659 %%    SLIDE 28    %%
660 %%%%%%%%%%%%%%%%%%%%
661  %\begin{frame}{The scaling algorithm for heter. cluster}
662
663  %\centering
664    %\includegraphics[width=.52\textwidth]{algo-heter}
665  %\end{frame}
666  
667  
668  %%%%%%%%%%%%%%%%%%%%
669 %%    SLIDE 29    %%
670 %%%%%%%%%%%%%%%%%%%%
671  \begin{frame}{The scaling algorithm for heter. cluster}
672  \vspace{-0.77cm}
673  \centering
674  
675   \begin{figure}
676   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{836}
677  % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
678   \end{figure}
679 \end{frame}
680
681
682
683
684 %%%%%%%%%%%%%%%%%%%%
685 %%    SLIDE 30    %%
686 %%%%%%%%%%%%%%%%%%%%
687 %\begin{frame}{Experiments over a heterogeneous cluster  }   
688       %  \begin{itemize}
689         % \small
690           % \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
691           % \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
692           % \item Four types of processors with different computing powers were used.\medskip
693           % \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
694           % \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
695                  % \medskip
696          
697         %\end{itemize}
698
699 %\end{frame}  
700
701
702 %%%%%%%%%%%%%%%%%%%%
703 %%    SLIDE 31    %%
704 %%%%%%%%%%%%%%%%%%%%
705 %\begin{frame}{The simulation results}
706   % \vspace{-5 mm}
707   % \begin{figure}[!t]
708    %\centering
709     %\includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
710     
711    % \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%} 
712      %for the class C of the NAS Benchmarks executed over 8 nodes}
713     
714   % \end{figure}
715 %\end{frame} 
716  
717  
718  
719 %%%%%%%%%%%%%%%%%%%%
720 %%    SLIDE 32    %%
721 %%%%%%%%%%%%%%%%%%%%
722 %\begin{frame}{The simulation results}
723  %  \vspace{-5 mm}
724   % \begin{figure}[!t]
725   % \centering
726     
727    % \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
728    
729   % \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
730     % of NAS Benchmarks class C executed over 8 nodes}
731   %   \end{figure}
732 %\end{frame} 
733  
734  
735  
736
737
738
739
740
741 %%%%%%%%%%%%%%%%%%%%
742 %%    SLIDE 35    %%
743 %%%%%%%%%%%%%%%%%%%%
744 %\begin{frame}{Energy optimization of grid platform} 
745   % \begin{figure}[!t]
746    % \centering
747          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
748              
749         %   \small  10 sites distributed over France and Luxembourg
750         %\end{figure}
751 %\end{frame} 
752
753
754 %%%%%%%%%%%%%%%%%%%%
755 %%    SLIDE 36    %%
756 %%%%%%%%%%%%%%%%%%%%
757 %\begin{frame}{The grid architecture}
758 %\begin{center}
759 %\includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
760 %\end{center}
761
762  %\begin{frame}{Performance, Energy and trade-off models} \small
763   %\begin{block}{\small The performance model of grid}
764    % \begin{equation}
765   %\label{eq:perf}
766   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij}) 
767  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
768 %\end{equation}
769     %\end{block}   
770  
771  
772  %\begin{block}{\small The energy model of grid}\small
773   %  \begin{equation}
774   %\label{eq:energy}
775  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  
776 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
777 %\end{equation}
778    % \end{block}  
779
780 %\begin{block}{\small The trade-off model of grid}
781 %\small
782     %\begin{equation}
783    %\label{eq:max}
784   %\MaxDist =
785   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
786    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
787     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
788 %\end{equation}
789    % \end{block}  
790      
791      
792  %\end{frame}
793   
794   
795   
796 %%%%%%%%%%%%%%%%%%%%
797 %%    SLIDE 37    %%
798 %%%%%%%%%%%%%%%%%%%%
799  \begin{frame}{Experiments over Grid'5000}
800  
801    \textcolor{blue}{The experiments were conducted using three 
802           clusters distributed over one or two sites.}
803            \vspace{-7 mm}
804           \begin{center}
805           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
806           \end{center}          
807       \vspace{-10 mm}
808   \textcolor{blue}{Grid'5000 power measurement tools were used.} 
809         \vspace{-9 mm}
810   \begin{center}
811           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
812           \end{center}
813           
814       
815 \end{frame}   
816
817
818
819
820 %%%%%%%%%%%%%%%%%%%%
821 %%    SLIDE 38    %%
822 %%%%%%%%%%%%%%%%%%%%
823 \begin{frame}{Experiments over Grid'5000}
824
825    \begin{minipage}{0.4\textwidth}
826        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by  
827         %\textcolor{red}{30\%}}
828      \small \textcolor{blue}{The average energy saving =  \textcolor{red}{30\%}}
829    \end{minipage}  
830      \begin{minipage}{0.55\textwidth}
831         \begin{figure}[h!]
832           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
833      \end{figure}
834 \end{minipage}
835
836          \begin{minipage}{0.4\textwidth}
837            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the 
838                 %performance by \textcolor{red}{3.2\%}}
839       \small  \textcolor{blue}{The average performance degradation  =  \textcolor{red}{3.2\%}}  
840         \end{minipage}
841        \begin{minipage}{0.55\textwidth}
842          \begin{figure}[h!] 
843            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
844          \end{figure}  
845           \end{minipage}
846  \end{frame}
847
848
849
850 %%%%%%%%%%%%%%%%%%%%
851 %%    SLIDE 33    %%
852 %%%%%%%%%%%%%%%%%%%%
853 \begin{frame}{The results of the three power scenarios}
854    \vspace{-5 mm}
855    \begin{figure}[!t]
856    \centering
857    \includegraphics[width=.45\textwidth]{c2/eng_pow.eps}
858    \hspace{0.3cm}
859    \includegraphics[width=.45\textwidth]{c2/per_pow.eps}
860    \vspace{4 mm}
861    \includegraphics[width=.7\textwidth]{c2/three_scenarios.pdf}
862    \end{figure}
863 \end{frame}  
864
865
866
867
868
869
870
871 %%%%%%%%%%%%%%%%%%%%
872 %%    SLIDE 39    %%
873 %%%%%%%%%%%%%%%%%%%%
874 \begin{frame}{One core and Multi-cores per node results}
875    %\textcolor{blue}{One core  and Multi-cores per node results:}
876    
877   \begin{figure}[h!] 
878   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
879   \hspace{0.3cm}
880   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
881   \end{figure} 
882   
883   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
884 \end{frame}
885
886
887 %%%%%%%%%%%%%%%%%%%%
888 %%    SLIDE 34    %%
889 %%%%%%%%%%%%%%%%%%%%
890 \begin{frame}{Comparing the objective function to EDP}
891      
892      EDP is the products between the energy consumption and the delay.
893     \vspace{-5 mm}
894     \begin{figure}[!t]
895     \centering
896     \includegraphics[width=.6\textwidth]{c2/edp_dist.eps}
897     
898   
899     \end{figure}
900 \end{frame} 
901 %\begin{frame}{Summary}
902 %\begin{itemize}
903      % \small
904         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
905         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
906       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real 
907           %test-bed \textcolor{blue}{Grid'5000}.
908          
909          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
910         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
911       %    clusters.
912          
913          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
914         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
915          
916        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
917      %    \end{itemize}   
918 %\end{frame}
919
920
921 %%%%%%%%%%%%%%%%%%%%
922 %%    SLIDE 40    %%
923 %%%%%%%%%%%%%%%%%%%%
924 \begin{frame}{The third contribution}
925 \section{\small {Energy optimization of asynchronous applications}}
926 \begin{center}
927 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing  applications}
928 \end{center}
929  \end{frame}
930
931
932
933 %%%%%%%%%%%%%%%%%%%%
934 %%    SLIDE 41   %%
935 %%%%%%%%%%%%%%%%%%%%
936 \begin{frame}{Problem definition}\vspace{0.8 mm}
937 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
938 \vspace{-8 mm}
939 \begin{figure}
940  \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{syn/a-}{0}{647}
941  %\includegraphics[width=0.6\textwidth]{syn/a-503}
942   \end{figure}
943 \end{frame}
944
945
946
947 %%%%%%%%%%%%%%%%%%%%
948 %%    SLIDE 42   %%
949 %%%%%%%%%%%%%%%%%%%%
950 \begin{frame}{Problem definition}\vspace{0.8 mm}
951 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
952 \vspace{-8 mm}
953 \begin{figure}
954  \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn/a-}{0}{556}
955  %\includegraphics[width=0.6\textwidth]{asyn/a-440}
956   \end{figure}
957 \end{frame}
958
959
960
961 %%%%%%%%%%%%%%%%%%%%
962 %%    SLIDE 43   %%
963 %%%%%%%%%%%%%%%%%%%%
964 \begin{frame}{Solution}\vspace{0.8mm}
965 \textcolor{blue}{Using asynchronous communications with DVFS }
966 \vspace{-8 mm}
967 \begin{figure}
968   \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{344}
969   %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
970   \end{figure}
971 \end{frame}
972
973
974
975
976 %%%%%%%%%%%%%%%%%%%%
977 %%    SLIDE 44   %%
978 %%%%%%%%%%%%%%%%%%%%
979 %\begin{frame}{The performance models}
980
981 %\begin{block}{\small The performance model of Asynch. Applications}\small
982 %\begin{equation}
983   %\label{eq:asyn_time}
984  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
985 %\end{equation}
986 %\end{block}
987
988
989 %\begin{block}{\small The performance model of Hybrid Applications}\small
990 %\begin{equation}
991   %\label{eq:asyn_perf}
992   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +  
993    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
994 %\end{equation}
995 %\end{block}
996
997
998 %\end{frame}
999
1000
1001
1002 %%%%%%%%%%%%%%%%%%%%
1003 %%    SLIDE 45   %%
1004 %%%%%%%%%%%%%%%%%%%%
1005 %\begin{frame}{The energy consumption models}
1006
1007 %\begin{block}{\small The energy model of Asynch. Applications}\small
1008 %\begin{equation}
1009   %\label{eq:asyn_energy1}
1010 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )} 
1011 %\end{equation} 
1012 %\end{block}
1013
1014
1015 %\begin{block}{\small The energy model of Hybrid Applications}\small
1016 %\begin{multline}
1017   %\label{eq:asyn_energy}
1018  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
1019 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]}))) 
1020 %\end{multline}
1021 %\end{block}
1022 %\end{frame}
1023
1024
1025
1026 %%%%%%%%%%%%%%%%%%%%
1027 %%    SLIDE 44   %%
1028 %%%%%%%%%%%%%%%%%%%%
1029 \begin{frame}{The performance and the energy models }
1030
1031 \centering
1032 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
1033 \end{frame}
1034
1035
1036
1037
1038
1039 %%%%%%%%%%%%%%%%%%%%
1040 %%    SLIDE 46   %%
1041 %%%%%%%%%%%%%%%%%%%%
1042 \begin{frame}{The scaling algorithm for Asynch.  applications}
1043 \vspace{-0.1 mm}
1044 \centering
1045 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
1046 \end{frame}
1047
1048
1049
1050 %%%%%%%%%%%%%%%%%%%%
1051 %%    SLIDE 47   %%
1052 %%%%%%%%%%%%%%%%%%%%
1053 \begin{frame}{The experiments}
1054    \vspace{-5 mm}
1055    \begin{figure}[!t]
1056    \begin{itemize}
1057       \small
1058         \item The architecture of the grid:
1059    \end{itemize}
1060     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf} 
1061    \end{figure}
1062    \begin{itemize}
1063       \small
1064         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1065         \item Evaluating the application over the simulator and Grid'5000.
1066    \end{itemize}
1067 \end{frame} 
1068
1069
1070
1071 %%%%%%%%%%%%%%%%%%%%
1072 %%    SLIDE 48   %%
1073 %%%%%%%%%%%%%%%%%%%%
1074 %\begin{frame}{The simulation results}
1075 %\centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is %the Async. MS with Sync. DVFS}
1076
1077 %\centering
1078    % \includegraphics[scale=0.42]{c3/energy_saving.eps}
1079
1080  %\centering  The average energy saving  = \textcolor{red}{22\%}
1081 %\end{frame} 
1082
1083
1084
1085 %%%%%%%%%%%%%%%%%%%%
1086 %%    SLIDE 49   %%
1087 %%%%%%%%%%%%%%%%%%%%
1088 %\begin{frame}{The simulation results}
1089 %\centering
1090    
1091    %  \includegraphics[scale=0.42]{c3/perf_degra.eps}
1092      
1093 %\centering    The average speed-up  = \textcolor{red}{5.72\%}
1094 %\end{frame} 
1095
1096
1097
1098 %%%%%%%%%%%%%%%%%%%%
1099 %%    SLIDE 50   %%
1100 %%%%%%%%%%%%%%%%%%%%
1101  \begin{frame}{The Grid'5000 results}
1102    \vspace{-10 mm}
1103    \begin{figure}[!t]
1104    \centering
1105    \hspace{-8 mm}
1106     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}                    
1107     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1108    \end{figure}
1109     \vspace{-5 mm}
1110      \centering \footnotesize
1111      
1112      %\small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1113      
1114 The average energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1115 \end{frame} 
1116
1117
1118 %%%%%%%%%%%%%%%%%%%%
1119 %%    SLIDE 51   %%
1120 %%%%%%%%%%%%%%%%%%%%
1121 \begin{frame}{The comparison results}
1122  \centering
1123     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1124     
1125     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1126 \end{frame} 
1127
1128
1129
1130
1131 %%%%%%%%%%%%%%%%%%%%
1132 %%    SLIDE 52  %%
1133 %%%%%%%%%%%%%%%%%%%%
1134 \begin{frame}{Conclusions}
1135 \section{Conclusions and Perspectives}
1136 \begin{itemize}
1137
1138 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over 
1139 \textcolor{blue}{homogeneous and  heterogeneous clusters or grids}.  
1140       
1141
1142
1143 \small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
1144
1145 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1146
1147 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1148 Multi-splitting} method.
1149
1150 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  the \textcolor{blue}{Grid'5000 testbed}.
1151
1152 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or to the \textcolor{blue}{EDP objective function}.
1153
1154
1155 \end{itemize}
1156 \end{frame}
1157
1158
1159
1160 %%%%%%%%%%%%%%%%%%%%
1161 %%    SLIDE 53   %%
1162 %%%%%%%%%%%%%%%%%%%%
1163 \begin{frame}{Publications}
1164
1165 \begin{block}{\small Journal Articles }\scriptsize
1166 \begin{enumerate}[$\lbrack$1$\rbrack$]
1167
1168 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational 
1169       Science}, 2016.
1170
1171 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for     
1172       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1173  
1174 \end{enumerate}
1175 \end{block}
1176
1177
1178 \begin{block}{\small Conference Articles }\scriptsize
1179
1180 \begin{enumerate}[$\lbrack$1$\rbrack$]
1181
1182 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1183       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1184       225-230. IEEE Computer Society, Milan, Italy (2014).
1185
1186 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1187       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1188       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1189
1190 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1191       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society, 
1192       Paris (2016).  
1193
1194 \end{enumerate}
1195
1196 \end{block}
1197 \end{frame}
1198
1199
1200 %%%%%%%%%%%%%%%%%%%%
1201 %%    SLIDE 54   %%
1202 %%%%%%%%%%%%%%%%%%%%
1203 \begin{frame}{Perspectives}
1204
1205 \begin{itemize}
1206
1207 \small  \barrow The proposed algorithms should  take into consideration the
1208 \textcolor{blue}{variability between some iterations}.
1209
1210 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1211
1212 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1213
1214 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1215 \end{itemize}
1216
1217 \end{frame}
1218
1219 %%%%%%%%%%%%%%%%%%%%
1220 %%    SLIDE 55  %%
1221 %%%%%%%%%%%%%%%%%%%%
1222 \begin{frame}{Fin} \vspace{-10 mm}
1223
1224             \centering \Large \textcolor{blue}{Thank you for your attention}
1225             
1226             \vspace{2cm}
1227             \centering \textcolor{blue}{ {\Large Questions?}}
1228         
1229 \end{frame}
1230 \end{document}
1231 %  _____ ___ _   _ 
1232 % |  ___|_ _| \ | |
1233 % | |_   | ||  \| |
1234 % |  _|  | || |\  |
1235 % |_|   |___|_| \_|
1236 %