]> AND Private Git Repository - ThesisAhmed.git/blob - thesis-presentation/AhmedSlides.tex
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
addind the presentation corrections
[ThesisAhmed.git] / thesis-presentation / AhmedSlides.tex
1  \documentclass{beamer}
2 \usepackage{beamerthemefemto}
3 \usepackage[latin1]{inputenc}
4 \usepackage[T1]{fontenc}
5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
6 \usepackage{algorithm,algorithmicx,algpseudocode}
7 \usepackage{graphicx,graphics}
8 \usepackage{subfig}
9 \usepackage{listings}
10 \usepackage{colortbl}
11 \usepackage{amsmath}
12 \usepackage{xspace}
13  \usepackage{movie15}
14  \usepackage{animate}
15 \usepackage{xmpmulti} 
16  \newcommand{\AG}[2][inline]{%
17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
18 \newcommand{\JC}[2][inline]{%
19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
20 \definecolor{myblue}{RGB}{0,29,119}
21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
22 \usepackage{fixltx2e}
23 %% used to put some subscripts lower, and make them more legible
24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
25
26 \newcommand{\CL}{\Xsub{C}{L}}
27 \newcommand{\Dist}{\mathit{Dist}}
28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
29 \newcommand{\Eind}{\Xsub{E}{ind}}
30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
33 \newcommand{\Es}{\Xsub{E}{S}}
34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
36 \newcommand{\Fnew}{\Xsub{F}{new}}
37 \newcommand{\Vnew}{\Xsub{V}{new}}
38 \newcommand{\Vmax}{\Xsub{V}{max}}
39 \newcommand{\Ileak}{\Xsub{I}{leak}}
40 \newcommand{\Kdesign}{\Xsub{K}{design}}
41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
46
47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
56 \newcommand{\Tnew}{\Xsub{T}{New}}
57 \newcommand{\Told}{\Xsub{T}{Old}}
58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
63  \usepackage{pifont}
64 \usepackage{xcolor}
65 \definecolor{myblue}{RGB}{0,29,119}
66 \usepackage[textsize=footnotesize]{todonotes}
67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]} 
68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
71
72
73
74 %\title{Energy Consumption Optimization of Parallel Applications with
75 %Iterations using CPU Frequency Scaling} 
76 \vspace{2cm}
77
78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm}
79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Franche-Comté - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}} 
80
81 \date{}
82 \vspace{-3cm}
83 %  ____  _____ ____  _   _ _____ 
84 % |  _ \| ____| __ )| | | |_   _|
85 % | | | |  _| |  _ \| | | | | |  
86 % | |_| | |___| |_) | |_| | | |  
87 % |____/|_____|____/ \___/  |_|  
88
89 \begin{document}
90 \setbeamertemplate{background}{\titrefemto}
91
92 %%%%%%%%%%%%%%%%%%%%
93 %%    SLIDE 01    %%
94 %%%%%%%%%%%%%%%%%%%% 
95 \begin{frame}[plain]
96 \vspace{1cm}
97 \centering
98    \titlepage
99 \end{frame}
100
101
102 %%%%%%%%%%%%%%%%%%%%
103 %%    SLIDE 02    %%
104 %%%%%%%%%%%%%%%%%%%% 
105 \setbeamertemplate{background}{\pagefemto}
106 \begin{frame}{Outline}
107
108 \setbeamertemplate{section in toc}[sections numbered] 
109 \tableofcontents
110 \end{frame}
111
112
113 %%%%%%%%%%%%%%%%%%%%
114 %%    SLIDE 03    %%
115 %%%%%%%%%%%%%%%%%%%% 
116 \begin{frame}{Introduction and problem definition}
117  \section{\small {Introduction and Problem definition}}
118    \bf \textcolor{blue}{Approaches to increase the computing power:}
119      \begin{minipage}{0.5\textwidth} 
120       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increasing the frequency of a  processor}
121     \end{minipage}%
122     \begin{minipage}{0.6\textwidth} 
123     
124 \begin{figure}[h!]
125         
126     \includegraphics[width=0.7\textwidth]{fig/freq-years} 
127     \end{figure}
128     \end{minipage}%
129     \vspace{0.2cm}
130     \begin{minipage}{0.5\textwidth} 
131      \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes}        
132     \end{minipage}%
133     \begin{minipage}{0.6\textwidth} 
134     \begin{figure}[h!]
135      \includegraphics[width=0.7\textwidth]{fig/clusters} 
136     \end{figure}
137     \end{minipage}%
138  \end{frame}
139  
140  
141  
142
143  %%%%%%%%%%%%%%%%%%%
144 %%    SLIDE 04   %%
145 %%%%%%%%%%%%%%%%%%%% 
146 \begin{frame}{Introduction and problem definition}
147  \vspace{0.1cm}
148  \bf \textcolor{blue}{Techniques for energy consumption reduction}
149  
150      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}  
151     \vspace{-0.9cm}
152     \begin{figure}
153      \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
154     \end{figure}
155  \end{frame}
156
157 %%%%%%%%%%%%%%%%%%%%
158 %%    SLIDE 06    %%
159 %%%%%%%%%%%%%%%%%%%% 
160 \begin{frame}{Techniques for energy consumption reduction}
161  
162   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
163      \vspace{-0.5cm}
164     \begin{figure}
165      \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
166     \end{figure}
167     \end{frame}
168  
169
170
171 %%%%%%%%%%%%%%%%%%%%
172 %%    SLIDE 07    %%
173 %%%%%%%%%%%%%%%%%%%% 
174 \begin{frame}{Motivations}
175 \vspace{0.05cm}
176 \section{\small {Motivations}}
177 \textcolor{blue}{Why we used DVFS method:}
178 \vspace{-0.49cm}
179 \begin{minipage}{0.5\textwidth} 
180     \vspace{-0.49cm} 
181       \begin{itemize} 
182        \item  \small \textcolor{black}{The biggest power consumption is consumed by a processor \textsuperscript{1}. }
183                 
184          \end{itemize}
185
186     \end{minipage}%
187     \begin{minipage}{0.5\textwidth}
188      \vspace{-0.49cm} 
189     \begin{figure}[h!]
190      \includegraphics[width=0.85\textwidth]{fig/node-power} 
191      
192     \end{figure}
193     \end{minipage}%
194     
195   \begin{itemize} \item \small  \textcolor{black}{It used to reduce the energy consumption  while keeping all the node working, thus  it is more adapted to parallel computing.}
196                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes method.}  \end{itemize} 
197     
198 \vspace{-0.12cm}
199
200  \begin{block}{\textcolor{white}{Challenge and Objective}}
201
202         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it degrades the performance simultaneously.}
203                 
204                 \vspace{0.1cm}
205  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel applications.}
206 \end{block}
207  
208  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
209 for a warehouse-sized computer.
210
211     \end{frame}
212
213
214
215 %%%%%%%%%%%%%%%%%%%%
216 %%    SLIDE 08    %%
217 %%%%%%%%%%%%%%%%%%%% 
218
219
220 \begin{frame}{Contribution}
221
222 \section{\small {Energy optimization of homogeneous platform}}
223 \begin{center}
224 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
225 \end{center}
226  \end{frame}
227
228
229
230 %%%%%%%%%%%%%%%%%%%%
231 %%    SLIDE 09    %%
232 %%%%%%%%%%%%%%%%%%%% 
233  
234 \begin{frame}{Objectives}
235         \begin{femtoBlock}{} \vspace{-12 mm}
236                 \begin{itemize} \small
237                    \item  Study the effect of the scaling factor $S$ on \textbf{energy consumption and performance } of parallel  applications with iterations such as NAS 
238                           Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip
239                    
240                    \item  Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
241                    \item  Proposing an algorithm for selecting the scaling factor $S$ producing \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
242                    \item  Comparing the proposed algorithm to existing methods.
243                    
244                    
245                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the  
246                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on. 
247                 \end{itemize}
248                  %\let\thefootnote\relax\footnote{}
249           \vspace{-10 mm}
250         \end{femtoBlock}      
251 \end{frame}
252
253
254
255 %%%%%%%%%%%%%%%%%%%%
256 %%    SLIDE 10    %%
257 %%%%%%%%%%%%%%%%%%%% 
258
259
260 \begin{frame}{Execution of synchronous parallel tasks}
261 \vspace{-0.5 cm}
262 \begin{figure}
263   \centering
264   \subfloat[Sync. imbalanced communications]{%
265     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
266   \subfloat[Sync. imbalanced computations]{%
267     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
268  % \caption{Parallel tasks on homogeneous platform}
269   \label{fig:homo}
270 \end{figure}
271
272  \end{frame}
273  
274  
275  
276
277 %%%%%%%%%%%%%%%%%%%%
278 %%    SLIDE 11   %%
279 %%%%%%%%%%%%%%%%%%%% 
280 \begin{frame}{Energy model for homogeneous platform}    
281       The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static   
282        (\textcolor{red}{$P_s$}) power. 
283     \begin{equation}
284      \label{eq:pd}
285      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
286    \end{equation}
287     \scriptsize \underline{Where}: \\ 
288     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\     
289     \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
290    \begin{equation}
291      \label{eq:ps}
292      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
293    \end{equation}
294     \underline{Where}:\\ 
295         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\   
296         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent  
297              parameter.} 
298 \end{frame}
299
300 %%%%%%%%%%%%%%%%%%%%
301 %%    SLIDE 12   %%
302 %%%%%%%%%%%%%%%%%%%% 
303
304 \begin{frame}{Energy model for homogeneous platform}
305        
306           The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.  \medskip     
307               
308               
309               
310         \begin{block}{\small Rauber and Rünger's energy model}
311          $ E = P_{d} \cdot S_1^{-2} \cdot
312          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
313             P_{s} \cdot S_1  \cdot T_1 \cdot N$
314         \end{block}     
315            \textcolor{blue}{$S_1$}: the max. scaling factor\\ 
316            \textcolor{blue}{$P_{d}$}: the dynamic power\\
317            \textcolor{blue}{$P_{s}$}: the static power\\
318            \textcolor{blue}{$T_I$}: the time of the slower task\\ 
319            \textcolor{blue}{$T_i$}: the time of the other tasks\\ 
320            \textcolor{blue}{$N$}:  the number of  nodes
321        
322 \end{frame}
323   
324   
325 %%%%%%%%%%%%%%%%%%%%
326 %%    SLIDE 13   %%
327 %%%%%%%%%%%%%%%%%%%% 
328 \begin{frame}{Performance evaluation of MPI programs}      
329         \begin{femtoBlock}{}
330               \vspace{-5 mm}
331               \begin{block}{\small Execution time prediction model}
332                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
333           \end{block}   
334           \vspace{10 mm}
335            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
336            \quad%
337            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
338             \vspace{5 mm}
339             
340            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
341            \end{femtoBlock}
342 \end{frame}
343
344
345
346
347  %%%%%%%%%%%%%%%%%%%%
348 %%    SLIDE 14   %%
349 %%%%%%%%%%%%%%%%%%%%  
350 \begin{frame}{Performance and energy reduction trade-off}      
351         \begin{femtoBlock}{} \vspace{-15 mm}
352                \begin{figure}
353      \centering
354      \subfloat[\small  Real relation.]{%
355      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
356      \quad%
357      \subfloat[\small Converted relation.]{%
358      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
359   \label{fig:rel}
360  % \caption{The energy and performance relation}
361 \end{figure}
362
363  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
364
365 %\vspace{-0.3cm}
366       \small 
367          \begin{block}{\small Our objective function}
368          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}             
369                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} - 
370                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
371                                          
372         \end{block}                
373         \end{femtoBlock}
374        
375 \end{frame}
376
377 %%%%%%%%%%%%%%%%%%%%
378 %%    SLIDE 15   %%
379 %%%%%%%%%%%%%%%%%%%% 
380  \begin{frame}{Scaling factor selection algorithm}
381 \vspace{-0.75cm}
382      \begin{center}
383       \includegraphics[width=.56 \textwidth]{c1/algo-homo}
384      \end{center}
385      
386 \end{frame}
387
388
389 %%%%%%%%%%%%%%%%%%%%
390 %%    SLIDE 16   %%
391 %%%%%%%%%%%%%%%%%%%% 
392 \begin{frame}{Scaling algorithm example}
393 \vspace{-0.75cm}
394      
395      \begin{figure}
396   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
397
398   \end{figure}
399 \end{frame}
400
401 %%%%%%%%%%%%%%%%%%%%
402 %%    SLIDE 17   %%
403 %%%%%%%%%%%%%%%%%%%% 
404 \begin{frame}{Experimental results }
405       \begin{femtoBlock}{}      
406         \begin{itemize}
407          \small
408            \item The experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
409            \item The proposed algorithm is applied to the NAS parallel benchmarks.\medskip
410            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
411            \item The proposed algorithm was evaluated over the A, B, C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
412            \item $P_d=20W$,  $P_s=4W$.
413                 \end{itemize}
414         \end{femtoBlock}
415 \end{frame}
416
417
418 %%%%%%%%%%%%%%%%%%%%
419 %%    SLIDE 18   %%
420 %%%%%%%%%%%%%%%%%%%% 
421 \begin{frame}{Experimental results}
422   \begin{femtoBlock}{}  
423       \centering { 
424      \includegraphics[width=.35\textwidth]{c1/ep}
425      \includegraphics[width=.35\textwidth]{c1/cg}
426      \includegraphics[width=.35\textwidth]{c1/bt}}
427      
428      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
429  \end{femtoBlock}
430 \end{frame}
431
432
433   %%%%%%%%%%%%%%%%%%%%
434 %%    SLIDE 19   %%
435 %%%%%%%%%%%%%%%%%%%% 
436 \begin{frame}{Results comparison}
437          \begin{block}{\small Rauber and Rünger's optimal scaling factor} 
438            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
439             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
440         \end{block}   
441     \centering {
442          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
443          %\qquad
444          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
445            
446          
447             \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}}
448         
449 \end{frame}
450
451
452 %%%%%%%%%%%%%%%%%%%%
453 %%    SLIDE 20   %%
454 %%%%%%%%%%%%%%%%%%%% 
455 \begin{frame}{The proposed new energy model}
456     \vspace{-0.75cm}     
457   \begin{figure}
458   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
459   \end{figure}
460 \end{frame}
461
462
463 %%%%%%%%%%%%%%%%%%%%
464 %%    SLIDE 21   %%
465 %%%%%%%%%%%%%%%%%%%% 
466 \begin{frame}{Comparing the new model with Rauber model }
467  \vspace{0.1cm}    
468  \centering
469     \includegraphics[width=.45\textwidth]{c1/energy_con}
470     
471     \includegraphics[width=.5\textwidth]{c1/compare-scales}
472 \end{frame}
473
474
475
476
477    % \begin{frame}{Summary}
478      % \begin{femtoBlock}{}    
479      % \begin{itemize}
480       %\small
481        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
482        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
483          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip    
484          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
485                % performance}. \medskip
486          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
487          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
488          %\end{itemize}      
489          
490         %\end{femtoBlock}
491 %\end{frame}
492
493
494 %%%%%%%%%%%%%%%%%%%%
495 %%    SLIDE 22    %%
496 %%%%%%%%%%%%%%%%%%%% 
497
498
499 \begin{frame}{Contribution}
500
501 \section{\small {Energy optimization of heterogeneous platform}}
502 \begin{center}
503
504
505 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over Heterogeneous platform}
506 \end{center}
507  \end{frame}
508  
509
510
511 %%%%%%%%%%%%%%%%%%%%
512 %%    SLIDE 23    %%
513 %%%%%%%%%%%%%%%%%%%% 
514  
515 \begin{frame}{Objectives}
516         \begin{femtoBlock}{} \vspace{-12 mm}
517                 \begin{itemize} \small
518                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running  
519                           over a heterogeneous platform (cluster and Grid). \medskip
520                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
521                           message passing iterative applications.    \medskip                      
522                    
523                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the optimal trade-off} between
524                           the energy consumption and the performance. 
525                 \end{itemize}
526                  
527           \vspace{-10 mm}
528         \end{femtoBlock}      
529 \end{frame}
530
531
532 %%%%%%%%%%%%%%%%%%%%
533 %%    SLIDE 24    %%
534 %%%%%%%%%%%%%%%%%%%%
535 \begin{frame}{The execution time model}    
536       \vspace{-8 mm}
537      \begin{figure}[!t]
538        \centering
539        \includegraphics[scale=0.5]{c2/commtasks}
540        \label{fig:heter}
541      \end{figure}     
542        \vspace{-12 mm}
543        \medskip
544        
545     \begin{block}{\small The execution time prediction model}
546     \begin{equation}
547      \label{eq:perf}
548      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
549     \end{equation}
550     \end{block}   
551  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
552   
553 \end{frame}
554  
555  %%%%%%%%%%%%%%%%%%%%
556 %%    SLIDE 25    %%
557 %%%%%%%%%%%%%%%%%%%%
558  \begin{frame}{The energy consumption model} 
559     The overall energy consumption of a message passing synchronous  application executed over
560      a heterogeneous platform can be computed as  follows:
561     \begin{multline}
562      \label{eq:energy}
563      \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
564      \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}   
565       \hspace{10 mm}
566     \end{multline}
567     \underline{where}:\\
568     \textcolor{blue}{N} : is the number of nodes.
569 \end{frame}
570  
571  
572 %%%%%%%%%%%%%%%%%%%%
573 %%    SLIDE 26    %%
574 %%%%%%%%%%%%%%%%%%%%
575   \begin{frame}{The  energy  model example for heter. cluster}
576   \vspace{-0.5cm}
577  \begin{figure}
578   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
579   \end{figure}
580  \end{frame}
581  
582  
583  
584  
585 %%%%%%%%%%%%%%%%%%%%
586 %%    SLIDE 27    %%
587 %%%%%%%%%%%%%%%%%%%%
588 %\begin{frame}{The trade-off between energy  and performance}
589    % \vspace{-7 mm}
590     %\begin{figure}
591    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
592    % \end{figure}
593    % \vspace{-7 mm}
594    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}} 
595     %{E_{Max}}$}. \\
596     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
597    
598    %  \begin{block}{\small The tradeoff model}
599     % \begin{equation}
600     %  \label{eq:max}
601     %  \textcolor{red}{MaxDist} =
602      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
603       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
604       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
605       %\end{equation}
606     % \end{block}  
607 %\end{frame}
608    
609  
610 %%%%%%%%%%%%%%%%%%%%
611 %%    SLIDE 28    %%
612 %%%%%%%%%%%%%%%%%%%%
613  \begin{frame}{The scaling algorithm for heter. cluster}
614
615  \centering
616    \includegraphics[width=.52\textwidth]{algo-heter}
617  \end{frame}
618  
619  
620  %%%%%%%%%%%%%%%%%%%%
621 %%    SLIDE 29    %%
622 %%%%%%%%%%%%%%%%%%%%
623  \begin{frame}{The scaling algorithm example}
624  \vspace{-0.5cm}
625  \centering
626  
627   \begin{figure}
628   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
629   \end{figure}
630 \end{frame}
631
632
633
634
635 %%%%%%%%%%%%%%%%%%%%
636 %%    SLIDE 30    %%
637 %%%%%%%%%%%%%%%%%%%%
638 \begin{frame}{Experiments over a heterogeneous cluster  }   
639         \begin{itemize}
640          \small
641            \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip
642            \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
643            \item Four types of processors with different computing powers were used.\medskip
644            \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip
645            \item The total power consumption of the chosen CPUs  assumed to be composed of $80\%$ for the dynamic power and $20\%$ for the static power.
646                   \medskip
647          
648         \end{itemize}
649
650 \end{frame}  
651
652
653 %%%%%%%%%%%%%%%%%%%%
654 %%    SLIDE 31    %%
655 %%%%%%%%%%%%%%%%%%%%
656 \begin{frame}{The experimental results}
657    \vspace{-5 mm}
658    \begin{figure}[!t]
659    \centering
660     \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
661     
662     \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%} 
663      for the class C of the NAS benchmarks executed over 8 nodes}
664     
665    \end{figure}
666 \end{frame} 
667  
668  
669  
670 %%%%%%%%%%%%%%%%%%%%
671 %%    SLIDE 32    %%
672 %%%%%%%%%%%%%%%%%%%%
673 \begin{frame}{The experimental results}
674    \vspace{-5 mm}
675    \begin{figure}[!t]
676    \centering
677     
678     \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
679    
680    \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
681      of NAS benchmarks class C executed over 8 nodes}
682      \end{figure}
683 \end{frame} 
684  
685  
686  
687 %%%%%%%%%%%%%%%%%%%%
688 %%    SLIDE 33    %%
689 %%%%%%%%%%%%%%%%%%%%
690 \begin{frame}{The results of the three power scenarios}
691    \vspace{-5 mm}
692    \begin{figure}[!t]
693    \centering
694    \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
695    \vspace{10 mm}
696    \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
697    \end{figure}
698 \end{frame}  
699
700
701
702 %%%%%%%%%%%%%%%%%%%%
703 %%    SLIDE 34    %%
704 %%%%%%%%%%%%%%%%%%%%
705 \begin{frame}{Comparing the objective function to EDP}
706      
707      EDP is the products between the energy consumption and the delay.
708     \vspace{-5 mm}
709     \begin{figure}[!t]
710     \centering
711     \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
712     
713     \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
714     \end{figure}
715 \end{frame} 
716
717
718
719
720 %%%%%%%%%%%%%%%%%%%%
721 %%    SLIDE 35    %%
722 %%%%%%%%%%%%%%%%%%%%
723 %\begin{frame}{Energy optimization of grid platform} 
724   % \begin{figure}[!t]
725    % \centering
726          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
727              
728         %   \small  10 sites distributed over France and Luxembourg
729         %\end{figure}
730 %\end{frame} 
731
732
733 %%%%%%%%%%%%%%%%%%%%
734 %%    SLIDE 36    %%
735 %%%%%%%%%%%%%%%%%%%%
736 \begin{frame}{The grid architecture}
737 \begin{center}
738 \includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
739 \end{center}
740
741  %\begin{frame}{Performance, Energy and trade-off models} \small
742   %\begin{block}{\small The performance model of grid}
743    % \begin{equation}
744   %\label{eq:perf}
745   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij}) 
746  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
747 %\end{equation}
748     %\end{block}   
749  
750  
751  %\begin{block}{\small The energy model of grid}\small
752   %  \begin{equation}
753   %\label{eq:energy}
754  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  
755 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
756 %\end{equation}
757    % \end{block}  
758
759 %\begin{block}{\small The trade-off model of grid}
760 %\small
761     %\begin{equation}
762    %\label{eq:max}
763   %\MaxDist =
764   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
765    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
766     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
767 %\end{equation}
768    % \end{block}  
769      
770      
771  \end{frame}
772   
773   
774   
775 %%%%%%%%%%%%%%%%%%%%
776 %%    SLIDE 37    %%
777 %%%%%%%%%%%%%%%%%%%%
778  \begin{frame}{Experiments over Grid'5000}
779   \centering
780
781           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
782           
783           \vspace{-3 mm}
784           \textcolor{blue}{Two experiments were conducted: over one site and two sites 
785           each one with three clusters }
786           
787               \vspace{1mm}
788
789           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
790           
791         \textcolor{blue}{Grid'5000 power measurement tools were used} 
792 \end{frame}   
793
794
795
796
797 %%%%%%%%%%%%%%%%%%%%
798 %%    SLIDE 38    %%
799 %%%%%%%%%%%%%%%%%%%%
800 \begin{frame}{Experiments over Grid'5000}
801
802    \begin{minipage}{0.4\textwidth}
803        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by  
804         %\textcolor{red}{30\%}}
805         \textcolor{blue}{The energy saving =  \textcolor{red}{30\%}}
806    \end{minipage}  
807      \begin{minipage}{0.55\textwidth}
808         \begin{figure}[h!]
809           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
810      \end{figure}
811 \end{minipage}
812
813          \begin{minipage}{0.4\textwidth}
814            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the 
815                 %performance by \textcolor{red}{3.2\%}}
816               \textcolor{blue}{The performance degradation  =  \textcolor{red}{3.2\%}}  
817         \end{minipage}
818        \begin{minipage}{0.55\textwidth}
819          \begin{figure}[h!] 
820            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
821          \end{figure}  
822           \end{minipage}
823  \end{frame}
824
825
826
827 %%%%%%%%%%%%%%%%%%%%
828 %%    SLIDE 39    %%
829 %%%%%%%%%%%%%%%%%%%%
830 \begin{frame}{Experiments over Grid'5000}
831    \textcolor{blue}{One core  and Multi-cores per node results:}
832    
833   \begin{figure}[h!] 
834   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
835   \hspace{0.3cm}
836   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
837   \end{figure} 
838   
839   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
840 \end{frame}
841
842
843
844 %\begin{frame}{Summary}
845 %\begin{itemize}
846      % \small
847         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
848         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
849       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real 
850           %test-bed \textcolor{blue}{Grid'5000}.
851          
852          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
853         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
854       %    clusters.
855          
856          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
857         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
858          
859        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
860      %    \end{itemize}   
861 %\end{frame}
862
863
864 %%%%%%%%%%%%%%%%%%%%
865 %%    SLIDE 40    %%
866 %%%%%%%%%%%%%%%%%%%%
867 \begin{frame}{Contribution}
868 \section{\small {Energy optimization of asynchronous applications}}
869 \begin{center}
870 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous  message passing iterative applications}
871 \end{center}
872  \end{frame}
873
874
875
876 %%%%%%%%%%%%%%%%%%%%
877 %%    SLIDE 41   %%
878 %%%%%%%%%%%%%%%%%%%%
879 \begin{frame}{Problem definition}\vspace{0.8 mm}
880 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
881 \vspace{-8 mm}
882 \begin{figure}
883   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
884   \end{figure}
885 \end{frame}
886
887
888
889 %%%%%%%%%%%%%%%%%%%%
890 %%    SLIDE 42   %%
891 %%%%%%%%%%%%%%%%%%%%
892 \begin{frame}{Problem definition}\vspace{0.8 mm}
893 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
894 \vspace{-8 mm}
895 \begin{figure}
896   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
897   \end{figure}
898 \end{frame}
899
900
901
902 %%%%%%%%%%%%%%%%%%%%
903 %%    SLIDE 43   %%
904 %%%%%%%%%%%%%%%%%%%%
905 \begin{frame}{Solution}\vspace{0.8mm}
906 \textcolor{blue}{Using asynchronous communications with DVFS }
907 \vspace{-8 mm}
908 \begin{figure}
909   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
910   \end{figure}
911 \end{frame}
912
913
914
915
916 %%%%%%%%%%%%%%%%%%%%
917 %%    SLIDE 44   %%
918 %%%%%%%%%%%%%%%%%%%%
919 %\begin{frame}{The performance models}
920
921 %\begin{block}{\small The performance model of Asynch. Applications}\small
922 %\begin{equation}
923   %\label{eq:asyn_time}
924  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
925 %\end{equation}
926 %\end{block}
927
928
929 %\begin{block}{\small The performance model of Hybrid Applications}\small
930 %\begin{equation}
931   %\label{eq:asyn_perf}
932   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +  
933    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
934 %\end{equation}
935 %\end{block}
936
937
938 %\end{frame}
939
940
941
942 %%%%%%%%%%%%%%%%%%%%
943 %%    SLIDE 45   %%
944 %%%%%%%%%%%%%%%%%%%%
945 %\begin{frame}{The energy consumption models}
946
947 %\begin{block}{\small The energy model of Asynch. Applications}\small
948 %\begin{equation}
949   %\label{eq:asyn_energy1}
950 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )} 
951 %\end{equation} 
952 %\end{block}
953
954
955 %\begin{block}{\small The energy model of Hybrid Applications}\small
956 %\begin{multline}
957   %\label{eq:asyn_energy}
958  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
959 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]}))) 
960 %\end{multline}
961 %\end{block}
962 %\end{frame}
963
964
965
966 %%%%%%%%%%%%%%%%%%%%
967 %%    SLIDE 44   %%
968 %%%%%%%%%%%%%%%%%%%%
969 \begin{frame}{The performance and the energy models }
970
971 \centering
972 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
973 \end{frame}
974
975
976
977
978
979 %%%%%%%%%%%%%%%%%%%%
980 %%    SLIDE 46   %%
981 %%%%%%%%%%%%%%%%%%%%
982 \begin{frame}{The scaling algorithm for Asynch.  applications}
983 \vspace{-0.1 mm}
984 \centering
985 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
986 \end{frame}
987
988
989
990 %%%%%%%%%%%%%%%%%%%%
991 %%    SLIDE 47   %%
992 %%%%%%%%%%%%%%%%%%%%
993 \begin{frame}{The experiments}
994    \vspace{-5 mm}
995    \begin{figure}[!t]
996    \begin{itemize}
997       \small
998         \item The architecture of the grid:
999    \end{itemize}
1000     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf} 
1001    \end{figure}
1002    \begin{itemize}
1003       \small
1004         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1005         \item Evaluating the application over the simulator and Grid'5000.
1006    \end{itemize}
1007 \end{frame} 
1008
1009
1010
1011 %%%%%%%%%%%%%%%%%%%%
1012 %%    SLIDE 48   %%
1013 %%%%%%%%%%%%%%%%%%%%
1014 \begin{frame}{The simulation results}
1015 \centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1016
1017 \centering
1018     \includegraphics[scale=0.42]{c3/energy_saving.eps}
1019
1020  \centering  The average of energy saving  = \textcolor{red}{22\%}
1021 \end{frame} 
1022
1023
1024
1025 %%%%%%%%%%%%%%%%%%%%
1026 %%    SLIDE 49   %%
1027 %%%%%%%%%%%%%%%%%%%%
1028 \begin{frame}{The simulation results}
1029 \centering
1030    
1031      \includegraphics[scale=0.42]{c3/perf_degra.eps}
1032      
1033  \centering    The average speed-up  = \textcolor{red}{5.72\%}
1034 \end{frame} 
1035
1036
1037
1038 %%%%%%%%%%%%%%%%%%%%
1039 %%    SLIDE 50   %%
1040 %%%%%%%%%%%%%%%%%%%%
1041  \begin{frame}{The Grid'5000 results}
1042    \vspace{-20 mm}
1043    \begin{figure}[!t]
1044    \centering
1045    \hspace{-8 mm}
1046     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}                    
1047     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1048    \end{figure}
1049     \vspace{-5 mm}
1050      \centering
1051 The energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1052 \end{frame} 
1053
1054
1055 %%%%%%%%%%%%%%%%%%%%
1056 %%    SLIDE 51   %%
1057 %%%%%%%%%%%%%%%%%%%%
1058 \begin{frame}{The comparison results}
1059  \centering
1060     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1061     
1062     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1063 \end{frame} 
1064
1065
1066
1067
1068 %%%%%%%%%%%%%%%%%%%%
1069 %%    SLIDE 52  %%
1070 %%%%%%%%%%%%%%%%%%%%
1071 \begin{frame}{Conclusions}
1072 \section{Conclusions and Perspectives}
1073 \begin{itemize}
1074
1075 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous and asynchronous parallel applications with iterations running over 
1076 \textcolor{blue}{homogeneous and  heterogeneous clusters and grids}.  
1077       
1078
1079
1080 \small \barrow \textcolor{blue}{A new objective function} was proposed to optimize both the energy consumption and the performance.
1081
1082 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1083
1084 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1085 Multi-splitting} method.
1086
1087 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  \textcolor{blue}{Grid'5000 testbed}.
1088
1089 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or  \textcolor{blue}{the EDP objective function}.
1090
1091
1092 \end{itemize}
1093 \end{frame}
1094
1095
1096
1097 %%%%%%%%%%%%%%%%%%%%
1098 %%    SLIDE 53   %%
1099 %%%%%%%%%%%%%%%%%%%%
1100 \begin{frame}{Publications}
1101
1102 \begin{block}{\small Journal Articles }\scriptsize
1103 \begin{enumerate}[$\lbrack$1$\rbrack$]
1104
1105 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational 
1106       Science}, 2016.
1107
1108 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for     
1109       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1110  
1111 \end{enumerate}
1112 \end{block}
1113
1114
1115 \begin{block}{\small Conference Articles }\scriptsize
1116
1117 \begin{enumerate}[$\lbrack$1$\rbrack$]
1118
1119 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1120       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1121       225-230. IEEE Computer Society, Milan, Italy (2014).
1122
1123 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1124       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1125       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1126
1127 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1128       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society, 
1129       Paris (2016).  
1130
1131 \end{enumerate}
1132
1133 \end{block}
1134 \end{frame}
1135
1136
1137 %%%%%%%%%%%%%%%%%%%%
1138 %%    SLIDE 54   %%
1139 %%%%%%%%%%%%%%%%%%%%
1140 \begin{frame}{Perspectives}
1141
1142 \begin{itemize}
1143
1144 \small  \barrow The proposed algorithms should  take into consideration the
1145 \textcolor{blue}{variability between some iterations}.
1146
1147 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1148
1149 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1150
1151 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1152 \end{itemize}
1153
1154 \end{frame}
1155
1156 %%%%%%%%%%%%%%%%%%%%
1157 %%    SLIDE 55  %%
1158 %%%%%%%%%%%%%%%%%%%%
1159 \begin{frame}{Fin} \vspace{-10 mm}
1160
1161             \centering \Large \textcolor{blue}{Thank you for your listening}
1162             
1163             \vspace{2cm}
1164             \centering \textcolor{blue}{ {\Large Questions?}}
1165         
1166 \end{frame}
1167 \end{document}
1168 %  _____ ___ _   _ 
1169 % |  ___|_ _| \ | |
1170 % | |_   | ||  \| |
1171 % |  _|  | || |\  |
1172 % |_|   |___|_| \_|
1173 %