2 \usepackage{beamerthemefemto}
3 \usepackage[latin1]{inputenc}
4 \usepackage[T1]{fontenc}
5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
6 \usepackage{algorithm,algorithmicx,algpseudocode}
7 \usepackage{graphicx,graphics}
16 \newcommand{\AG}[2][inline]{%
17 \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
18 \newcommand{\JC}[2][inline]{%
19 \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
20 \definecolor{myblue}{RGB}{0,29,119}
21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
23 %% used to put some subscripts lower, and make them more legible
24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
26 \newcommand{\CL}{\Xsub{C}{L}}
27 \newcommand{\Dist}{\mathit{Dist}}
28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
29 \newcommand{\Eind}{\Xsub{E}{ind}}
30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
33 \newcommand{\Es}{\Xsub{E}{S}}
34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
36 \newcommand{\Fnew}{\Xsub{F}{new}}
37 \newcommand{\Vnew}{\Xsub{V}{new}}
38 \newcommand{\Vmax}{\Xsub{V}{max}}
39 \newcommand{\Ileak}{\Xsub{I}{leak}}
40 \newcommand{\Kdesign}{\Xsub{K}{design}}
41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
56 \newcommand{\Tnew}{\Xsub{T}{New}}
57 \newcommand{\Told}{\Xsub{T}{Old}}
58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
65 \definecolor{myblue}{RGB}{0,29,119}
66 \usepackage[textsize=footnotesize]{todonotes}
67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
74 %\title{Energy Consumption Optimization of Parallel Applications with
75 %Iterations using CPU Frequency Scaling}
78 \title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm}
79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Franche-Comté - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
83 % ____ _____ ____ _ _ _____
84 % | _ \| ____| __ )| | | |_ _|
85 % | | | | _| | _ \| | | | | |
86 % | |_| | |___| |_) | |_| | | |
87 % |____/|_____|____/ \___/ |_|
90 \setbeamertemplate{background}{\titrefemto}
105 \setbeamertemplate{background}{\pagefemto}
106 \begin{frame}{Outline}
108 \setbeamertemplate{section in toc}[sections numbered]
116 \begin{frame}{Introduction and problem definition}
117 \section{\small {Introduction and Problem definition}}
118 \bf \textcolor{blue}{Approaches to increase the computing power:}
119 \begin{minipage}{0.5\textwidth}
120 \textcolor{blue}{1)} \small \bf \textcolor{black}{Increasing the frequency of processor}
122 \begin{minipage}{0.6\textwidth}
126 \includegraphics[width=0.7\textwidth]{fig/freq-years}
130 \begin{minipage}{0.5\textwidth}
131 \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes}
133 \begin{minipage}{0.6\textwidth}
135 \includegraphics[width=0.7\textwidth]{fig/clusters}
145 \begin{frame}{Introduction and problem definition}
146 \bf \textcolor{blue}{Processor frequency and its energy consumption}
148 \begin{minipage}{0.5\textwidth}
149 \textcolor{blue}{$\blacktriangleright$}
150 \small \bf \textcolor{black}{ The power consumption of a processor increases exponentially when its
151 frequency is increased}
153 \begin{minipage}{0.5\textwidth}
155 \includegraphics[width=0.7\textwidth]{fig/freq-power}
159 \begin{minipage}{0.5\textwidth}
160 \textcolor{blue}{$\blacktriangleright$}
161 \small \bf \textcolor{black}{The biggest power consumption is consumed by a processor in the computing node}
164 \begin{minipage}{0.6\textwidth}
166 \includegraphics[width=0.9\textwidth]{fig/node-power}
175 \begin{frame}{Introduction and problem definition}
177 \bf \textcolor{blue}{Techniques for energy consumption reduction}
179 \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
182 \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
189 \begin{frame}{Techniques for energy consumption reduction}
191 \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
194 \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
203 \begin{frame}{Using the energy reduction method}
204 \section{\small {Using the energy reduction method}}
205 \begin{block}{\textcolor{white}{Why we used DVFS method:}}
207 \item \textcolor{black}{It used to reduce the energy while keeping all node working, thus it is more conventional with parallel computing.}
208 \item \textcolor{black}{It has a very small overhead compared to switch-off idle nodes method.}
213 \begin{block}{\textcolor{white}{Challenge and Objective}}
215 \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy, \textcolor{blue}{but} it degrades the performance simultaneously.}
218 \textcolor{blue}{Objective:} \textcolor{black}{Optimizing both energy consumption and performance of a parallel application at the same time when DVFS is used.}
230 \begin{frame}{Contributions}
231 \section{\small {Contributions}}
232 \subsection{\small {3.1 Energy optimization of homogeneous platform}}
234 \bf \textcolor{black}{First contribution} \\
236 \bf \Large \textcolor{blue}{Energy optimization of homogeneous platform}
246 \begin{frame}{Objectives}
247 \begin{femtoBlock}{} \vspace{-12 mm}
248 \begin{itemize} \small
249 \item Study the effect of the scaling factor $S$ on \textbf{energy consumption} of parallel iterative applications such as NAS
250 Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip
251 \item Study the effect of the scaling factor $S$ on \textbf{performance} of these benchmarks.\medskip
252 \item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency.\medskip
253 \item We propose an algorithm for selecting the scaling factor $S$ producing \textbf {optimal trade-off} between the energy and performance. \medskip
254 \item Improving Rauber and Rünger's\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
255 energy consumption \\ \quad ~ ~\quad of independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
257 \let\thefootnote\relax\footnote{}
269 \begin{frame}{Parallel tasks execution over Homo. Platform}
273 \subfloat[Sync. imbalanced communications]{%
274 \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
275 \subfloat[Sync. imbalanced computations]{%
276 \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
277 \caption{Parallel tasks on homogeneous platform}
289 \begin{frame}{Energy model for homogeneous platform}
290 The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
291 (\textcolor{red}{$P_s$}) power.
294 \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
296 \scriptsize \underline{Where}: \\
297 \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm} \textcolor{blue}{$CL$}: load capacitance\\
298 \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
301 \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
304 \scriptsize{ \textcolor{blue}{$V$}: the supply voltage. \hspace{28 mm} \textcolor{blue}{$N_{trans}$}: number of transistors. \\
305 \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
313 \begin{frame}{Energy model for homogeneous platform}
315 The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}. \medskip
319 \begin{block}{\small Rauber and Rünger's energy model}
320 $ E = P_{d} \cdot S_1^{-2} \cdot
321 \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
322 P_{s} \cdot S_1 \cdot T_1 \cdot N$
324 \textcolor{blue}{$S_1$}: the max. scaling factor\\
325 \textcolor{blue}{$P_{d}$}: the dynamic power\\
326 \textcolor{blue}{$P_{s}$}: the static power\\
327 \textcolor{blue}{$T_I$}: the time of the slower task\\
328 \textcolor{blue}{$T_i$}: the time of the other tasks\\
329 \textcolor{blue}{$N$}: the number of nodes
337 \begin{frame}{Performance evaluation of MPI programs}
340 \begin{block}{\small Execution time prediction model}
341 \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
344 \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
346 \includegraphics[width=.4\textwidth]{c1/lu_pre}}
349 \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
359 \begin{frame}{Performance and energy reduction trade-off}
360 \begin{femtoBlock}{} \vspace{-15 mm}
363 \subfloat[\small Real relation.]{%
364 \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
366 \subfloat[\small Converted relation.]{%
367 \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
369 % \caption{The energy and performance relation}
372 Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
376 \begin{block}{\small Our objective function}
377 \centering{$\textbf{\emph {MaxDist}} = \max_{j=1,2,\dots ,F}
378 (\overbrace{P_{Norm}(S_j)}^{{Maximize}} -
379 \overbrace{E_{Norm}(S_j)}^{{Minimize}} )$}
389 \begin{frame}{Scaling factor selection algorithm}
392 \includegraphics[width=.56 \textwidth]{c1/algo-homo}
401 \begin{frame}{Scaling algorithm example}
405 \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
413 \begin{frame}{Experimental results }
417 \item Our experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
418 \item Our algorithm is applied to NAS parallel benchmarks.\medskip
419 \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
420 \item We run the classes A, B and C on 4, 8 or 9 and 16 nodes respectively.\medskip
421 \item The dynamic power with the highest frequency is equal to \textbf{20 $W$} and the power static is equal to \textbf{4 $W$}.
430 \begin{frame}{Experimental results}
433 \includegraphics[width=.35\textwidth]{c1/ep}
434 \includegraphics[width=.35\textwidth]{c1/cg}
435 \includegraphics[width=.35\textwidth]{c1/bt}}
437 \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
445 \begin{frame}{Results comparison}
446 \begin{block}{\small Rauber and Rünger's optimal scaling factor}
447 $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
448 \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
451 %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
453 %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
456 \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}}
464 \begin{frame}{The proposed new energy model}
467 \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
475 \begin{frame}{Comparing the new model with Rauber model }
478 \includegraphics[width=.45\textwidth]{c1/energy_con}
480 \includegraphics[width=.5\textwidth]{c1/compare-scales}
486 % \begin{frame}{Summary}
487 % \begin{femtoBlock}{}
490 %\item We have presented a new online scaling factor selection method that \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
491 % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
492 %\item Our algorithm \textcolor{blue}{saves more energy} when the communication and the other slacks times are big. \medskip
493 %\item It gives the \textcolor{blue}{best trade-off between energy reduction and
494 % performance}. \medskip
495 %\item Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of energy-performance ratio.
496 %\item The proposed new energy model is \textcolor{blue}{more accurate} then Rauber energy model.
508 \begin{frame}{Contribution}
510 \subsection{\small {3.2 Energy optimization of heterogeneous platform}}
512 \bf \textcolor{black}{Second contribution} \\
514 \bf \Large \textcolor{blue}{Energy optimization of Heterogeneous platform}
524 \begin{frame}{Objectives}
525 \begin{femtoBlock}{} \vspace{-12 mm}
526 \begin{itemize} \small
527 \item Evaluating the \textcolor{blue}{new energy and performance models} of message passing applications with iterations running
528 over a heterogeneous platform (cluster and Grid). \medskip
529 \item Study the effect of the scaling factor $S$ on both \textcolor{blue}{energy consumption and the performance} of
530 message passing iterative applications. \medskip
532 \item Computing the vector of scaling factors ($S_1, S_2, ..., S_n$) producing \textcolor{blue} {optimal trade-off} between
533 energy consumption and performance.
544 \begin{frame}{The execution time model}
548 \includegraphics[scale=0.5]{c2/commtasks}
554 \begin{block}{\small The execution time prediction model}
557 \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
560 \small Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
567 \begin{frame}{The energy consumption model}
568 -The overall energy consumption of a message passing synchronous distributed application executed over a
569 heterogeneous platform is computed as follows:
572 \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot Tcp_i)}} + {} \\
573 \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
577 \textcolor{blue}{N} : is the number of nodes.
584 \begin{frame}{The energy model example for heter. cluster}
587 \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
597 \begin{frame}{The trade-off between energy and performance}
600 \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
603 \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}{$E_{norm} = \frac{E_{reduced}}
605 \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
607 \begin{block}{\small The tradeoff model}
610 \textcolor{red}{MaxDist} =
611 \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
612 (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
613 \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
622 \begin{frame}{The scaling algorithm for heter. cluster}
625 \includegraphics[width=.52\textwidth]{algo-heter}
632 \begin{frame}{The scaling algorithm example}
637 \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
647 \begin{frame}{Experiments over heterogeneous cluster }
650 \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip
651 \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
652 \item Four types of processors with different computing powers were used.\medskip
653 \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip
654 \item The total power consumption of the chosen CPUs is composed of $80\%$ for dynamic power and $20\%$ for static power.
665 \begin{frame}{The experimental results}
669 \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
671 \textcolor{blue}{On average, it saves the energy consumption by \textcolor{red}{29\%}
672 of NAS benchmarks class C executed over 8 nodes}
682 \begin{frame}{The experimental results}
687 \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
689 \textcolor{blue}{On average, it degrades the performance by \textcolor{red}{3.8\%}
690 of NAS benchmarks class C executed over 8 nodes}
699 \begin{frame}{The results of the three powers scenarios}
703 \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
705 \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
714 \begin{frame}{The comparing our method}
715 The proposed method (MaxDist) was compared to the EDP algorithm that minimizes the \textcolor{blue}{
716 $\mathit{energy}\times \mathit{delay}$} value.
720 \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
722 \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
732 \begin{frame}{Energy optimization of grid platform}
735 \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
737 \small 10 sites distributed over France and Luxembourg
745 \begin{frame}{Performance, Energy and trade-off models} \small
746 \begin{block}{\small The performance model of grid}
749 \Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
750 +\mathop{\min_{j=1,\dots,M_h}} (\Tcm[hj])
755 \begin{block}{\small The energy model of grid}\small
758 E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} +
759 \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
763 \begin{block}{\small The trade-off model of grid}
768 \mathop{ \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
769 (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
770 \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
781 \begin{frame}{Experiments over Grid'5000}
784 \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
787 \textcolor{blue}{The experiments executed over one site and two sites scenarios}
791 \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
793 \textcolor{blue}{We used Grid'5000 power measurement tools}
802 \begin{frame}{Experiments over Grid'5000}
804 \begin{minipage}{0.4\textwidth}
805 \textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
806 \textcolor{red}{30\%}}
808 \begin{minipage}{0.55\textwidth}
810 \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
814 \begin{minipage}{0.4\textwidth}
815 \textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
816 performance by \textcolor{red}{3.2\%}}
818 \begin{minipage}{0.55\textwidth}
820 \includegraphics[width=.83\textwidth]{c2/per_d.eps}
830 \begin{frame}{Experiments over Grid'5000}
831 \textcolor{blue}{One core and Multi-cores per node results:}
834 \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
836 \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
839 \centering \small \textcolor{blue}{Using multi-core per node scenario decreases the computations to communications ratio}.
844 %\begin{frame}{Summary}
847 % \item Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
848 % \item A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
849 % \item The experimental results ere conducted over \textcolor{blue}{SimGrid} simulators and real
850 %test-bed \textcolor{blue}{Grid'5000}.
852 %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
853 % degrades the performance by \textcolor{blue}{3.8\%} for simulated heterogeneous
856 %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
857 % degrades the performance by \textcolor{blue}{3.2\%} for Grid'5000 results.
859 % \item The proposed method \textcolor{blue}{outperforms the EDP method} in terms of energy-performance ratio.
867 \begin{frame}{Continuation}
868 \subsection{\small {3.3 Energy optimization of asynchronous applications}}
870 \bf \textcolor{black}{Third contribution} \\
872 \bf \Large \textcolor{blue}{Energy optimization of asynchronous applications}
881 \begin{frame}{Problem definition}\vspace{0.8 mm}
882 \textcolor{blue}{Execution the parallel iterative application with synchronous communications }
885 \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
894 \begin{frame}{Problem definition}\vspace{0.8 mm}
895 \textcolor{blue}{Execution the parallel iterative application with synchronous communications }
898 \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
907 \begin{frame}{Solution}\vspace{0.8mm}
908 \textcolor{blue}{Using asynchronous communications with DVFS }
911 \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
921 \begin{frame}{The performance models}
923 \begin{block}{\small The performance model of Asynch. Applications}\small
926 \Tnew = \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N \cdot M_i }
931 \begin{block}{\small The performance model of Hybrid Applications}\small
934 \Tnew = \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
935 \min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
947 \begin{frame}{The energy consumption models}
949 \begin{block}{\small The energy model of Asynch. Applications}\small
951 \label{eq:asyn_energy1}
952 E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
957 \begin{block}{\small The energy model of Hybrid Applications}\small
959 \label{eq:asyn_energy}
960 E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} + \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
961 ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
971 \begin{frame}{The scaling algorithm for Asynch. applications}
974 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
982 \begin{frame}{The experimental results}
986 \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
990 \item Execution the iterative multi-splitting method over simulated Grid.
991 \item Execution the iterative multi-splitting method over Grid'5000 test-bed.
1000 \begin{frame}{The simulation results}
1001 \centering \small \textcolor{blue}{The best scenario in term of energy and performance is the Async. MS with Sync. DVFS}
1004 \includegraphics[scale=0.46]{c3/energy_saving.eps}
1006 \centering The average of energy saving = \textcolor{red}{22\%}
1011 %%%%%%%%%%%%%%%%%%%%
1013 %%%%%%%%%%%%%%%%%%%%
1014 \begin{frame}{The simulation results}
1017 \includegraphics[scale=0.46]{c3/perf_degra.eps}
1019 \centering The average of speed-up = \textcolor{red}{5.72\%}
1024 %%%%%%%%%%%%%%%%%%%%
1026 %%%%%%%%%%%%%%%%%%%%
1027 \begin{frame}{The Grid'5000 results}
1032 \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1033 \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1037 The energy saving = \textcolor{red}{26.93\%}, speeds up = \textcolor{red}{21.48\%}
1041 %%%%%%%%%%%%%%%%%%%%
1043 %%%%%%%%%%%%%%%%%%%%
1044 \begin{frame}{The comparison results}
1046 \includegraphics[width=.5\textwidth]{c3/compare.eps}
1048 \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1054 %%%%%%%%%%%%%%%%%%%%
1056 %%%%%%%%%%%%%%%%%%%%
1057 \begin{frame}{Conclusions}
1058 \section{Conclusions}
1061 \small \barrow We have proposed \textcolor{blue}{a new energy consumption and performance} models for
1062 synchronous and asynchronous parallel applications with iterations.
1065 \small \barrow The parallel applications with iterations were executed over different parallel architectures such as: \textcolor{blue}{homogeneous cluster, heterogeneous cluster and
1068 \small \barrow We have proposed \textcolor{blue}{new objective function} to optimize both the energy consumption and the performance.
1070 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1072 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1073 Multi-splitting} method.
1075 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator and over Grid'5000 testbed}.
1077 \small \barrow All the proposed methods were compared with either \textcolor{blue}{Rauber and Rünger method} or \textcolor{blue}{EDP objective function}.
1085 %%%%%%%%%%%%%%%%%%%%
1087 %%%%%%%%%%%%%%%%%%%%
1088 \begin{frame}{Publication}
1090 \begin{block}{\small Journal Articles }\scriptsize
1091 \begin{enumerate}[$\lbrack$1$\rbrack$]
1093 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier, Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1096 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier, Arnaud Giersch. Energy Consumption Reduction for
1097 Asynchronous Message Passing Applications. \textit{Journal of Supercomputing}, 2016, (Submitted)
1103 \begin{block}{\small Conference Articles }\scriptsize
1105 \begin{enumerate}[$\lbrack$1$\rbrack$]
1107 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1108 Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1109 225-230. IEEE Computer Society, Milan, Italy (2014).
1111 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1112 with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1113 \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1115 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier, Arnaud Giersch. CPUs Energy Consumption
1116 Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1125 %%%%%%%%%%%%%%%%%%%%
1127 %%%%%%%%%%%%%%%%%%%%
1128 \begin{frame}{Perspectives}
1129 \section{Perspectives}
1133 \small \barrow We will adapt the proposed algorithms to take into consideration the
1134 \textcolor{blue}{variability between some iterations}.
1136 \small \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1138 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1140 \small \barrow Comparing the results returned by the energy models to the values given by \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1145 %%%%%%%%%%%%%%%%%%%%
1147 %%%%%%%%%%%%%%%%%%%%
1148 \begin{frame}{Fin} \vspace{-10 mm}
1150 \centering \Large \textcolor{blue}{Thanks for Your Listening}
1153 \centering \textcolor{blue}{ {\Large Questions?}}