X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/ThesisAhmed.git/blobdiff_plain/240aac4a27721a2cb9d0e2ccd75e6e8970ce90c8..68ca79118192b40ef64bacc059bad602c13c82ba:/thesis-presentation/AhmedSlides.tex?ds=inline diff --git a/thesis-presentation/AhmedSlides.tex b/thesis-presentation/AhmedSlides.tex index d4fd8c3..b9ac39c 100644 --- a/thesis-presentation/AhmedSlides.tex +++ b/thesis-presentation/AhmedSlides.tex @@ -19,10 +19,10 @@ \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace} \definecolor{myblue}{RGB}{0,29,119} \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}} - +\usepackage{fixltx2e} %% used to put some subscripts lower, and make them more legible \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi} - +\usepackage{ragged2e} \newcommand{\CL}{\Xsub{C}{L}} \newcommand{\Dist}{\mathit{Dist}} \newcommand{\EdNew}{\Xsub{E}{dNew}} @@ -75,8 +75,8 @@ %Iterations using CPU Frequency Scaling} \vspace{2cm} -\title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm} -\author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Franche-Comté - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}} +\title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm} +\author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}} \date{} \vspace{-3cm} @@ -115,9 +115,9 @@ %%%%%%%%%%%%%%%%%%%% \begin{frame}{Introduction and problem definition} \section{\small {Introduction and Problem definition}} - \bf \textcolor{blue}{Approaches to increase the computing power:} + \bf \textcolor{blue}{To get more computing power:} \begin{minipage}{0.5\textwidth} - \textcolor{blue}{1)} \small \bf \textcolor{black}{Increasing the frequency of processor} + \textcolor{blue}{1)} \small \bf \textcolor{black}{Increase the frequency of a processor.\\ (limited due to overheating)} \end{minipage}% \begin{minipage}{0.6\textwidth} @@ -128,7 +128,10 @@ \end{minipage}% \vspace{0.2cm} \begin{minipage}{0.5\textwidth} - \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes} + \textcolor{blue}{2)} \small \bf \textcolor{black}{Use more nodes.} + + \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.} + \end{minipage}% \begin{minipage}{0.6\textwidth} \begin{figure}[h!] @@ -139,84 +142,75 @@ -%%%%%%%%%%%%%%%%%%%% -%% SLIDE 04 %% -%%%%%%%%%%%%%%%%%%%% -\begin{frame}{Introduction and problem definition} - \bf \textcolor{blue}{Processor frequency and its energy consumption} - \vspace{0.4cm} - \begin{minipage}{0.5\textwidth} - \textcolor{blue}{$\blacktriangleright$} - \small \bf \textcolor{black}{ The power consumption of a processor increases exponentially when its - frequency is increased} - \end{minipage}% - \begin{minipage}{0.5\textwidth} - \begin{figure}[h!] - \includegraphics[width=0.7\textwidth]{fig/freq-power} - \end{figure} - \end{minipage}% - - \begin{minipage}{0.5\textwidth} - \textcolor{blue}{$\blacktriangleright$} - \small \bf \textcolor{black}{The biggest power consumption is consumed by a processor in the computing node} - - \end{minipage}% - \begin{minipage}{0.6\textwidth} - \begin{figure}[h!] - \includegraphics[width=0.9\textwidth]{fig/node-power} - \end{figure} - \end{minipage}% - - \end{frame} - + %%%%%%%%%%%%%%%%%%% -%% SLIDE 05 %% +%% SLIDE 04 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Introduction and problem definition} - \vspace{0.1cm} - \bf \textcolor{blue}{Techniques for energy consumption reduction} - +\begin{frame}{Techniques for energy consumption reduction} + \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method} \vspace{-0.9cm} \begin{figure} \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69} + %\includegraphics[width=0.6\textwidth]{on-off/a-69} \end{figure} \end{frame} %%%%%%%%%%%%%%%%%%%% -%% SLIDE 06 %% +%% SLIDE 05 %% %%%%%%%%%%%%%%%%%%%% \begin{frame}{Techniques for energy consumption reduction} - \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)} - \vspace{-0.5cm} + \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic Voltage and Frequency Scaling (DVFS)} + \vspace{-0.9cm} \begin{figure} - \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109} + \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109} + %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109} \end{figure} \end{frame} %%%%%%%%%%%%%%%%%%%% -%% SLIDE 07 %% +%% SLIDE 06 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Using the energy reduction method} -\section{\small {Using the energy reduction method}} -\begin{block}{\textcolor{white}{Why we used DVFS method:}} -\begin{itemize} - \item \textcolor{black}{It used to reduce the energy while keeping all node working, thus it is more conventional with parallel computing.} - \item \textcolor{black}{It has a very small overhead compared to switch-off idle nodes method.} +\begin{frame}{Motivations} +\vspace{0.05cm} +\section{\small {Motivations}} +\textcolor{blue}{Why we used the DVFS method:} +\vspace{-0.49cm} +\begin{minipage}{0.5\textwidth} + \vspace{-0.49cm} + \begin{itemize} + \item \small \textcolor{black}{ The CPU is the component that consumes the highest amount of energy in a node \textsuperscript{1}. } + \end{itemize} -\end{block} - \vspace{0.1cm} + \end{minipage}% + \begin{minipage}{0.5\textwidth} + \vspace{-0.49cm} + \begin{figure}[h!] + \includegraphics[width=0.85\textwidth]{fig/node-power} + + \end{figure} + \end{minipage}% + + \begin{itemize} \item \small \textcolor{black}{DVFS reduces the energy consumption while + keeping all the nodes working.} + \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.} \end{itemize} + +\vspace{-0.12cm} + \begin{block}{\textcolor{white}{Challenge and Objective}} - \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy, \textcolor{blue}{but} it degrades the performance simultaneously.} + \small \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it also degrades the performance of the CPU.} \vspace{0.1cm} - \textcolor{blue}{Objective:} \textcolor{black}{Optimizing both energy consumption and performance of a parallel application at the same time when DVFS is used.} + \small \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.} \end{block} + + \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007. Power provisioning +for a warehouse-sized computer. \end{frame} @@ -227,13 +221,14 @@ %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Contributions} -\section{\small {Contributions}} -\subsection{\small {3.1 Energy optimization of homogeneous platform}} +\begin{frame}{The first contribution} + +\section{\small {Energy optimization of a homogeneous platform}} +%\vspace{-3cm} + % \includegraphics[width=0.6\textwidth]{white.pdf} + \begin{center} -\bf \textcolor{black}{First contribution} \\ -\vspace{1cm} -\bf \Large \textcolor{blue}{Energy optimization of homogeneous platform} +\bf \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform} \end{center} \end{frame} @@ -244,19 +239,22 @@ %%%%%%%%%%%%%%%%%%%% \begin{frame}{Objectives} - \begin{femtoBlock}{} \vspace{-12 mm} - \begin{itemize} \small - \item Study the effect of the scaling factor $S$ on \textbf{energy consumption} of parallel iterative applications such as NAS - Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip - \item Study the effect of the scaling factor $S$ on \textbf{performance} of these benchmarks.\medskip - \item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency.\medskip - \item We propose an algorithm for selecting the scaling factor $S$ producing \textbf {optimal trade-off} between the energy and performance. \medskip - \item Improving Rauber and Rünger's\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the - energy consumption \\ \quad ~ ~\quad of independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on. + + \begin{itemize} \small \justifying + + \item Study the effect of the scaling factor on the \textbf{energy consumption and performance } of parallel applications with iterations. \medskip + + \item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip + \item Proposing an algorithm for selecting the scaling factor that produces \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip + \item Comparing the proposed algorithm to existing methods. + + + %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the + %energy consumption \\ \quad ~ ~\quad of independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on. \end{itemize} - \let\thefootnote\relax\footnote{} - \vspace{-10 mm} - \end{femtoBlock} + %\let\thefootnote\relax\footnote{} + + \end{frame} @@ -266,15 +264,15 @@ %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Parallel tasks execution over Homo. Platform} +\begin{frame}{Execution of synchronous parallel tasks} \vspace{-0.5 cm} \begin{figure} \centering - \subfloat[Sync. imbalanced communications]{% + \subfloat[Synchronous imbalanced communications]{% \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}} - \subfloat[Sync. imbalanced computations]{% + \subfloat[Synchronous imbalanced computations]{% \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}} - \caption{Parallel tasks on homogeneous platform} + % \caption{Parallel tasks on homogeneous platform} \label{fig:homo} \end{figure} @@ -286,7 +284,7 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 11 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Energy model for homogeneous platform} +\begin{frame}{Energy model for a homogeneous platform} The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static (\textcolor{red}{$P_s$}) power. \begin{equation} @@ -295,7 +293,7 @@ \end{equation} \scriptsize \underline{Where}: \\ \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm} \textcolor{blue}{$CL$}: load capacitance\\ - \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency} + \textcolor{blue}{$V$}: the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency} \begin{equation} \label{eq:ps} \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}} @@ -310,7 +308,7 @@ %% SLIDE 12 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Energy model for homogeneous platform} +\begin{frame}{Energy model for a homogeneous platform} The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}. \medskip @@ -321,12 +319,12 @@ \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) + P_{s} \cdot S_1 \cdot T_1 \cdot N$ \end{block} - \textcolor{blue}{$S_1$}: the max. scaling factor\\ - \textcolor{blue}{$P_{d}$}: the dynamic power\\ - \textcolor{blue}{$P_{s}$}: the static power\\ - \textcolor{blue}{$T_I$}: the time of the slower task\\ - \textcolor{blue}{$T_i$}: the time of the other tasks\\ - \textcolor{blue}{$N$}: the number of nodes + \textcolor{blue}{$S_1$}: the maximum scaling factor.\\ + \textcolor{blue}{$P_{d}$}: the dynamic power.\\ + \textcolor{blue}{$P_{s}$}: the static power.\\ + \textcolor{blue}{$T_I$}: the execution time of the slower task.\\ + \textcolor{blue}{$T_i$}: the execution time of task i.\\ + \textcolor{blue}{$N$}: the number of nodes. \end{frame} @@ -374,9 +372,9 @@ %\vspace{-0.3cm} \small \begin{block}{\small Our objective function} - \centering{$\textbf{\emph {MaxDist}} = \max_{j=1,2,\dots ,F} - (\overbrace{P_{Norm}(S_j)}^{{Maximize}} - - \overbrace{E_{Norm}(S_j)}^{{Minimize}} )$} + \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F} + (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} - + \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$} \end{block} \end{femtoBlock} @@ -403,7 +401,7 @@ \begin{figure} \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159} - + %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159} \end{figure} \end{frame} @@ -414,11 +412,11 @@ \begin{femtoBlock}{} \begin{itemize} \small - \item Our experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip - \item Our algorithm is applied to NAS parallel benchmarks.\medskip + \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip + \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip - \item We run the classes A, B and C on 4, 8 or 9 and 16 nodes respectively.\medskip - \item The dynamic power with the highest frequency is equal to \textbf{20 $W$} and the power static is equal to \textbf{4 $W$}. + \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip + \item $P_d=20W$, $P_s=4W$. \end{itemize} \end{femtoBlock} \end{frame} @@ -447,13 +445,15 @@ $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $ \end{block} + + \centering { %\includegraphics[width=.33\textwidth]{c1/c1.pdf} %\qquad %\includegraphics[width=.33\textwidth]{c1/c2.pdf}} - \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}} + \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}} \end{frame} @@ -465,6 +465,7 @@ \vspace{-0.75cm} \begin{figure} \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356} + %\includegraphics[width=0.6\textwidth]{homo-model/a-356} \end{figure} \end{frame} @@ -472,7 +473,7 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 21 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Comparing the new model with Rauber model } +\begin{frame}{\large Comparing the new model with Rauber's model } \vspace{0.1cm} \centering \includegraphics[width=.45\textwidth]{c1/energy_con} @@ -505,13 +506,13 @@ %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Contribution} +\begin{frame}{The second contribution} -\subsection{\small {3.2 Energy optimization of heterogeneous platform}} +\section{\small {Energy optimization of a heterogeneous platform}} \begin{center} -\bf \textcolor{black}{Second contribution} \\ -\vspace{1cm} -\bf \Large \textcolor{blue}{Energy optimization of Heterogeneous platform} + + +\bf \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform} \end{center} \end{frame} @@ -524,13 +525,13 @@ \begin{frame}{Objectives} \begin{femtoBlock}{} \vspace{-12 mm} \begin{itemize} \small - \item Evaluating the \textcolor{blue}{new energy and performance models} of message passing applications with iterations running - over a heterogeneous platform (cluster and Grid). \medskip - \item Study the effect of the scaling factor $S$ on both \textcolor{blue}{energy consumption and the performance} of + \item Proposing \textcolor{blue}{new energy and performance models} for message passing applications with iterations running + over a heterogeneous platform (cluster or Grid). \medskip + \item Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption and the performance} of message passing iterative applications. \medskip - \item Computing the vector of scaling factors ($S_1, S_2, ..., S_n$) producing \textcolor{blue} {optimal trade-off} between - energy consumption and performance. + \item Computing the vector of scaling factors ($S_1, S_2, ..., S_n$) producing \textcolor{blue} {the optimal trade-off} between + the energy consumption and the performance. \end{itemize} \vspace{-10 mm} @@ -565,8 +566,8 @@ %% SLIDE 25 %% %%%%%%%%%%%%%%%%%%%% \begin{frame}{The energy consumption model} - -The overall energy consumption of a message passing synchronous distributed application executed over a - heterogeneous platform is computed as follows: + The overall energy consumption of a message passing synchronous application executed over + a heterogeneous platform can be computed as follows: \begin{multline} \label{eq:energy} \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot Tcp_i)}} + {} \\ @@ -585,6 +586,7 @@ \vspace{-0.5cm} \begin{figure} \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272} + %\includegraphics[width=0.6\textwidth]{heter-model/a-272} \end{figure} \end{frame} @@ -594,26 +596,26 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 27 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{The trade-off between energy and performance} - \vspace{-7 mm} - \begin{figure} - \centering{ \includegraphics[width=.4\textwidth]{c2/heter}} - \end{figure} - \vspace{-7 mm} - \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}{$E_{norm} = \frac{E_{reduced}} - {E_{Max}}$}. \\ - \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}. +%\begin{frame}{The trade-off between energy and performance} + % \vspace{-7 mm} + %\begin{figure} + % \centering{ \includegraphics[width=.4\textwidth]{c2/heter}} + % \end{figure} + % \vspace{-7 mm} + % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}} + %{E_{Max}}$}. \\ + % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}. - \begin{block}{\small The tradeoff model} - \begin{equation} - \label{eq:max} - \textcolor{red}{MaxDist} = - \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N} - (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} - - \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} ) - \end{equation} - \end{block} -\end{frame} + % \begin{block}{\small The tradeoff model} + % \begin{equation} + % \label{eq:max} + % \textcolor{red}{MaxDist} = + % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N} + % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} - + % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} ) + %\end{equation} + % \end{block} +%\end{frame} %%%%%%%%%%%%%%%%%%%% @@ -635,6 +637,7 @@ \begin{figure} \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650} + % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650} \end{figure} \end{frame} @@ -644,14 +647,14 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 30 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Experiments over heterogeneous cluster } +\begin{frame}{Experiments over a heterogeneous cluster } \begin{itemize} \small - \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip + \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip \item Four types of processors with different computing powers were used.\medskip - \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip - \item The total power consumption of the chosen CPUs is composed of $80\%$ for dynamic power and $20\%$ for static power. + \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip + \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power. \medskip \end{itemize} @@ -668,8 +671,8 @@ \centering \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf} - \textcolor{blue}{On average, it saves the energy consumption by \textcolor{red}{29\%} - of NAS benchmarks class C executed over 8 nodes} + \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%} + for the class C of the NAS Benchmarks executed over 8 nodes} \end{figure} \end{frame} @@ -686,8 +689,8 @@ \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf} - \textcolor{blue}{On average, it degrades the performance by \textcolor{red}{3.8\%} - of NAS benchmarks class C executed over 8 nodes} + \textcolor{blue}{On average, it degrades by \textcolor{red}{3.8\%} the performance + of NAS Benchmarks class C executed over 8 nodes} \end{figure} \end{frame} @@ -696,7 +699,7 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 33 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{The results of the three powers scenarios} +\begin{frame}{The results of the three power scenarios} \vspace{-5 mm} \begin{figure}[!t] \centering @@ -711,9 +714,9 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 34 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{The comparing our method} - The proposed method (MaxDist) was compared to the EDP algorithm that minimizes the \textcolor{blue}{ - $\mathit{energy}\times \mathit{delay}$} value. +\begin{frame}{Comparing the objective function to EDP} + + EDP is the products between the energy consumption and the delay. \vspace{-5 mm} \begin{figure}[!t] \centering @@ -729,47 +732,53 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 35 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Energy optimization of grid platform} - \begin{figure}[!t] - \centering - \includegraphics[width=.6\textwidth]{c2/grid5000.pdf} +%\begin{frame}{Energy optimization of grid platform} + % \begin{figure}[!t] + % \centering + % \includegraphics[width=.6\textwidth]{c2/grid5000.pdf} - \small 10 sites distributed over France and Luxembourg - \end{figure} -\end{frame} + % \small 10 sites distributed over France and Luxembourg + %\end{figure} +%\end{frame} %%%%%%%%%%%%%%%%%%%% %% SLIDE 36 %% %%%%%%%%%%%%%%%%%%%% - \begin{frame}{Performance, Energy and trade-off models} \small - \begin{block}{\small The performance model of grid} - \begin{equation} - \label{eq:perf} - \Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij}) - +\mathop{\min_{j=1,\dots,M_h}} (\Tcm[hj]) -\end{equation} - \end{block} +\begin{frame}{The grid architecture} +\begin{center} +\includegraphics[width=.8\textwidth]{c2/init_freq.pdf} +\end{center} + + %\begin{frame}{Performance, Energy and trade-off models} \small + %\begin{block}{\small The performance model of grid} + % \begin{equation} + %\label{eq:perf} + %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij}) + % +\mathop{\min_{j=1,\dots,M_h}} (\Tcm[hj]) +%\end{equation} + %\end{block} - \begin{block}{\small The energy model of grid}\small - \begin{equation} - \label{eq:energy} - E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} + - \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew) -\end{equation} - \end{block} - -\begin{block}{\small The trade-off model of grid} -\small - \begin{equation} - \label{eq:max} - \MaxDist = - \mathop{ \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j} - (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} - - \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} ) -\end{equation} - \end{block} + %\begin{block}{\small The energy model of grid}\small + % \begin{equation} + %\label{eq:energy} + %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} + +% \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew) +%\end{equation} + % \end{block} + +%\begin{block}{\small The trade-off model of grid} +%\small + %\begin{equation} + %\label{eq:max} + %\MaxDist = + %\mathop{ \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j} + % (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} - + % \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} ) +%\end{equation} + % \end{block} + \end{frame} @@ -779,18 +788,21 @@ %% SLIDE 37 %% %%%%%%%%%%%%%%%%%%%% \begin{frame}{Experiments over Grid'5000} - \centering - + + \textcolor{blue}{The experiments were conducted using three + clusters distributed over one or two sites.} + \vspace{-7 mm} + \begin{center} \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf} - - \vspace{-3 mm} - \textcolor{blue}{The experiments executed over one site and two sites scenarios} - - \vspace{1mm} - + \end{center} + \vspace{-10 mm} + \textcolor{blue}{Grid'5000 power measurement tools were used.} + \vspace{-9 mm} + \begin{center} \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf} + \end{center} - \textcolor{blue}{We used Grid'5000 power measurement tools} + \end{frame} @@ -802,8 +814,9 @@ \begin{frame}{Experiments over Grid'5000} \begin{minipage}{0.4\textwidth} - \textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by - \textcolor{red}{30\%}} + %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by + %\textcolor{red}{30\%}} + \small \textcolor{blue}{The average energy saving = \textcolor{red}{30\%}} \end{minipage} \begin{minipage}{0.55\textwidth} \begin{figure}[h!] @@ -812,8 +825,9 @@ \end{minipage} \begin{minipage}{0.4\textwidth} - \textcolor{blue}{Execution the NAS class D on 16 nodes degrades the - performance by \textcolor{red}{3.2\%}} + %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the + %performance by \textcolor{red}{3.2\%}} + \small \textcolor{blue}{The average performance degradation = \textcolor{red}{3.2\%}} \end{minipage} \begin{minipage}{0.55\textwidth} \begin{figure}[h!] @@ -836,7 +850,7 @@ \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps} \end{figure} - \centering \small \textcolor{blue}{Using multi-core per node scenario decreases the computations to communications ratio}. + \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}. \end{frame} @@ -864,12 +878,10 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 40 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Continuation} -\subsection{\small {3.3 Energy optimization of asynchronous applications}} +\begin{frame}{The third contribution} +\section{\small {Energy optimization of asynchronous applications}} \begin{center} -\bf \textcolor{black}{Third contribution} \\ -\vspace{1cm} -\bf \Large \textcolor{blue}{Energy optimization of asynchronous applications} +\bf \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing applications} \end{center} \end{frame} @@ -879,10 +891,11 @@ %% SLIDE 41 %% %%%%%%%%%%%%%%%%%%%% \begin{frame}{Problem definition}\vspace{0.8 mm} -\textcolor{blue}{Execution the parallel iterative application with synchronous communications } +\textcolor{blue}{The execution of a synchronous parallel iterative application over a grid } \vspace{-8 mm} \begin{figure} - \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503} + \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503} + %\includegraphics[width=0.6\textwidth]{syn/a-503} \end{figure} \end{frame} @@ -892,10 +905,11 @@ %% SLIDE 42 %% %%%%%%%%%%%%%%%%%%%% \begin{frame}{Problem definition}\vspace{0.8 mm} -\textcolor{blue}{Execution the parallel iterative application with synchronous communications } +\textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid } \vspace{-8 mm} \begin{figure} - \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440} + \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440} + %\includegraphics[width=0.6\textwidth]{asyn/a-440} \end{figure} \end{frame} @@ -909,6 +923,7 @@ \vspace{-8 mm} \begin{figure} \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314} + %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314} \end{figure} \end{frame} @@ -918,53 +933,66 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 44 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{The performance models} +%\begin{frame}{The performance models} -\begin{block}{\small The performance model of Asynch. Applications}\small -\begin{equation} - \label{eq:asyn_time} - \Tnew = \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N \cdot M_i } -\end{equation} -\end{block} +%\begin{block}{\small The performance model of Asynch. Applications}\small +%\begin{equation} + %\label{eq:asyn_time} + %\Tnew = \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N \cdot M_i } +%\end{equation} +%\end{block} -\begin{block}{\small The performance model of Hybrid Applications}\small -\begin{equation} - \label{eq:asyn_perf} - \Tnew = \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) + - \min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N} -\end{equation} -\end{block} +%\begin{block}{\small The performance model of Hybrid Applications}\small +%\begin{equation} + %\label{eq:asyn_perf} + %\Tnew = \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) + + %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N} +%\end{equation} +%\end{block} -\end{frame} +%\end{frame} %%%%%%%%%%%%%%%%%%%% %% SLIDE 45 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{The energy consumption models} +%\begin{frame}{The energy consumption models} -\begin{block}{\small The energy model of Asynch. Applications}\small -\begin{equation} - \label{eq:asyn_energy1} - E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )} -\end{equation} -\end{block} +%\begin{block}{\small The energy model of Asynch. Applications}\small +%\begin{equation} + %\label{eq:asyn_energy1} +% E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )} +%\end{equation} +%\end{block} -\begin{block}{\small The energy model of Hybrid Applications}\small -\begin{multline} - \label{eq:asyn_energy} - E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} + \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\ - ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]}))) -\end{multline} -\end{block} +%\begin{block}{\small The energy model of Hybrid Applications}\small +%\begin{multline} + %\label{eq:asyn_energy} + %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} + \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\ +% ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]}))) +%\end{multline} +%\end{block} +%\end{frame} + + + +%%%%%%%%%%%%%%%%%%%% +%% SLIDE 44 %% +%%%%%%%%%%%%%%%%%%%% +\begin{frame}{The performance and the energy models } + +\centering +\includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf} \end{frame} + + %%%%%%%%%%%%%%%%%%%% %% SLIDE 46 %% %%%%%%%%%%%%%%%%%%%% @@ -979,16 +1007,19 @@ %%%%%%%%%%%%%%%%%%%% %% SLIDE 47 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{The experimental results} +\begin{frame}{The experiments} \vspace{-5 mm} \begin{figure}[!t] - \centering + \begin{itemize} + \small + \item The architecture of the grid: + \end{itemize} \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf} \end{figure} \begin{itemize} \small - \item Execution the iterative multi-splitting method over simulated Grid. - \item Execution the iterative multi-splitting method over Grid'5000 test-bed. + \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method. + \item Evaluating the application over the simulator and Grid'5000. \end{itemize} \end{frame} @@ -998,12 +1029,12 @@ %% SLIDE 48 %% %%%%%%%%%%%%%%%%%%%% \begin{frame}{The simulation results} -\centering \small \textcolor{blue}{The best scenario in term of energy and performance is the Async. MS with Sync. DVFS} +\centering \small \textcolor{blue}{The best scenario in terms of energy and performance is the Async. MS with Sync. DVFS} \centering - \includegraphics[scale=0.46]{c3/energy_saving.eps} + \includegraphics[scale=0.42]{c3/energy_saving.eps} - \centering The average of energy saving = \textcolor{red}{22\%} + \centering The average energy saving = \textcolor{red}{22\%} \end{frame} @@ -1014,9 +1045,9 @@ \begin{frame}{The simulation results} \centering - \includegraphics[scale=0.46]{c3/perf_degra.eps} + \includegraphics[scale=0.42]{c3/perf_degra.eps} - \centering The average of speed-up = \textcolor{red}{5.72\%} + \centering The average speed-up = \textcolor{red}{5.72\%} \end{frame} @@ -1033,8 +1064,8 @@ \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps} \end{figure} \vspace{-5 mm} - \centering - The energy saving = \textcolor{red}{26.93\%}, speeds up = \textcolor{red}{21.48\%} + \centering \footnotesize +The average energy saving = \textcolor{red}{26.93\%}, the average speed-up = \textcolor{red}{21.48\%} \end{frame} @@ -1055,26 +1086,24 @@ %% SLIDE 52 %% %%%%%%%%%%%%%%%%%%%% \begin{frame}{Conclusions} -\section{Conclusions} +\section{Conclusions and Perspectives} \begin{itemize} -\small \barrow We have proposed \textcolor{blue}{a new energy consumption and performance} models for - synchronous and asynchronous parallel applications with iterations. - +\small \barrow Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over +\textcolor{blue}{homogeneous and heterogeneous clusters or grids}. -\small \barrow The parallel applications with iterations were executed over different parallel architectures such as: \textcolor{blue}{homogeneous cluster, heterogeneous cluster and -grid}. -\small \barrow We have proposed \textcolor{blue}{new objective function} to optimize both the energy consumption and the performance. + +\small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed. \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed. \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the Multi-splitting} method. -\small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator and over Grid'5000 testbed}. +\small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over the \textcolor{blue}{Grid'5000 testbed}. -\small \barrow All the proposed methods were compared with either \textcolor{blue}{Rauber and Rünger method} or \textcolor{blue}{EDP objective function}. +\small \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's method} or to the \textcolor{blue}{EDP objective function}. \end{itemize} @@ -1085,7 +1114,7 @@ Multi-splitting} method. %%%%%%%%%%%%%%%%%%%% %% SLIDE 53 %% %%%%%%%%%%%%%%%%%%%% -\begin{frame}{Publication} +\begin{frame}{Publications} \begin{block}{\small Journal Articles }\scriptsize \begin{enumerate}[$\lbrack$1$\rbrack$] @@ -1126,11 +1155,10 @@ Multi-splitting} method. %% SLIDE 54 %% %%%%%%%%%%%%%%%%%%%% \begin{frame}{Perspectives} -\section{Perspectives} \begin{itemize} -\small \barrow We will adapt the proposed algorithms to take into consideration the +\small \barrow The proposed algorithms should take into consideration the \textcolor{blue}{variability between some iterations}. \small \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods. @@ -1147,7 +1175,7 @@ Multi-splitting} method. %%%%%%%%%%%%%%%%%%%% \begin{frame}{Fin} \vspace{-10 mm} - \centering \Large \textcolor{blue}{Thanks for Your Listening} + \centering \Large \textcolor{blue}{Thank you for your attention} \vspace{2cm} \centering \textcolor{blue}{ {\Large Questions?}}