\todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
\definecolor{myblue}{RGB}{0,29,119}
\newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
-
+\usepackage{fixltx2e}
%% used to put some subscripts lower, and make them more legible
\newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
%Iterations using CPU Frequency Scaling}
\vspace{2cm}
-\title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm}
-\author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Franche-Comté - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
+\title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
+\author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
\date{}
\vspace{-3cm}
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Introduction and problem definition}
\section{\small {Introduction and Problem definition}}
- \bf \textcolor{blue}{Approaches to increase the computing power:}
+ \bf \textcolor{blue}{To get more computing power:}
\begin{minipage}{0.5\textwidth}
- \textcolor{blue}{1)} \small \bf \textcolor{black}{Increasing the frequency of processor}
+ \textcolor{blue}{1)} \small \bf \textcolor{black}{Increase the frequency of a processor.\\ (limited due to overheating)}
\end{minipage}%
\begin{minipage}{0.6\textwidth}
\end{minipage}%
\vspace{0.2cm}
\begin{minipage}{0.5\textwidth}
- \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes}
+ \textcolor{blue}{2)} \small \bf \textcolor{black}{Use more nodes.}
+
+ \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}
+
\end{minipage}%
\begin{minipage}{0.6\textwidth}
\begin{figure}[h!]
-%%%%%%%%%%%%%%%%%%%%
-%% SLIDE 04 %%
-%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Introduction and problem definition}
- \bf \textcolor{blue}{Processor frequency and its energy consumption}
- \vspace{0.4cm}
- \begin{minipage}{0.5\textwidth}
- \textcolor{blue}{$\blacktriangleright$}
- \small \bf \textcolor{black}{ The power consumption of a processor increases exponentially when its
- frequency is increased}
- \end{minipage}%
- \begin{minipage}{0.5\textwidth}
- \begin{figure}[h!]
- \includegraphics[width=0.7\textwidth]{fig/freq-power}
- \end{figure}
- \end{minipage}%
-
- \begin{minipage}{0.5\textwidth}
- \textcolor{blue}{$\blacktriangleright$}
- \small \bf \textcolor{black}{The biggest power consumption is consumed by a processor in the computing node}
-
- \end{minipage}%
- \begin{minipage}{0.6\textwidth}
- \begin{figure}[h!]
- \includegraphics[width=0.9\textwidth]{fig/node-power}
- \end{figure}
- \end{minipage}%
-
- \end{frame}
-
+
%%%%%%%%%%%%%%%%%%%
-%% SLIDE 05 %%
+%% SLIDE 04 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Introduction and problem definition}
+\begin{frame}{Techniques for energy consumption reduction}
\vspace{0.1cm}
- \bf \textcolor{blue}{Techniques for energy consumption reduction}
+
\textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
\vspace{-0.9cm}
\begin{figure}
\animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
+ %\includegraphics[width=0.6\textwidth]{on-off/a-69}
\end{figure}
\end{frame}
\textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
\vspace{-0.5cm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
+ \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
+ %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 07 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Using the energy reduction method}
-\section{\small {Using the energy reduction method}}
-\begin{block}{\textcolor{white}{Why we used DVFS method:}}
-\begin{itemize}
- \item \textcolor{black}{It used to reduce the energy while keeping all node working, thus it is more conventional with parallel computing.}
- \item \textcolor{black}{It has a very small overhead compared to switch-off idle nodes method.}
+\begin{frame}{Motivations}
+\vspace{0.05cm}
+\section{\small {Motivations}}
+\textcolor{blue}{Why we used the DVFS method:}
+\vspace{-0.49cm}
+\begin{minipage}{0.5\textwidth}
+ \vspace{-0.49cm}
+ \begin{itemize}
+ \item \small \textcolor{black}{ The CPU is the component that consumes the highest amount of energy in a node \textsuperscript{1}. }
+
\end{itemize}
-\end{block}
- \vspace{0.1cm}
+ \end{minipage}%
+ \begin{minipage}{0.5\textwidth}
+ \vspace{-0.49cm}
+ \begin{figure}[h!]
+ \includegraphics[width=0.85\textwidth]{fig/node-power}
+
+ \end{figure}
+ \end{minipage}%
+
+ \begin{itemize} \item \small \textcolor{black}{DVFS reduces the energy consumption while
+ keeping all the nodes working.}
+ \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.} \end{itemize}
+
+\vspace{-0.12cm}
+
\begin{block}{\textcolor{white}{Challenge and Objective}}
- \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy, \textcolor{blue}{but} it degrades the performance simultaneously.}
+ \small \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it degrades the performance simultaneously.}
\vspace{0.1cm}
- \textcolor{blue}{Objective:} \textcolor{black}{Optimizing both energy consumption and performance of a parallel application at the same time when DVFS is used.}
+ \small \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
\end{block}
+
+ \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007. Power provisioning
+for a warehouse-sized computer.
\end{frame}
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Contributions}
-\section{\small {Contributions}}
-\subsection{\small {3.1 Energy optimization of homogeneous platform}}
+\begin{frame}{The first contribution}
+
+\section{\small {Energy optimization of a homogeneous platform}}
+%\vspace{-3cm}
+ % \includegraphics[width=0.6\textwidth]{white.pdf}
+
\begin{center}
-\bf \textcolor{black}{First contribution} \\
-\vspace{1cm}
-\bf \Large \textcolor{blue}{Energy optimization of homogeneous platform}
+\bf \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
\end{center}
\end{frame}
\begin{frame}{Objectives}
\begin{femtoBlock}{} \vspace{-12 mm}
\begin{itemize} \small
- \item Study the effect of the scaling factor $S$ on \textbf{energy consumption} of parallel iterative applications such as NAS
- Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip
- \item Study the effect of the scaling factor $S$ on \textbf{performance} of these benchmarks.\medskip
- \item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency.\medskip
- \item We propose an algorithm for selecting the scaling factor $S$ producing \textbf {optimal trade-off} between the energy and performance. \medskip
- \item Improving Rauber and Rünger's\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
- energy consumption \\ \quad ~ ~\quad of independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
+ \item Study the effect of the scaling factor on the \textbf{energy consumption and performance } of parallel applications with iterations. \medskip
+
+ \item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
+ \item Proposing an algorithm for selecting the scaling factor that produces \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
+ \item Comparing the proposed algorithm to existing methods.
+
+
+ %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
+ %energy consumption \\ \quad ~ ~\quad of independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
\end{itemize}
- \let\thefootnote\relax\footnote{}
+ %\let\thefootnote\relax\footnote{}
\vspace{-10 mm}
\end{femtoBlock}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Parallel tasks execution over Homo. Platform}
+\begin{frame}{Execution of synchronous parallel tasks}
\vspace{-0.5 cm}
\begin{figure}
\centering
- \subfloat[Sync. imbalanced communications]{%
+ \subfloat[Synchronous imbalanced communications]{%
\includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
- \subfloat[Sync. imbalanced computations]{%
+ \subfloat[Synchronous imbalanced computations]{%
\includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
- \caption{Parallel tasks on homogeneous platform}
+ % \caption{Parallel tasks on homogeneous platform}
\label{fig:homo}
\end{figure}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 11 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Energy model for homogeneous platform}
+\begin{frame}{Energy model for a homogeneous platform}
The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
(\textcolor{red}{$P_s$}) power.
\begin{equation}
%% SLIDE 12 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Energy model for homogeneous platform}
+\begin{frame}{Energy model for a homogeneous platform}
The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}. \medskip
\left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
P_{s} \cdot S_1 \cdot T_1 \cdot N$
\end{block}
- \textcolor{blue}{$S_1$}: the max. scaling factor\\
- \textcolor{blue}{$P_{d}$}: the dynamic power\\
- \textcolor{blue}{$P_{s}$}: the static power\\
- \textcolor{blue}{$T_I$}: the time of the slower task\\
- \textcolor{blue}{$T_i$}: the time of the other tasks\\
- \textcolor{blue}{$N$}: the number of nodes
+ \textcolor{blue}{$S_1$}: the maximum scaling factor.\\
+ \textcolor{blue}{$P_{d}$}: the dynamic power.\\
+ \textcolor{blue}{$P_{s}$}: the static power.\\
+ \textcolor{blue}{$T_I$}: the execution time of the slower task.\\
+ \textcolor{blue}{$T_i$}: the execution time of task i.\\
+ \textcolor{blue}{$N$}: the number of nodes.
\end{frame}
%\vspace{-0.3cm}
\small
\begin{block}{\small Our objective function}
- \centering{$\textbf{\emph {MaxDist}} = \max_{j=1,2,\dots ,F}
- (\overbrace{P_{Norm}(S_j)}^{{Maximize}} -
- \overbrace{E_{Norm}(S_j)}^{{Minimize}} )$}
+ \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}
+ (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} -
+ \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
\end{block}
\end{femtoBlock}
\begin{figure}
\animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
-
+ %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
\end{figure}
\end{frame}
\begin{femtoBlock}{}
\begin{itemize}
\small
- \item Our experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
- \item Our algorithm is applied to NAS parallel benchmarks.\medskip
+ \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
+ \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
\item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
- \item We run the classes A, B and C on 4, 8 or 9 and 16 nodes respectively.\medskip
- \item The dynamic power with the highest frequency is equal to \textbf{20 $W$} and the power static is equal to \textbf{4 $W$}.
+ \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
+ \item $P_d=20W$, $P_s=4W$.
\end{itemize}
\end{femtoBlock}
\end{frame}
$S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
\left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
\end{block}
+
+
\centering {
%\includegraphics[width=.33\textwidth]{c1/c1.pdf}
%\qquad
%\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
- \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}}
+ \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}}
\end{frame}
\vspace{-0.75cm}
\begin{figure}
\animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
+ %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 21 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Comparing the new model with Rauber model }
+\begin{frame}{\large Comparing the new model with Rauber's model }
\vspace{0.1cm}
\centering
\includegraphics[width=.45\textwidth]{c1/energy_con}
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Contribution}
+\begin{frame}{The second contribution}
-\subsection{\small {3.2 Energy optimization of heterogeneous platform}}
+\section{\small {Energy optimization of a heterogeneous platform}}
\begin{center}
-\bf \textcolor{black}{Second contribution} \\
-\vspace{1cm}
-\bf \Large \textcolor{blue}{Energy optimization of Heterogeneous platform}
+
+
+\bf \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
\end{center}
\end{frame}
\begin{frame}{Objectives}
\begin{femtoBlock}{} \vspace{-12 mm}
\begin{itemize} \small
- \item Evaluating the \textcolor{blue}{new energy and performance models} of message passing applications with iterations running
- over a heterogeneous platform (cluster and Grid). \medskip
- \item Study the effect of the scaling factor $S$ on both \textcolor{blue}{energy consumption and the performance} of
+ \item Proposing \textcolor{blue}{new energy and performance models} for message passing applications with iterations running
+ over a heterogeneous platform (cluster or Grid). \medskip
+ \item Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption and the performance} of
message passing iterative applications. \medskip
- \item Computing the vector of scaling factors ($S_1, S_2, ..., S_n$) producing \textcolor{blue} {optimal trade-off} between
- energy consumption and performance.
+ \item Computing the vector of scaling factors ($S_1, S_2, ..., S_n$) producing \textcolor{blue} {the optimal trade-off} between
+ the energy consumption and the performance.
\end{itemize}
\vspace{-10 mm}
%% SLIDE 25 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{The energy consumption model}
- -The overall energy consumption of a message passing synchronous distributed application executed over a
- heterogeneous platform is computed as follows:
+ The overall energy consumption of a message passing synchronous application executed over
+ a heterogeneous platform can be computed as follows:
\begin{multline}
\label{eq:energy}
\textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot Tcp_i)}} + {} \\
\vspace{-0.5cm}
\begin{figure}
\animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
+ %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 27 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The trade-off between energy and performance}
- \vspace{-7 mm}
- \begin{figure}
- \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
- \end{figure}
- \vspace{-7 mm}
- \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}{$E_{norm} = \frac{E_{reduced}}
- {E_{Max}}$}. \\
- \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
+%\begin{frame}{The trade-off between energy and performance}
+ % \vspace{-7 mm}
+ %\begin{figure}
+ % \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
+ % \end{figure}
+ % \vspace{-7 mm}
+ % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}}
+ %{E_{Max}}$}. \\
+ % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
- \begin{block}{\small The tradeoff model}
- \begin{equation}
- \label{eq:max}
- \textcolor{red}{MaxDist} =
- \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
- (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
- \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
- \end{equation}
- \end{block}
-\end{frame}
+ % \begin{block}{\small The tradeoff model}
+ % \begin{equation}
+ % \label{eq:max}
+ % \textcolor{red}{MaxDist} =
+ % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
+ % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
+ % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
+ %\end{equation}
+ % \end{block}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
\begin{figure}
\animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
+ % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 30 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Experiments over heterogeneous cluster }
+\begin{frame}{Experiments over a heterogeneous cluster }
\begin{itemize}
\small
- \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip
+ \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
\item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
\item Four types of processors with different computing powers were used.\medskip
- \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip
- \item The total power consumption of the chosen CPUs is composed of $80\%$ for dynamic power and $20\%$ for static power.
+ \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
+ \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
\medskip
\end{itemize}
\centering
\includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
- \textcolor{blue}{On average, it saves the energy consumption by \textcolor{red}{29\%}
- of NAS benchmarks class C executed over 8 nodes}
+ \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
+ for the class C of the NAS Benchmarks executed over 8 nodes}
\end{figure}
\end{frame}
\includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
- \textcolor{blue}{On average, it degrades the performance by \textcolor{red}{3.8\%}
- of NAS benchmarks class C executed over 8 nodes}
+ \textcolor{blue}{On average, it degrades by \textcolor{red}{3.8\%} the performance
+ of NAS Benchmarks class C executed over 8 nodes}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 33 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The results of the three powers scenarios}
+\begin{frame}{The results of the three power scenarios}
\vspace{-5 mm}
\begin{figure}[!t]
\centering
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 34 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The comparing our method}
- The proposed method (MaxDist) was compared to the EDP algorithm that minimizes the \textcolor{blue}{
- $\mathit{energy}\times \mathit{delay}$} value.
+\begin{frame}{Comparing the objective function to EDP}
+
+ EDP is the products between the energy consumption and the delay.
\vspace{-5 mm}
\begin{figure}[!t]
\centering
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 35 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Energy optimization of grid platform}
- \begin{figure}[!t]
- \centering
- \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
+%\begin{frame}{Energy optimization of grid platform}
+ % \begin{figure}[!t]
+ % \centering
+ % \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
- \small 10 sites distributed over France and Luxembourg
- \end{figure}
-\end{frame}
+ % \small 10 sites distributed over France and Luxembourg
+ %\end{figure}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 36 %%
%%%%%%%%%%%%%%%%%%%%
- \begin{frame}{Performance, Energy and trade-off models} \small
- \begin{block}{\small The performance model of grid}
- \begin{equation}
- \label{eq:perf}
- \Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
- +\mathop{\min_{j=1,\dots,M_h}} (\Tcm[hj])
-\end{equation}
- \end{block}
+\begin{frame}{The grid architecture}
+\begin{center}
+\includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
+\end{center}
+
+ %\begin{frame}{Performance, Energy and trade-off models} \small
+ %\begin{block}{\small The performance model of grid}
+ % \begin{equation}
+ %\label{eq:perf}
+ %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
+ % +\mathop{\min_{j=1,\dots,M_h}} (\Tcm[hj])
+%\end{equation}
+ %\end{block}
- \begin{block}{\small The energy model of grid}\small
- \begin{equation}
- \label{eq:energy}
- E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} +
- \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
-\end{equation}
- \end{block}
-
-\begin{block}{\small The trade-off model of grid}
-\small
- \begin{equation}
- \label{eq:max}
- \MaxDist =
- \mathop{ \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
- (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
- \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
-\end{equation}
- \end{block}
+ %\begin{block}{\small The energy model of grid}\small
+ % \begin{equation}
+ %\label{eq:energy}
+ %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} +
+% \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
+%\end{equation}
+ % \end{block}
+
+%\begin{block}{\small The trade-off model of grid}
+%\small
+ %\begin{equation}
+ %\label{eq:max}
+ %\MaxDist =
+ %\mathop{ \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
+ % (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
+ % \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
+%\end{equation}
+ % \end{block}
+
\end{frame}
%% SLIDE 37 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Experiments over Grid'5000}
- \centering
-
+
+ \textcolor{blue}{The experiments were conducted using three
+ clusters distributed over one or two sites.}
+ \vspace{-7 mm}
+ \begin{center}
\includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
-
- \vspace{-3 mm}
- \textcolor{blue}{The experiments executed over one site and two sites scenarios}
-
- \vspace{1mm}
-
+ \end{center}
+ \vspace{-10 mm}
+ \textcolor{blue}{Grid'5000 power measurement tools were used.}
+ \vspace{-9 mm}
+ \begin{center}
\includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
+ \end{center}
- \textcolor{blue}{We used Grid'5000 power measurement tools}
+
\end{frame}
\begin{frame}{Experiments over Grid'5000}
\begin{minipage}{0.4\textwidth}
- \textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
- \textcolor{red}{30\%}}
+ %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
+ %\textcolor{red}{30\%}}
+ \small \textcolor{blue}{The average energy saving = \textcolor{red}{30\%}}
\end{minipage}
\begin{minipage}{0.55\textwidth}
\begin{figure}[h!]
\end{minipage}
\begin{minipage}{0.4\textwidth}
- \textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
- performance by \textcolor{red}{3.2\%}}
+ %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
+ %performance by \textcolor{red}{3.2\%}}
+ \small \textcolor{blue}{The average performance degradation = \textcolor{red}{3.2\%}}
\end{minipage}
\begin{minipage}{0.55\textwidth}
\begin{figure}[h!]
\includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
\end{figure}
- \centering \small \textcolor{blue}{Using multi-core per node scenario decreases the computations to communications ratio}.
+ \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 40 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Continuation}
-\subsection{\small {3.3 Energy optimization of asynchronous applications}}
+\begin{frame}{The third contribution}
+\section{\small {Energy optimization of asynchronous applications}}
\begin{center}
-\bf \textcolor{black}{Third contribution} \\
-\vspace{1cm}
-\bf \Large \textcolor{blue}{Energy optimization of asynchronous applications}
+\bf \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing applications}
\end{center}
\end{frame}
%% SLIDE 41 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Problem definition}\vspace{0.8 mm}
-\textcolor{blue}{Execution the parallel iterative application with synchronous communications }
+\textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
\vspace{-8 mm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
+ \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
+ %\includegraphics[width=0.6\textwidth]{syn/a-503}
\end{figure}
\end{frame}
%% SLIDE 42 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Problem definition}\vspace{0.8 mm}
-\textcolor{blue}{Execution the parallel iterative application with synchronous communications }
+\textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
\vspace{-8 mm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
+ \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
+ %\includegraphics[width=0.6\textwidth]{asyn/a-440}
\end{figure}
\end{frame}
\vspace{-8 mm}
\begin{figure}
\animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
+ %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 44 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The performance models}
+%\begin{frame}{The performance models}
-\begin{block}{\small The performance model of Asynch. Applications}\small
-\begin{equation}
- \label{eq:asyn_time}
- \Tnew = \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N \cdot M_i }
-\end{equation}
-\end{block}
+%\begin{block}{\small The performance model of Asynch. Applications}\small
+%\begin{equation}
+ %\label{eq:asyn_time}
+ %\Tnew = \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N \cdot M_i }
+%\end{equation}
+%\end{block}
-\begin{block}{\small The performance model of Hybrid Applications}\small
-\begin{equation}
- \label{eq:asyn_perf}
- \Tnew = \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
- \min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
-\end{equation}
-\end{block}
+%\begin{block}{\small The performance model of Hybrid Applications}\small
+%\begin{equation}
+ %\label{eq:asyn_perf}
+ %\Tnew = \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
+ %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
+%\end{equation}
+%\end{block}
-\end{frame}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 45 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The energy consumption models}
+%\begin{frame}{The energy consumption models}
-\begin{block}{\small The energy model of Asynch. Applications}\small
-\begin{equation}
- \label{eq:asyn_energy1}
- E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
-\end{equation}
-\end{block}
+%\begin{block}{\small The energy model of Asynch. Applications}\small
+%\begin{equation}
+ %\label{eq:asyn_energy1}
+% E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
+%\end{equation}
+%\end{block}
-\begin{block}{\small The energy model of Hybrid Applications}\small
-\begin{multline}
- \label{eq:asyn_energy}
- E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} + \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
- ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
-\end{multline}
-\end{block}
+%\begin{block}{\small The energy model of Hybrid Applications}\small
+%\begin{multline}
+ %\label{eq:asyn_energy}
+ %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot \Tcp[ij])} + \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
+% ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
+%\end{multline}
+%\end{block}
+%\end{frame}
+
+
+
+%%%%%%%%%%%%%%%%%%%%
+%% SLIDE 44 %%
+%%%%%%%%%%%%%%%%%%%%
+\begin{frame}{The performance and the energy models }
+
+\centering
+\includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
\end{frame}
+
+
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 46 %%
%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 47 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The experimental results}
+\begin{frame}{The experiments}
\vspace{-5 mm}
\begin{figure}[!t]
- \centering
+ \begin{itemize}
+ \small
+ \item The architecture of the grid:
+ \end{itemize}
\includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
\end{figure}
\begin{itemize}
\small
- \item Execution the iterative multi-splitting method over simulated Grid.
- \item Execution the iterative multi-splitting method over Grid'5000 test-bed.
+ \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
+ \item Evaluating the application over the simulator and Grid'5000.
\end{itemize}
\end{frame}
%% SLIDE 48 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{The simulation results}
-\centering \small \textcolor{blue}{The best scenario in term of energy and performance is the Async. MS with Sync. DVFS}
+\centering \small \textcolor{blue}{The best scenario in terms of energy and performance is the Async. MS with Sync. DVFS}
\centering
- \includegraphics[scale=0.46]{c3/energy_saving.eps}
+ \includegraphics[scale=0.42]{c3/energy_saving.eps}
- \centering The average of energy saving = \textcolor{red}{22\%}
+ \centering The average energy saving = \textcolor{red}{22\%}
\end{frame}
\begin{frame}{The simulation results}
\centering
- \includegraphics[scale=0.46]{c3/perf_degra.eps}
+ \includegraphics[scale=0.42]{c3/perf_degra.eps}
- \centering The average of speed-up = \textcolor{red}{5.72\%}
+ \centering The average speed-up = \textcolor{red}{5.72\%}
\end{frame}
\includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
\end{figure}
\vspace{-5 mm}
- \centering
- The energy saving = \textcolor{red}{26.93\%}, speeds up = \textcolor{red}{21.48\%}
+ \centering \footnotesize
+The average energy saving = \textcolor{red}{26.93\%}, the average speed-up = \textcolor{red}{21.48\%}
\end{frame}
%% SLIDE 52 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Conclusions}
-\section{Conclusions}
+\section{Conclusions and Perspectives}
\begin{itemize}
-\small \barrow We have proposed \textcolor{blue}{a new energy consumption and performance} models for
- synchronous and asynchronous parallel applications with iterations.
-
+\small \barrow Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over
+\textcolor{blue}{homogeneous and heterogeneous clusters or grids}.
-\small \barrow The parallel applications with iterations were executed over different parallel architectures such as: \textcolor{blue}{homogeneous cluster, heterogeneous cluster and
-grid}.
-\small \barrow We have proposed \textcolor{blue}{new objective function} to optimize both the energy consumption and the performance.
+
+\small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
\small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
\small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
Multi-splitting} method.
-\small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator and over Grid'5000 testbed}.
+\small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over the \textcolor{blue}{Grid'5000 testbed}.
-\small \barrow All the proposed methods were compared with either \textcolor{blue}{Rauber and Rünger method} or \textcolor{blue}{EDP objective function}.
+\small \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's method} or to the \textcolor{blue}{EDP objective function}.
\end{itemize}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 53 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Publication}
+\begin{frame}{Publications}
\begin{block}{\small Journal Articles }\scriptsize
\begin{enumerate}[$\lbrack$1$\rbrack$]
%% SLIDE 54 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Perspectives}
-\section{Perspectives}
\begin{itemize}
-\small \barrow We will adapt the proposed algorithms to take into consideration the
+\small \barrow The proposed algorithms should take into consideration the
\textcolor{blue}{variability between some iterations}.
\small \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Fin} \vspace{-10 mm}
- \centering \Large \textcolor{blue}{Thanks for Your Listening}
+ \centering \Large \textcolor{blue}{Thank you for your listening}
\vspace{2cm}
\centering \textcolor{blue}{ {\Large Questions?}}