\usepackage{fixltx2e}
%% used to put some subscripts lower, and make them more legible
\newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
-
+\usepackage{ragged2e}
\newcommand{\CL}{\Xsub{C}{L}}
\newcommand{\Dist}{\mathit{Dist}}
\newcommand{\EdNew}{\Xsub{E}{dNew}}
%Iterations using CPU Frequency Scaling}
\vspace{2cm}
-\title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm}
-\author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Franche-Comté - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
+\title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
+\author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
\date{}
\vspace{-3cm}
\tableofcontents
\end{frame}
+%%%%%%%%%%%%%%%%%%%%
+%% SLIDE 03 %%
+%%%%%%%%%%%%%%%%%%%%
+\begin{frame}{Definition of parallel computing}
+\section{\small {Introduction and Problem definition}}
+ \centering
+ \includegraphics[width=0.99\textwidth]{para.pdf}
+\end{frame}
+
+
+\begin{frame}{Execution of synchronous parallel tasks}
+\vspace{-0.5 cm}
+\begin{figure}
+ \centering
+ \subfloat[Synchronous imbalanced communications]{%
+ \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
+ \subfloat[Synchronous imbalanced computations]{%
+ \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
+ % \caption{Parallel tasks on homogeneous platform}
+ \label{fig:homo}
+\end{figure}
+
+ \end{frame}
+
+
%%%%%%%%%%%%%%%%%%%%
+%% SLIDE 07 %%
+%%%%%%%%%%%%%%%%%%%%
+
+
+\begin{frame}{\large Synchronous and asynchronous iterative methods }
+\vspace{-0.5 cm}
+\begin{figure}
+
+\includegraphics[scale=0.42]{syn_tasks.pdf}
+\vspace{0.6 cm}
+\includegraphics[scale=0.42]{Asyn_tasks.pdf}
+\end{figure}
+
+
+ \end{frame}
+
+ %%%%%%%%%%%%%%%%%%%%
%% SLIDE 03 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Introduction and problem definition}
- \section{\small {Introduction and Problem definition}}
- \bf \textcolor{blue}{Approaches to increase the computing power:}
+\begin{frame}{Approaches to get more computing power}
+
+ %\bf \textcolor{blue}{}
\begin{minipage}{0.5\textwidth}
- \textcolor{blue}{1)} \small \bf \textcolor{black}{Increasing the frequency of a processor}
+ \textcolor{blue}{1)} \small \bf \textcolor{black}{Increase the frequency of a processor.\\ (limited due to overheating)}
\end{minipage}%
\begin{minipage}{0.6\textwidth}
\end{minipage}%
\vspace{0.2cm}
\begin{minipage}{0.5\textwidth}
- \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes}
+ \textcolor{blue}{2)} \small \bf \textcolor{black}{Increase the number of computing
+ units.}
+
+ \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}
+
\end{minipage}%
\begin{minipage}{0.6\textwidth}
\begin{figure}[h!]
-
%%%%%%%%%%%%%%%%%%%
%% SLIDE 04 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Introduction and problem definition}
- \vspace{0.1cm}
- \bf \textcolor{blue}{Techniques for energy consumption reduction}
-
+\begin{frame}{Techniques for energy consumption reduction}
+
\textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
\vspace{-0.9cm}
\begin{figure}
- \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
+ \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{200}{on-off/a-}{0}{111}
+ %\includegraphics[width=0.6\textwidth]{on-off/a-69}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
-%% SLIDE 06 %%
+%% SLIDE 05 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Techniques for energy consumption reduction}
- \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
- \vspace{-0.5cm}
+ \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic Voltage and Frequency Scaling (DVFS)}
+ \vspace{-0.9cm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
+ \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{175}
+ %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
\end{figure}
\end{frame}
-
-
+%%%%%%%%%%%%%%%%%%%%
+%% SLIDE 06 %%
+%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 07 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Motivations}
\vspace{0.05cm}
\section{\small {Motivations}}
-\textcolor{blue}{Why we used DVFS method:}
+\textcolor{blue}{Why we used the DVFS method:}
\vspace{-0.49cm}
\begin{minipage}{0.5\textwidth}
\vspace{-0.49cm}
\begin{itemize}
- \item \small \textcolor{black}{The biggest power consumption is consumed by a processor \textsuperscript{1}. }
+ \item \small \textcolor{black}{ The CPU is the component that consumes the highest amount of energy in a node \textsuperscript{1}. }
\end{itemize}
\end{figure}
\end{minipage}%
- \begin{itemize} \item \small \textcolor{black}{It used to reduce the energy consumption while keeping all the node working, thus it is more adapted to parallel computing.}
- \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes method.} \end{itemize}
+ \begin{itemize} \item \small \textcolor{black}{DVFS reduces the energy consumption while
+ keeping all the nodes working.}
+ \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.} \end{itemize}
\vspace{-0.12cm}
\begin{block}{\textcolor{white}{Challenge and Objective}}
- \small \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it degrades the performance simultaneously.}
+ \small \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it also degrades the performance of the CPU.}
\vspace{0.1cm}
- \small \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel applications.}
+ \small \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
\end{block}
\tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007. Power provisioning
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Contribution}
+\begin{frame}{The first contribution}
+
+\section{\small {Energy optimization of a homogeneous platform}}
+%\vspace{-3cm}
+ % \includegraphics[width=0.6\textwidth]{white.pdf}
-\section{\small {Energy optimization of homogeneous platform}}
\begin{center}
\bf \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
\end{center}
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Objectives}
- \begin{femtoBlock}{} \vspace{-12 mm}
- \begin{itemize} \small
- \item Study the effect of the scaling factor $S$ on \textbf{energy consumption and performance } of parallel applications with iterations such as NAS
- Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip
+
+ \begin{itemize} \small \justifying
+
+ \item Studying the effect of the frequency scaling on the \textbf{energy consumption and performance } of parallel applications with iterations. \medskip
- \item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
- \item Proposing an algorithm for selecting the scaling factor $S$ producing \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
- \item Comparing the proposed algorithm to existing methods.
+ \item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
+ \item Proposing an algorithm for selecting the scaling factor that produces \textbf {the good trade-off} between the energy consumption and the performance. \medskip
+ \item Comparing the proposed algorithm to existing methods.
%\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
%energy consumption \\ \quad ~ ~\quad of independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
\end{itemize}
%\let\thefootnote\relax\footnote{}
- \vspace{-10 mm}
- \end{femtoBlock}
+
+
\end{frame}
+
%%%%%%%%%%%%%%%%%%%%
-%% SLIDE 10 %%
+%% SLIDE 13 %%
%%%%%%%%%%%%%%%%%%%%
+\begin{frame}{Performance evaluation of MPI programs}
+\small The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.
+ \vspace{5 mm}
+
+ \begin{femtoBlock}{}
+ \vspace{-5 mm}
+ \begin{block}{\small Execution time prediction model}
+ \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
+ \end{block}
+ \vspace{5 mm}
+ \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
+ \quad%
+ \includegraphics[width=.4\textwidth]{c1/lu_pre}}
+ \vspace{1 mm}
+
+ \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
+ \end{femtoBlock}
+\end{frame}
+
+
+
+
+
+
+
+
-\begin{frame}{Execution of synchronous parallel tasks}
-\vspace{-0.5 cm}
-\begin{figure}
- \centering
- \subfloat[Sync. imbalanced communications]{%
- \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
- \subfloat[Sync. imbalanced computations]{%
- \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
- % \caption{Parallel tasks on homogeneous platform}
- \label{fig:homo}
-\end{figure}
- \end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 11 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Energy model for homogeneous platform}
- The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
- (\textcolor{red}{$P_s$}) power.
+\begin{frame}{Energy model for a homogeneous platform}
+ The power consumed by a processor is divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and the static
+ (\textcolor{red}{$P_s$}) powers.
\begin{equation}
\label{eq:pd}
\textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
\end{equation}
\scriptsize \underline{Where}: \\
- \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm} \textcolor{blue}{$CL$}: load capacitance\\
- \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
+ \scriptsize {\textcolor{blue}{$\alpha$}: switching activity. \hspace{15 mm} \textcolor{blue}{$CL$}: load capacitance [F].\\
+ \textcolor{blue}{$V$}: the supply voltage [V]. \hspace{8 mm} \textcolor{blue}{$F$}: operational frequency [Hz].}
\begin{equation}
\label{eq:ps}
\small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
\end{equation}
\underline{Where}:\\
- \scriptsize{ \textcolor{blue}{$V$}: the supply voltage. \hspace{28 mm} \textcolor{blue}{$N_{trans}$}: number of transistors. \\
- \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
- parameter.}
+ \scriptsize{ \textcolor{blue}{$V$}: the supply voltage [V]. \hspace{19 mm} \textcolor{blue}{$N_{trans}$}: number of transistors. \\
+ \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{3 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
+ parameter [A].}
+
+
\end{frame}
+
+
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 12 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Energy model for homogeneous platform}
-
- The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}. \medskip
+\begin{frame}{Energy model for a homogeneous platform}
+ \vspace{-0.77cm}
+ \begin{figure}
+ \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{homo-model/a-}{0}{441}
+ %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
+ \end{figure}
-
-
- \begin{block}{\small Rauber and Rünger's energy model}
- $ E = P_{d} \cdot S_1^{-2} \cdot
- \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
- P_{s} \cdot S_1 \cdot T_1 \cdot N$
- \end{block}
- \textcolor{blue}{$S_1$}: the max. scaling factor\\
- \textcolor{blue}{$P_{d}$}: the dynamic power\\
- \textcolor{blue}{$P_{s}$}: the static power\\
- \textcolor{blue}{$T_I$}: the time of the slower task\\
- \textcolor{blue}{$T_i$}: the time of the other tasks\\
- \textcolor{blue}{$N$}: the number of nodes
+ % \begin{block}{\small Rauber and Rünger's energy model}
+ %$ E = P_{d} \cdot S_1^{-2} \cdot
+ %\left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
+ % P_{s} \cdot S_1 \cdot T_1 \cdot N$
+ %\end{block}
+ % \textcolor{blue}{$S_1$}: the maximum scaling factor.\\
+ % \textcolor{blue}{$P_{d}$}: the dynamic power.\\
+ % \textcolor{blue}{$P_{s}$}: the static power.\\
+ % \textcolor{blue}{$T_I$}: the execution time of the slower task.\\
+ % \textcolor{blue}{$T_i$}: the execution time of task i.\\
+ % \textcolor{blue}{$N$}: the number of nodes.
+
+
\end{frame}
-
-
-%%%%%%%%%%%%%%%%%%%%
-%% SLIDE 13 %%
-%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Performance evaluation of MPI programs}
- \begin{femtoBlock}{}
- \vspace{-5 mm}
- \begin{block}{\small Execution time prediction model}
- \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
- \end{block}
- \vspace{10 mm}
- \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
- \quad%
- \includegraphics[width=.4\textwidth]{c1/lu_pre}}
- \vspace{5 mm}
-
- \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
- \end{femtoBlock}
-\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 15 %%
%%%%%%%%%%%%%%%%%%%%
- \begin{frame}{Scaling factor selection algorithm}
-\vspace{-0.75cm}
- \begin{center}
- \includegraphics[width=.56 \textwidth]{c1/algo-homo}
- \end{center}
+ %\begin{frame}{Scaling factor selection algorithm}
+%\vspace{-0.75cm}
+ % \begin{center}
+ %\includegraphics[width=.56 \textwidth]{c1/algo-homo}
+ %\end{center}
-\end{frame}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 16 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Scaling algorithm example}
+\begin{frame}{Scaling factor selection algorithm}
\vspace{-0.75cm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
-
+ \animategraphics[autopause,controls,scale=0.29,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{335}
+ %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 17 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Experimental results }
+\begin{frame}{Experiment over SimGrid }
\begin{femtoBlock}{}
\begin{itemize}
\small
- \item The experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
- \item The proposed algorithm is applied to the NAS parallel benchmarks.\medskip
+ \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
+ \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
\item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
- \item The proposed algorithm was evaluated over the A, B, C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
+ \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
\item $P_d=20W$, $P_s=4W$.
\end{itemize}
\end{femtoBlock}
\includegraphics[width=.35\textwidth]{c1/cg}
\includegraphics[width=.35\textwidth]{c1/bt}}
+\hspace{0.5cm}
+
\centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
\end{femtoBlock}
\end{frame}
%% SLIDE 19 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Results comparison}
- \begin{block}{\small Rauber and Rünger's optimal scaling factor}
- $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
- \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
- \end{block}
- \centering {
- %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
- %\qquad
- %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
-
+ \small \textcolor{blue}{Rauber and Rünger's scaling factor \textcolor{black}{ \tiny \textsuperscript{2}}}
+
+ \vspace{2 mm}
- \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}}
+ $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
+ \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
+
+ \begin{center}
+ \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}
+ \end{center}
+
+
+\vspace{-2 mm}
+ \tiny \textsuperscript{2} Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the energy consumption of independent tasks. In Proceedings of the Winter Simulation Conference, 2012.
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 20 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The proposed new energy model}
- \vspace{-0.75cm}
- \begin{figure}
- \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
- \end{figure}
-\end{frame}
+%\begin{frame}{The proposed new energy model}
+ % \vspace{-0.75cm}
+ %\begin{figure}
+ % \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
+ %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
+ % \end{figure}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 21 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Comparing the new model with Rauber model }
- \vspace{0.1cm}
- \centering
- \includegraphics[width=.45\textwidth]{c1/energy_con}
+%\begin{frame}{\large Comparing the new model with Rauber's model }
+% \vspace{0.1cm}
+% \centering
+ %\includegraphics[width=.45\textwidth]{c1/energy_con}
- \includegraphics[width=.5\textwidth]{c1/compare-scales}
-\end{frame}
+ %\includegraphics[width=.5\textwidth]{c1/compare-scales}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Contribution}
+\begin{frame}{The second contribution}
-\section{\small {Energy optimization of heterogeneous platform}}
+\section{\small {Energy optimization of a heterogeneous platform}}
\begin{center}
-\bf \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over Heterogeneous platform}
+\bf \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
\end{center}
\end{frame}
\begin{femtoBlock}{} \vspace{-12 mm}
\begin{itemize} \small
\item Proposing \textcolor{blue}{new energy and performance models} for message passing applications with iterations running
- over a heterogeneous platform (cluster and Grid). \medskip
+ over a heterogeneous platform (cluster or Grid). \medskip
\item Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption and the performance} of
message passing iterative applications. \medskip
- \item Computing the vector of scaling factors ($S_1, S_2, ..., S_n$) producing \textcolor{blue} {the optimal trade-off} between
+ \item Computing the vector of scaling factors ($S_1, S_2, ..., S_n$) producing \textcolor{blue} {the good trade-off} between
the energy consumption and the performance.
\end{itemize}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 25 %%
%%%%%%%%%%%%%%%%%%%%
- \begin{frame}{The energy consumption model}
- The overall energy consumption of a message passing synchronous application executed over
- a heterogeneous platform can be computed as follows:
- \begin{multline}
- \label{eq:energy}
- \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot Tcp_i)}} + {} \\
- \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
- \hspace{10 mm}
- \end{multline}
- \underline{where}:\\
- \textcolor{blue}{N} : is the number of nodes.
-\end{frame}
+ %\begin{frame}{The energy consumption model}
+ % The overall energy consumption of a message passing synchronous application executed over
+ % a heterogeneous platform can be computed as follows:
+ % \begin{multline}
+ % \label{eq:energy}
+ % \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot Tcp_i)}} + {} \\
+ % \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
+ % \hspace{10 mm}
+ % \end{multline}
+ % \underline{where}:\\
+ % \textcolor{blue}{N} : is the number of nodes.
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 26 %%
%%%%%%%%%%%%%%%%%%%%
- \begin{frame}{The energy model example for heter. cluster}
- \vspace{-0.5cm}
+ \begin{frame}{The energy model for heterogeneous cluster}
+ \vspace{-0.77cm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
+ \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{heter-model/a-}{0}{350}
+ %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 28 %%
%%%%%%%%%%%%%%%%%%%%
- \begin{frame}{The scaling algorithm for heter. cluster}
+ %\begin{frame}{The scaling algorithm for heter. cluster}
- \centering
- \includegraphics[width=.52\textwidth]{algo-heter}
- \end{frame}
+ %\centering
+ %\includegraphics[width=.52\textwidth]{algo-heter}
+ %\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 29 %%
%%%%%%%%%%%%%%%%%%%%
- \begin{frame}{The scaling algorithm example}
- \vspace{-0.5cm}
+ \begin{frame}{The scaling algorithm for heter. cluster}
+ \vspace{-0.77cm}
\centering
\begin{figure}
- \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
+ \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{836}
+ % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 30 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Experiments over a heterogeneous cluster }
- \begin{itemize}
- \small
- \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip
- \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
- \item Four types of processors with different computing powers were used.\medskip
- \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip
- \item The total power consumption of the chosen CPUs assumed to be composed of $80\%$ for the dynamic power and $20\%$ for the static power.
- \medskip
+%\begin{frame}{Experiments over a heterogeneous cluster }
+ % \begin{itemize}
+ % \small
+ % \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
+ % \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
+ % \item Four types of processors with different computing powers were used.\medskip
+ % \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
+ % \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
+ % \medskip
- \end{itemize}
+ %\end{itemize}
-\end{frame}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 31 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The experimental results}
- \vspace{-5 mm}
- \begin{figure}[!t]
- \centering
- \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
+%\begin{frame}{The simulation results}
+ % \vspace{-5 mm}
+ % \begin{figure}[!t]
+ %\centering
+ %\includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
- \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
- for the class C of the NAS benchmarks executed over 8 nodes}
+ % \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
+ %for the class C of the NAS Benchmarks executed over 8 nodes}
- \end{figure}
-\end{frame}
+ % \end{figure}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 32 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The experimental results}
- \vspace{-5 mm}
- \begin{figure}[!t]
- \centering
+%\begin{frame}{The simulation results}
+ % \vspace{-5 mm}
+ % \begin{figure}[!t]
+ % \centering
- \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
+ % \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
- \textcolor{blue}{On average, it degrades by \textcolor{red}{3.8\%} the performance
- of NAS benchmarks class C executed over 8 nodes}
- \end{figure}
-\end{frame}
+ % \textcolor{blue}{On average, it degrades by \textcolor{red}{3.8\%} the performance
+ % of NAS Benchmarks class C executed over 8 nodes}
+ % \end{figure}
+%\end{frame}
-%%%%%%%%%%%%%%%%%%%%
-%% SLIDE 33 %%
-%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The results of the three power scenarios}
- \vspace{-5 mm}
- \begin{figure}[!t]
- \centering
- \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
- \vspace{10 mm}
- \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
- \end{figure}
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%
-%% SLIDE 34 %%
-%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Comparing the objective function to EDP}
-
- EDP is the products between the energy consumption and the delay.
- \vspace{-5 mm}
- \begin{figure}[!t]
- \centering
- \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
-
- \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
- \end{figure}
-\end{frame}
-
-
%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 36 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The grid architecture}
-\begin{center}
-\includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
-\end{center}
+%\begin{frame}{The grid architecture}
+%\begin{center}
+%\includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
+%\end{center}
%\begin{frame}{Performance, Energy and trade-off models} \small
%\begin{block}{\small The performance model of grid}
% \end{block}
- \end{frame}
+ %\end{frame}
%% SLIDE 37 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Experiments over Grid'5000}
- \centering
-
+
+ \textcolor{blue}{The experiments were conducted using three
+ clusters distributed over one or two sites.}
+ \vspace{-7 mm}
+ \begin{center}
\includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
-
- \vspace{-3 mm}
- \textcolor{blue}{Two experiments were conducted: over one site and two sites
- each one with three clusters }
-
- \vspace{1mm}
-
+ \end{center}
+ \vspace{-10 mm}
+ \textcolor{blue}{Grid'5000 power measurement tools were used.}
+ \vspace{-9 mm}
+ \begin{center}
\includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
+ \end{center}
- \textcolor{blue}{Grid'5000 power measurement tools were used}
+
\end{frame}
\begin{minipage}{0.4\textwidth}
%\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
%\textcolor{red}{30\%}}
- \textcolor{blue}{The energy saving = \textcolor{red}{30\%}}
+ \small \textcolor{blue}{The average energy saving = \textcolor{red}{30\%}}
\end{minipage}
\begin{minipage}{0.55\textwidth}
\begin{figure}[h!]
\begin{minipage}{0.4\textwidth}
%\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
%performance by \textcolor{red}{3.2\%}}
- \textcolor{blue}{The performance degradation = \textcolor{red}{3.2\%}}
+ \small \textcolor{blue}{The average performance degradation = \textcolor{red}{3.2\%}}
\end{minipage}
\begin{minipage}{0.55\textwidth}
\begin{figure}[h!]
+%%%%%%%%%%%%%%%%%%%%
+%% SLIDE 33 %%
+%%%%%%%%%%%%%%%%%%%%
+\begin{frame}{The results of the three power scenarios}
+ \vspace{-5 mm}
+ \begin{figure}[!t]
+ \centering
+ \includegraphics[width=.45\textwidth]{c2/eng_pow.eps}
+ \hspace{0.3cm}
+ \includegraphics[width=.45\textwidth]{c2/per_pow.eps}
+ \vspace{4 mm}
+ \includegraphics[width=.7\textwidth]{c2/three_scenarios.pdf}
+ \end{figure}
+\end{frame}
+
+
+
+
+
+
+
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 39 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Experiments over Grid'5000}
- \textcolor{blue}{One core and Multi-cores per node results:}
+\begin{frame}{One core and Multi-cores per node results}
+ %\textcolor{blue}{One core and Multi-cores per node results:}
\begin{figure}[h!]
\includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
\end{frame}
-
+%%%%%%%%%%%%%%%%%%%%
+%% SLIDE 34 %%
+%%%%%%%%%%%%%%%%%%%%
+\begin{frame}{Comparing the objective function to EDP}
+
+ EDP is the product between the energy consumption and the delay \tiny\textsuperscript{3}.
+ \vspace{-5 mm}
+ \begin{figure}[!t]
+ \centering
+ \includegraphics[width=.6\textwidth]{c2/edp_dist.eps}
+
+
+ \end{figure}
+
+ \tiny \textsuperscript{3} Spiliopoulos et al, Green governors: A framework for continuously adaptive dvfs, in International Green Computing Conference and Workshops (IGCC), 2011.
+\end{frame}
%\begin{frame}{Summary}
%\begin{itemize}
% \small
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 40 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Contribution}
+\begin{frame}{The third contribution}
\section{\small {Energy optimization of asynchronous applications}}
\begin{center}
-\bf \Large \textcolor{blue}{Energy optimization of asynchronous message passing iterative applications}
+\bf \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing applications}
\end{center}
\end{frame}
\textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
\vspace{-8 mm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
+ \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{syn/a-}{0}{647}
+ %\includegraphics[width=0.6\textwidth]{syn/a-503}
\end{figure}
\end{frame}
\textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
\vspace{-8 mm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
+ \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn/a-}{0}{556}
+ %\includegraphics[width=0.6\textwidth]{asyn/a-440}
\end{figure}
\end{frame}
\textcolor{blue}{Using asynchronous communications with DVFS }
\vspace{-8 mm}
\begin{figure}
- \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
+ \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{344}
+ %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
\end{figure}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 46 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The scaling algorithm for Asynch. applications}
-\vspace{-0.1 mm}
-\centering
-\includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
-\end{frame}
+%\begin{frame}{The scaling algorithm for Asynch. applications}
+%\vspace{-0.1 mm}
+%\centering
+%\includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 48 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The simulation results}
-\centering \small \textcolor{blue}{The best scenario in terms of energy and performance is the Async. MS with Sync. DVFS}
+%\begin{frame}{The simulation results}
+%\centering \small \textcolor{blue}{The best scenario in terms of energy and performance is %the Async. MS with Sync. DVFS}
-\centering
- \includegraphics[scale=0.42]{c3/energy_saving.eps}
+%\centering
+ % \includegraphics[scale=0.42]{c3/energy_saving.eps}
- \centering The average of energy saving = \textcolor{red}{22\%}
-\end{frame}
+ %\centering The average energy saving = \textcolor{red}{22\%}
+%\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 49 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{The simulation results}
-\centering
+%\begin{frame}{The simulation results}
+%\centering
- \includegraphics[scale=0.42]{c3/perf_degra.eps}
+ % \includegraphics[scale=0.42]{c3/perf_degra.eps}
- \centering The average speed-up = \textcolor{red}{5.72\%}
-\end{frame}
+%\centering The average speed-up = \textcolor{red}{5.72\%}
+%\end{frame}
%% SLIDE 50 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{The Grid'5000 results}
- \vspace{-20 mm}
+ \vspace{-10 mm}
\begin{figure}[!t]
\centering
\hspace{-8 mm}
\includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
\end{figure}
\vspace{-5 mm}
- \centering
-The energy saving = \textcolor{red}{26.93\%}, the average speed-up = \textcolor{red}{21.48\%}
+ \centering \footnotesize
+
+ %\small \textcolor{blue}{The best scenario in terms of energy and performance is the Async. MS with Sync. DVFS}
+
+The average energy saving = \textcolor{red}{26.93\%}, the average speed-up = \textcolor{red}{21.48\%}
\end{frame}
\section{Conclusions and Perspectives}
\begin{itemize}
-\small \barrow Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous and asynchronous parallel applications with iterations running over
-\textcolor{blue}{homogeneous and heterogeneous clusters and grids}.
+\small \barrow Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over
+\textcolor{blue}{homogeneous and heterogeneous clusters or grids}.
-\small \barrow \textcolor{blue}{A new objective function} was proposed to optimize both the energy consumption and the performance.
+\small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
\small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
\small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
Multi-splitting} method.
-\small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over \textcolor{blue}{Grid'5000 testbed}.
+\small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over the \textcolor{blue}{Grid'5000 testbed}.
-\small \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's method} or \textcolor{blue}{the EDP objective function}.
+\small \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's method} or to the \textcolor{blue}{EDP objective function}.
\end{itemize}
Science}, 2016.
\item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier, Arnaud Giersch. Energy Consumption Reduction for
- Asynchronous Message Passing Applications. \textit{Journal of Supercomputing}, 2016, (Submitted)
+ Asynchronous Message Passing Applications. \textit{Journal of Supercomputing}, 2016, (Accepted with minor revisions)
\end{enumerate}
\end{block}
\small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
\small \barrow Comparing the results returned by the energy models to the values given by \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
+\small \barrow Considering the power consumed by the other devices in the node such as
+\textcolor{blue}{the memory and the hard drive} in the energy consumption model.
+
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Fin} \vspace{-10 mm}
- \centering \Large \textcolor{blue}{Thank you for your listening}
+ \centering \Large \textcolor{blue}{Thank you for your attention}
\vspace{2cm}
\centering \textcolor{blue}{ {\Large Questions?}}