%Iterations using CPU Frequency Scaling}
\vspace{2cm}
-\title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm}
-\author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Bourgogne Franche-Comté - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
+\title{ \textbf{Energy Consumption Optimization of Parallel Applications with Iterations using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
+\author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept. - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
\date{}
\vspace{-3cm}
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Introduction and problem definition}
\section{\small {Introduction and Problem definition}}
- \bf \textcolor{blue}{Approaches to increase the computing power of the parallel platform :}
+ \bf \textcolor{blue}{To get more computing power:}
\begin{minipage}{0.5\textwidth}
- \textcolor{blue}{1)} \small \bf \textcolor{black}{Increasing the frequency of a processor.}
+ \textcolor{blue}{1)} \small \bf \textcolor{black}{Increase the frequency of a processor.\\ (limited due to overheating)}
\end{minipage}%
\begin{minipage}{0.6\textwidth}
\end{minipage}%
\vspace{0.2cm}
\begin{minipage}{0.5\textwidth}
- \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes.}
+ \textcolor{blue}{2)} \small \bf \textcolor{black}{Use more nodes.}
- \tiny \textcolor{blue}{Recently, Tianhe-2 supercomputer had more than 3 million cores while consuming around 17.8 megawatts.}
+ \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}
\end{minipage}%
\begin{minipage}{0.6\textwidth}
%%%%%%%%%%%%%%%%%%%
%% SLIDE 04 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Introduction and problem definition}
+\begin{frame}{Techniques for energy consumption reduction}
\vspace{0.1cm}
- \bf \textcolor{blue}{Techniques for energy consumption reduction}
+
\textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
\vspace{-0.9cm}
\begin{frame}{Motivations}
\vspace{0.05cm}
\section{\small {Motivations}}
-\textcolor{blue}{Why we used DVFS method:}
+\textcolor{blue}{Why we used the DVFS method:}
\vspace{-0.49cm}
\begin{minipage}{0.5\textwidth}
\vspace{-0.49cm}
\begin{itemize}
- \item \small \textcolor{black}{The biggest power consumption is consumed by the processor \textsuperscript{1}. }
+ \item \small \textcolor{black}{ The CPU is the component that consumes the highest amount of energy in a node \textsuperscript{1}. }
\end{itemize}
\end{figure}
\end{minipage}%
- \begin{itemize} \item \small \textcolor{black}{It uses to reduce the energy consumption while keeping all the nodes working, thus it is more adapted to parallel computing.}
- \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes method.} \end{itemize}
+ \begin{itemize} \item \small \textcolor{black}{DVFS reduces the energy consumption while
+ keeping all the nodes working.}
+ \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.} \end{itemize}
\vspace{-0.12cm}
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Contribution}
+\begin{frame}{The first contribution}
+
+\section{\small {Energy optimization of a homogeneous platform}}
+%\vspace{-3cm}
+ % \includegraphics[width=0.6\textwidth]{white.pdf}
-\section{\small {Energy optimization of homogeneous platform}}
\begin{center}
\bf \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
\end{center}
\begin{frame}{Objectives}
\begin{femtoBlock}{} \vspace{-12 mm}
\begin{itemize} \small
- \item Study the effect of the scaling factor $S$ on \textbf{energy consumption and performance } of parallel applications with iterations such as NAS
- Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip
+ \item Study the effect of the scaling factor on the \textbf{energy consumption and performance } of parallel applications with iterations. \medskip
\item Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
- \item Proposing an algorithm for selecting the scaling factor $S$ producing \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
+ \item Proposing an algorithm for selecting the scaling factor that produces \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
\item Comparing the proposed algorithm to existing methods.
\vspace{-0.5 cm}
\begin{figure}
\centering
- \subfloat[Sync. imbalanced communications]{%
+ \subfloat[Synchronous imbalanced communications]{%
\includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
- \subfloat[Sync. imbalanced computations]{%
+ \subfloat[Synchronous imbalanced computations]{%
\includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
% \caption{Parallel tasks on homogeneous platform}
\label{fig:homo}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 11 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Energy model for homogeneous platform}
+\begin{frame}{Energy model for a homogeneous platform}
The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
(\textcolor{red}{$P_s$}) power.
\begin{equation}
%% SLIDE 12 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Energy model for homogeneous platform}
+\begin{frame}{Energy model for a homogeneous platform}
The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}. \medskip
\left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
P_{s} \cdot S_1 \cdot T_1 \cdot N$
\end{block}
- \textcolor{blue}{$S_1$}: the max. scaling factor\\
- \textcolor{blue}{$P_{d}$}: the dynamic power\\
- \textcolor{blue}{$P_{s}$}: the static power\\
- \textcolor{blue}{$T_I$}: the time of the slower task\\
- \textcolor{blue}{$T_i$}: the time of the other tasks\\
- \textcolor{blue}{$N$}: the number of nodes
+ \textcolor{blue}{$S_1$}: the maximum scaling factor.\\
+ \textcolor{blue}{$P_{d}$}: the dynamic power.\\
+ \textcolor{blue}{$P_{s}$}: the static power.\\
+ \textcolor{blue}{$T_I$}: the execution time of the slower task.\\
+ \textcolor{blue}{$T_i$}: the execution time of task i.\\
+ \textcolor{blue}{$N$}: the number of nodes.
\end{frame}
\item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
\item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
\item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
- \item The proposed algorithm was evaluated over the A, B, C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
+ \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
\item $P_d=20W$, $P_s=4W$.
\end{itemize}
\end{femtoBlock}
$S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
\left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
\end{block}
+
+
\centering {
%\includegraphics[width=.33\textwidth]{c1/c1.pdf}
%\qquad
%\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
- \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}}
+ \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 21 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Comparing the new model with Rauber model }
+\begin{frame}{\large Comparing the new model with Rauber's model }
\vspace{0.1cm}
\centering
\includegraphics[width=.45\textwidth]{c1/energy_con}
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Contribution}
+\begin{frame}{The second contribution}
-\section{\small {Energy optimization of heterogeneous platform}}
+\section{\small {Energy optimization of a heterogeneous platform}}
\begin{center}
\begin{femtoBlock}{} \vspace{-12 mm}
\begin{itemize} \small
\item Proposing \textcolor{blue}{new energy and performance models} for message passing applications with iterations running
- over a heterogeneous platform (cluster and Grid). \medskip
+ over a heterogeneous platform (cluster or Grid). \medskip
\item Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption and the performance} of
message passing iterative applications. \medskip
\begin{frame}{Experiments over a heterogeneous cluster }
\begin{itemize}
\small
- \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip
+ \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
\item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
\item Four types of processors with different computing powers were used.\medskip
- \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip
- \item The total power consumption of the chosen CPUs assumed to be composed of $80\%$ for the dynamic power and $20\%$ for the static power.
+ \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
+ \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
\medskip
\end{itemize}
%% SLIDE 37 %%
%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Experiments over Grid'5000}
- \centering
-
+
+ \textcolor{blue}{The experiments were conducted using three
+ clusters distributed over one or two sites.}
+ \vspace{-7 mm}
+ \begin{center}
\includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
-
- \vspace{-3 mm}
- \textcolor{blue}{Two experiments were conducted: over one site and two sites
- each one with three clusters }
-
- \vspace{1mm}
-
+ \end{center}
+ \vspace{-10 mm}
+ \textcolor{blue}{Grid'5000 power measurement tools were used.}
+ \vspace{-9 mm}
+ \begin{center}
\includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
+ \end{center}
- \textcolor{blue}{Grid'5000 power measurement tools were used}
+
\end{frame}
\begin{minipage}{0.4\textwidth}
%\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
%\textcolor{red}{30\%}}
- \textcolor{blue}{The energy saving = \textcolor{red}{30\%}}
+ \small \textcolor{blue}{The average energy saving = \textcolor{red}{30\%}}
\end{minipage}
\begin{minipage}{0.55\textwidth}
\begin{figure}[h!]
\begin{minipage}{0.4\textwidth}
%\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
%performance by \textcolor{red}{3.2\%}}
- \textcolor{blue}{The performance degradation = \textcolor{red}{3.2\%}}
+ \small \textcolor{blue}{The average performance degradation = \textcolor{red}{3.2\%}}
\end{minipage}
\begin{minipage}{0.55\textwidth}
\begin{figure}[h!]
%%%%%%%%%%%%%%%%%%%%
%% SLIDE 40 %%
%%%%%%%%%%%%%%%%%%%%
-\begin{frame}{Contribution}
+\begin{frame}{The third contribution}
\section{\small {Energy optimization of asynchronous applications}}
\begin{center}
\bf \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing applications}
\centering
\includegraphics[scale=0.42]{c3/energy_saving.eps}
- \centering The average of energy saving = \textcolor{red}{22\%}
+ \centering The average energy saving = \textcolor{red}{22\%}
\end{frame}
\includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
\end{figure}
\vspace{-5 mm}
- \centering
-The energy saving = \textcolor{red}{26.93\%}, the average speed-up = \textcolor{red}{21.48\%}
+ \centering \footnotesize
+The average energy saving = \textcolor{red}{26.93\%}, the average speed-up = \textcolor{red}{21.48\%}
\end{frame}
\section{Conclusions and Perspectives}
\begin{itemize}
-\small \barrow Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous and asynchronous parallel applications with iterations running over
-\textcolor{blue}{homogeneous and heterogeneous clusters and grids}.
+\small \barrow Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over
+\textcolor{blue}{homogeneous and heterogeneous clusters or grids}.
\small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
Multi-splitting} method.
-\small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over \textcolor{blue}{Grid'5000 testbed}.
+\small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over the \textcolor{blue}{Grid'5000 testbed}.
-\small \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's method} or \textcolor{blue}{the EDP objective function}.
+\small \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's method} or to the \textcolor{blue}{EDP objective function}.
\end{itemize}