thesis-presentation/AhmedSlides.tex

   1  \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13  \usepackage{movie15}
  14  \usepackage{animate}
  15 \usepackage{xmpmulti}
  16  \newcommand{\AG}[2][inline]{%
  17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
  18 \newcommand{\JC}[2][inline]{%
  19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
  20 \definecolor{myblue}{RGB}{0,29,119}
  21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
  22 \usepackage{fixltx2e}
  23 %% used to put some subscripts lower, and make them more legible
  24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
  25
  26 \newcommand{\CL}{\Xsub{C}{L}}
  27 \newcommand{\Dist}{\mathit{Dist}}
  28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
  29 \newcommand{\Eind}{\Xsub{E}{ind}}
  30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
  31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
  32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
  33 \newcommand{\Es}{\Xsub{E}{S}}
  34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
  35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
  36 \newcommand{\Fnew}{\Xsub{F}{new}}
  37 \newcommand{\Vnew}{\Xsub{V}{new}}
  38 \newcommand{\Vmax}{\Xsub{V}{max}}
  39 \newcommand{\Ileak}{\Xsub{I}{leak}}
  40 \newcommand{\Kdesign}{\Xsub{K}{design}}
  41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
  42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
  43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
  44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
  45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
  46
  47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
  48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
  49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
  50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
  51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
  52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
  53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
  54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
  55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
  56 \newcommand{\Tnew}{\Xsub{T}{New}}
  57 \newcommand{\Told}{\Xsub{T}{Old}}
  58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
  59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
  60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
  61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
  62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
  63  \usepackage{pifont}
  64 \usepackage{xcolor}
  65 \definecolor{myblue}{RGB}{0,29,119}
  66 \usepackage[textsize=footnotesize]{todonotes}
  67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
  68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
  69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
  70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
  71
  72
  73
  74 %\title{Energy Consumption Optimization of Parallel Applications with
  75 %Iterations using CPU Frequency Scaling}
  76 \vspace{2cm}
  77
  78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm}
  79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Bourgogne Franche-Comté - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
  80
  81 \date{}
  82 \vspace{-3cm}
  83 %  ____  _____ ____  _   _ _____
  84 % |  _ \| ____| __ )| | | |_   _|
  85 % | | | |  _| |  _ \| | | | | |
  86 % | |_| | |___| |_) | |_| | | |
  87 % |____/|_____|____/ \___/  |_|
  88 %
  89 \begin{document}
  90 \setbeamertemplate{background}{\titrefemto}
  91
  92 %%%%%%%%%%%%%%%%%%%%
  93 %%    SLIDE 01    %%
  94 %%%%%%%%%%%%%%%%%%%%
  95 \begin{frame}[plain]
  96 \vspace{1cm}
  97 \centering
  98    \titlepage
  99 \end{frame}
 100
 101
 102 %%%%%%%%%%%%%%%%%%%%
 103 %%    SLIDE 02    %%
 104 %%%%%%%%%%%%%%%%%%%%
 105 \setbeamertemplate{background}{\pagefemto}
 106 \begin{frame}{Outline}
 107
 108 \setbeamertemplate{section in toc}[sections numbered]
 109 \tableofcontents
 110 \end{frame}
 111
 112
 113 %%%%%%%%%%%%%%%%%%%%
 114 %%    SLIDE 03    %%
 115 %%%%%%%%%%%%%%%%%%%%
 116 \begin{frame}{Introduction and problem definition}
 117  \section{\small {Introduction and Problem definition}}
 118    \bf \textcolor{blue}{Approaches to increase the computing power of the parallel platform :}
 119      \begin{minipage}{0.5\textwidth}
 120       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increasing the frequency of a  processor.}
 121     \end{minipage}%
 122     \begin{minipage}{0.6\textwidth}
 123
 124 \begin{figure}[h!]
 125
 126     \includegraphics[width=0.7\textwidth]{fig/freq-years}
 127     \end{figure}
 128     \end{minipage}%
 129     \vspace{0.2cm}
 130     \begin{minipage}{0.5\textwidth}
 131      \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes.}
 132
 133     \tiny  \textcolor{blue}{Recently, Tianhe-2 supercomputer had more than 3 million cores while consuming around 17.8 megawatts.}
 134
 135     \end{minipage}%
 136     \begin{minipage}{0.6\textwidth}
 137     \begin{figure}[h!]
 138      \includegraphics[width=0.7\textwidth]{fig/clusters}
 139     \end{figure}
 140     \end{minipage}%
 141  \end{frame}
 142
 143
 144
 145
 146  %%%%%%%%%%%%%%%%%%%
 147 %%    SLIDE 04   %%
 148 %%%%%%%%%%%%%%%%%%%%
 149 \begin{frame}{Introduction and problem definition}
 150  \vspace{0.1cm}
 151  \bf \textcolor{blue}{Techniques for energy consumption reduction}
 152
 153      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
 154     \vspace{-0.9cm}
 155     \begin{figure}
 156      \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
 157      %\includegraphics[width=0.6\textwidth]{on-off/a-69}
 158     \end{figure}
 159  \end{frame}
 160
 161 %%%%%%%%%%%%%%%%%%%%
 162 %%    SLIDE 06    %%
 163 %%%%%%%%%%%%%%%%%%%%
 164 \begin{frame}{Techniques for energy consumption reduction}
 165
 166   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
 167      \vspace{-0.5cm}
 168     \begin{figure}
 169     \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
 170      %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
 171     \end{figure}
 172     \end{frame}
 173
 174
 175
 176 %%%%%%%%%%%%%%%%%%%%
 177 %%    SLIDE 07    %%
 178 %%%%%%%%%%%%%%%%%%%%
 179 \begin{frame}{Motivations}
 180 \vspace{0.05cm}
 181 \section{\small {Motivations}}
 182 \textcolor{blue}{Why we used DVFS method:}
 183 \vspace{-0.49cm}
 184 \begin{minipage}{0.5\textwidth}
 185     \vspace{-0.49cm}
 186       \begin{itemize}
 187        \item  \small \textcolor{black}{The biggest power consumption is consumed by the processor \textsuperscript{1}. }
 188
 189          \end{itemize}
 190
 191     \end{minipage}%
 192     \begin{minipage}{0.5\textwidth}
 193      \vspace{-0.49cm}
 194     \begin{figure}[h!]
 195      \includegraphics[width=0.85\textwidth]{fig/node-power}
 196
 197     \end{figure}
 198     \end{minipage}%
 199
 200   \begin{itemize} \item \small  \textcolor{black}{It uses to reduce the energy consumption  while keeping all the nodes working, thus  it is more adapted to parallel computing.}
 201                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes method.}  \end{itemize}
 202
 203 \vspace{-0.12cm}
 204
 205  \begin{block}{\textcolor{white}{Challenge and Objective}}
 206
 207         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it degrades the performance simultaneously.}
 208
 209                 \vspace{0.1cm}
 210  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
 211 \end{block}
 212
 213  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
 214 for a warehouse-sized computer.
 215
 216     \end{frame}
 217
 218
 219
 220 %%%%%%%%%%%%%%%%%%%%
 221 %%    SLIDE 08    %%
 222 %%%%%%%%%%%%%%%%%%%%
 223
 224
 225 \begin{frame}{Contribution}
 226
 227 \section{\small {Energy optimization of homogeneous platform}}
 228 \begin{center}
 229 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
 230 \end{center}
 231  \end{frame}
 232
 233
 234
 235 %%%%%%%%%%%%%%%%%%%%
 236 %%    SLIDE 09    %%
 237 %%%%%%%%%%%%%%%%%%%%
 238
 239 \begin{frame}{Objectives}
 240         \begin{femtoBlock}{} \vspace{-12 mm}
 241                 \begin{itemize} \small
 242                    \item  Study the effect of the scaling factor $S$ on \textbf{energy consumption and performance } of parallel  applications with iterations such as NAS
 243                           Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip
 244
 245                    \item  Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
 246                    \item  Proposing an algorithm for selecting the scaling factor $S$ producing \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
 247                    \item  Comparing the proposed algorithm to existing methods.
 248
 249
 250                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
 251                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
 252                 \end{itemize}
 253                  %\let\thefootnote\relax\footnote{}
 254           \vspace{-10 mm}
 255         \end{femtoBlock}
 256 \end{frame}
 257
 258
 259
 260 %%%%%%%%%%%%%%%%%%%%
 261 %%    SLIDE 10    %%
 262 %%%%%%%%%%%%%%%%%%%%
 263
 264
 265 \begin{frame}{Execution of synchronous parallel tasks}
 266 \vspace{-0.5 cm}
 267 \begin{figure}
 268   \centering
 269   \subfloat[Sync. imbalanced communications]{%
 270     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
 271   \subfloat[Sync. imbalanced computations]{%
 272     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
 273  % \caption{Parallel tasks on homogeneous platform}
 274   \label{fig:homo}
 275 \end{figure}
 276
 277  \end{frame}
 278
 279
 280
 281
 282 %%%%%%%%%%%%%%%%%%%%
 283 %%    SLIDE 11   %%
 284 %%%%%%%%%%%%%%%%%%%%
 285 \begin{frame}{Energy model for homogeneous platform}
 286       The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
 287        (\textcolor{red}{$P_s$}) power.
 288     \begin{equation}
 289      \label{eq:pd}
 290      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
 291    \end{equation}
 292     \scriptsize \underline{Where}: \\
 293     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\
 294     \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
 295    \begin{equation}
 296      \label{eq:ps}
 297      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
 298    \end{equation}
 299     \underline{Where}:\\
 300         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\
 301         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
 302              parameter.}
 303 \end{frame}
 304
 305 %%%%%%%%%%%%%%%%%%%%
 306 %%    SLIDE 12   %%
 307 %%%%%%%%%%%%%%%%%%%%
 308
 309 \begin{frame}{Energy model for homogeneous platform}
 310
 311           The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.  \medskip
 312
 313
 314
 315         \begin{block}{\small Rauber and Rünger's energy model}
 316          $ E = P_{d} \cdot S_1^{-2} \cdot
 317          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
 318             P_{s} \cdot S_1  \cdot T_1 \cdot N$
 319         \end{block}
 320            \textcolor{blue}{$S_1$}: the max. scaling factor\\
 321            \textcolor{blue}{$P_{d}$}: the dynamic power\\
 322            \textcolor{blue}{$P_{s}$}: the static power\\
 323            \textcolor{blue}{$T_I$}: the time of the slower task\\
 324            \textcolor{blue}{$T_i$}: the time of the other tasks\\
 325            \textcolor{blue}{$N$}:  the number of  nodes
 326
 327 \end{frame}
 328
 329
 330 %%%%%%%%%%%%%%%%%%%%
 331 %%    SLIDE 13   %%
 332 %%%%%%%%%%%%%%%%%%%%
 333 \begin{frame}{Performance evaluation of MPI programs}
 334         \begin{femtoBlock}{}
 335               \vspace{-5 mm}
 336               \begin{block}{\small Execution time prediction model}
 337                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
 338           \end{block}
 339           \vspace{10 mm}
 340            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
 341            \quad%
 342            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
 343             \vspace{5 mm}
 344
 345            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 346            \end{femtoBlock}
 347 \end{frame}
 348
 349
 350
 351
 352  %%%%%%%%%%%%%%%%%%%%
 353 %%    SLIDE 14   %%
 354 %%%%%%%%%%%%%%%%%%%%
 355 \begin{frame}{Performance and energy reduction trade-off}
 356         \begin{femtoBlock}{} \vspace{-15 mm}
 357                \begin{figure}
 358      \centering
 359      \subfloat[\small  Real relation.]{%
 360      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
 361      \quad%
 362      \subfloat[\small Converted relation.]{%
 363      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
 364   \label{fig:rel}
 365  % \caption{The energy and performance relation}
 366 \end{figure}
 367
 368  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
 369
 370 %\vspace{-0.3cm}
 371       \small
 372          \begin{block}{\small Our objective function}
 373          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}
 374                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} -
 375                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
 376
 377         \end{block}
 378         \end{femtoBlock}
 379
 380 \end{frame}
 381
 382 %%%%%%%%%%%%%%%%%%%%
 383 %%    SLIDE 15   %%
 384 %%%%%%%%%%%%%%%%%%%%
 385  \begin{frame}{Scaling factor selection algorithm}
 386 \vspace{-0.75cm}
 387      \begin{center}
 388       \includegraphics[width=.56 \textwidth]{c1/algo-homo}
 389      \end{center}
 390
 391 \end{frame}
 392
 393
 394 %%%%%%%%%%%%%%%%%%%%
 395 %%    SLIDE 16   %%
 396 %%%%%%%%%%%%%%%%%%%%
 397 \begin{frame}{Scaling algorithm example}
 398 \vspace{-0.75cm}
 399
 400      \begin{figure}
 401   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
 402   %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
 403   \end{figure}
 404 \end{frame}
 405
 406 %%%%%%%%%%%%%%%%%%%%
 407 %%    SLIDE 17   %%
 408 %%%%%%%%%%%%%%%%%%%%
 409 \begin{frame}{Experimental results }
 410       \begin{femtoBlock}{}
 411         \begin{itemize}
 412          \small
 413            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 414            \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
 415            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 416            \item The proposed algorithm was evaluated over the A, B, C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
 417            \item $P_d=20W$,  $P_s=4W$.
 418                 \end{itemize}
 419         \end{femtoBlock}
 420 \end{frame}
 421
 422
 423 %%%%%%%%%%%%%%%%%%%%
 424 %%    SLIDE 18   %%
 425 %%%%%%%%%%%%%%%%%%%%
 426 \begin{frame}{Experimental results}
 427   \begin{femtoBlock}{}
 428       \centering {
 429      \includegraphics[width=.35\textwidth]{c1/ep}
 430      \includegraphics[width=.35\textwidth]{c1/cg}
 431      \includegraphics[width=.35\textwidth]{c1/bt}}
 432
 433      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
 434  \end{femtoBlock}
 435 \end{frame}
 436
 437
 438   %%%%%%%%%%%%%%%%%%%%
 439 %%    SLIDE 19   %%
 440 %%%%%%%%%%%%%%%%%%%%
 441 \begin{frame}{Results comparison}
 442          \begin{block}{\small Rauber and Rünger's optimal scaling factor}
 443            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
 444             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
 445         \end{block}
 446     \centering {
 447          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
 448          %\qquad
 449          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
 450
 451
 452             \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}}
 453
 454 \end{frame}
 455
 456
 457 %%%%%%%%%%%%%%%%%%%%
 458 %%    SLIDE 20   %%
 459 %%%%%%%%%%%%%%%%%%%%
 460 \begin{frame}{The proposed new energy model}
 461     \vspace{-0.75cm}
 462   \begin{figure}
 463   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
 464   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
 465   \end{figure}
 466 \end{frame}
 467
 468
 469 %%%%%%%%%%%%%%%%%%%%
 470 %%    SLIDE 21   %%
 471 %%%%%%%%%%%%%%%%%%%%
 472 \begin{frame}{Comparing the new model with Rauber model }
 473  \vspace{0.1cm}
 474  \centering
 475     \includegraphics[width=.45\textwidth]{c1/energy_con}
 476
 477     \includegraphics[width=.5\textwidth]{c1/compare-scales}
 478 \end{frame}
 479
 480
 481
 482
 483    % \begin{frame}{Summary}
 484      % \begin{femtoBlock}{}
 485      % \begin{itemize}
 486       %\small
 487        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
 488        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
 489          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip
 490          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
 491                % performance}. \medskip
 492          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 493          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
 494          %\end{itemize}
 495
 496         %\end{femtoBlock}
 497 %\end{frame}
 498
 499
 500 %%%%%%%%%%%%%%%%%%%%
 501 %%    SLIDE 22    %%
 502 %%%%%%%%%%%%%%%%%%%%
 503
 504
 505 \begin{frame}{Contribution}
 506
 507 \section{\small {Energy optimization of heterogeneous platform}}
 508 \begin{center}
 509
 510
 511 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
 512 \end{center}
 513  \end{frame}
 514
 515
 516
 517 %%%%%%%%%%%%%%%%%%%%
 518 %%    SLIDE 23    %%
 519 %%%%%%%%%%%%%%%%%%%%
 520
 521 \begin{frame}{Objectives}
 522         \begin{femtoBlock}{} \vspace{-12 mm}
 523                 \begin{itemize} \small
 524                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running
 525                           over a heterogeneous platform (cluster and Grid). \medskip
 526                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
 527                           message passing iterative applications.    \medskip
 528
 529                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the optimal trade-off} between
 530                           the energy consumption and the performance.
 531                 \end{itemize}
 532
 533           \vspace{-10 mm}
 534         \end{femtoBlock}
 535 \end{frame}
 536
 537
 538 %%%%%%%%%%%%%%%%%%%%
 539 %%    SLIDE 24    %%
 540 %%%%%%%%%%%%%%%%%%%%
 541 \begin{frame}{The execution time model}
 542       \vspace{-8 mm}
 543      \begin{figure}[!t]
 544        \centering
 545        \includegraphics[scale=0.5]{c2/commtasks}
 546        \label{fig:heter}
 547      \end{figure}
 548        \vspace{-12 mm}
 549        \medskip
 550
 551     \begin{block}{\small The execution time prediction model}
 552     \begin{equation}
 553      \label{eq:perf}
 554      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
 555     \end{equation}
 556     \end{block}
 557  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
 558
 559 \end{frame}
 560
 561  %%%%%%%%%%%%%%%%%%%%
 562 %%    SLIDE 25    %%
 563 %%%%%%%%%%%%%%%%%%%%
 564  \begin{frame}{The energy consumption model}
 565     The overall energy consumption of a message passing synchronous  application executed over
 566      a heterogeneous platform can be computed as  follows:
 567     \begin{multline}
 568      \label{eq:energy}
 569      \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
 570      \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
 571       \hspace{10 mm}
 572     \end{multline}
 573     \underline{where}:\\
 574     \textcolor{blue}{N} : is the number of nodes.
 575 \end{frame}
 576
 577
 578 %%%%%%%%%%%%%%%%%%%%
 579 %%    SLIDE 26    %%
 580 %%%%%%%%%%%%%%%%%%%%
 581   \begin{frame}{The  energy  model example for heter. cluster}
 582   \vspace{-0.5cm}
 583  \begin{figure}
 584   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
 585   %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
 586   \end{figure}
 587  \end{frame}
 588
 589
 590
 591
 592 %%%%%%%%%%%%%%%%%%%%
 593 %%    SLIDE 27    %%
 594 %%%%%%%%%%%%%%%%%%%%
 595 %\begin{frame}{The trade-off between energy  and performance}
 596    % \vspace{-7 mm}
 597     %\begin{figure}
 598    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
 599    % \end{figure}
 600    % \vspace{-7 mm}
 601    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}}
 602     %{E_{Max}}$}. \\
 603     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
 604
 605    %  \begin{block}{\small The tradeoff model}
 606     % \begin{equation}
 607     %  \label{eq:max}
 608     %  \textcolor{red}{MaxDist} =
 609      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
 610       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
 611       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
 612       %\end{equation}
 613     % \end{block}
 614 %\end{frame}
 615
 616
 617 %%%%%%%%%%%%%%%%%%%%
 618 %%    SLIDE 28    %%
 619 %%%%%%%%%%%%%%%%%%%%
 620  \begin{frame}{The scaling algorithm for heter. cluster}
 621
 622  \centering
 623    \includegraphics[width=.52\textwidth]{algo-heter}
 624  \end{frame}
 625
 626
 627  %%%%%%%%%%%%%%%%%%%%
 628 %%    SLIDE 29    %%
 629 %%%%%%%%%%%%%%%%%%%%
 630  \begin{frame}{The scaling algorithm example}
 631  \vspace{-0.5cm}
 632  \centering
 633
 634   \begin{figure}
 635   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
 636  % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
 637   \end{figure}
 638 \end{frame}
 639
 640
 641
 642
 643 %%%%%%%%%%%%%%%%%%%%
 644 %%    SLIDE 30    %%
 645 %%%%%%%%%%%%%%%%%%%%
 646 \begin{frame}{Experiments over a heterogeneous cluster  }
 647         \begin{itemize}
 648          \small
 649            \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip
 650            \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
 651            \item Four types of processors with different computing powers were used.\medskip
 652            \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip
 653            \item The total power consumption of the chosen CPUs  assumed to be composed of $80\%$ for the dynamic power and $20\%$ for the static power.
 654                   \medskip
 655
 656         \end{itemize}
 657
 658 \end{frame}
 659
 660
 661 %%%%%%%%%%%%%%%%%%%%
 662 %%    SLIDE 31    %%
 663 %%%%%%%%%%%%%%%%%%%%
 664 \begin{frame}{The experimental results}
 665    \vspace{-5 mm}
 666    \begin{figure}[!t]
 667    \centering
 668     \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
 669
 670     \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
 671      for the class C of the NAS Benchmarks executed over 8 nodes}
 672
 673    \end{figure}
 674 \end{frame}
 675
 676
 677
 678 %%%%%%%%%%%%%%%%%%%%
 679 %%    SLIDE 32    %%
 680 %%%%%%%%%%%%%%%%%%%%
 681 \begin{frame}{The experimental results}
 682    \vspace{-5 mm}
 683    \begin{figure}[!t]
 684    \centering
 685
 686     \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
 687
 688    \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
 689      of NAS Benchmarks class C executed over 8 nodes}
 690      \end{figure}
 691 \end{frame}
 692
 693
 694
 695 %%%%%%%%%%%%%%%%%%%%
 696 %%    SLIDE 33    %%
 697 %%%%%%%%%%%%%%%%%%%%
 698 \begin{frame}{The results of the three power scenarios}
 699    \vspace{-5 mm}
 700    \begin{figure}[!t]
 701    \centering
 702    \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
 703    \vspace{10 mm}
 704    \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
 705    \end{figure}
 706 \end{frame}
 707
 708
 709
 710 %%%%%%%%%%%%%%%%%%%%
 711 %%    SLIDE 34    %%
 712 %%%%%%%%%%%%%%%%%%%%
 713 \begin{frame}{Comparing the objective function to EDP}
 714
 715      EDP is the products between the energy consumption and the delay.
 716     \vspace{-5 mm}
 717     \begin{figure}[!t]
 718     \centering
 719     \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
 720
 721     \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
 722     \end{figure}
 723 \end{frame}
 724
 725
 726
 727
 728 %%%%%%%%%%%%%%%%%%%%
 729 %%    SLIDE 35    %%
 730 %%%%%%%%%%%%%%%%%%%%
 731 %\begin{frame}{Energy optimization of grid platform}
 732   % \begin{figure}[!t]
 733    % \centering
 734          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
 735
 736         %   \small  10 sites distributed over France and Luxembourg
 737         %\end{figure}
 738 %\end{frame}
 739
 740
 741 %%%%%%%%%%%%%%%%%%%%
 742 %%    SLIDE 36    %%
 743 %%%%%%%%%%%%%%%%%%%%
 744 \begin{frame}{The grid architecture}
 745 \begin{center}
 746 \includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
 747 \end{center}
 748
 749  %\begin{frame}{Performance, Energy and trade-off models} \small
 750   %\begin{block}{\small The performance model of grid}
 751    % \begin{equation}
 752   %\label{eq:perf}
 753   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
 754  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
 755 %\end{equation}
 756     %\end{block}
 757
 758
 759  %\begin{block}{\small The energy model of grid}\small
 760   %  \begin{equation}
 761   %\label{eq:energy}
 762  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +
 763 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
 764 %\end{equation}
 765    % \end{block}
 766
 767 %\begin{block}{\small The trade-off model of grid}
 768 %\small
 769     %\begin{equation}
 770    %\label{eq:max}
 771   %\MaxDist =
 772   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
 773    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
 774     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
 775 %\end{equation}
 776    % \end{block}
 777
 778
 779  \end{frame}
 780
 781
 782
 783 %%%%%%%%%%%%%%%%%%%%
 784 %%    SLIDE 37    %%
 785 %%%%%%%%%%%%%%%%%%%%
 786  \begin{frame}{Experiments over Grid'5000}
 787   \centering
 788
 789           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
 790
 791           \vspace{-3 mm}
 792           \textcolor{blue}{Two experiments were conducted: over one site and two sites
 793           each one with three clusters }
 794
 795               \vspace{1mm}
 796
 797           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
 798
 799         \textcolor{blue}{Grid'5000 power measurement tools were used}
 800 \end{frame}
 801
 802
 803
 804
 805 %%%%%%%%%%%%%%%%%%%%
 806 %%    SLIDE 38    %%
 807 %%%%%%%%%%%%%%%%%%%%
 808 \begin{frame}{Experiments over Grid'5000}
 809
 810    \begin{minipage}{0.4\textwidth}
 811        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
 812         %\textcolor{red}{30\%}}
 813         \textcolor{blue}{The energy saving =  \textcolor{red}{30\%}}
 814    \end{minipage}
 815      \begin{minipage}{0.55\textwidth}
 816         \begin{figure}[h!]
 817           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
 818      \end{figure}
 819 \end{minipage}
 820
 821          \begin{minipage}{0.4\textwidth}
 822            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
 823                 %performance by \textcolor{red}{3.2\%}}
 824               \textcolor{blue}{The performance degradation  =  \textcolor{red}{3.2\%}}
 825         \end{minipage}
 826        \begin{minipage}{0.55\textwidth}
 827          \begin{figure}[h!]
 828            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
 829          \end{figure}
 830           \end{minipage}
 831  \end{frame}
 832
 833
 834
 835 %%%%%%%%%%%%%%%%%%%%
 836 %%    SLIDE 39    %%
 837 %%%%%%%%%%%%%%%%%%%%
 838 \begin{frame}{Experiments over Grid'5000}
 839    \textcolor{blue}{One core  and Multi-cores per node results:}
 840
 841   \begin{figure}[h!]
 842   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
 843   \hspace{0.3cm}
 844   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
 845   \end{figure}
 846
 847   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
 848 \end{frame}
 849
 850
 851
 852 %\begin{frame}{Summary}
 853 %\begin{itemize}
 854      % \small
 855         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
 856         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
 857       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real
 858           %test-bed \textcolor{blue}{Grid'5000}.
 859
 860          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
 861         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
 862       %    clusters.
 863
 864          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
 865         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
 866
 867        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
 868      %    \end{itemize}
 869 %\end{frame}
 870
 871
 872 %%%%%%%%%%%%%%%%%%%%
 873 %%    SLIDE 40    %%
 874 %%%%%%%%%%%%%%%%%%%%
 875 \begin{frame}{Contribution}
 876 \section{\small {Energy optimization of asynchronous applications}}
 877 \begin{center}
 878 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing  applications}
 879 \end{center}
 880  \end{frame}
 881
 882
 883
 884 %%%%%%%%%%%%%%%%%%%%
 885 %%    SLIDE 41   %%
 886 %%%%%%%%%%%%%%%%%%%%
 887 \begin{frame}{Problem definition}\vspace{0.8 mm}
 888 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
 889 \vspace{-8 mm}
 890 \begin{figure}
 891  \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
 892  %\includegraphics[width=0.6\textwidth]{syn/a-503}
 893   \end{figure}
 894 \end{frame}
 895
 896
 897
 898 %%%%%%%%%%%%%%%%%%%%
 899 %%    SLIDE 42   %%
 900 %%%%%%%%%%%%%%%%%%%%
 901 \begin{frame}{Problem definition}\vspace{0.8 mm}
 902 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
 903 \vspace{-8 mm}
 904 \begin{figure}
 905  \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
 906  %\includegraphics[width=0.6\textwidth]{asyn/a-440}
 907   \end{figure}
 908 \end{frame}
 909
 910
 911
 912 %%%%%%%%%%%%%%%%%%%%
 913 %%    SLIDE 43   %%
 914 %%%%%%%%%%%%%%%%%%%%
 915 \begin{frame}{Solution}\vspace{0.8mm}
 916 \textcolor{blue}{Using asynchronous communications with DVFS }
 917 \vspace{-8 mm}
 918 \begin{figure}
 919   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
 920   %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
 921   \end{figure}
 922 \end{frame}
 923
 924
 925
 926
 927 %%%%%%%%%%%%%%%%%%%%
 928 %%    SLIDE 44   %%
 929 %%%%%%%%%%%%%%%%%%%%
 930 %\begin{frame}{The performance models}
 931
 932 %\begin{block}{\small The performance model of Asynch. Applications}\small
 933 %\begin{equation}
 934   %\label{eq:asyn_time}
 935  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
 936 %\end{equation}
 937 %\end{block}
 938
 939
 940 %\begin{block}{\small The performance model of Hybrid Applications}\small
 941 %\begin{equation}
 942   %\label{eq:asyn_perf}
 943   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
 944    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
 945 %\end{equation}
 946 %\end{block}
 947
 948
 949 %\end{frame}
 950
 951
 952
 953 %%%%%%%%%%%%%%%%%%%%
 954 %%    SLIDE 45   %%
 955 %%%%%%%%%%%%%%%%%%%%
 956 %\begin{frame}{The energy consumption models}
 957
 958 %\begin{block}{\small The energy model of Asynch. Applications}\small
 959 %\begin{equation}
 960   %\label{eq:asyn_energy1}
 961 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
 962 %\end{equation}
 963 %\end{block}
 964
 965
 966 %\begin{block}{\small The energy model of Hybrid Applications}\small
 967 %\begin{multline}
 968   %\label{eq:asyn_energy}
 969  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
 970 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
 971 %\end{multline}
 972 %\end{block}
 973 %\end{frame}
 974
 975
 976
 977 %%%%%%%%%%%%%%%%%%%%
 978 %%    SLIDE 44   %%
 979 %%%%%%%%%%%%%%%%%%%%
 980 \begin{frame}{The performance and the energy models }
 981
 982 \centering
 983 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
 984 \end{frame}
 985
 986
 987
 988
 989
 990 %%%%%%%%%%%%%%%%%%%%
 991 %%    SLIDE 46   %%
 992 %%%%%%%%%%%%%%%%%%%%
 993 \begin{frame}{The scaling algorithm for Asynch.  applications}
 994 \vspace{-0.1 mm}
 995 \centering
 996 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
 997 \end{frame}
 998
 999
1000
1001 %%%%%%%%%%%%%%%%%%%%
1002 %%    SLIDE 47   %%
1003 %%%%%%%%%%%%%%%%%%%%
1004 \begin{frame}{The experiments}
1005    \vspace{-5 mm}
1006    \begin{figure}[!t]
1007    \begin{itemize}
1008       \small
1009         \item The architecture of the grid:
1010    \end{itemize}
1011     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
1012    \end{figure}
1013    \begin{itemize}
1014       \small
1015         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1016         \item Evaluating the application over the simulator and Grid'5000.
1017    \end{itemize}
1018 \end{frame}
1019
1020
1021
1022 %%%%%%%%%%%%%%%%%%%%
1023 %%    SLIDE 48   %%
1024 %%%%%%%%%%%%%%%%%%%%
1025 \begin{frame}{The simulation results}
1026 \centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1027
1028 \centering
1029     \includegraphics[scale=0.42]{c3/energy_saving.eps}
1030
1031  \centering  The average of energy saving  = \textcolor{red}{22\%}
1032 \end{frame}
1033
1034
1035
1036 %%%%%%%%%%%%%%%%%%%%
1037 %%    SLIDE 49   %%
1038 %%%%%%%%%%%%%%%%%%%%
1039 \begin{frame}{The simulation results}
1040 \centering
1041
1042      \includegraphics[scale=0.42]{c3/perf_degra.eps}
1043
1044  \centering    The average speed-up  = \textcolor{red}{5.72\%}
1045 \end{frame}
1046
1047
1048
1049 %%%%%%%%%%%%%%%%%%%%
1050 %%    SLIDE 50   %%
1051 %%%%%%%%%%%%%%%%%%%%
1052  \begin{frame}{The Grid'5000 results}
1053    \vspace{-20 mm}
1054    \begin{figure}[!t]
1055    \centering
1056    \hspace{-8 mm}
1057     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1058     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1059    \end{figure}
1060     \vspace{-5 mm}
1061      \centering
1062 The energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1063 \end{frame}
1064
1065
1066 %%%%%%%%%%%%%%%%%%%%
1067 %%    SLIDE 51   %%
1068 %%%%%%%%%%%%%%%%%%%%
1069 \begin{frame}{The comparison results}
1070  \centering
1071     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1072
1073     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1074 \end{frame}
1075
1076
1077
1078
1079 %%%%%%%%%%%%%%%%%%%%
1080 %%    SLIDE 52  %%
1081 %%%%%%%%%%%%%%%%%%%%
1082 \begin{frame}{Conclusions}
1083 \section{Conclusions and Perspectives}
1084 \begin{itemize}
1085
1086 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous and asynchronous parallel applications with iterations running over
1087 \textcolor{blue}{homogeneous and  heterogeneous clusters and grids}.
1088
1089
1090
1091 \small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
1092
1093 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1094
1095 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1096 Multi-splitting} method.
1097
1098 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  \textcolor{blue}{Grid'5000 testbed}.
1099
1100 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or  \textcolor{blue}{the EDP objective function}.
1101
1102
1103 \end{itemize}
1104 \end{frame}
1105
1106
1107
1108 %%%%%%%%%%%%%%%%%%%%
1109 %%    SLIDE 53   %%
1110 %%%%%%%%%%%%%%%%%%%%
1111 \begin{frame}{Publications}
1112
1113 \begin{block}{\small Journal Articles }\scriptsize
1114 \begin{enumerate}[$\lbrack$1$\rbrack$]
1115
1116 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1117       Science}, 2016.
1118
1119 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for
1120       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1121
1122 \end{enumerate}
1123 \end{block}
1124
1125
1126 \begin{block}{\small Conference Articles }\scriptsize
1127
1128 \begin{enumerate}[$\lbrack$1$\rbrack$]
1129
1130 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1131       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1132       225-230. IEEE Computer Society, Milan, Italy (2014).
1133
1134 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1135       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1136       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1137
1138 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1139       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1140       Paris (2016).
1141
1142 \end{enumerate}
1143
1144 \end{block}
1145 \end{frame}
1146
1147
1148 %%%%%%%%%%%%%%%%%%%%
1149 %%    SLIDE 54   %%
1150 %%%%%%%%%%%%%%%%%%%%
1151 \begin{frame}{Perspectives}
1152
1153 \begin{itemize}
1154
1155 \small  \barrow The proposed algorithms should  take into consideration the
1156 \textcolor{blue}{variability between some iterations}.
1157
1158 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1159
1160 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1161
1162 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1163 \end{itemize}
1164
1165 \end{frame}
1166
1167 %%%%%%%%%%%%%%%%%%%%
1168 %%    SLIDE 55  %%
1169 %%%%%%%%%%%%%%%%%%%%
1170 \begin{frame}{Fin} \vspace{-10 mm}
1171
1172             \centering \Large \textcolor{blue}{Thank you for your listening}
1173
1174             \vspace{2cm}
1175             \centering \textcolor{blue}{ {\Large Questions?}}
1176
1177 \end{frame}
1178 \end{document}
1179 %  _____ ___ _   _
1180 % |  ___|_ _| \ | |
1181 % | |_   | ||  \| |
1182 % |  _|  | || |\  |
1183 % |_|   |___|_| \_|
1184 %