thesis-presentation/AhmedSlides.tex

   1  \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13  \usepackage{movie15}
  14  \usepackage{animate}
  15 \usepackage{xmpmulti}
  16  \newcommand{\AG}[2][inline]{%
  17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
  18 \newcommand{\JC}[2][inline]{%
  19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
  20 \definecolor{myblue}{RGB}{0,29,119}
  21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
  22 \usepackage{fixltx2e}
  23 %% used to put some subscripts lower, and make them more legible
  24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
  25 \usepackage{ragged2e}
  26 \newcommand{\CL}{\Xsub{C}{L}}
  27 \newcommand{\Dist}{\mathit{Dist}}
  28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
  29 \newcommand{\Eind}{\Xsub{E}{ind}}
  30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
  31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
  32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
  33 \newcommand{\Es}{\Xsub{E}{S}}
  34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
  35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
  36 \newcommand{\Fnew}{\Xsub{F}{new}}
  37 \newcommand{\Vnew}{\Xsub{V}{new}}
  38 \newcommand{\Vmax}{\Xsub{V}{max}}
  39 \newcommand{\Ileak}{\Xsub{I}{leak}}
  40 \newcommand{\Kdesign}{\Xsub{K}{design}}
  41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
  42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
  43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
  44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
  45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
  46
  47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
  48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
  49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
  50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
  51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
  52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
  53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
  54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
  55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
  56 \newcommand{\Tnew}{\Xsub{T}{New}}
  57 \newcommand{\Told}{\Xsub{T}{Old}}
  58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
  59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
  60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
  61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
  62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
  63  \usepackage{pifont}
  64 \usepackage{xcolor}
  65 \definecolor{myblue}{RGB}{0,29,119}
  66 \usepackage[textsize=footnotesize]{todonotes}
  67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
  68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
  69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
  70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
  71
  72
  73
  74 %\title{Energy Consumption Optimization of Parallel Applications with
  75 %Iterations using CPU Frequency Scaling}
  76 \vspace{2cm}
  77
  78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
  79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
  80
  81 \date{}
  82 \vspace{-3cm}
  83 %  ____  _____ ____  _   _ _____
  84 % |  _ \| ____| __ )| | | |_   _|
  85 % | | | |  _| |  _ \| | | | | |
  86 % | |_| | |___| |_) | |_| | | |
  87 % |____/|_____|____/ \___/  |_|
  88 %
  89 \begin{document}
  90 \setbeamertemplate{background}{\titrefemto}
  91
  92 %%%%%%%%%%%%%%%%%%%%
  93 %%    SLIDE 01    %%
  94 %%%%%%%%%%%%%%%%%%%%
  95 \begin{frame}[plain]
  96 \vspace{1cm}
  97 \centering
  98    \titlepage
  99 \end{frame}
 100
 101
 102 %%%%%%%%%%%%%%%%%%%%
 103 %%    SLIDE 02    %%
 104 %%%%%%%%%%%%%%%%%%%%
 105 \setbeamertemplate{background}{\pagefemto}
 106 \begin{frame}{Outline}
 107
 108 \setbeamertemplate{section in toc}[sections numbered]
 109 \tableofcontents
 110 \end{frame}
 111
 112
 113 %%%%%%%%%%%%%%%%%%%%
 114 %%    SLIDE 03    %%
 115 %%%%%%%%%%%%%%%%%%%%
 116 \begin{frame}{Introduction and problem definition}
 117  \section{\small {Introduction and Problem definition}}
 118    \bf \textcolor{blue}{To get more computing power:}
 119      \begin{minipage}{0.5\textwidth}
 120       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increase the frequency of a  processor.\\ (limited due to overheating)}
 121     \end{minipage}%
 122     \begin{minipage}{0.6\textwidth}
 123
 124 \begin{figure}[h!]
 125
 126     \includegraphics[width=0.7\textwidth]{fig/freq-years}
 127     \end{figure}
 128     \end{minipage}%
 129     \vspace{0.2cm}
 130     \begin{minipage}{0.5\textwidth}
 131      \textcolor{blue}{2)} \small \bf \textcolor{black}{Use more nodes.}
 132
 133  \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}
 134
 135     \end{minipage}%
 136     \begin{minipage}{0.6\textwidth}
 137     \begin{figure}[h!]
 138      \includegraphics[width=0.7\textwidth]{fig/clusters}
 139     \end{figure}
 140     \end{minipage}%
 141  \end{frame}
 142
 143
 144
 145
 146  %%%%%%%%%%%%%%%%%%%
 147 %%    SLIDE 04   %%
 148 %%%%%%%%%%%%%%%%%%%%
 149 \begin{frame}{Techniques for energy consumption reduction}
 150
 151      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
 152     \vspace{-0.9cm}
 153     \begin{figure}
 154      \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
 155      %\includegraphics[width=0.6\textwidth]{on-off/a-69}
 156     \end{figure}
 157  \end{frame}
 158
 159 %%%%%%%%%%%%%%%%%%%%
 160 %%    SLIDE 05    %%
 161 %%%%%%%%%%%%%%%%%%%%
 162 \begin{frame}{Techniques for energy consumption reduction}
 163
 164   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic Voltage and Frequency Scaling (DVFS)}
 165      \vspace{-0.9cm}
 166     \begin{figure}
 167     \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
 168      %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
 169     \end{figure}
 170     \end{frame}
 171
 172
 173
 174 %%%%%%%%%%%%%%%%%%%%
 175 %%    SLIDE 06    %%
 176 %%%%%%%%%%%%%%%%%%%%
 177 \begin{frame}{Motivations}
 178 \vspace{0.05cm}
 179 \section{\small {Motivations}}
 180 \textcolor{blue}{Why we used the DVFS method:}
 181 \vspace{-0.49cm}
 182 \begin{minipage}{0.5\textwidth}
 183     \vspace{-0.49cm}
 184       \begin{itemize}
 185        \item  \small \textcolor{black}{ The CPU is the component that consumes the  highest amount of energy in a node \textsuperscript{1}. }
 186
 187          \end{itemize}
 188
 189     \end{minipage}%
 190     \begin{minipage}{0.5\textwidth}
 191      \vspace{-0.49cm}
 192     \begin{figure}[h!]
 193      \includegraphics[width=0.85\textwidth]{fig/node-power}
 194
 195     \end{figure}
 196     \end{minipage}%
 197
 198   \begin{itemize} \item \small  \textcolor{black}{DVFS reduces the energy consumption while
 199    keeping all the nodes working.}
 200                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.}  \end{itemize}
 201
 202 \vspace{-0.12cm}
 203
 204  \begin{block}{\textcolor{white}{Challenge and Objective}}
 205
 206         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it also degrades the performance of the CPU.}
 207
 208                 \vspace{0.1cm}
 209  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
 210 \end{block}
 211
 212  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
 213 for a warehouse-sized computer.
 214
 215     \end{frame}
 216
 217
 218
 219 %%%%%%%%%%%%%%%%%%%%
 220 %%    SLIDE 08    %%
 221 %%%%%%%%%%%%%%%%%%%%
 222
 223
 224 \begin{frame}{The first contribution}
 225
 226 \section{\small {Energy optimization of a homogeneous platform}}
 227 %\vspace{-3cm}
 228  % \includegraphics[width=0.6\textwidth]{white.pdf}
 229
 230 \begin{center}
 231 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
 232 \end{center}
 233  \end{frame}
 234
 235
 236
 237 %%%%%%%%%%%%%%%%%%%%
 238 %%    SLIDE 09    %%
 239 %%%%%%%%%%%%%%%%%%%%
 240
 241 \begin{frame}{Objectives}
 242
 243                 \begin{itemize}   \small \justifying
 244
 245                    \item   Study the effect of the scaling factor on the \textbf{energy consumption and performance } of parallel  applications with iterations. \medskip
 246
 247                    \item   Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
 248                    \item   Proposing an algorithm for selecting the scaling factor that produces  \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
 249                    \item   Comparing the proposed algorithm to existing methods.
 250
 251
 252                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
 253                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
 254                 \end{itemize}
 255                  %\let\thefootnote\relax\footnote{}
 256
 257
 258 \end{frame}
 259
 260
 261
 262 %%%%%%%%%%%%%%%%%%%%
 263 %%    SLIDE 10    %%
 264 %%%%%%%%%%%%%%%%%%%%
 265
 266
 267 \begin{frame}{Execution of synchronous parallel tasks}
 268 \vspace{-0.5 cm}
 269 \begin{figure}
 270   \centering
 271   \subfloat[Synchronous imbalanced communications]{%
 272     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
 273   \subfloat[Synchronous imbalanced computations]{%
 274     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
 275  % \caption{Parallel tasks on homogeneous platform}
 276   \label{fig:homo}
 277 \end{figure}
 278
 279  \end{frame}
 280
 281
 282
 283
 284 %%%%%%%%%%%%%%%%%%%%
 285 %%    SLIDE 11   %%
 286 %%%%%%%%%%%%%%%%%%%%
 287 \begin{frame}{Energy model for a homogeneous platform}
 288       The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
 289        (\textcolor{red}{$P_s$}) power.
 290     \begin{equation}
 291      \label{eq:pd}
 292      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
 293    \end{equation}
 294     \scriptsize \underline{Where}: \\
 295     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\
 296     \textcolor{blue}{$V$}: the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
 297    \begin{equation}
 298      \label{eq:ps}
 299      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
 300    \end{equation}
 301     \underline{Where}:\\
 302         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\
 303         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
 304              parameter.}
 305 \end{frame}
 306
 307 %%%%%%%%%%%%%%%%%%%%
 308 %%    SLIDE 12   %%
 309 %%%%%%%%%%%%%%%%%%%%
 310
 311 \begin{frame}{Energy model for a homogeneous platform}
 312
 313           The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.  \medskip
 314
 315
 316
 317         \begin{block}{\small Rauber and Rünger's energy model}
 318          $ E = P_{d} \cdot S_1^{-2} \cdot
 319          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
 320             P_{s} \cdot S_1  \cdot T_1 \cdot N$
 321         \end{block}
 322            \textcolor{blue}{$S_1$}: the maximum scaling factor.\\
 323            \textcolor{blue}{$P_{d}$}: the dynamic power.\\
 324            \textcolor{blue}{$P_{s}$}: the static power.\\
 325            \textcolor{blue}{$T_I$}: the execution time of the slower task.\\
 326            \textcolor{blue}{$T_i$}: the execution time of task i.\\
 327            \textcolor{blue}{$N$}:  the number of  nodes.
 328
 329 \end{frame}
 330
 331
 332 %%%%%%%%%%%%%%%%%%%%
 333 %%    SLIDE 13   %%
 334 %%%%%%%%%%%%%%%%%%%%
 335 \begin{frame}{Performance evaluation of MPI programs}
 336         \begin{femtoBlock}{}
 337               \vspace{-5 mm}
 338               \begin{block}{\small Execution time prediction model}
 339                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
 340           \end{block}
 341           \vspace{10 mm}
 342            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
 343            \quad%
 344            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
 345             \vspace{5 mm}
 346
 347            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 348            \end{femtoBlock}
 349 \end{frame}
 350
 351
 352
 353
 354  %%%%%%%%%%%%%%%%%%%%
 355 %%    SLIDE 14   %%
 356 %%%%%%%%%%%%%%%%%%%%
 357 \begin{frame}{Performance and energy reduction trade-off}
 358         \begin{femtoBlock}{} \vspace{-15 mm}
 359                \begin{figure}
 360      \centering
 361      \subfloat[\small  Real relation.]{%
 362      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
 363      \quad%
 364      \subfloat[\small Converted relation.]{%
 365      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
 366   \label{fig:rel}
 367  % \caption{The energy and performance relation}
 368 \end{figure}
 369
 370  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
 371
 372 %\vspace{-0.3cm}
 373       \small
 374          \begin{block}{\small Our objective function}
 375          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}
 376                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} -
 377                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
 378
 379         \end{block}
 380         \end{femtoBlock}
 381
 382 \end{frame}
 383
 384 %%%%%%%%%%%%%%%%%%%%
 385 %%    SLIDE 15   %%
 386 %%%%%%%%%%%%%%%%%%%%
 387  \begin{frame}{Scaling factor selection algorithm}
 388 \vspace{-0.75cm}
 389      \begin{center}
 390       \includegraphics[width=.56 \textwidth]{c1/algo-homo}
 391      \end{center}
 392
 393 \end{frame}
 394
 395
 396 %%%%%%%%%%%%%%%%%%%%
 397 %%    SLIDE 16   %%
 398 %%%%%%%%%%%%%%%%%%%%
 399 \begin{frame}{Scaling algorithm example}
 400 \vspace{-0.75cm}
 401
 402      \begin{figure}
 403   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
 404   %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
 405   \end{figure}
 406 \end{frame}
 407
 408 %%%%%%%%%%%%%%%%%%%%
 409 %%    SLIDE 17   %%
 410 %%%%%%%%%%%%%%%%%%%%
 411 \begin{frame}{Experimental results }
 412       \begin{femtoBlock}{}
 413         \begin{itemize}
 414          \small
 415            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 416            \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
 417            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 418            \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
 419            \item $P_d=20W$,  $P_s=4W$.
 420                 \end{itemize}
 421         \end{femtoBlock}
 422 \end{frame}
 423
 424
 425 %%%%%%%%%%%%%%%%%%%%
 426 %%    SLIDE 18   %%
 427 %%%%%%%%%%%%%%%%%%%%
 428 \begin{frame}{Experimental results}
 429   \begin{femtoBlock}{}
 430       \centering {
 431      \includegraphics[width=.35\textwidth]{c1/ep}
 432      \includegraphics[width=.35\textwidth]{c1/cg}
 433      \includegraphics[width=.35\textwidth]{c1/bt}}
 434
 435      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
 436  \end{femtoBlock}
 437 \end{frame}
 438
 439
 440   %%%%%%%%%%%%%%%%%%%%
 441 %%    SLIDE 19   %%
 442 %%%%%%%%%%%%%%%%%%%%
 443 \begin{frame}{Results comparison}
 444          \begin{block}{\small Rauber and Rünger's optimal scaling factor}
 445            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
 446             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
 447         \end{block}
 448
 449
 450     \centering {
 451          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
 452          %\qquad
 453          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
 454
 455
 456             \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}}
 457
 458 \end{frame}
 459
 460
 461 %%%%%%%%%%%%%%%%%%%%
 462 %%    SLIDE 20   %%
 463 %%%%%%%%%%%%%%%%%%%%
 464 \begin{frame}{The proposed new energy model}
 465     \vspace{-0.75cm}
 466   \begin{figure}
 467   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
 468   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
 469   \end{figure}
 470 \end{frame}
 471
 472
 473 %%%%%%%%%%%%%%%%%%%%
 474 %%    SLIDE 21   %%
 475 %%%%%%%%%%%%%%%%%%%%
 476 \begin{frame}{\large Comparing the new model with Rauber's model }
 477  \vspace{0.1cm}
 478  \centering
 479     \includegraphics[width=.45\textwidth]{c1/energy_con}
 480
 481     \includegraphics[width=.5\textwidth]{c1/compare-scales}
 482 \end{frame}
 483
 484
 485
 486
 487    % \begin{frame}{Summary}
 488      % \begin{femtoBlock}{}
 489      % \begin{itemize}
 490       %\small
 491        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
 492        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
 493          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip
 494          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
 495                % performance}. \medskip
 496          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 497          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
 498          %\end{itemize}
 499
 500         %\end{femtoBlock}
 501 %\end{frame}
 502
 503
 504 %%%%%%%%%%%%%%%%%%%%
 505 %%    SLIDE 22    %%
 506 %%%%%%%%%%%%%%%%%%%%
 507
 508
 509 \begin{frame}{The second contribution}
 510
 511 \section{\small {Energy optimization of a heterogeneous platform}}
 512 \begin{center}
 513
 514
 515 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
 516 \end{center}
 517  \end{frame}
 518
 519
 520
 521 %%%%%%%%%%%%%%%%%%%%
 522 %%    SLIDE 23    %%
 523 %%%%%%%%%%%%%%%%%%%%
 524
 525 \begin{frame}{Objectives}
 526         \begin{femtoBlock}{} \vspace{-12 mm}
 527                 \begin{itemize} \small
 528                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running
 529                           over a heterogeneous platform (cluster or Grid). \medskip
 530                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
 531                           message passing iterative applications.    \medskip
 532
 533                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the optimal trade-off} between
 534                           the energy consumption and the performance.
 535                 \end{itemize}
 536
 537           \vspace{-10 mm}
 538         \end{femtoBlock}
 539 \end{frame}
 540
 541
 542 %%%%%%%%%%%%%%%%%%%%
 543 %%    SLIDE 24    %%
 544 %%%%%%%%%%%%%%%%%%%%
 545 \begin{frame}{The execution time model}
 546       \vspace{-8 mm}
 547      \begin{figure}[!t]
 548        \centering
 549        \includegraphics[scale=0.5]{c2/commtasks}
 550        \label{fig:heter}
 551      \end{figure}
 552        \vspace{-12 mm}
 553        \medskip
 554
 555     \begin{block}{\small The execution time prediction model}
 556     \begin{equation}
 557      \label{eq:perf}
 558      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
 559     \end{equation}
 560     \end{block}
 561  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
 562
 563 \end{frame}
 564
 565  %%%%%%%%%%%%%%%%%%%%
 566 %%    SLIDE 25    %%
 567 %%%%%%%%%%%%%%%%%%%%
 568  \begin{frame}{The energy consumption model}
 569     The overall energy consumption of a message passing synchronous  application executed over
 570      a heterogeneous platform can be computed as  follows:
 571     \begin{multline}
 572      \label{eq:energy}
 573      \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
 574      \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
 575       \hspace{10 mm}
 576     \end{multline}
 577     \underline{where}:\\
 578     \textcolor{blue}{N} : is the number of nodes.
 579 \end{frame}
 580
 581
 582 %%%%%%%%%%%%%%%%%%%%
 583 %%    SLIDE 26    %%
 584 %%%%%%%%%%%%%%%%%%%%
 585   \begin{frame}{The  energy  model example for heter. cluster}
 586   \vspace{-0.5cm}
 587  \begin{figure}
 588   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
 589   %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
 590   \end{figure}
 591  \end{frame}
 592
 593
 594
 595
 596 %%%%%%%%%%%%%%%%%%%%
 597 %%    SLIDE 27    %%
 598 %%%%%%%%%%%%%%%%%%%%
 599 %\begin{frame}{The trade-off between energy  and performance}
 600    % \vspace{-7 mm}
 601     %\begin{figure}
 602    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
 603    % \end{figure}
 604    % \vspace{-7 mm}
 605    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}}
 606     %{E_{Max}}$}. \\
 607     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
 608
 609    %  \begin{block}{\small The tradeoff model}
 610     % \begin{equation}
 611     %  \label{eq:max}
 612     %  \textcolor{red}{MaxDist} =
 613      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
 614       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
 615       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
 616       %\end{equation}
 617     % \end{block}
 618 %\end{frame}
 619
 620
 621 %%%%%%%%%%%%%%%%%%%%
 622 %%    SLIDE 28    %%
 623 %%%%%%%%%%%%%%%%%%%%
 624  \begin{frame}{The scaling algorithm for heter. cluster}
 625
 626  \centering
 627    \includegraphics[width=.52\textwidth]{algo-heter}
 628  \end{frame}
 629
 630
 631  %%%%%%%%%%%%%%%%%%%%
 632 %%    SLIDE 29    %%
 633 %%%%%%%%%%%%%%%%%%%%
 634  \begin{frame}{The scaling algorithm example}
 635  \vspace{-0.5cm}
 636  \centering
 637
 638   \begin{figure}
 639   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
 640  % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
 641   \end{figure}
 642 \end{frame}
 643
 644
 645
 646
 647 %%%%%%%%%%%%%%%%%%%%
 648 %%    SLIDE 30    %%
 649 %%%%%%%%%%%%%%%%%%%%
 650 \begin{frame}{Experiments over a heterogeneous cluster  }
 651         \begin{itemize}
 652          \small
 653            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 654            \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
 655            \item Four types of processors with different computing powers were used.\medskip
 656            \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
 657            \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
 658                   \medskip
 659
 660         \end{itemize}
 661
 662 \end{frame}
 663
 664
 665 %%%%%%%%%%%%%%%%%%%%
 666 %%    SLIDE 31    %%
 667 %%%%%%%%%%%%%%%%%%%%
 668 \begin{frame}{The experimental results}
 669    \vspace{-5 mm}
 670    \begin{figure}[!t]
 671    \centering
 672     \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
 673
 674     \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
 675      for the class C of the NAS Benchmarks executed over 8 nodes}
 676
 677    \end{figure}
 678 \end{frame}
 679
 680
 681
 682 %%%%%%%%%%%%%%%%%%%%
 683 %%    SLIDE 32    %%
 684 %%%%%%%%%%%%%%%%%%%%
 685 \begin{frame}{The experimental results}
 686    \vspace{-5 mm}
 687    \begin{figure}[!t]
 688    \centering
 689
 690     \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
 691
 692    \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
 693      of NAS Benchmarks class C executed over 8 nodes}
 694      \end{figure}
 695 \end{frame}
 696
 697
 698
 699 %%%%%%%%%%%%%%%%%%%%
 700 %%    SLIDE 33    %%
 701 %%%%%%%%%%%%%%%%%%%%
 702 \begin{frame}{The results of the three power scenarios}
 703    \vspace{-5 mm}
 704    \begin{figure}[!t]
 705    \centering
 706    \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
 707    \vspace{10 mm}
 708    \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
 709    \end{figure}
 710 \end{frame}
 711
 712
 713
 714 %%%%%%%%%%%%%%%%%%%%
 715 %%    SLIDE 34    %%
 716 %%%%%%%%%%%%%%%%%%%%
 717 \begin{frame}{Comparing the objective function to EDP}
 718
 719      EDP is the products between the energy consumption and the delay.
 720     \vspace{-5 mm}
 721     \begin{figure}[!t]
 722     \centering
 723     \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
 724
 725     \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
 726     \end{figure}
 727 \end{frame}
 728
 729
 730
 731
 732 %%%%%%%%%%%%%%%%%%%%
 733 %%    SLIDE 35    %%
 734 %%%%%%%%%%%%%%%%%%%%
 735 %\begin{frame}{Energy optimization of grid platform}
 736   % \begin{figure}[!t]
 737    % \centering
 738          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
 739
 740         %   \small  10 sites distributed over France and Luxembourg
 741         %\end{figure}
 742 %\end{frame}
 743
 744
 745 %%%%%%%%%%%%%%%%%%%%
 746 %%    SLIDE 36    %%
 747 %%%%%%%%%%%%%%%%%%%%
 748 \begin{frame}{The grid architecture}
 749 \begin{center}
 750 \includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
 751 \end{center}
 752
 753  %\begin{frame}{Performance, Energy and trade-off models} \small
 754   %\begin{block}{\small The performance model of grid}
 755    % \begin{equation}
 756   %\label{eq:perf}
 757   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
 758  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
 759 %\end{equation}
 760     %\end{block}
 761
 762
 763  %\begin{block}{\small The energy model of grid}\small
 764   %  \begin{equation}
 765   %\label{eq:energy}
 766  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +
 767 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
 768 %\end{equation}
 769    % \end{block}
 770
 771 %\begin{block}{\small The trade-off model of grid}
 772 %\small
 773     %\begin{equation}
 774    %\label{eq:max}
 775   %\MaxDist =
 776   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
 777    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
 778     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
 779 %\end{equation}
 780    % \end{block}
 781
 782
 783  \end{frame}
 784
 785
 786
 787 %%%%%%%%%%%%%%%%%%%%
 788 %%    SLIDE 37    %%
 789 %%%%%%%%%%%%%%%%%%%%
 790  \begin{frame}{Experiments over Grid'5000}
 791
 792    \textcolor{blue}{The experiments were conducted using three
 793           clusters distributed over one or two sites.}
 794            \vspace{-7 mm}
 795           \begin{center}
 796           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
 797           \end{center}
 798       \vspace{-10 mm}
 799   \textcolor{blue}{Grid'5000 power measurement tools were used.}
 800         \vspace{-9 mm}
 801   \begin{center}
 802           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
 803           \end{center}
 804
 805
 806 \end{frame}
 807
 808
 809
 810
 811 %%%%%%%%%%%%%%%%%%%%
 812 %%    SLIDE 38    %%
 813 %%%%%%%%%%%%%%%%%%%%
 814 \begin{frame}{Experiments over Grid'5000}
 815
 816    \begin{minipage}{0.4\textwidth}
 817        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
 818         %\textcolor{red}{30\%}}
 819      \small \textcolor{blue}{The average energy saving =  \textcolor{red}{30\%}}
 820    \end{minipage}
 821      \begin{minipage}{0.55\textwidth}
 822         \begin{figure}[h!]
 823           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
 824      \end{figure}
 825 \end{minipage}
 826
 827          \begin{minipage}{0.4\textwidth}
 828            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
 829                 %performance by \textcolor{red}{3.2\%}}
 830       \small  \textcolor{blue}{The average performance degradation  =  \textcolor{red}{3.2\%}}
 831         \end{minipage}
 832        \begin{minipage}{0.55\textwidth}
 833          \begin{figure}[h!]
 834            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
 835          \end{figure}
 836           \end{minipage}
 837  \end{frame}
 838
 839
 840
 841 %%%%%%%%%%%%%%%%%%%%
 842 %%    SLIDE 39    %%
 843 %%%%%%%%%%%%%%%%%%%%
 844 \begin{frame}{Experiments over Grid'5000}
 845    \textcolor{blue}{One core  and Multi-cores per node results:}
 846
 847   \begin{figure}[h!]
 848   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
 849   \hspace{0.3cm}
 850   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
 851   \end{figure}
 852
 853   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
 854 \end{frame}
 855
 856
 857
 858 %\begin{frame}{Summary}
 859 %\begin{itemize}
 860      % \small
 861         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
 862         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
 863       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real
 864           %test-bed \textcolor{blue}{Grid'5000}.
 865
 866          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
 867         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
 868       %    clusters.
 869
 870          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
 871         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
 872
 873        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
 874      %    \end{itemize}
 875 %\end{frame}
 876
 877
 878 %%%%%%%%%%%%%%%%%%%%
 879 %%    SLIDE 40    %%
 880 %%%%%%%%%%%%%%%%%%%%
 881 \begin{frame}{The third contribution}
 882 \section{\small {Energy optimization of asynchronous applications}}
 883 \begin{center}
 884 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing  applications}
 885 \end{center}
 886  \end{frame}
 887
 888
 889
 890 %%%%%%%%%%%%%%%%%%%%
 891 %%    SLIDE 41   %%
 892 %%%%%%%%%%%%%%%%%%%%
 893 \begin{frame}{Problem definition}\vspace{0.8 mm}
 894 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
 895 \vspace{-8 mm}
 896 \begin{figure}
 897  \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
 898  %\includegraphics[width=0.6\textwidth]{syn/a-503}
 899   \end{figure}
 900 \end{frame}
 901
 902
 903
 904 %%%%%%%%%%%%%%%%%%%%
 905 %%    SLIDE 42   %%
 906 %%%%%%%%%%%%%%%%%%%%
 907 \begin{frame}{Problem definition}\vspace{0.8 mm}
 908 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
 909 \vspace{-8 mm}
 910 \begin{figure}
 911  \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
 912  %\includegraphics[width=0.6\textwidth]{asyn/a-440}
 913   \end{figure}
 914 \end{frame}
 915
 916
 917
 918 %%%%%%%%%%%%%%%%%%%%
 919 %%    SLIDE 43   %%
 920 %%%%%%%%%%%%%%%%%%%%
 921 \begin{frame}{Solution}\vspace{0.8mm}
 922 \textcolor{blue}{Using asynchronous communications with DVFS }
 923 \vspace{-8 mm}
 924 \begin{figure}
 925   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
 926   %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
 927   \end{figure}
 928 \end{frame}
 929
 930
 931
 932
 933 %%%%%%%%%%%%%%%%%%%%
 934 %%    SLIDE 44   %%
 935 %%%%%%%%%%%%%%%%%%%%
 936 %\begin{frame}{The performance models}
 937
 938 %\begin{block}{\small The performance model of Asynch. Applications}\small
 939 %\begin{equation}
 940   %\label{eq:asyn_time}
 941  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
 942 %\end{equation}
 943 %\end{block}
 944
 945
 946 %\begin{block}{\small The performance model of Hybrid Applications}\small
 947 %\begin{equation}
 948   %\label{eq:asyn_perf}
 949   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
 950    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
 951 %\end{equation}
 952 %\end{block}
 953
 954
 955 %\end{frame}
 956
 957
 958
 959 %%%%%%%%%%%%%%%%%%%%
 960 %%    SLIDE 45   %%
 961 %%%%%%%%%%%%%%%%%%%%
 962 %\begin{frame}{The energy consumption models}
 963
 964 %\begin{block}{\small The energy model of Asynch. Applications}\small
 965 %\begin{equation}
 966   %\label{eq:asyn_energy1}
 967 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
 968 %\end{equation}
 969 %\end{block}
 970
 971
 972 %\begin{block}{\small The energy model of Hybrid Applications}\small
 973 %\begin{multline}
 974   %\label{eq:asyn_energy}
 975  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
 976 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
 977 %\end{multline}
 978 %\end{block}
 979 %\end{frame}
 980
 981
 982
 983 %%%%%%%%%%%%%%%%%%%%
 984 %%    SLIDE 44   %%
 985 %%%%%%%%%%%%%%%%%%%%
 986 \begin{frame}{The performance and the energy models }
 987
 988 \centering
 989 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
 990 \end{frame}
 991
 992
 993
 994
 995
 996 %%%%%%%%%%%%%%%%%%%%
 997 %%    SLIDE 46   %%
 998 %%%%%%%%%%%%%%%%%%%%
 999 \begin{frame}{The scaling algorithm for Asynch.  applications}
1000 \vspace{-0.1 mm}
1001 \centering
1002 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
1003 \end{frame}
1004
1005
1006
1007 %%%%%%%%%%%%%%%%%%%%
1008 %%    SLIDE 47   %%
1009 %%%%%%%%%%%%%%%%%%%%
1010 \begin{frame}{The experiments}
1011    \vspace{-5 mm}
1012    \begin{figure}[!t]
1013    \begin{itemize}
1014       \small
1015         \item The architecture of the grid:
1016    \end{itemize}
1017     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
1018    \end{figure}
1019    \begin{itemize}
1020       \small
1021         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1022         \item Evaluating the application over the simulator and Grid'5000.
1023    \end{itemize}
1024 \end{frame}
1025
1026
1027
1028 %%%%%%%%%%%%%%%%%%%%
1029 %%    SLIDE 48   %%
1030 %%%%%%%%%%%%%%%%%%%%
1031 \begin{frame}{The simulation results}
1032 \centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1033
1034 \centering
1035     \includegraphics[scale=0.42]{c3/energy_saving.eps}
1036
1037  \centering  The average energy saving  = \textcolor{red}{22\%}
1038 \end{frame}
1039
1040
1041
1042 %%%%%%%%%%%%%%%%%%%%
1043 %%    SLIDE 49   %%
1044 %%%%%%%%%%%%%%%%%%%%
1045 \begin{frame}{The simulation results}
1046 \centering
1047
1048      \includegraphics[scale=0.42]{c3/perf_degra.eps}
1049
1050  \centering    The average speed-up  = \textcolor{red}{5.72\%}
1051 \end{frame}
1052
1053
1054
1055 %%%%%%%%%%%%%%%%%%%%
1056 %%    SLIDE 50   %%
1057 %%%%%%%%%%%%%%%%%%%%
1058  \begin{frame}{The Grid'5000 results}
1059    \vspace{-20 mm}
1060    \begin{figure}[!t]
1061    \centering
1062    \hspace{-8 mm}
1063     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1064     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1065    \end{figure}
1066     \vspace{-5 mm}
1067      \centering \footnotesize
1068 The average energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1069 \end{frame}
1070
1071
1072 %%%%%%%%%%%%%%%%%%%%
1073 %%    SLIDE 51   %%
1074 %%%%%%%%%%%%%%%%%%%%
1075 \begin{frame}{The comparison results}
1076  \centering
1077     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1078
1079     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1080 \end{frame}
1081
1082
1083
1084
1085 %%%%%%%%%%%%%%%%%%%%
1086 %%    SLIDE 52  %%
1087 %%%%%%%%%%%%%%%%%%%%
1088 \begin{frame}{Conclusions}
1089 \section{Conclusions and Perspectives}
1090 \begin{itemize}
1091
1092 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over
1093 \textcolor{blue}{homogeneous and  heterogeneous clusters or grids}.
1094
1095
1096
1097 \small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
1098
1099 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1100
1101 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1102 Multi-splitting} method.
1103
1104 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  the \textcolor{blue}{Grid'5000 testbed}.
1105
1106 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or to the \textcolor{blue}{EDP objective function}.
1107
1108
1109 \end{itemize}
1110 \end{frame}
1111
1112
1113
1114 %%%%%%%%%%%%%%%%%%%%
1115 %%    SLIDE 53   %%
1116 %%%%%%%%%%%%%%%%%%%%
1117 \begin{frame}{Publications}
1118
1119 \begin{block}{\small Journal Articles }\scriptsize
1120 \begin{enumerate}[$\lbrack$1$\rbrack$]
1121
1122 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1123       Science}, 2016.
1124
1125 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for
1126       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1127
1128 \end{enumerate}
1129 \end{block}
1130
1131
1132 \begin{block}{\small Conference Articles }\scriptsize
1133
1134 \begin{enumerate}[$\lbrack$1$\rbrack$]
1135
1136 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1137       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1138       225-230. IEEE Computer Society, Milan, Italy (2014).
1139
1140 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1141       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1142       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1143
1144 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1145       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1146       Paris (2016).
1147
1148 \end{enumerate}
1149
1150 \end{block}
1151 \end{frame}
1152
1153
1154 %%%%%%%%%%%%%%%%%%%%
1155 %%    SLIDE 54   %%
1156 %%%%%%%%%%%%%%%%%%%%
1157 \begin{frame}{Perspectives}
1158
1159 \begin{itemize}
1160
1161 \small  \barrow The proposed algorithms should  take into consideration the
1162 \textcolor{blue}{variability between some iterations}.
1163
1164 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1165
1166 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1167
1168 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1169 \end{itemize}
1170
1171 \end{frame}
1172
1173 %%%%%%%%%%%%%%%%%%%%
1174 %%    SLIDE 55  %%
1175 %%%%%%%%%%%%%%%%%%%%
1176 \begin{frame}{Fin} \vspace{-10 mm}
1177
1178             \centering \Large \textcolor{blue}{Thank you for your attention}
1179
1180             \vspace{2cm}
1181             \centering \textcolor{blue}{ {\Large Questions?}}
1182
1183 \end{frame}
1184 \end{document}
1185 %  _____ ___ _   _
1186 % |  ___|_ _| \ | |
1187 % | |_   | ||  \| |
1188 % |  _|  | || |\  |
1189 % |_|   |___|_| \_|
1190 %