thesis-presentation/AhmedSlides.tex

   1  \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13  \usepackage{movie15}
  14  \usepackage{animate}
  15 \usepackage{xmpmulti}
  16  \newcommand{\AG}[2][inline]{%
  17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
  18 \newcommand{\JC}[2][inline]{%
  19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
  20 \definecolor{myblue}{RGB}{0,29,119}
  21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
  22 \usepackage{fixltx2e}
  23 %% used to put some subscripts lower, and make them more legible
  24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
  25
  26 \newcommand{\CL}{\Xsub{C}{L}}
  27 \newcommand{\Dist}{\mathit{Dist}}
  28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
  29 \newcommand{\Eind}{\Xsub{E}{ind}}
  30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
  31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
  32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
  33 \newcommand{\Es}{\Xsub{E}{S}}
  34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
  35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
  36 \newcommand{\Fnew}{\Xsub{F}{new}}
  37 \newcommand{\Vnew}{\Xsub{V}{new}}
  38 \newcommand{\Vmax}{\Xsub{V}{max}}
  39 \newcommand{\Ileak}{\Xsub{I}{leak}}
  40 \newcommand{\Kdesign}{\Xsub{K}{design}}
  41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
  42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
  43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
  44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
  45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
  46
  47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
  48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
  49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
  50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
  51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
  52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
  53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
  54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
  55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
  56 \newcommand{\Tnew}{\Xsub{T}{New}}
  57 \newcommand{\Told}{\Xsub{T}{Old}}
  58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
  59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
  60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
  61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
  62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
  63  \usepackage{pifont}
  64 \usepackage{xcolor}
  65 \definecolor{myblue}{RGB}{0,29,119}
  66 \usepackage[textsize=footnotesize]{todonotes}
  67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
  68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
  69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
  70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
  71
  72
  73
  74 %\title{Energy Consumption Optimization of Parallel Applications with
  75 %Iterations using CPU Frequency Scaling}
  76 \vspace{2cm}
  77
  78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
  79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
  80
  81 \date{}
  82 \vspace{-3cm}
  83 %  ____  _____ ____  _   _ _____
  84 % |  _ \| ____| __ )| | | |_   _|
  85 % | | | |  _| |  _ \| | | | | |
  86 % | |_| | |___| |_) | |_| | | |
  87 % |____/|_____|____/ \___/  |_|
  88 %
  89 \begin{document}
  90 \setbeamertemplate{background}{\titrefemto}
  91
  92 %%%%%%%%%%%%%%%%%%%%
  93 %%    SLIDE 01    %%
  94 %%%%%%%%%%%%%%%%%%%%
  95 \begin{frame}[plain]
  96 \vspace{1cm}
  97 \centering
  98    \titlepage
  99 \end{frame}
 100
 101
 102 %%%%%%%%%%%%%%%%%%%%
 103 %%    SLIDE 02    %%
 104 %%%%%%%%%%%%%%%%%%%%
 105 \setbeamertemplate{background}{\pagefemto}
 106 \begin{frame}{Outline}
 107
 108 \setbeamertemplate{section in toc}[sections numbered]
 109 \tableofcontents
 110 \end{frame}
 111
 112
 113 %%%%%%%%%%%%%%%%%%%%
 114 %%    SLIDE 03    %%
 115 %%%%%%%%%%%%%%%%%%%%
 116 \begin{frame}{Introduction and problem definition}
 117  \section{\small {Introduction and Problem definition}}
 118    \bf \textcolor{blue}{To get more computing power:}
 119      \begin{minipage}{0.5\textwidth}
 120       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increase the frequency of a  processor.\\ (limited due to overheating)}
 121     \end{minipage}%
 122     \begin{minipage}{0.6\textwidth}
 123
 124 \begin{figure}[h!]
 125
 126     \includegraphics[width=0.7\textwidth]{fig/freq-years}
 127     \end{figure}
 128     \end{minipage}%
 129     \vspace{0.2cm}
 130     \begin{minipage}{0.5\textwidth}
 131      \textcolor{blue}{2)} \small \bf \textcolor{black}{Use more nodes.}
 132
 133  \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}
 134
 135     \end{minipage}%
 136     \begin{minipage}{0.6\textwidth}
 137     \begin{figure}[h!]
 138      \includegraphics[width=0.7\textwidth]{fig/clusters}
 139     \end{figure}
 140     \end{minipage}%
 141  \end{frame}
 142
 143
 144
 145
 146  %%%%%%%%%%%%%%%%%%%
 147 %%    SLIDE 04   %%
 148 %%%%%%%%%%%%%%%%%%%%
 149 \begin{frame}{Techniques for energy consumption reduction}
 150
 151      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
 152     \vspace{-0.9cm}
 153     \begin{figure}
 154      \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
 155      %\includegraphics[width=0.6\textwidth]{on-off/a-69}
 156     \end{figure}
 157  \end{frame}
 158
 159 %%%%%%%%%%%%%%%%%%%%
 160 %%    SLIDE 06    %%
 161 %%%%%%%%%%%%%%%%%%%%
 162 \begin{frame}{Techniques for energy consumption reduction}
 163
 164   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
 165      \vspace{-0.9cm}
 166     \begin{figure}
 167     \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
 168      %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
 169     \end{figure}
 170     \end{frame}
 171
 172
 173
 174 %%%%%%%%%%%%%%%%%%%%
 175 %%    SLIDE 07    %%
 176 %%%%%%%%%%%%%%%%%%%%
 177 \begin{frame}{Motivations}
 178 \vspace{0.05cm}
 179 \section{\small {Motivations}}
 180 \textcolor{blue}{Why we used the DVFS method:}
 181 \vspace{-0.49cm}
 182 \begin{minipage}{0.5\textwidth}
 183     \vspace{-0.49cm}
 184       \begin{itemize}
 185        \item  \small \textcolor{black}{ The CPU is the component that consumes the  highest amount of energy in a node \textsuperscript{1}. }
 186
 187          \end{itemize}
 188
 189     \end{minipage}%
 190     \begin{minipage}{0.5\textwidth}
 191      \vspace{-0.49cm}
 192     \begin{figure}[h!]
 193      \includegraphics[width=0.85\textwidth]{fig/node-power}
 194
 195     \end{figure}
 196     \end{minipage}%
 197
 198   \begin{itemize} \item \small  \textcolor{black}{DVFS reduces the energy consumption while
 199    keeping all the nodes working.}
 200                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.}  \end{itemize}
 201
 202 \vspace{-0.12cm}
 203
 204  \begin{block}{\textcolor{white}{Challenge and Objective}}
 205
 206         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it degrades the performance simultaneously.}
 207
 208                 \vspace{0.1cm}
 209  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
 210 \end{block}
 211
 212  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
 213 for a warehouse-sized computer.
 214
 215     \end{frame}
 216
 217
 218
 219 %%%%%%%%%%%%%%%%%%%%
 220 %%    SLIDE 08    %%
 221 %%%%%%%%%%%%%%%%%%%%
 222
 223
 224 \begin{frame}{The first contribution}
 225
 226 \section{\small {Energy optimization of a homogeneous platform}}
 227 %\vspace{-3cm}
 228  % \includegraphics[width=0.6\textwidth]{white.pdf}
 229
 230 \begin{center}
 231 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
 232 \end{center}
 233  \end{frame}
 234
 235
 236
 237 %%%%%%%%%%%%%%%%%%%%
 238 %%    SLIDE 09    %%
 239 %%%%%%%%%%%%%%%%%%%%
 240
 241 \begin{frame}{Objectives}
 242         \begin{femtoBlock}{} \vspace{-12 mm}
 243                 \begin{itemize} \small
 244                    \item  Study the effect of the scaling factor on the \textbf{energy consumption and performance } of parallel  applications with iterations. \medskip
 245
 246                    \item  Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
 247                    \item  Proposing an algorithm for selecting the scaling factor that produces  \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
 248                    \item  Comparing the proposed algorithm to existing methods.
 249
 250
 251                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
 252                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
 253                 \end{itemize}
 254                  %\let\thefootnote\relax\footnote{}
 255           \vspace{-10 mm}
 256         \end{femtoBlock}
 257 \end{frame}
 258
 259
 260
 261 %%%%%%%%%%%%%%%%%%%%
 262 %%    SLIDE 10    %%
 263 %%%%%%%%%%%%%%%%%%%%
 264
 265
 266 \begin{frame}{Execution of synchronous parallel tasks}
 267 \vspace{-0.5 cm}
 268 \begin{figure}
 269   \centering
 270   \subfloat[Synchronous imbalanced communications]{%
 271     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
 272   \subfloat[Synchronous imbalanced computations]{%
 273     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
 274  % \caption{Parallel tasks on homogeneous platform}
 275   \label{fig:homo}
 276 \end{figure}
 277
 278  \end{frame}
 279
 280
 281
 282
 283 %%%%%%%%%%%%%%%%%%%%
 284 %%    SLIDE 11   %%
 285 %%%%%%%%%%%%%%%%%%%%
 286 \begin{frame}{Energy model for a homogeneous platform}
 287       The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
 288        (\textcolor{red}{$P_s$}) power.
 289     \begin{equation}
 290      \label{eq:pd}
 291      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
 292    \end{equation}
 293     \scriptsize \underline{Where}: \\
 294     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\
 295     \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
 296    \begin{equation}
 297      \label{eq:ps}
 298      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
 299    \end{equation}
 300     \underline{Where}:\\
 301         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\
 302         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
 303              parameter.}
 304 \end{frame}
 305
 306 %%%%%%%%%%%%%%%%%%%%
 307 %%    SLIDE 12   %%
 308 %%%%%%%%%%%%%%%%%%%%
 309
 310 \begin{frame}{Energy model for a homogeneous platform}
 311
 312           The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.  \medskip
 313
 314
 315
 316         \begin{block}{\small Rauber and Rünger's energy model}
 317          $ E = P_{d} \cdot S_1^{-2} \cdot
 318          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
 319             P_{s} \cdot S_1  \cdot T_1 \cdot N$
 320         \end{block}
 321            \textcolor{blue}{$S_1$}: the maximum scaling factor.\\
 322            \textcolor{blue}{$P_{d}$}: the dynamic power.\\
 323            \textcolor{blue}{$P_{s}$}: the static power.\\
 324            \textcolor{blue}{$T_I$}: the execution time of the slower task.\\
 325            \textcolor{blue}{$T_i$}: the execution time of task i.\\
 326            \textcolor{blue}{$N$}:  the number of  nodes.
 327
 328 \end{frame}
 329
 330
 331 %%%%%%%%%%%%%%%%%%%%
 332 %%    SLIDE 13   %%
 333 %%%%%%%%%%%%%%%%%%%%
 334 \begin{frame}{Performance evaluation of MPI programs}
 335         \begin{femtoBlock}{}
 336               \vspace{-5 mm}
 337               \begin{block}{\small Execution time prediction model}
 338                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
 339           \end{block}
 340           \vspace{10 mm}
 341            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
 342            \quad%
 343            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
 344             \vspace{5 mm}
 345
 346            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 347            \end{femtoBlock}
 348 \end{frame}
 349
 350
 351
 352
 353  %%%%%%%%%%%%%%%%%%%%
 354 %%    SLIDE 14   %%
 355 %%%%%%%%%%%%%%%%%%%%
 356 \begin{frame}{Performance and energy reduction trade-off}
 357         \begin{femtoBlock}{} \vspace{-15 mm}
 358                \begin{figure}
 359      \centering
 360      \subfloat[\small  Real relation.]{%
 361      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
 362      \quad%
 363      \subfloat[\small Converted relation.]{%
 364      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
 365   \label{fig:rel}
 366  % \caption{The energy and performance relation}
 367 \end{figure}
 368
 369  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
 370
 371 %\vspace{-0.3cm}
 372       \small
 373          \begin{block}{\small Our objective function}
 374          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}
 375                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} -
 376                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
 377
 378         \end{block}
 379         \end{femtoBlock}
 380
 381 \end{frame}
 382
 383 %%%%%%%%%%%%%%%%%%%%
 384 %%    SLIDE 15   %%
 385 %%%%%%%%%%%%%%%%%%%%
 386  \begin{frame}{Scaling factor selection algorithm}
 387 \vspace{-0.75cm}
 388      \begin{center}
 389       \includegraphics[width=.56 \textwidth]{c1/algo-homo}
 390      \end{center}
 391
 392 \end{frame}
 393
 394
 395 %%%%%%%%%%%%%%%%%%%%
 396 %%    SLIDE 16   %%
 397 %%%%%%%%%%%%%%%%%%%%
 398 \begin{frame}{Scaling algorithm example}
 399 \vspace{-0.75cm}
 400
 401      \begin{figure}
 402   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
 403   %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
 404   \end{figure}
 405 \end{frame}
 406
 407 %%%%%%%%%%%%%%%%%%%%
 408 %%    SLIDE 17   %%
 409 %%%%%%%%%%%%%%%%%%%%
 410 \begin{frame}{Experimental results }
 411       \begin{femtoBlock}{}
 412         \begin{itemize}
 413          \small
 414            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 415            \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
 416            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 417            \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
 418            \item $P_d=20W$,  $P_s=4W$.
 419                 \end{itemize}
 420         \end{femtoBlock}
 421 \end{frame}
 422
 423
 424 %%%%%%%%%%%%%%%%%%%%
 425 %%    SLIDE 18   %%
 426 %%%%%%%%%%%%%%%%%%%%
 427 \begin{frame}{Experimental results}
 428   \begin{femtoBlock}{}
 429       \centering {
 430      \includegraphics[width=.35\textwidth]{c1/ep}
 431      \includegraphics[width=.35\textwidth]{c1/cg}
 432      \includegraphics[width=.35\textwidth]{c1/bt}}
 433
 434      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
 435  \end{femtoBlock}
 436 \end{frame}
 437
 438
 439   %%%%%%%%%%%%%%%%%%%%
 440 %%    SLIDE 19   %%
 441 %%%%%%%%%%%%%%%%%%%%
 442 \begin{frame}{Results comparison}
 443          \begin{block}{\small Rauber and Rünger's optimal scaling factor}
 444            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
 445             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
 446         \end{block}
 447
 448
 449     \centering {
 450          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
 451          %\qquad
 452          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
 453
 454
 455             \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}}
 456
 457 \end{frame}
 458
 459
 460 %%%%%%%%%%%%%%%%%%%%
 461 %%    SLIDE 20   %%
 462 %%%%%%%%%%%%%%%%%%%%
 463 \begin{frame}{The proposed new energy model}
 464     \vspace{-0.75cm}
 465   \begin{figure}
 466   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
 467   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
 468   \end{figure}
 469 \end{frame}
 470
 471
 472 %%%%%%%%%%%%%%%%%%%%
 473 %%    SLIDE 21   %%
 474 %%%%%%%%%%%%%%%%%%%%
 475 \begin{frame}{\large Comparing the new model with Rauber's model }
 476  \vspace{0.1cm}
 477  \centering
 478     \includegraphics[width=.45\textwidth]{c1/energy_con}
 479
 480     \includegraphics[width=.5\textwidth]{c1/compare-scales}
 481 \end{frame}
 482
 483
 484
 485
 486    % \begin{frame}{Summary}
 487      % \begin{femtoBlock}{}
 488      % \begin{itemize}
 489       %\small
 490        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
 491        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
 492          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip
 493          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
 494                % performance}. \medskip
 495          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 496          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
 497          %\end{itemize}
 498
 499         %\end{femtoBlock}
 500 %\end{frame}
 501
 502
 503 %%%%%%%%%%%%%%%%%%%%
 504 %%    SLIDE 22    %%
 505 %%%%%%%%%%%%%%%%%%%%
 506
 507
 508 \begin{frame}{The second contribution}
 509
 510 \section{\small {Energy optimization of a heterogeneous platform}}
 511 \begin{center}
 512
 513
 514 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
 515 \end{center}
 516  \end{frame}
 517
 518
 519
 520 %%%%%%%%%%%%%%%%%%%%
 521 %%    SLIDE 23    %%
 522 %%%%%%%%%%%%%%%%%%%%
 523
 524 \begin{frame}{Objectives}
 525         \begin{femtoBlock}{} \vspace{-12 mm}
 526                 \begin{itemize} \small
 527                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running
 528                           over a heterogeneous platform (cluster or Grid). \medskip
 529                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
 530                           message passing iterative applications.    \medskip
 531
 532                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the optimal trade-off} between
 533                           the energy consumption and the performance.
 534                 \end{itemize}
 535
 536           \vspace{-10 mm}
 537         \end{femtoBlock}
 538 \end{frame}
 539
 540
 541 %%%%%%%%%%%%%%%%%%%%
 542 %%    SLIDE 24    %%
 543 %%%%%%%%%%%%%%%%%%%%
 544 \begin{frame}{The execution time model}
 545       \vspace{-8 mm}
 546      \begin{figure}[!t]
 547        \centering
 548        \includegraphics[scale=0.5]{c2/commtasks}
 549        \label{fig:heter}
 550      \end{figure}
 551        \vspace{-12 mm}
 552        \medskip
 553
 554     \begin{block}{\small The execution time prediction model}
 555     \begin{equation}
 556      \label{eq:perf}
 557      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
 558     \end{equation}
 559     \end{block}
 560  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
 561
 562 \end{frame}
 563
 564  %%%%%%%%%%%%%%%%%%%%
 565 %%    SLIDE 25    %%
 566 %%%%%%%%%%%%%%%%%%%%
 567  \begin{frame}{The energy consumption model}
 568     The overall energy consumption of a message passing synchronous  application executed over
 569      a heterogeneous platform can be computed as  follows:
 570     \begin{multline}
 571      \label{eq:energy}
 572      \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
 573      \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
 574       \hspace{10 mm}
 575     \end{multline}
 576     \underline{where}:\\
 577     \textcolor{blue}{N} : is the number of nodes.
 578 \end{frame}
 579
 580
 581 %%%%%%%%%%%%%%%%%%%%
 582 %%    SLIDE 26    %%
 583 %%%%%%%%%%%%%%%%%%%%
 584   \begin{frame}{The  energy  model example for heter. cluster}
 585   \vspace{-0.5cm}
 586  \begin{figure}
 587   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
 588   %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
 589   \end{figure}
 590  \end{frame}
 591
 592
 593
 594
 595 %%%%%%%%%%%%%%%%%%%%
 596 %%    SLIDE 27    %%
 597 %%%%%%%%%%%%%%%%%%%%
 598 %\begin{frame}{The trade-off between energy  and performance}
 599    % \vspace{-7 mm}
 600     %\begin{figure}
 601    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
 602    % \end{figure}
 603    % \vspace{-7 mm}
 604    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}}
 605     %{E_{Max}}$}. \\
 606     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
 607
 608    %  \begin{block}{\small The tradeoff model}
 609     % \begin{equation}
 610     %  \label{eq:max}
 611     %  \textcolor{red}{MaxDist} =
 612      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
 613       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
 614       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
 615       %\end{equation}
 616     % \end{block}
 617 %\end{frame}
 618
 619
 620 %%%%%%%%%%%%%%%%%%%%
 621 %%    SLIDE 28    %%
 622 %%%%%%%%%%%%%%%%%%%%
 623  \begin{frame}{The scaling algorithm for heter. cluster}
 624
 625  \centering
 626    \includegraphics[width=.52\textwidth]{algo-heter}
 627  \end{frame}
 628
 629
 630  %%%%%%%%%%%%%%%%%%%%
 631 %%    SLIDE 29    %%
 632 %%%%%%%%%%%%%%%%%%%%
 633  \begin{frame}{The scaling algorithm example}
 634  \vspace{-0.5cm}
 635  \centering
 636
 637   \begin{figure}
 638   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
 639  % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
 640   \end{figure}
 641 \end{frame}
 642
 643
 644
 645
 646 %%%%%%%%%%%%%%%%%%%%
 647 %%    SLIDE 30    %%
 648 %%%%%%%%%%%%%%%%%%%%
 649 \begin{frame}{Experiments over a heterogeneous cluster  }
 650         \begin{itemize}
 651          \small
 652            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 653            \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
 654            \item Four types of processors with different computing powers were used.\medskip
 655            \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
 656            \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
 657                   \medskip
 658
 659         \end{itemize}
 660
 661 \end{frame}
 662
 663
 664 %%%%%%%%%%%%%%%%%%%%
 665 %%    SLIDE 31    %%
 666 %%%%%%%%%%%%%%%%%%%%
 667 \begin{frame}{The experimental results}
 668    \vspace{-5 mm}
 669    \begin{figure}[!t]
 670    \centering
 671     \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
 672
 673     \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
 674      for the class C of the NAS Benchmarks executed over 8 nodes}
 675
 676    \end{figure}
 677 \end{frame}
 678
 679
 680
 681 %%%%%%%%%%%%%%%%%%%%
 682 %%    SLIDE 32    %%
 683 %%%%%%%%%%%%%%%%%%%%
 684 \begin{frame}{The experimental results}
 685    \vspace{-5 mm}
 686    \begin{figure}[!t]
 687    \centering
 688
 689     \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
 690
 691    \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
 692      of NAS Benchmarks class C executed over 8 nodes}
 693      \end{figure}
 694 \end{frame}
 695
 696
 697
 698 %%%%%%%%%%%%%%%%%%%%
 699 %%    SLIDE 33    %%
 700 %%%%%%%%%%%%%%%%%%%%
 701 \begin{frame}{The results of the three power scenarios}
 702    \vspace{-5 mm}
 703    \begin{figure}[!t]
 704    \centering
 705    \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
 706    \vspace{10 mm}
 707    \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
 708    \end{figure}
 709 \end{frame}
 710
 711
 712
 713 %%%%%%%%%%%%%%%%%%%%
 714 %%    SLIDE 34    %%
 715 %%%%%%%%%%%%%%%%%%%%
 716 \begin{frame}{Comparing the objective function to EDP}
 717
 718      EDP is the products between the energy consumption and the delay.
 719     \vspace{-5 mm}
 720     \begin{figure}[!t]
 721     \centering
 722     \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
 723
 724     \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
 725     \end{figure}
 726 \end{frame}
 727
 728
 729
 730
 731 %%%%%%%%%%%%%%%%%%%%
 732 %%    SLIDE 35    %%
 733 %%%%%%%%%%%%%%%%%%%%
 734 %\begin{frame}{Energy optimization of grid platform}
 735   % \begin{figure}[!t]
 736    % \centering
 737          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
 738
 739         %   \small  10 sites distributed over France and Luxembourg
 740         %\end{figure}
 741 %\end{frame}
 742
 743
 744 %%%%%%%%%%%%%%%%%%%%
 745 %%    SLIDE 36    %%
 746 %%%%%%%%%%%%%%%%%%%%
 747 \begin{frame}{The grid architecture}
 748 \begin{center}
 749 \includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
 750 \end{center}
 751
 752  %\begin{frame}{Performance, Energy and trade-off models} \small
 753   %\begin{block}{\small The performance model of grid}
 754    % \begin{equation}
 755   %\label{eq:perf}
 756   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
 757  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
 758 %\end{equation}
 759     %\end{block}
 760
 761
 762  %\begin{block}{\small The energy model of grid}\small
 763   %  \begin{equation}
 764   %\label{eq:energy}
 765  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +
 766 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
 767 %\end{equation}
 768    % \end{block}
 769
 770 %\begin{block}{\small The trade-off model of grid}
 771 %\small
 772     %\begin{equation}
 773    %\label{eq:max}
 774   %\MaxDist =
 775   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
 776    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
 777     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
 778 %\end{equation}
 779    % \end{block}
 780
 781
 782  \end{frame}
 783
 784
 785
 786 %%%%%%%%%%%%%%%%%%%%
 787 %%    SLIDE 37    %%
 788 %%%%%%%%%%%%%%%%%%%%
 789  \begin{frame}{Experiments over Grid'5000}
 790
 791    \textcolor{blue}{The experiments were conducted using three
 792           clusters distributed over one or two sites.}
 793            \vspace{-7 mm}
 794           \begin{center}
 795           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
 796           \end{center}
 797       \vspace{-10 mm}
 798   \textcolor{blue}{Grid'5000 power measurement tools were used.}
 799         \vspace{-9 mm}
 800   \begin{center}
 801           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
 802           \end{center}
 803
 804
 805 \end{frame}
 806
 807
 808
 809
 810 %%%%%%%%%%%%%%%%%%%%
 811 %%    SLIDE 38    %%
 812 %%%%%%%%%%%%%%%%%%%%
 813 \begin{frame}{Experiments over Grid'5000}
 814
 815    \begin{minipage}{0.4\textwidth}
 816        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
 817         %\textcolor{red}{30\%}}
 818      \small \textcolor{blue}{The average energy saving =  \textcolor{red}{30\%}}
 819    \end{minipage}
 820      \begin{minipage}{0.55\textwidth}
 821         \begin{figure}[h!]
 822           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
 823      \end{figure}
 824 \end{minipage}
 825
 826          \begin{minipage}{0.4\textwidth}
 827            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
 828                 %performance by \textcolor{red}{3.2\%}}
 829       \small  \textcolor{blue}{The average performance degradation  =  \textcolor{red}{3.2\%}}
 830         \end{minipage}
 831        \begin{minipage}{0.55\textwidth}
 832          \begin{figure}[h!]
 833            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
 834          \end{figure}
 835           \end{minipage}
 836  \end{frame}
 837
 838
 839
 840 %%%%%%%%%%%%%%%%%%%%
 841 %%    SLIDE 39    %%
 842 %%%%%%%%%%%%%%%%%%%%
 843 \begin{frame}{Experiments over Grid'5000}
 844    \textcolor{blue}{One core  and Multi-cores per node results:}
 845
 846   \begin{figure}[h!]
 847   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
 848   \hspace{0.3cm}
 849   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
 850   \end{figure}
 851
 852   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
 853 \end{frame}
 854
 855
 856
 857 %\begin{frame}{Summary}
 858 %\begin{itemize}
 859      % \small
 860         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
 861         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
 862       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real
 863           %test-bed \textcolor{blue}{Grid'5000}.
 864
 865          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
 866         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
 867       %    clusters.
 868
 869          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
 870         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
 871
 872        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
 873      %    \end{itemize}
 874 %\end{frame}
 875
 876
 877 %%%%%%%%%%%%%%%%%%%%
 878 %%    SLIDE 40    %%
 879 %%%%%%%%%%%%%%%%%%%%
 880 \begin{frame}{The third contribution}
 881 \section{\small {Energy optimization of asynchronous applications}}
 882 \begin{center}
 883 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing  applications}
 884 \end{center}
 885  \end{frame}
 886
 887
 888
 889 %%%%%%%%%%%%%%%%%%%%
 890 %%    SLIDE 41   %%
 891 %%%%%%%%%%%%%%%%%%%%
 892 \begin{frame}{Problem definition}\vspace{0.8 mm}
 893 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
 894 \vspace{-8 mm}
 895 \begin{figure}
 896  \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
 897  %\includegraphics[width=0.6\textwidth]{syn/a-503}
 898   \end{figure}
 899 \end{frame}
 900
 901
 902
 903 %%%%%%%%%%%%%%%%%%%%
 904 %%    SLIDE 42   %%
 905 %%%%%%%%%%%%%%%%%%%%
 906 \begin{frame}{Problem definition}\vspace{0.8 mm}
 907 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
 908 \vspace{-8 mm}
 909 \begin{figure}
 910  \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
 911  %\includegraphics[width=0.6\textwidth]{asyn/a-440}
 912   \end{figure}
 913 \end{frame}
 914
 915
 916
 917 %%%%%%%%%%%%%%%%%%%%
 918 %%    SLIDE 43   %%
 919 %%%%%%%%%%%%%%%%%%%%
 920 \begin{frame}{Solution}\vspace{0.8mm}
 921 \textcolor{blue}{Using asynchronous communications with DVFS }
 922 \vspace{-8 mm}
 923 \begin{figure}
 924   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
 925   %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
 926   \end{figure}
 927 \end{frame}
 928
 929
 930
 931
 932 %%%%%%%%%%%%%%%%%%%%
 933 %%    SLIDE 44   %%
 934 %%%%%%%%%%%%%%%%%%%%
 935 %\begin{frame}{The performance models}
 936
 937 %\begin{block}{\small The performance model of Asynch. Applications}\small
 938 %\begin{equation}
 939   %\label{eq:asyn_time}
 940  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
 941 %\end{equation}
 942 %\end{block}
 943
 944
 945 %\begin{block}{\small The performance model of Hybrid Applications}\small
 946 %\begin{equation}
 947   %\label{eq:asyn_perf}
 948   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
 949    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
 950 %\end{equation}
 951 %\end{block}
 952
 953
 954 %\end{frame}
 955
 956
 957
 958 %%%%%%%%%%%%%%%%%%%%
 959 %%    SLIDE 45   %%
 960 %%%%%%%%%%%%%%%%%%%%
 961 %\begin{frame}{The energy consumption models}
 962
 963 %\begin{block}{\small The energy model of Asynch. Applications}\small
 964 %\begin{equation}
 965   %\label{eq:asyn_energy1}
 966 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
 967 %\end{equation}
 968 %\end{block}
 969
 970
 971 %\begin{block}{\small The energy model of Hybrid Applications}\small
 972 %\begin{multline}
 973   %\label{eq:asyn_energy}
 974  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
 975 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
 976 %\end{multline}
 977 %\end{block}
 978 %\end{frame}
 979
 980
 981
 982 %%%%%%%%%%%%%%%%%%%%
 983 %%    SLIDE 44   %%
 984 %%%%%%%%%%%%%%%%%%%%
 985 \begin{frame}{The performance and the energy models }
 986
 987 \centering
 988 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
 989 \end{frame}
 990
 991
 992
 993
 994
 995 %%%%%%%%%%%%%%%%%%%%
 996 %%    SLIDE 46   %%
 997 %%%%%%%%%%%%%%%%%%%%
 998 \begin{frame}{The scaling algorithm for Asynch.  applications}
 999 \vspace{-0.1 mm}
1000 \centering
1001 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
1002 \end{frame}
1003
1004
1005
1006 %%%%%%%%%%%%%%%%%%%%
1007 %%    SLIDE 47   %%
1008 %%%%%%%%%%%%%%%%%%%%
1009 \begin{frame}{The experiments}
1010    \vspace{-5 mm}
1011    \begin{figure}[!t]
1012    \begin{itemize}
1013       \small
1014         \item The architecture of the grid:
1015    \end{itemize}
1016     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
1017    \end{figure}
1018    \begin{itemize}
1019       \small
1020         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1021         \item Evaluating the application over the simulator and Grid'5000.
1022    \end{itemize}
1023 \end{frame}
1024
1025
1026
1027 %%%%%%%%%%%%%%%%%%%%
1028 %%    SLIDE 48   %%
1029 %%%%%%%%%%%%%%%%%%%%
1030 \begin{frame}{The simulation results}
1031 \centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1032
1033 \centering
1034     \includegraphics[scale=0.42]{c3/energy_saving.eps}
1035
1036  \centering  The average energy saving  = \textcolor{red}{22\%}
1037 \end{frame}
1038
1039
1040
1041 %%%%%%%%%%%%%%%%%%%%
1042 %%    SLIDE 49   %%
1043 %%%%%%%%%%%%%%%%%%%%
1044 \begin{frame}{The simulation results}
1045 \centering
1046
1047      \includegraphics[scale=0.42]{c3/perf_degra.eps}
1048
1049  \centering    The average speed-up  = \textcolor{red}{5.72\%}
1050 \end{frame}
1051
1052
1053
1054 %%%%%%%%%%%%%%%%%%%%
1055 %%    SLIDE 50   %%
1056 %%%%%%%%%%%%%%%%%%%%
1057  \begin{frame}{The Grid'5000 results}
1058    \vspace{-20 mm}
1059    \begin{figure}[!t]
1060    \centering
1061    \hspace{-8 mm}
1062     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1063     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1064    \end{figure}
1065     \vspace{-5 mm}
1066      \centering \footnotesize
1067 The average energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1068 \end{frame}
1069
1070
1071 %%%%%%%%%%%%%%%%%%%%
1072 %%    SLIDE 51   %%
1073 %%%%%%%%%%%%%%%%%%%%
1074 \begin{frame}{The comparison results}
1075  \centering
1076     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1077
1078     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1079 \end{frame}
1080
1081
1082
1083
1084 %%%%%%%%%%%%%%%%%%%%
1085 %%    SLIDE 52  %%
1086 %%%%%%%%%%%%%%%%%%%%
1087 \begin{frame}{Conclusions}
1088 \section{Conclusions and Perspectives}
1089 \begin{itemize}
1090
1091 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over
1092 \textcolor{blue}{homogeneous and  heterogeneous clusters or grids}.
1093
1094
1095
1096 \small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
1097
1098 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1099
1100 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1101 Multi-splitting} method.
1102
1103 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  the \textcolor{blue}{Grid'5000 testbed}.
1104
1105 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or to the \textcolor{blue}{EDP objective function}.
1106
1107
1108 \end{itemize}
1109 \end{frame}
1110
1111
1112
1113 %%%%%%%%%%%%%%%%%%%%
1114 %%    SLIDE 53   %%
1115 %%%%%%%%%%%%%%%%%%%%
1116 \begin{frame}{Publications}
1117
1118 \begin{block}{\small Journal Articles }\scriptsize
1119 \begin{enumerate}[$\lbrack$1$\rbrack$]
1120
1121 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1122       Science}, 2016.
1123
1124 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for
1125       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1126
1127 \end{enumerate}
1128 \end{block}
1129
1130
1131 \begin{block}{\small Conference Articles }\scriptsize
1132
1133 \begin{enumerate}[$\lbrack$1$\rbrack$]
1134
1135 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1136       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1137       225-230. IEEE Computer Society, Milan, Italy (2014).
1138
1139 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1140       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1141       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1142
1143 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1144       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1145       Paris (2016).
1146
1147 \end{enumerate}
1148
1149 \end{block}
1150 \end{frame}
1151
1152
1153 %%%%%%%%%%%%%%%%%%%%
1154 %%    SLIDE 54   %%
1155 %%%%%%%%%%%%%%%%%%%%
1156 \begin{frame}{Perspectives}
1157
1158 \begin{itemize}
1159
1160 \small  \barrow The proposed algorithms should  take into consideration the
1161 \textcolor{blue}{variability between some iterations}.
1162
1163 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1164
1165 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1166
1167 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1168 \end{itemize}
1169
1170 \end{frame}
1171
1172 %%%%%%%%%%%%%%%%%%%%
1173 %%    SLIDE 55  %%
1174 %%%%%%%%%%%%%%%%%%%%
1175 \begin{frame}{Fin} \vspace{-10 mm}
1176
1177             \centering \Large \textcolor{blue}{Thank you for your listening}
1178
1179             \vspace{2cm}
1180             \centering \textcolor{blue}{ {\Large Questions?}}
1181
1182 \end{frame}
1183 \end{document}
1184 %  _____ ___ _   _
1185 % |  ___|_ _| \ | |
1186 % | |_   | ||  \| |
1187 % |  _|  | || |\  |
1188 % |_|   |___|_| \_|
1189 %