thesis-presentation/AhmedSlides.tex

   1  \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13  \usepackage{movie15}
  14  \usepackage{animate}
  15 \usepackage{xmpmulti}
  16  \newcommand{\AG}[2][inline]{%
  17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
  18 \newcommand{\JC}[2][inline]{%
  19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
  20 \definecolor{myblue}{RGB}{0,29,119}
  21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
  22 \usepackage{fixltx2e}
  23 %% used to put some subscripts lower, and make them more legible
  24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
  25
  26 \newcommand{\CL}{\Xsub{C}{L}}
  27 \newcommand{\Dist}{\mathit{Dist}}
  28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
  29 \newcommand{\Eind}{\Xsub{E}{ind}}
  30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
  31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
  32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
  33 \newcommand{\Es}{\Xsub{E}{S}}
  34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
  35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
  36 \newcommand{\Fnew}{\Xsub{F}{new}}
  37 \newcommand{\Vnew}{\Xsub{V}{new}}
  38 \newcommand{\Vmax}{\Xsub{V}{max}}
  39 \newcommand{\Ileak}{\Xsub{I}{leak}}
  40 \newcommand{\Kdesign}{\Xsub{K}{design}}
  41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
  42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
  43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
  44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
  45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
  46
  47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
  48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
  49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
  50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
  51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
  52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
  53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
  54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
  55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
  56 \newcommand{\Tnew}{\Xsub{T}{New}}
  57 \newcommand{\Told}{\Xsub{T}{Old}}
  58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
  59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
  60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
  61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
  62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
  63  \usepackage{pifont}
  64 \usepackage{xcolor}
  65 \definecolor{myblue}{RGB}{0,29,119}
  66 \usepackage[textsize=footnotesize]{todonotes}
  67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
  68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
  69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
  70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
  71
  72
  73
  74 %\title{Energy Consumption Optimization of Parallel Applications with
  75 %Iterations using CPU Frequency Scaling}
  76 \vspace{2cm}
  77
  78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
  79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
  80
  81 \date{}
  82 \vspace{-3cm}
  83 %  ____  _____ ____  _   _ _____
  84 % |  _ \| ____| __ )| | | |_   _|
  85 % | | | |  _| |  _ \| | | | | |
  86 % | |_| | |___| |_) | |_| | | |
  87 % |____/|_____|____/ \___/  |_|
  88 %
  89 \begin{document}
  90 \setbeamertemplate{background}{\titrefemto}
  91
  92 %%%%%%%%%%%%%%%%%%%%
  93 %%    SLIDE 01    %%
  94 %%%%%%%%%%%%%%%%%%%%
  95 \begin{frame}[plain]
  96 \vspace{1cm}
  97 \centering
  98    \titlepage
  99 \end{frame}
 100
 101
 102 %%%%%%%%%%%%%%%%%%%%
 103 %%    SLIDE 02    %%
 104 %%%%%%%%%%%%%%%%%%%%
 105 \setbeamertemplate{background}{\pagefemto}
 106 \begin{frame}{Outline}
 107
 108 \setbeamertemplate{section in toc}[sections numbered]
 109 \tableofcontents
 110 \end{frame}
 111
 112
 113 %%%%%%%%%%%%%%%%%%%%
 114 %%    SLIDE 03    %%
 115 %%%%%%%%%%%%%%%%%%%%
 116 \begin{frame}{Introduction and problem definition}
 117  \section{\small {Introduction and Problem definition}}
 118    \bf \textcolor{blue}{To get more computing power:}
 119      \begin{minipage}{0.5\textwidth}
 120       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increase the frequency of a  processor.\\ (limited due to overheating)}
 121     \end{minipage}%
 122     \begin{minipage}{0.6\textwidth}
 123
 124 \begin{figure}[h!]
 125
 126     \includegraphics[width=0.7\textwidth]{fig/freq-years}
 127     \end{figure}
 128     \end{minipage}%
 129     \vspace{0.2cm}
 130     \begin{minipage}{0.5\textwidth}
 131      \textcolor{blue}{2)} \small \bf \textcolor{black}{Use more nodes.}
 132
 133  \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}
 134
 135     \end{minipage}%
 136     \begin{minipage}{0.6\textwidth}
 137     \begin{figure}[h!]
 138      \includegraphics[width=0.7\textwidth]{fig/clusters}
 139     \end{figure}
 140     \end{minipage}%
 141  \end{frame}
 142
 143
 144
 145
 146  %%%%%%%%%%%%%%%%%%%
 147 %%    SLIDE 04   %%
 148 %%%%%%%%%%%%%%%%%%%%
 149 \begin{frame}{Techniques for energy consumption reduction}
 150  \vspace{0.1cm}
 151
 152
 153      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
 154     \vspace{-0.9cm}
 155     \begin{figure}
 156      \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
 157      %\includegraphics[width=0.6\textwidth]{on-off/a-69}
 158     \end{figure}
 159  \end{frame}
 160
 161 %%%%%%%%%%%%%%%%%%%%
 162 %%    SLIDE 06    %%
 163 %%%%%%%%%%%%%%%%%%%%
 164 \begin{frame}{Techniques for energy consumption reduction}
 165
 166   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
 167      \vspace{-0.5cm}
 168     \begin{figure}
 169     \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
 170      %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
 171     \end{figure}
 172     \end{frame}
 173
 174
 175
 176 %%%%%%%%%%%%%%%%%%%%
 177 %%    SLIDE 07    %%
 178 %%%%%%%%%%%%%%%%%%%%
 179 \begin{frame}{Motivations}
 180 \vspace{0.05cm}
 181 \section{\small {Motivations}}
 182 \textcolor{blue}{Why we used the DVFS method:}
 183 \vspace{-0.49cm}
 184 \begin{minipage}{0.5\textwidth}
 185     \vspace{-0.49cm}
 186       \begin{itemize}
 187        \item  \small \textcolor{black}{ The CPU is the component that consumes the  highest amount of energy in a node \textsuperscript{1}. }
 188
 189          \end{itemize}
 190
 191     \end{minipage}%
 192     \begin{minipage}{0.5\textwidth}
 193      \vspace{-0.49cm}
 194     \begin{figure}[h!]
 195      \includegraphics[width=0.85\textwidth]{fig/node-power}
 196
 197     \end{figure}
 198     \end{minipage}%
 199
 200   \begin{itemize} \item \small  \textcolor{black}{DVFS reduces the energy consumption while
 201    keeping all the nodes working.}
 202                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.}  \end{itemize}
 203
 204 \vspace{-0.12cm}
 205
 206  \begin{block}{\textcolor{white}{Challenge and Objective}}
 207
 208         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it degrades the performance simultaneously.}
 209
 210                 \vspace{0.1cm}
 211  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
 212 \end{block}
 213
 214  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
 215 for a warehouse-sized computer.
 216
 217     \end{frame}
 218
 219
 220
 221 %%%%%%%%%%%%%%%%%%%%
 222 %%    SLIDE 08    %%
 223 %%%%%%%%%%%%%%%%%%%%
 224
 225
 226 \begin{frame}{The first contribution}
 227
 228 \section{\small {Energy optimization of a homogeneous platform}}
 229 %\vspace{-3cm}
 230  % \includegraphics[width=0.6\textwidth]{white.pdf}
 231
 232 \begin{center}
 233 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
 234 \end{center}
 235  \end{frame}
 236
 237
 238
 239 %%%%%%%%%%%%%%%%%%%%
 240 %%    SLIDE 09    %%
 241 %%%%%%%%%%%%%%%%%%%%
 242
 243 \begin{frame}{Objectives}
 244         \begin{femtoBlock}{} \vspace{-12 mm}
 245                 \begin{itemize} \small
 246                    \item  Study the effect of the scaling factor on the \textbf{energy consumption and performance } of parallel  applications with iterations. \medskip
 247
 248                    \item  Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
 249                    \item  Proposing an algorithm for selecting the scaling factor that produces  \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
 250                    \item  Comparing the proposed algorithm to existing methods.
 251
 252
 253                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
 254                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
 255                 \end{itemize}
 256                  %\let\thefootnote\relax\footnote{}
 257           \vspace{-10 mm}
 258         \end{femtoBlock}
 259 \end{frame}
 260
 261
 262
 263 %%%%%%%%%%%%%%%%%%%%
 264 %%    SLIDE 10    %%
 265 %%%%%%%%%%%%%%%%%%%%
 266
 267
 268 \begin{frame}{Execution of synchronous parallel tasks}
 269 \vspace{-0.5 cm}
 270 \begin{figure}
 271   \centering
 272   \subfloat[Synchronous imbalanced communications]{%
 273     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
 274   \subfloat[Synchronous imbalanced computations]{%
 275     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
 276  % \caption{Parallel tasks on homogeneous platform}
 277   \label{fig:homo}
 278 \end{figure}
 279
 280  \end{frame}
 281
 282
 283
 284
 285 %%%%%%%%%%%%%%%%%%%%
 286 %%    SLIDE 11   %%
 287 %%%%%%%%%%%%%%%%%%%%
 288 \begin{frame}{Energy model for a homogeneous platform}
 289       The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
 290        (\textcolor{red}{$P_s$}) power.
 291     \begin{equation}
 292      \label{eq:pd}
 293      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
 294    \end{equation}
 295     \scriptsize \underline{Where}: \\
 296     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\
 297     \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
 298    \begin{equation}
 299      \label{eq:ps}
 300      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
 301    \end{equation}
 302     \underline{Where}:\\
 303         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\
 304         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
 305              parameter.}
 306 \end{frame}
 307
 308 %%%%%%%%%%%%%%%%%%%%
 309 %%    SLIDE 12   %%
 310 %%%%%%%%%%%%%%%%%%%%
 311
 312 \begin{frame}{Energy model for a homogeneous platform}
 313
 314           The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.  \medskip
 315
 316
 317
 318         \begin{block}{\small Rauber and Rünger's energy model}
 319          $ E = P_{d} \cdot S_1^{-2} \cdot
 320          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
 321             P_{s} \cdot S_1  \cdot T_1 \cdot N$
 322         \end{block}
 323            \textcolor{blue}{$S_1$}: the maximum scaling factor.\\
 324            \textcolor{blue}{$P_{d}$}: the dynamic power.\\
 325            \textcolor{blue}{$P_{s}$}: the static power.\\
 326            \textcolor{blue}{$T_I$}: the execution time of the slower task.\\
 327            \textcolor{blue}{$T_i$}: the execution time of task i.\\
 328            \textcolor{blue}{$N$}:  the number of  nodes.
 329
 330 \end{frame}
 331
 332
 333 %%%%%%%%%%%%%%%%%%%%
 334 %%    SLIDE 13   %%
 335 %%%%%%%%%%%%%%%%%%%%
 336 \begin{frame}{Performance evaluation of MPI programs}
 337         \begin{femtoBlock}{}
 338               \vspace{-5 mm}
 339               \begin{block}{\small Execution time prediction model}
 340                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
 341           \end{block}
 342           \vspace{10 mm}
 343            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
 344            \quad%
 345            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
 346             \vspace{5 mm}
 347
 348            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 349            \end{femtoBlock}
 350 \end{frame}
 351
 352
 353
 354
 355  %%%%%%%%%%%%%%%%%%%%
 356 %%    SLIDE 14   %%
 357 %%%%%%%%%%%%%%%%%%%%
 358 \begin{frame}{Performance and energy reduction trade-off}
 359         \begin{femtoBlock}{} \vspace{-15 mm}
 360                \begin{figure}
 361      \centering
 362      \subfloat[\small  Real relation.]{%
 363      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
 364      \quad%
 365      \subfloat[\small Converted relation.]{%
 366      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
 367   \label{fig:rel}
 368  % \caption{The energy and performance relation}
 369 \end{figure}
 370
 371  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
 372
 373 %\vspace{-0.3cm}
 374       \small
 375          \begin{block}{\small Our objective function}
 376          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}
 377                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} -
 378                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
 379
 380         \end{block}
 381         \end{femtoBlock}
 382
 383 \end{frame}
 384
 385 %%%%%%%%%%%%%%%%%%%%
 386 %%    SLIDE 15   %%
 387 %%%%%%%%%%%%%%%%%%%%
 388  \begin{frame}{Scaling factor selection algorithm}
 389 \vspace{-0.75cm}
 390      \begin{center}
 391       \includegraphics[width=.56 \textwidth]{c1/algo-homo}
 392      \end{center}
 393
 394 \end{frame}
 395
 396
 397 %%%%%%%%%%%%%%%%%%%%
 398 %%    SLIDE 16   %%
 399 %%%%%%%%%%%%%%%%%%%%
 400 \begin{frame}{Scaling algorithm example}
 401 \vspace{-0.75cm}
 402
 403      \begin{figure}
 404   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
 405   %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
 406   \end{figure}
 407 \end{frame}
 408
 409 %%%%%%%%%%%%%%%%%%%%
 410 %%    SLIDE 17   %%
 411 %%%%%%%%%%%%%%%%%%%%
 412 \begin{frame}{Experimental results }
 413       \begin{femtoBlock}{}
 414         \begin{itemize}
 415          \small
 416            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 417            \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
 418            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 419            \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
 420            \item $P_d=20W$,  $P_s=4W$.
 421                 \end{itemize}
 422         \end{femtoBlock}
 423 \end{frame}
 424
 425
 426 %%%%%%%%%%%%%%%%%%%%
 427 %%    SLIDE 18   %%
 428 %%%%%%%%%%%%%%%%%%%%
 429 \begin{frame}{Experimental results}
 430   \begin{femtoBlock}{}
 431       \centering {
 432      \includegraphics[width=.35\textwidth]{c1/ep}
 433      \includegraphics[width=.35\textwidth]{c1/cg}
 434      \includegraphics[width=.35\textwidth]{c1/bt}}
 435
 436      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
 437  \end{femtoBlock}
 438 \end{frame}
 439
 440
 441   %%%%%%%%%%%%%%%%%%%%
 442 %%    SLIDE 19   %%
 443 %%%%%%%%%%%%%%%%%%%%
 444 \begin{frame}{Results comparison}
 445          \begin{block}{\small Rauber and Rünger's optimal scaling factor}
 446            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
 447             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
 448         \end{block}
 449
 450
 451     \centering {
 452          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
 453          %\qquad
 454          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
 455
 456
 457             \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}}
 458
 459 \end{frame}
 460
 461
 462 %%%%%%%%%%%%%%%%%%%%
 463 %%    SLIDE 20   %%
 464 %%%%%%%%%%%%%%%%%%%%
 465 \begin{frame}{The proposed new energy model}
 466     \vspace{-0.75cm}
 467   \begin{figure}
 468   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
 469   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
 470   \end{figure}
 471 \end{frame}
 472
 473
 474 %%%%%%%%%%%%%%%%%%%%
 475 %%    SLIDE 21   %%
 476 %%%%%%%%%%%%%%%%%%%%
 477 \begin{frame}{\large Comparing the new model with Rauber's model }
 478  \vspace{0.1cm}
 479  \centering
 480     \includegraphics[width=.45\textwidth]{c1/energy_con}
 481
 482     \includegraphics[width=.5\textwidth]{c1/compare-scales}
 483 \end{frame}
 484
 485
 486
 487
 488    % \begin{frame}{Summary}
 489      % \begin{femtoBlock}{}
 490      % \begin{itemize}
 491       %\small
 492        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
 493        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
 494          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip
 495          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
 496                % performance}. \medskip
 497          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 498          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
 499          %\end{itemize}
 500
 501         %\end{femtoBlock}
 502 %\end{frame}
 503
 504
 505 %%%%%%%%%%%%%%%%%%%%
 506 %%    SLIDE 22    %%
 507 %%%%%%%%%%%%%%%%%%%%
 508
 509
 510 \begin{frame}{The second contribution}
 511
 512 \section{\small {Energy optimization of a heterogeneous platform}}
 513 \begin{center}
 514
 515
 516 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
 517 \end{center}
 518  \end{frame}
 519
 520
 521
 522 %%%%%%%%%%%%%%%%%%%%
 523 %%    SLIDE 23    %%
 524 %%%%%%%%%%%%%%%%%%%%
 525
 526 \begin{frame}{Objectives}
 527         \begin{femtoBlock}{} \vspace{-12 mm}
 528                 \begin{itemize} \small
 529                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running
 530                           over a heterogeneous platform (cluster or Grid). \medskip
 531                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
 532                           message passing iterative applications.    \medskip
 533
 534                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the optimal trade-off} between
 535                           the energy consumption and the performance.
 536                 \end{itemize}
 537
 538           \vspace{-10 mm}
 539         \end{femtoBlock}
 540 \end{frame}
 541
 542
 543 %%%%%%%%%%%%%%%%%%%%
 544 %%    SLIDE 24    %%
 545 %%%%%%%%%%%%%%%%%%%%
 546 \begin{frame}{The execution time model}
 547       \vspace{-8 mm}
 548      \begin{figure}[!t]
 549        \centering
 550        \includegraphics[scale=0.5]{c2/commtasks}
 551        \label{fig:heter}
 552      \end{figure}
 553        \vspace{-12 mm}
 554        \medskip
 555
 556     \begin{block}{\small The execution time prediction model}
 557     \begin{equation}
 558      \label{eq:perf}
 559      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
 560     \end{equation}
 561     \end{block}
 562  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
 563
 564 \end{frame}
 565
 566  %%%%%%%%%%%%%%%%%%%%
 567 %%    SLIDE 25    %%
 568 %%%%%%%%%%%%%%%%%%%%
 569  \begin{frame}{The energy consumption model}
 570     The overall energy consumption of a message passing synchronous  application executed over
 571      a heterogeneous platform can be computed as  follows:
 572     \begin{multline}
 573      \label{eq:energy}
 574      \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
 575      \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
 576       \hspace{10 mm}
 577     \end{multline}
 578     \underline{where}:\\
 579     \textcolor{blue}{N} : is the number of nodes.
 580 \end{frame}
 581
 582
 583 %%%%%%%%%%%%%%%%%%%%
 584 %%    SLIDE 26    %%
 585 %%%%%%%%%%%%%%%%%%%%
 586   \begin{frame}{The  energy  model example for heter. cluster}
 587   \vspace{-0.5cm}
 588  \begin{figure}
 589   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
 590   %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
 591   \end{figure}
 592  \end{frame}
 593
 594
 595
 596
 597 %%%%%%%%%%%%%%%%%%%%
 598 %%    SLIDE 27    %%
 599 %%%%%%%%%%%%%%%%%%%%
 600 %\begin{frame}{The trade-off between energy  and performance}
 601    % \vspace{-7 mm}
 602     %\begin{figure}
 603    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
 604    % \end{figure}
 605    % \vspace{-7 mm}
 606    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}}
 607     %{E_{Max}}$}. \\
 608     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
 609
 610    %  \begin{block}{\small The tradeoff model}
 611     % \begin{equation}
 612     %  \label{eq:max}
 613     %  \textcolor{red}{MaxDist} =
 614      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
 615       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
 616       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
 617       %\end{equation}
 618     % \end{block}
 619 %\end{frame}
 620
 621
 622 %%%%%%%%%%%%%%%%%%%%
 623 %%    SLIDE 28    %%
 624 %%%%%%%%%%%%%%%%%%%%
 625  \begin{frame}{The scaling algorithm for heter. cluster}
 626
 627  \centering
 628    \includegraphics[width=.52\textwidth]{algo-heter}
 629  \end{frame}
 630
 631
 632  %%%%%%%%%%%%%%%%%%%%
 633 %%    SLIDE 29    %%
 634 %%%%%%%%%%%%%%%%%%%%
 635  \begin{frame}{The scaling algorithm example}
 636  \vspace{-0.5cm}
 637  \centering
 638
 639   \begin{figure}
 640   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
 641  % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
 642   \end{figure}
 643 \end{frame}
 644
 645
 646
 647
 648 %%%%%%%%%%%%%%%%%%%%
 649 %%    SLIDE 30    %%
 650 %%%%%%%%%%%%%%%%%%%%
 651 \begin{frame}{Experiments over a heterogeneous cluster  }
 652         \begin{itemize}
 653          \small
 654            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 655            \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
 656            \item Four types of processors with different computing powers were used.\medskip
 657            \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
 658            \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
 659                   \medskip
 660
 661         \end{itemize}
 662
 663 \end{frame}
 664
 665
 666 %%%%%%%%%%%%%%%%%%%%
 667 %%    SLIDE 31    %%
 668 %%%%%%%%%%%%%%%%%%%%
 669 \begin{frame}{The experimental results}
 670    \vspace{-5 mm}
 671    \begin{figure}[!t]
 672    \centering
 673     \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
 674
 675     \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
 676      for the class C of the NAS Benchmarks executed over 8 nodes}
 677
 678    \end{figure}
 679 \end{frame}
 680
 681
 682
 683 %%%%%%%%%%%%%%%%%%%%
 684 %%    SLIDE 32    %%
 685 %%%%%%%%%%%%%%%%%%%%
 686 \begin{frame}{The experimental results}
 687    \vspace{-5 mm}
 688    \begin{figure}[!t]
 689    \centering
 690
 691     \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
 692
 693    \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
 694      of NAS Benchmarks class C executed over 8 nodes}
 695      \end{figure}
 696 \end{frame}
 697
 698
 699
 700 %%%%%%%%%%%%%%%%%%%%
 701 %%    SLIDE 33    %%
 702 %%%%%%%%%%%%%%%%%%%%
 703 \begin{frame}{The results of the three power scenarios}
 704    \vspace{-5 mm}
 705    \begin{figure}[!t]
 706    \centering
 707    \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
 708    \vspace{10 mm}
 709    \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
 710    \end{figure}
 711 \end{frame}
 712
 713
 714
 715 %%%%%%%%%%%%%%%%%%%%
 716 %%    SLIDE 34    %%
 717 %%%%%%%%%%%%%%%%%%%%
 718 \begin{frame}{Comparing the objective function to EDP}
 719
 720      EDP is the products between the energy consumption and the delay.
 721     \vspace{-5 mm}
 722     \begin{figure}[!t]
 723     \centering
 724     \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
 725
 726     \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
 727     \end{figure}
 728 \end{frame}
 729
 730
 731
 732
 733 %%%%%%%%%%%%%%%%%%%%
 734 %%    SLIDE 35    %%
 735 %%%%%%%%%%%%%%%%%%%%
 736 %\begin{frame}{Energy optimization of grid platform}
 737   % \begin{figure}[!t]
 738    % \centering
 739          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
 740
 741         %   \small  10 sites distributed over France and Luxembourg
 742         %\end{figure}
 743 %\end{frame}
 744
 745
 746 %%%%%%%%%%%%%%%%%%%%
 747 %%    SLIDE 36    %%
 748 %%%%%%%%%%%%%%%%%%%%
 749 \begin{frame}{The grid architecture}
 750 \begin{center}
 751 \includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
 752 \end{center}
 753
 754  %\begin{frame}{Performance, Energy and trade-off models} \small
 755   %\begin{block}{\small The performance model of grid}
 756    % \begin{equation}
 757   %\label{eq:perf}
 758   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
 759  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
 760 %\end{equation}
 761     %\end{block}
 762
 763
 764  %\begin{block}{\small The energy model of grid}\small
 765   %  \begin{equation}
 766   %\label{eq:energy}
 767  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +
 768 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
 769 %\end{equation}
 770    % \end{block}
 771
 772 %\begin{block}{\small The trade-off model of grid}
 773 %\small
 774     %\begin{equation}
 775    %\label{eq:max}
 776   %\MaxDist =
 777   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
 778    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
 779     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
 780 %\end{equation}
 781    % \end{block}
 782
 783
 784  \end{frame}
 785
 786
 787
 788 %%%%%%%%%%%%%%%%%%%%
 789 %%    SLIDE 37    %%
 790 %%%%%%%%%%%%%%%%%%%%
 791  \begin{frame}{Experiments over Grid'5000}
 792
 793    \textcolor{blue}{The experiments were conducted using three
 794           clusters distributed over one or two sites.}
 795            \vspace{-7 mm}
 796           \begin{center}
 797           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
 798           \end{center}
 799       \vspace{-10 mm}
 800   \textcolor{blue}{Grid'5000 power measurement tools were used.}
 801         \vspace{-9 mm}
 802   \begin{center}
 803           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
 804           \end{center}
 805
 806
 807 \end{frame}
 808
 809
 810
 811
 812 %%%%%%%%%%%%%%%%%%%%
 813 %%    SLIDE 38    %%
 814 %%%%%%%%%%%%%%%%%%%%
 815 \begin{frame}{Experiments over Grid'5000}
 816
 817    \begin{minipage}{0.4\textwidth}
 818        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
 819         %\textcolor{red}{30\%}}
 820      \small \textcolor{blue}{The average energy saving =  \textcolor{red}{30\%}}
 821    \end{minipage}
 822      \begin{minipage}{0.55\textwidth}
 823         \begin{figure}[h!]
 824           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
 825      \end{figure}
 826 \end{minipage}
 827
 828          \begin{minipage}{0.4\textwidth}
 829            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
 830                 %performance by \textcolor{red}{3.2\%}}
 831       \small  \textcolor{blue}{The average performance degradation  =  \textcolor{red}{3.2\%}}
 832         \end{minipage}
 833        \begin{minipage}{0.55\textwidth}
 834          \begin{figure}[h!]
 835            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
 836          \end{figure}
 837           \end{minipage}
 838  \end{frame}
 839
 840
 841
 842 %%%%%%%%%%%%%%%%%%%%
 843 %%    SLIDE 39    %%
 844 %%%%%%%%%%%%%%%%%%%%
 845 \begin{frame}{Experiments over Grid'5000}
 846    \textcolor{blue}{One core  and Multi-cores per node results:}
 847
 848   \begin{figure}[h!]
 849   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
 850   \hspace{0.3cm}
 851   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
 852   \end{figure}
 853
 854   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
 855 \end{frame}
 856
 857
 858
 859 %\begin{frame}{Summary}
 860 %\begin{itemize}
 861      % \small
 862         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
 863         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
 864       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real
 865           %test-bed \textcolor{blue}{Grid'5000}.
 866
 867          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
 868         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
 869       %    clusters.
 870
 871          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
 872         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
 873
 874        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
 875      %    \end{itemize}
 876 %\end{frame}
 877
 878
 879 %%%%%%%%%%%%%%%%%%%%
 880 %%    SLIDE 40    %%
 881 %%%%%%%%%%%%%%%%%%%%
 882 \begin{frame}{The third contribution}
 883 \section{\small {Energy optimization of asynchronous applications}}
 884 \begin{center}
 885 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing  applications}
 886 \end{center}
 887  \end{frame}
 888
 889
 890
 891 %%%%%%%%%%%%%%%%%%%%
 892 %%    SLIDE 41   %%
 893 %%%%%%%%%%%%%%%%%%%%
 894 \begin{frame}{Problem definition}\vspace{0.8 mm}
 895 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
 896 \vspace{-8 mm}
 897 \begin{figure}
 898  \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
 899  %\includegraphics[width=0.6\textwidth]{syn/a-503}
 900   \end{figure}
 901 \end{frame}
 902
 903
 904
 905 %%%%%%%%%%%%%%%%%%%%
 906 %%    SLIDE 42   %%
 907 %%%%%%%%%%%%%%%%%%%%
 908 \begin{frame}{Problem definition}\vspace{0.8 mm}
 909 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
 910 \vspace{-8 mm}
 911 \begin{figure}
 912  \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
 913  %\includegraphics[width=0.6\textwidth]{asyn/a-440}
 914   \end{figure}
 915 \end{frame}
 916
 917
 918
 919 %%%%%%%%%%%%%%%%%%%%
 920 %%    SLIDE 43   %%
 921 %%%%%%%%%%%%%%%%%%%%
 922 \begin{frame}{Solution}\vspace{0.8mm}
 923 \textcolor{blue}{Using asynchronous communications with DVFS }
 924 \vspace{-8 mm}
 925 \begin{figure}
 926   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
 927   %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
 928   \end{figure}
 929 \end{frame}
 930
 931
 932
 933
 934 %%%%%%%%%%%%%%%%%%%%
 935 %%    SLIDE 44   %%
 936 %%%%%%%%%%%%%%%%%%%%
 937 %\begin{frame}{The performance models}
 938
 939 %\begin{block}{\small The performance model of Asynch. Applications}\small
 940 %\begin{equation}
 941   %\label{eq:asyn_time}
 942  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
 943 %\end{equation}
 944 %\end{block}
 945
 946
 947 %\begin{block}{\small The performance model of Hybrid Applications}\small
 948 %\begin{equation}
 949   %\label{eq:asyn_perf}
 950   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
 951    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
 952 %\end{equation}
 953 %\end{block}
 954
 955
 956 %\end{frame}
 957
 958
 959
 960 %%%%%%%%%%%%%%%%%%%%
 961 %%    SLIDE 45   %%
 962 %%%%%%%%%%%%%%%%%%%%
 963 %\begin{frame}{The energy consumption models}
 964
 965 %\begin{block}{\small The energy model of Asynch. Applications}\small
 966 %\begin{equation}
 967   %\label{eq:asyn_energy1}
 968 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
 969 %\end{equation}
 970 %\end{block}
 971
 972
 973 %\begin{block}{\small The energy model of Hybrid Applications}\small
 974 %\begin{multline}
 975   %\label{eq:asyn_energy}
 976  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
 977 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
 978 %\end{multline}
 979 %\end{block}
 980 %\end{frame}
 981
 982
 983
 984 %%%%%%%%%%%%%%%%%%%%
 985 %%    SLIDE 44   %%
 986 %%%%%%%%%%%%%%%%%%%%
 987 \begin{frame}{The performance and the energy models }
 988
 989 \centering
 990 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
 991 \end{frame}
 992
 993
 994
 995
 996
 997 %%%%%%%%%%%%%%%%%%%%
 998 %%    SLIDE 46   %%
 999 %%%%%%%%%%%%%%%%%%%%
1000 \begin{frame}{The scaling algorithm for Asynch.  applications}
1001 \vspace{-0.1 mm}
1002 \centering
1003 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
1004 \end{frame}
1005
1006
1007
1008 %%%%%%%%%%%%%%%%%%%%
1009 %%    SLIDE 47   %%
1010 %%%%%%%%%%%%%%%%%%%%
1011 \begin{frame}{The experiments}
1012    \vspace{-5 mm}
1013    \begin{figure}[!t]
1014    \begin{itemize}
1015       \small
1016         \item The architecture of the grid:
1017    \end{itemize}
1018     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
1019    \end{figure}
1020    \begin{itemize}
1021       \small
1022         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1023         \item Evaluating the application over the simulator and Grid'5000.
1024    \end{itemize}
1025 \end{frame}
1026
1027
1028
1029 %%%%%%%%%%%%%%%%%%%%
1030 %%    SLIDE 48   %%
1031 %%%%%%%%%%%%%%%%%%%%
1032 \begin{frame}{The simulation results}
1033 \centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1034
1035 \centering
1036     \includegraphics[scale=0.42]{c3/energy_saving.eps}
1037
1038  \centering  The average energy saving  = \textcolor{red}{22\%}
1039 \end{frame}
1040
1041
1042
1043 %%%%%%%%%%%%%%%%%%%%
1044 %%    SLIDE 49   %%
1045 %%%%%%%%%%%%%%%%%%%%
1046 \begin{frame}{The simulation results}
1047 \centering
1048
1049      \includegraphics[scale=0.42]{c3/perf_degra.eps}
1050
1051  \centering    The average speed-up  = \textcolor{red}{5.72\%}
1052 \end{frame}
1053
1054
1055
1056 %%%%%%%%%%%%%%%%%%%%
1057 %%    SLIDE 50   %%
1058 %%%%%%%%%%%%%%%%%%%%
1059  \begin{frame}{The Grid'5000 results}
1060    \vspace{-20 mm}
1061    \begin{figure}[!t]
1062    \centering
1063    \hspace{-8 mm}
1064     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1065     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1066    \end{figure}
1067     \vspace{-5 mm}
1068      \centering \footnotesize
1069 The average energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1070 \end{frame}
1071
1072
1073 %%%%%%%%%%%%%%%%%%%%
1074 %%    SLIDE 51   %%
1075 %%%%%%%%%%%%%%%%%%%%
1076 \begin{frame}{The comparison results}
1077  \centering
1078     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1079
1080     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1081 \end{frame}
1082
1083
1084
1085
1086 %%%%%%%%%%%%%%%%%%%%
1087 %%    SLIDE 52  %%
1088 %%%%%%%%%%%%%%%%%%%%
1089 \begin{frame}{Conclusions}
1090 \section{Conclusions and Perspectives}
1091 \begin{itemize}
1092
1093 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over
1094 \textcolor{blue}{homogeneous and  heterogeneous clusters or grids}.
1095
1096
1097
1098 \small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
1099
1100 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1101
1102 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1103 Multi-splitting} method.
1104
1105 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  the \textcolor{blue}{Grid'5000 testbed}.
1106
1107 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or to the \textcolor{blue}{EDP objective function}.
1108
1109
1110 \end{itemize}
1111 \end{frame}
1112
1113
1114
1115 %%%%%%%%%%%%%%%%%%%%
1116 %%    SLIDE 53   %%
1117 %%%%%%%%%%%%%%%%%%%%
1118 \begin{frame}{Publications}
1119
1120 \begin{block}{\small Journal Articles }\scriptsize
1121 \begin{enumerate}[$\lbrack$1$\rbrack$]
1122
1123 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1124       Science}, 2016.
1125
1126 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for
1127       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1128
1129 \end{enumerate}
1130 \end{block}
1131
1132
1133 \begin{block}{\small Conference Articles }\scriptsize
1134
1135 \begin{enumerate}[$\lbrack$1$\rbrack$]
1136
1137 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1138       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1139       225-230. IEEE Computer Society, Milan, Italy (2014).
1140
1141 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1142       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1143       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1144
1145 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1146       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1147       Paris (2016).
1148
1149 \end{enumerate}
1150
1151 \end{block}
1152 \end{frame}
1153
1154
1155 %%%%%%%%%%%%%%%%%%%%
1156 %%    SLIDE 54   %%
1157 %%%%%%%%%%%%%%%%%%%%
1158 \begin{frame}{Perspectives}
1159
1160 \begin{itemize}
1161
1162 \small  \barrow The proposed algorithms should  take into consideration the
1163 \textcolor{blue}{variability between some iterations}.
1164
1165 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1166
1167 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1168
1169 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1170 \end{itemize}
1171
1172 \end{frame}
1173
1174 %%%%%%%%%%%%%%%%%%%%
1175 %%    SLIDE 55  %%
1176 %%%%%%%%%%%%%%%%%%%%
1177 \begin{frame}{Fin} \vspace{-10 mm}
1178
1179             \centering \Large \textcolor{blue}{Thank you for your listening}
1180
1181             \vspace{2cm}
1182             \centering \textcolor{blue}{ {\Large Questions?}}
1183
1184 \end{frame}
1185 \end{document}
1186 %  _____ ___ _   _
1187 % |  ___|_ _| \ | |
1188 % | |_   | ||  \| |
1189 % |  _|  | || |\  |
1190 % |_|   |___|_| \_|
1191 %