thesis-presentation/AhmedSlides.tex

   1  \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13  \usepackage{movie15}
  14  \usepackage{animate}
  15 \usepackage{xmpmulti}
  16  \newcommand{\AG}[2][inline]{%
  17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
  18 \newcommand{\JC}[2][inline]{%
  19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
  20 \definecolor{myblue}{RGB}{0,29,119}
  21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
  22
  23 %% used to put some subscripts lower, and make them more legible
  24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
  25
  26 \newcommand{\CL}{\Xsub{C}{L}}
  27 \newcommand{\Dist}{\mathit{Dist}}
  28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
  29 \newcommand{\Eind}{\Xsub{E}{ind}}
  30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
  31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
  32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
  33 \newcommand{\Es}{\Xsub{E}{S}}
  34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
  35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
  36 \newcommand{\Fnew}{\Xsub{F}{new}}
  37 \newcommand{\Vnew}{\Xsub{V}{new}}
  38 \newcommand{\Vmax}{\Xsub{V}{max}}
  39 \newcommand{\Ileak}{\Xsub{I}{leak}}
  40 \newcommand{\Kdesign}{\Xsub{K}{design}}
  41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
  42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
  43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
  44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
  45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
  46
  47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
  48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
  49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
  50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
  51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
  52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
  53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
  54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
  55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
  56 \newcommand{\Tnew}{\Xsub{T}{New}}
  57 \newcommand{\Told}{\Xsub{T}{Old}}
  58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
  59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
  60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
  61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
  62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
  63  \usepackage{pifont}
  64 \usepackage{xcolor}
  65 \definecolor{myblue}{RGB}{0,29,119}
  66 \usepackage[textsize=footnotesize]{todonotes}
  67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
  68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
  69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
  70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
  71
  72
  73
  74 %\title{Energy Consumption Optimization of Parallel Applications with
  75 %Iterations using CPU Frequency Scaling}
  76 \vspace{2cm}
  77
  78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm}
  79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Franche-Comté - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
  80
  81 \date{}
  82 \vspace{-3cm}
  83 %  ____  _____ ____  _   _ _____
  84 % |  _ \| ____| __ )| | | |_   _|
  85 % | | | |  _| |  _ \| | | | | |
  86 % | |_| | |___| |_) | |_| | | |
  87 % |____/|_____|____/ \___/  |_|
  88 %
  89 \begin{document}
  90 \setbeamertemplate{background}{\titrefemto}
  91
  92 %%%%%%%%%%%%%%%%%%%%
  93 %%    SLIDE 01    %%
  94 %%%%%%%%%%%%%%%%%%%%
  95 \begin{frame}[plain]
  96 \vspace{1cm}
  97 \centering
  98    \titlepage
  99 \end{frame}
 100
 101
 102 %%%%%%%%%%%%%%%%%%%%
 103 %%    SLIDE 02    %%
 104 %%%%%%%%%%%%%%%%%%%%
 105 \setbeamertemplate{background}{\pagefemto}
 106 \begin{frame}{Outline}
 107
 108 \setbeamertemplate{section in toc}[sections numbered]
 109 \tableofcontents
 110 \end{frame}
 111
 112
 113 %%%%%%%%%%%%%%%%%%%%
 114 %%    SLIDE 03    %%
 115 %%%%%%%%%%%%%%%%%%%%
 116 \begin{frame}{Introduction and problem definition}
 117  \section{\small {Introduction and Problem definition}}
 118    \bf \textcolor{blue}{Approaches to increase the computing power:}
 119      \begin{minipage}{0.5\textwidth}
 120       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increasing the frequency of processor}
 121     \end{minipage}%
 122     \begin{minipage}{0.6\textwidth}
 123
 124 \begin{figure}[h!]
 125
 126     \includegraphics[width=0.7\textwidth]{fig/freq-years}
 127     \end{figure}
 128     \end{minipage}%
 129     \vspace{0.2cm}
 130     \begin{minipage}{0.5\textwidth}
 131      \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes}
 132     \end{minipage}%
 133     \begin{minipage}{0.6\textwidth}
 134     \begin{figure}[h!]
 135      \includegraphics[width=0.7\textwidth]{fig/clusters}
 136     \end{figure}
 137     \end{minipage}%
 138  \end{frame}
 139
 140
 141
 142 %%%%%%%%%%%%%%%%%%%%
 143 %%    SLIDE 04    %%
 144 %%%%%%%%%%%%%%%%%%%%
 145 \begin{frame}{Introduction and problem definition}
 146  \bf \textcolor{blue}{Processor frequency and its energy consumption}
 147  \vspace{0.4cm}
 148    \begin{minipage}{0.5\textwidth}
 149    \textcolor{blue}{$\blacktriangleright$}
 150   \small  \bf \textcolor{black}{ The power consumption of a processor increases exponentially  when its
 151       frequency is increased}
 152     \end{minipage}%
 153     \begin{minipage}{0.5\textwidth}
 154     \begin{figure}[h!]
 155      \includegraphics[width=0.7\textwidth]{fig/freq-power}
 156     \end{figure}
 157     \end{minipage}%
 158
 159     \begin{minipage}{0.5\textwidth}
 160      \textcolor{blue}{$\blacktriangleright$}
 161      \small \bf \textcolor{black}{The biggest power consumption is consumed by a processor in the computing node}
 162
 163     \end{minipage}%
 164     \begin{minipage}{0.6\textwidth}
 165     \begin{figure}[h!]
 166      \includegraphics[width=0.9\textwidth]{fig/node-power}
 167     \end{figure}
 168     \end{minipage}%
 169
 170  \end{frame}
 171
 172  %%%%%%%%%%%%%%%%%%%
 173 %%    SLIDE 05   %%
 174 %%%%%%%%%%%%%%%%%%%%
 175 \begin{frame}{Introduction and problem definition}
 176  \vspace{0.1cm}
 177  \bf \textcolor{blue}{Techniques for energy consumption reduction}
 178
 179      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
 180     \vspace{-0.9cm}
 181     \begin{figure}
 182      \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
 183     \end{figure}
 184  \end{frame}
 185
 186 %%%%%%%%%%%%%%%%%%%%
 187 %%    SLIDE 06    %%
 188 %%%%%%%%%%%%%%%%%%%%
 189 \begin{frame}{Techniques for energy consumption reduction}
 190
 191   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
 192      \vspace{-0.5cm}
 193     \begin{figure}
 194      \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
 195     \end{figure}
 196     \end{frame}
 197
 198
 199
 200 %%%%%%%%%%%%%%%%%%%%
 201 %%    SLIDE 07    %%
 202 %%%%%%%%%%%%%%%%%%%%
 203 \begin{frame}{Using the energy reduction method}
 204 \section{\small {Using the energy reduction method}}
 205 \begin{block}{\textcolor{white}{Why we used DVFS method:}}
 206 \begin{itemize}
 207                 \item \textcolor{black}{It used to reduce the energy while keeping all node working, thus  it is more conventional with parallel computing.}
 208                 \item \textcolor{black}{It has a very small overhead compared to switch-off idle nodes method.}
 209          \end{itemize}
 210 \end{block}
 211
 212  \vspace{0.1cm}
 213  \begin{block}{\textcolor{white}{Challenge and Objective}}
 214
 215                 \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy, \textcolor{blue}{but} it degrades the performance simultaneously.}
 216
 217                 \vspace{0.1cm}
 218          \textcolor{blue}{Objective:} \textcolor{black}{Optimizing both energy consumption and performance of a parallel application at the same time when DVFS is used.}
 219 \end{block}
 220
 221     \end{frame}
 222
 223
 224
 225 %%%%%%%%%%%%%%%%%%%%
 226 %%    SLIDE 08    %%
 227 %%%%%%%%%%%%%%%%%%%%
 228
 229
 230 \begin{frame}{Contributions}
 231 \section{\small {Contributions}}
 232 \subsection{\small {3.1 Energy optimization of homogeneous platform}}
 233 \begin{center}
 234 \bf \textcolor{black}{First contribution} \\
 235 \vspace{1cm}
 236 \bf  \Large \textcolor{blue}{Energy optimization of homogeneous platform}
 237 \end{center}
 238  \end{frame}
 239
 240
 241
 242 %%%%%%%%%%%%%%%%%%%%
 243 %%    SLIDE 09    %%
 244 %%%%%%%%%%%%%%%%%%%%
 245
 246 \begin{frame}{Objectives}
 247         \begin{femtoBlock}{} \vspace{-12 mm}
 248                 \begin{itemize} \small
 249                    \item  Study the effect of the scaling factor $S$ on \textbf{energy consumption} of parallel iterative applications such as NAS
 250                           Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip
 251                    \item  Study the effect of the scaling factor $S$ on \textbf{performance} of these benchmarks.\medskip
 252                    \item  Discovering the \textbf{energy-performance trade-off relation} when changing the frequency.\medskip
 253                    \item  We propose an algorithm for selecting the scaling factor $S$ producing \textbf {optimal trade-off} between the energy and performance. \medskip
 254                    \item  Improving Rauber and Rünger's\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
 255                           energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
 256                 \end{itemize}
 257                  \let\thefootnote\relax\footnote{}
 258           \vspace{-10 mm}
 259         \end{femtoBlock}
 260 \end{frame}
 261
 262
 263
 264 %%%%%%%%%%%%%%%%%%%%
 265 %%    SLIDE 10    %%
 266 %%%%%%%%%%%%%%%%%%%%
 267
 268
 269 \begin{frame}{Parallel tasks execution over Homo. Platform}
 270 \vspace{-0.5 cm}
 271 \begin{figure}
 272   \centering
 273   \subfloat[Sync. imbalanced communications]{%
 274     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
 275   \subfloat[Sync. imbalanced computations]{%
 276     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
 277   \caption{Parallel tasks on homogeneous platform}
 278   \label{fig:homo}
 279 \end{figure}
 280
 281  \end{frame}
 282
 283
 284
 285
 286 %%%%%%%%%%%%%%%%%%%%
 287 %%    SLIDE 11   %%
 288 %%%%%%%%%%%%%%%%%%%%
 289 \begin{frame}{Energy model for homogeneous platform}
 290       The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
 291        (\textcolor{red}{$P_s$}) power.
 292     \begin{equation}
 293      \label{eq:pd}
 294      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
 295    \end{equation}
 296     \scriptsize \underline{Where}: \\
 297     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\
 298     \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
 299    \begin{equation}
 300      \label{eq:ps}
 301      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
 302    \end{equation}
 303     \underline{Where}:\\
 304         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\
 305         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
 306              parameter.}
 307 \end{frame}
 308
 309 %%%%%%%%%%%%%%%%%%%%
 310 %%    SLIDE 12   %%
 311 %%%%%%%%%%%%%%%%%%%%
 312
 313 \begin{frame}{Energy model for homogeneous platform}
 314
 315           The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.  \medskip
 316
 317
 318
 319         \begin{block}{\small Rauber and Rünger's energy model}
 320          $ E = P_{d} \cdot S_1^{-2} \cdot
 321          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
 322             P_{s} \cdot S_1  \cdot T_1 \cdot N$
 323         \end{block}
 324            \textcolor{blue}{$S_1$}: the max. scaling factor\\
 325            \textcolor{blue}{$P_{d}$}: the dynamic power\\
 326            \textcolor{blue}{$P_{s}$}: the static power\\
 327            \textcolor{blue}{$T_I$}: the time of the slower task\\
 328            \textcolor{blue}{$T_i$}: the time of the other tasks\\
 329            \textcolor{blue}{$N$}:  the number of  nodes
 330
 331 \end{frame}
 332
 333
 334 %%%%%%%%%%%%%%%%%%%%
 335 %%    SLIDE 13   %%
 336 %%%%%%%%%%%%%%%%%%%%
 337 \begin{frame}{Performance evaluation of MPI programs}
 338         \begin{femtoBlock}{}
 339               \vspace{-5 mm}
 340               \begin{block}{\small Execution time prediction model}
 341                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
 342           \end{block}
 343           \vspace{10 mm}
 344            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
 345            \quad%
 346            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
 347             \vspace{5 mm}
 348
 349            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 350            \end{femtoBlock}
 351 \end{frame}
 352
 353
 354
 355
 356  %%%%%%%%%%%%%%%%%%%%
 357 %%    SLIDE 14   %%
 358 %%%%%%%%%%%%%%%%%%%%
 359 \begin{frame}{Performance and energy reduction trade-off}
 360         \begin{femtoBlock}{} \vspace{-15 mm}
 361                \begin{figure}
 362      \centering
 363      \subfloat[\small  Real relation.]{%
 364      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
 365      \quad%
 366      \subfloat[\small Converted relation.]{%
 367      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
 368   \label{fig:rel}
 369  % \caption{The energy and performance relation}
 370 \end{figure}
 371
 372  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
 373
 374 %\vspace{-0.3cm}
 375       \small
 376          \begin{block}{\small Our objective function}
 377          \centering{$\textbf{\emph {MaxDist}} = \max_{j=1,2,\dots ,F}
 378                     (\overbrace{P_{Norm}(S_j)}^{{Maximize}} -
 379                      \overbrace{E_{Norm}(S_j)}^{{Minimize}} )$}
 380
 381         \end{block}
 382         \end{femtoBlock}
 383
 384 \end{frame}
 385
 386 %%%%%%%%%%%%%%%%%%%%
 387 %%    SLIDE 15   %%
 388 %%%%%%%%%%%%%%%%%%%%
 389  \begin{frame}{Scaling factor selection algorithm}
 390 \vspace{-0.75cm}
 391      \begin{center}
 392       \includegraphics[width=.56 \textwidth]{c1/algo-homo}
 393      \end{center}
 394
 395 \end{frame}
 396
 397
 398 %%%%%%%%%%%%%%%%%%%%
 399 %%    SLIDE 16   %%
 400 %%%%%%%%%%%%%%%%%%%%
 401 \begin{frame}{Scaling algorithm example}
 402 \vspace{-0.75cm}
 403
 404      \begin{figure}
 405   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
 406
 407   \end{figure}
 408 \end{frame}
 409
 410 %%%%%%%%%%%%%%%%%%%%
 411 %%    SLIDE 17   %%
 412 %%%%%%%%%%%%%%%%%%%%
 413 \begin{frame}{Experimental results }
 414       \begin{femtoBlock}{}
 415         \begin{itemize}
 416          \small
 417            \item Our experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
 418            \item Our algorithm is applied to  NAS parallel benchmarks.\medskip
 419            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 420            \item We run the classes A, B and C on 4, 8 or 9 and 16 nodes respectively.\medskip
 421            \item The dynamic power with the highest frequency is equal to \textbf{20 $W$} and the power static is equal to \textbf{4 $W$}.
 422                 \end{itemize}
 423         \end{femtoBlock}
 424 \end{frame}
 425
 426
 427 %%%%%%%%%%%%%%%%%%%%
 428 %%    SLIDE 18   %%
 429 %%%%%%%%%%%%%%%%%%%%
 430 \begin{frame}{Experimental results}
 431   \begin{femtoBlock}{}
 432       \centering {
 433      \includegraphics[width=.35\textwidth]{c1/ep}
 434      \includegraphics[width=.35\textwidth]{c1/cg}
 435      \includegraphics[width=.35\textwidth]{c1/bt}}
 436
 437      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
 438  \end{femtoBlock}
 439 \end{frame}
 440
 441
 442   %%%%%%%%%%%%%%%%%%%%
 443 %%    SLIDE 19   %%
 444 %%%%%%%%%%%%%%%%%%%%
 445 \begin{frame}{Results comparison}
 446          \begin{block}{\small Rauber and Rünger's optimal scaling factor}
 447            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
 448             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
 449         \end{block}
 450     \centering {
 451          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
 452          %\qquad
 453          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
 454
 455
 456             \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}}
 457
 458 \end{frame}
 459
 460
 461 %%%%%%%%%%%%%%%%%%%%
 462 %%    SLIDE 20   %%
 463 %%%%%%%%%%%%%%%%%%%%
 464 \begin{frame}{The proposed new energy model}
 465     \vspace{-0.75cm}
 466   \begin{figure}
 467   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
 468   \end{figure}
 469 \end{frame}
 470
 471
 472 %%%%%%%%%%%%%%%%%%%%
 473 %%    SLIDE 21   %%
 474 %%%%%%%%%%%%%%%%%%%%
 475 \begin{frame}{Comparing the new model with Rauber model }
 476  \vspace{0.1cm}
 477  \centering
 478     \includegraphics[width=.45\textwidth]{c1/energy_con}
 479
 480     \includegraphics[width=.5\textwidth]{c1/compare-scales}
 481 \end{frame}
 482
 483
 484
 485
 486    % \begin{frame}{Summary}
 487      % \begin{femtoBlock}{}
 488      % \begin{itemize}
 489       %\small
 490        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
 491        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
 492          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip
 493          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
 494                % performance}. \medskip
 495          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 496          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
 497          %\end{itemize}
 498
 499         %\end{femtoBlock}
 500 %\end{frame}
 501
 502
 503 %%%%%%%%%%%%%%%%%%%%
 504 %%    SLIDE 22    %%
 505 %%%%%%%%%%%%%%%%%%%%
 506
 507
 508 \begin{frame}{Contribution}
 509
 510 \subsection{\small {3.2 Energy optimization of heterogeneous platform}}
 511 \begin{center}
 512 \bf \textcolor{black}{Second contribution} \\
 513 \vspace{1cm}
 514 \bf  \Large \textcolor{blue}{Energy optimization of Heterogeneous platform}
 515 \end{center}
 516  \end{frame}
 517
 518
 519
 520 %%%%%%%%%%%%%%%%%%%%
 521 %%    SLIDE 23    %%
 522 %%%%%%%%%%%%%%%%%%%%
 523
 524 \begin{frame}{Objectives}
 525         \begin{femtoBlock}{} \vspace{-12 mm}
 526                 \begin{itemize} \small
 527                   \item   Evaluating the  \textcolor{blue}{new energy and performance models} of message passing  applications with iterations running
 528                           over a heterogeneous platform (cluster and Grid). \medskip
 529                    \item  Study the effect of the scaling factor $S$ on both \textcolor{blue}{energy consumption  and the performance} of
 530                           message passing iterative applications.    \medskip
 531
 532                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {optimal trade-off} between
 533                            energy consumption and performance.
 534                 \end{itemize}
 535
 536           \vspace{-10 mm}
 537         \end{femtoBlock}
 538 \end{frame}
 539
 540
 541 %%%%%%%%%%%%%%%%%%%%
 542 %%    SLIDE 24    %%
 543 %%%%%%%%%%%%%%%%%%%%
 544 \begin{frame}{The execution time model}
 545       \vspace{-8 mm}
 546      \begin{figure}[!t]
 547        \centering
 548        \includegraphics[scale=0.5]{c2/commtasks}
 549        \label{fig:heter}
 550      \end{figure}
 551        \vspace{-12 mm}
 552        \medskip
 553
 554     \begin{block}{\small The execution time prediction model}
 555     \begin{equation}
 556      \label{eq:perf}
 557      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
 558     \end{equation}
 559     \end{block}
 560  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
 561
 562 \end{frame}
 563
 564  %%%%%%%%%%%%%%%%%%%%
 565 %%    SLIDE 25    %%
 566 %%%%%%%%%%%%%%%%%%%%
 567  \begin{frame}{The energy consumption model}
 568     -The overall energy consumption of a message passing synchronous distributed application executed over a
 569     heterogeneous platform is computed as  follows:
 570     \begin{multline}
 571      \label{eq:energy}
 572      \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
 573      \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
 574       \hspace{10 mm}
 575     \end{multline}
 576     \underline{where}:\\
 577     \textcolor{blue}{N} : is the number of nodes.
 578 \end{frame}
 579
 580
 581 %%%%%%%%%%%%%%%%%%%%
 582 %%    SLIDE 26    %%
 583 %%%%%%%%%%%%%%%%%%%%
 584   \begin{frame}{The  energy  model example for heter. cluster}
 585   \vspace{-0.5cm}
 586  \begin{figure}
 587   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
 588   \end{figure}
 589  \end{frame}
 590
 591
 592
 593
 594 %%%%%%%%%%%%%%%%%%%%
 595 %%    SLIDE 27    %%
 596 %%%%%%%%%%%%%%%%%%%%
 597 \begin{frame}{The trade-off between energy  and performance}
 598     \vspace{-7 mm}
 599     \begin{figure}
 600      \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
 601     \end{figure}
 602     \vspace{-7 mm}
 603     \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}{$E_{norm} = \frac{E_{reduced}}
 604      {E_{Max}}$}. \\
 605      \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
 606
 607      \begin{block}{\small The tradeoff model}
 608      \begin{equation}
 609       \label{eq:max}
 610       \textcolor{red}{MaxDist} =
 611       \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
 612        (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
 613        \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
 614       \end{equation}
 615      \end{block}
 616 \end{frame}
 617
 618
 619 %%%%%%%%%%%%%%%%%%%%
 620 %%    SLIDE 28    %%
 621 %%%%%%%%%%%%%%%%%%%%
 622  \begin{frame}{The scaling algorithm for heter. cluster}
 623
 624  \centering
 625    \includegraphics[width=.52\textwidth]{algo-heter}
 626  \end{frame}
 627
 628
 629  %%%%%%%%%%%%%%%%%%%%
 630 %%    SLIDE 29    %%
 631 %%%%%%%%%%%%%%%%%%%%
 632  \begin{frame}{The scaling algorithm example}
 633  \vspace{-0.5cm}
 634  \centering
 635
 636   \begin{figure}
 637   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
 638   \end{figure}
 639 \end{frame}
 640
 641
 642
 643
 644 %%%%%%%%%%%%%%%%%%%%
 645 %%    SLIDE 30    %%
 646 %%%%%%%%%%%%%%%%%%%%
 647 \begin{frame}{Experiments over heterogeneous cluster  }
 648         \begin{itemize}
 649          \small
 650            \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip
 651            \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
 652            \item Four types of processors with different computing powers were used.\medskip
 653            \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip
 654            \item The total power consumption of the chosen CPUs  is composed of $80\%$ for dynamic power and $20\%$ for static power.
 655                   \medskip
 656
 657         \end{itemize}
 658
 659 \end{frame}
 660
 661
 662 %%%%%%%%%%%%%%%%%%%%
 663 %%    SLIDE 31    %%
 664 %%%%%%%%%%%%%%%%%%%%
 665 \begin{frame}{The experimental results}
 666    \vspace{-5 mm}
 667    \begin{figure}[!t]
 668    \centering
 669     \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
 670
 671     \textcolor{blue}{On average, it saves the energy consumption by \textcolor{red}{29\%}
 672      of NAS benchmarks class C executed over 8 nodes}
 673
 674    \end{figure}
 675 \end{frame}
 676
 677
 678
 679 %%%%%%%%%%%%%%%%%%%%
 680 %%    SLIDE 32    %%
 681 %%%%%%%%%%%%%%%%%%%%
 682 \begin{frame}{The experimental results}
 683    \vspace{-5 mm}
 684    \begin{figure}[!t]
 685    \centering
 686
 687     \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
 688
 689    \textcolor{blue}{On average, it degrades the performance by \textcolor{red}{3.8\%}
 690      of NAS benchmarks class C executed over 8 nodes}
 691      \end{figure}
 692 \end{frame}
 693
 694
 695
 696 %%%%%%%%%%%%%%%%%%%%
 697 %%    SLIDE 33    %%
 698 %%%%%%%%%%%%%%%%%%%%
 699 \begin{frame}{The results of the three powers scenarios}
 700    \vspace{-5 mm}
 701    \begin{figure}[!t]
 702    \centering
 703    \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
 704    \vspace{10 mm}
 705    \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
 706    \end{figure}
 707 \end{frame}
 708
 709
 710
 711 %%%%%%%%%%%%%%%%%%%%
 712 %%    SLIDE 34    %%
 713 %%%%%%%%%%%%%%%%%%%%
 714 \begin{frame}{The comparing our method}
 715     The proposed method (MaxDist) was compared to the EDP algorithm that minimizes  the \textcolor{blue}{
 716     $\mathit{energy}\times \mathit{delay}$} value.
 717     \vspace{-5 mm}
 718     \begin{figure}[!t]
 719     \centering
 720     \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
 721
 722     \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
 723     \end{figure}
 724 \end{frame}
 725
 726
 727
 728
 729 %%%%%%%%%%%%%%%%%%%%
 730 %%    SLIDE 35    %%
 731 %%%%%%%%%%%%%%%%%%%%
 732 \begin{frame}{Energy optimization of grid platform}
 733    \begin{figure}[!t]
 734     \centering
 735              \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
 736
 737            \small  10 sites distributed over France and Luxembourg
 738         \end{figure}
 739 \end{frame}
 740
 741
 742 %%%%%%%%%%%%%%%%%%%%
 743 %%    SLIDE 36    %%
 744 %%%%%%%%%%%%%%%%%%%%
 745  \begin{frame}{Performance, Energy and trade-off models} \small
 746   \begin{block}{\small The performance model of grid}
 747     \begin{equation}
 748   \label{eq:perf}
 749   \Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
 750   +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
 751 \end{equation}
 752     \end{block}
 753
 754
 755  \begin{block}{\small The energy model of grid}\small
 756     \begin{equation}
 757   \label{eq:energy}
 758  E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +
 759  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
 760 \end{equation}
 761     \end{block}
 762
 763 \begin{block}{\small The trade-off model of grid}
 764 \small
 765     \begin{equation}
 766    \label{eq:max}
 767   \MaxDist =
 768   \mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
 769       (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
 770        \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
 771 \end{equation}
 772     \end{block}
 773
 774  \end{frame}
 775
 776
 777
 778 %%%%%%%%%%%%%%%%%%%%
 779 %%    SLIDE 37    %%
 780 %%%%%%%%%%%%%%%%%%%%
 781  \begin{frame}{Experiments over Grid'5000}
 782   \centering
 783
 784           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
 785
 786           \vspace{-3 mm}
 787           \textcolor{blue}{The experiments executed over one site and two sites scenarios}
 788
 789               \vspace{1mm}
 790
 791           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
 792
 793         \textcolor{blue}{We used Grid'5000 power measurement tools}
 794 \end{frame}
 795
 796
 797
 798
 799 %%%%%%%%%%%%%%%%%%%%
 800 %%    SLIDE 38    %%
 801 %%%%%%%%%%%%%%%%%%%%
 802 \begin{frame}{Experiments over Grid'5000}
 803
 804    \begin{minipage}{0.4\textwidth}
 805        \textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
 806         \textcolor{red}{30\%}}
 807    \end{minipage}
 808      \begin{minipage}{0.55\textwidth}
 809         \begin{figure}[h!]
 810           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
 811      \end{figure}
 812 \end{minipage}
 813
 814          \begin{minipage}{0.4\textwidth}
 815            \textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
 816                 performance by \textcolor{red}{3.2\%}}
 817         \end{minipage}
 818        \begin{minipage}{0.55\textwidth}
 819          \begin{figure}[h!]
 820            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
 821          \end{figure}
 822           \end{minipage}
 823  \end{frame}
 824
 825
 826
 827 %%%%%%%%%%%%%%%%%%%%
 828 %%    SLIDE 39    %%
 829 %%%%%%%%%%%%%%%%%%%%
 830 \begin{frame}{Experiments over Grid'5000}
 831    \textcolor{blue}{One core  and Multi-cores per node results:}
 832
 833   \begin{figure}[h!]
 834   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
 835   \hspace{0.3cm}
 836   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
 837   \end{figure}
 838
 839   \centering \small \textcolor{blue}{Using multi-core per node scenario decreases the computations to communications ratio}.
 840 \end{frame}
 841
 842
 843
 844 %\begin{frame}{Summary}
 845 %\begin{itemize}
 846      % \small
 847         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
 848         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
 849       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real
 850           %test-bed \textcolor{blue}{Grid'5000}.
 851
 852          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
 853         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
 854       %    clusters.
 855
 856          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
 857         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
 858
 859        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
 860      %    \end{itemize}
 861 %\end{frame}
 862
 863
 864 %%%%%%%%%%%%%%%%%%%%
 865 %%    SLIDE 40    %%
 866 %%%%%%%%%%%%%%%%%%%%
 867 \begin{frame}{Continuation}
 868 \subsection{\small {3.3 Energy optimization of asynchronous applications}}
 869 \begin{center}
 870 \bf \textcolor{black}{Third contribution} \\
 871 \vspace{1cm}
 872 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous applications}
 873 \end{center}
 874  \end{frame}
 875
 876
 877
 878 %%%%%%%%%%%%%%%%%%%%
 879 %%    SLIDE 41   %%
 880 %%%%%%%%%%%%%%%%%%%%
 881 \begin{frame}{Problem definition}\vspace{0.8 mm}
 882 \textcolor{blue}{Execution the parallel iterative application with synchronous communications }
 883 \vspace{-8 mm}
 884 \begin{figure}
 885   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
 886   \end{figure}
 887 \end{frame}
 888
 889
 890
 891 %%%%%%%%%%%%%%%%%%%%
 892 %%    SLIDE 42   %%
 893 %%%%%%%%%%%%%%%%%%%%
 894 \begin{frame}{Problem definition}\vspace{0.8 mm}
 895 \textcolor{blue}{Execution the parallel iterative application with synchronous communications }
 896 \vspace{-8 mm}
 897 \begin{figure}
 898   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
 899   \end{figure}
 900 \end{frame}
 901
 902
 903
 904 %%%%%%%%%%%%%%%%%%%%
 905 %%    SLIDE 43   %%
 906 %%%%%%%%%%%%%%%%%%%%
 907 \begin{frame}{Solution}\vspace{0.8mm}
 908 \textcolor{blue}{Using asynchronous communications with DVFS }
 909 \vspace{-8 mm}
 910 \begin{figure}
 911   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
 912   \end{figure}
 913 \end{frame}
 914
 915
 916
 917
 918 %%%%%%%%%%%%%%%%%%%%
 919 %%    SLIDE 44   %%
 920 %%%%%%%%%%%%%%%%%%%%
 921 \begin{frame}{The performance models}
 922
 923 \begin{block}{\small The performance model of Asynch. Applications}\small
 924 \begin{equation}
 925   \label{eq:asyn_time}
 926   \Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
 927 \end{equation}
 928 \end{block}
 929
 930
 931 \begin{block}{\small The performance model of Hybrid Applications}\small
 932 \begin{equation}
 933   \label{eq:asyn_perf}
 934   \Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
 935    \min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
 936 \end{equation}
 937 \end{block}
 938
 939
 940 \end{frame}
 941
 942
 943
 944 %%%%%%%%%%%%%%%%%%%%
 945 %%    SLIDE 45   %%
 946 %%%%%%%%%%%%%%%%%%%%
 947 \begin{frame}{The energy consumption models}
 948
 949 \begin{block}{\small The energy model of Asynch. Applications}\small
 950 \begin{equation}
 951   \label{eq:asyn_energy1}
 952  E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
 953 \end{equation}
 954 \end{block}
 955
 956
 957 \begin{block}{\small The energy model of Hybrid Applications}\small
 958 \begin{multline}
 959   \label{eq:asyn_energy}
 960  E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
 961  ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
 962 \end{multline}
 963 \end{block}
 964 \end{frame}
 965
 966
 967
 968 %%%%%%%%%%%%%%%%%%%%
 969 %%    SLIDE 46   %%
 970 %%%%%%%%%%%%%%%%%%%%
 971 \begin{frame}{The scaling algorithm for Asynch.  applications}
 972 \vspace{-0.1 mm}
 973 \centering
 974 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
 975 \end{frame}
 976
 977
 978
 979 %%%%%%%%%%%%%%%%%%%%
 980 %%    SLIDE 47   %%
 981 %%%%%%%%%%%%%%%%%%%%
 982 \begin{frame}{The experimental results}
 983    \vspace{-5 mm}
 984    \begin{figure}[!t]
 985    \centering
 986     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
 987    \end{figure}
 988    \begin{itemize}
 989       \small
 990         \item Execution the iterative multi-splitting method over simulated Grid.
 991         \item Execution the iterative multi-splitting method over Grid'5000 test-bed.
 992    \end{itemize}
 993 \end{frame}
 994
 995
 996
 997 %%%%%%%%%%%%%%%%%%%%
 998 %%    SLIDE 48   %%
 999 %%%%%%%%%%%%%%%%%%%%
1000 \begin{frame}{The simulation results}
1001 \centering \small \textcolor{blue}{The best scenario in term of energy and performance  is the Async. MS with Sync. DVFS}
1002
1003 \centering
1004     \includegraphics[scale=0.46]{c3/energy_saving.eps}
1005
1006  \centering  The average of energy saving  = \textcolor{red}{22\%}
1007 \end{frame}
1008
1009
1010
1011 %%%%%%%%%%%%%%%%%%%%
1012 %%    SLIDE 49   %%
1013 %%%%%%%%%%%%%%%%%%%%
1014 \begin{frame}{The simulation results}
1015 \centering
1016
1017      \includegraphics[scale=0.46]{c3/perf_degra.eps}
1018
1019  \centering    The average of  speed-up  = \textcolor{red}{5.72\%}
1020 \end{frame}
1021
1022
1023
1024 %%%%%%%%%%%%%%%%%%%%
1025 %%    SLIDE 50   %%
1026 %%%%%%%%%%%%%%%%%%%%
1027  \begin{frame}{The Grid'5000 results}
1028    \vspace{-20 mm}
1029    \begin{figure}[!t]
1030    \centering
1031    \hspace{-8 mm}
1032     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1033     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1034    \end{figure}
1035     \vspace{-5 mm}
1036      \centering
1037    The energy saving = \textcolor{red}{26.93\%}, speeds up =  \textcolor{red}{21.48\%}
1038 \end{frame}
1039
1040
1041 %%%%%%%%%%%%%%%%%%%%
1042 %%    SLIDE 51   %%
1043 %%%%%%%%%%%%%%%%%%%%
1044 \begin{frame}{The comparison results}
1045  \centering
1046     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1047
1048     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1049 \end{frame}
1050
1051
1052
1053
1054 %%%%%%%%%%%%%%%%%%%%
1055 %%    SLIDE 52  %%
1056 %%%%%%%%%%%%%%%%%%%%
1057 \begin{frame}{Conclusions}
1058 \section{Conclusions}
1059 \begin{itemize}
1060
1061 \small  \barrow  We have proposed \textcolor{blue}{a new energy consumption and performance} models for
1062      synchronous and asynchronous parallel applications with iterations.
1063
1064
1065 \small \barrow The parallel applications with iterations were executed over different parallel architectures such as: \textcolor{blue}{homogeneous  cluster, heterogeneous  cluster and
1066 grid}.
1067
1068 \small \barrow We have proposed \textcolor{blue}{new objective function} to optimize both the energy consumption and the performance.
1069
1070 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1071
1072 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1073 Multi-splitting} method.
1074
1075 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator and over  Grid'5000 testbed}.
1076
1077 \small  \barrow All the proposed methods were compared with either \textcolor{blue}{Rauber and Rünger  method} or  \textcolor{blue}{EDP objective function}.
1078
1079
1080 \end{itemize}
1081 \end{frame}
1082
1083
1084
1085 %%%%%%%%%%%%%%%%%%%%
1086 %%    SLIDE 53   %%
1087 %%%%%%%%%%%%%%%%%%%%
1088 \begin{frame}{Publication}
1089
1090 \begin{block}{\small Journal Articles }\scriptsize
1091 \begin{enumerate}[$\lbrack$1$\rbrack$]
1092
1093 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1094       Science}, 2016.
1095
1096 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for
1097       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1098
1099 \end{enumerate}
1100 \end{block}
1101
1102
1103 \begin{block}{\small Conference Articles }\scriptsize
1104
1105 \begin{enumerate}[$\lbrack$1$\rbrack$]
1106
1107 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1108       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1109       225-230. IEEE Computer Society, Milan, Italy (2014).
1110
1111 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1112       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1113       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1114
1115 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1116       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1117       Paris (2016).
1118
1119 \end{enumerate}
1120
1121 \end{block}
1122 \end{frame}
1123
1124
1125 %%%%%%%%%%%%%%%%%%%%
1126 %%    SLIDE 54   %%
1127 %%%%%%%%%%%%%%%%%%%%
1128 \begin{frame}{Perspectives}
1129 \section{Perspectives}
1130
1131 \begin{itemize}
1132
1133 \small  \barrow We will adapt the proposed algorithms to take into consideration the
1134 \textcolor{blue}{variability between some iterations}.
1135
1136 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1137
1138 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1139
1140 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1141 \end{itemize}
1142
1143 \end{frame}
1144
1145 %%%%%%%%%%%%%%%%%%%%
1146 %%    SLIDE 55  %%
1147 %%%%%%%%%%%%%%%%%%%%
1148 \begin{frame}{Fin} \vspace{-10 mm}
1149
1150             \centering \Large \textcolor{blue}{Thanks for Your Listening}
1151
1152             \vspace{2cm}
1153             \centering \textcolor{blue}{ {\Large Questions?}}
1154
1155 \end{frame}
1156 \end{document}
1157 %  _____ ___ _   _
1158 % |  ___|_ _| \ | |
1159 % | |_   | ||  \| |
1160 % |  _|  | || |\  |
1161 % |_|   |___|_| \_|
1162 %