thesis-presentation/AhmedSlides.tex

   1  \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13  \usepackage{movie15}
  14  \usepackage{animate}
  15 \usepackage{xmpmulti}
  16  \newcommand{\AG}[2][inline]{%
  17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
  18 \newcommand{\JC}[2][inline]{%
  19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
  20 \definecolor{myblue}{RGB}{0,29,119}
  21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
  22 \usepackage{fixltx2e}
  23 %% used to put some subscripts lower, and make them more legible
  24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
  25
  26 \newcommand{\CL}{\Xsub{C}{L}}
  27 \newcommand{\Dist}{\mathit{Dist}}
  28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
  29 \newcommand{\Eind}{\Xsub{E}{ind}}
  30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
  31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
  32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
  33 \newcommand{\Es}{\Xsub{E}{S}}
  34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
  35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
  36 \newcommand{\Fnew}{\Xsub{F}{new}}
  37 \newcommand{\Vnew}{\Xsub{V}{new}}
  38 \newcommand{\Vmax}{\Xsub{V}{max}}
  39 \newcommand{\Ileak}{\Xsub{I}{leak}}
  40 \newcommand{\Kdesign}{\Xsub{K}{design}}
  41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
  42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
  43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
  44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
  45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
  46
  47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
  48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
  49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
  50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
  51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
  52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
  53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
  54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
  55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
  56 \newcommand{\Tnew}{\Xsub{T}{New}}
  57 \newcommand{\Told}{\Xsub{T}{Old}}
  58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
  59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
  60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
  61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
  62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
  63  \usepackage{pifont}
  64 \usepackage{xcolor}
  65 \definecolor{myblue}{RGB}{0,29,119}
  66 \usepackage[textsize=footnotesize]{todonotes}
  67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
  68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
  69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
  70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
  71
  72
  73
  74 %\title{Energy Consumption Optimization of Parallel Applications with
  75 %Iterations using CPU Frequency Scaling}
  76 \vspace{2cm}
  77
  78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-1cm}
  79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under Supervision: \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ University of Franche-Comté - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
  80
  81 \date{}
  82 \vspace{-3cm}
  83 %  ____  _____ ____  _   _ _____
  84 % |  _ \| ____| __ )| | | |_   _|
  85 % | | | |  _| |  _ \| | | | | |
  86 % | |_| | |___| |_) | |_| | | |
  87 % |____/|_____|____/ \___/  |_|
  88 %
  89 \begin{document}
  90 \setbeamertemplate{background}{\titrefemto}
  91
  92 %%%%%%%%%%%%%%%%%%%%
  93 %%    SLIDE 01    %%
  94 %%%%%%%%%%%%%%%%%%%%
  95 \begin{frame}[plain]
  96 \vspace{1cm}
  97 \centering
  98    \titlepage
  99 \end{frame}
 100
 101
 102 %%%%%%%%%%%%%%%%%%%%
 103 %%    SLIDE 02    %%
 104 %%%%%%%%%%%%%%%%%%%%
 105 \setbeamertemplate{background}{\pagefemto}
 106 \begin{frame}{Outline}
 107
 108 \setbeamertemplate{section in toc}[sections numbered]
 109 \tableofcontents
 110 \end{frame}
 111
 112
 113 %%%%%%%%%%%%%%%%%%%%
 114 %%    SLIDE 03    %%
 115 %%%%%%%%%%%%%%%%%%%%
 116 \begin{frame}{Introduction and problem definition}
 117  \section{\small {Introduction and Problem definition}}
 118    \bf \textcolor{blue}{Approaches to increase the computing power:}
 119      \begin{minipage}{0.5\textwidth}
 120       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increasing the frequency of a  processor}
 121     \end{minipage}%
 122     \begin{minipage}{0.6\textwidth}
 123
 124 \begin{figure}[h!]
 125
 126     \includegraphics[width=0.7\textwidth]{fig/freq-years}
 127     \end{figure}
 128     \end{minipage}%
 129     \vspace{0.2cm}
 130     \begin{minipage}{0.5\textwidth}
 131      \textcolor{blue}{2)} \small \bf \textcolor{black}{Increasing the number of nodes}
 132     \end{minipage}%
 133     \begin{minipage}{0.6\textwidth}
 134     \begin{figure}[h!]
 135      \includegraphics[width=0.7\textwidth]{fig/clusters}
 136     \end{figure}
 137     \end{minipage}%
 138  \end{frame}
 139
 140
 141
 142
 143  %%%%%%%%%%%%%%%%%%%
 144 %%    SLIDE 04   %%
 145 %%%%%%%%%%%%%%%%%%%%
 146 \begin{frame}{Introduction and problem definition}
 147  \vspace{0.1cm}
 148  \bf \textcolor{blue}{Techniques for energy consumption reduction}
 149
 150      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
 151     \vspace{-0.9cm}
 152     \begin{figure}
 153      \animategraphics[autopause,loop,controls,scale=0.25,buttonsize=0.2cm]{200}{on-off/a-}{0}{69}
 154     \end{figure}
 155  \end{frame}
 156
 157 %%%%%%%%%%%%%%%%%%%%
 158 %%    SLIDE 06    %%
 159 %%%%%%%%%%%%%%%%%%%%
 160 \begin{frame}{Techniques for energy consumption reduction}
 161
 162   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic voltage and frequency Scaling (DVFS)}
 163      \vspace{-0.5cm}
 164     \begin{figure}
 165      \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{109}
 166     \end{figure}
 167     \end{frame}
 168
 169
 170
 171 %%%%%%%%%%%%%%%%%%%%
 172 %%    SLIDE 07    %%
 173 %%%%%%%%%%%%%%%%%%%%
 174 \begin{frame}{Motivations}
 175 \vspace{0.05cm}
 176 \section{\small {Motivations}}
 177 \textcolor{blue}{Why we used DVFS method:}
 178 \vspace{-0.49cm}
 179 \begin{minipage}{0.5\textwidth}
 180     \vspace{-0.49cm}
 181       \begin{itemize}
 182        \item  \small \textcolor{black}{The biggest power consumption is consumed by a processor \textsuperscript{1}. }
 183
 184          \end{itemize}
 185
 186     \end{minipage}%
 187     \begin{minipage}{0.5\textwidth}
 188      \vspace{-0.49cm}
 189     \begin{figure}[h!]
 190      \includegraphics[width=0.85\textwidth]{fig/node-power}
 191
 192     \end{figure}
 193     \end{minipage}%
 194
 195   \begin{itemize} \item \small  \textcolor{black}{It used to reduce the energy consumption  while keeping all the node working, thus  it is more adapted to parallel computing.}
 196                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes method.}  \end{itemize}
 197
 198 \vspace{-0.12cm}
 199
 200  \begin{block}{\textcolor{white}{Challenge and Objective}}
 201
 202         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it degrades the performance simultaneously.}
 203
 204                 \vspace{0.1cm}
 205  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel applications.}
 206 \end{block}
 207
 208  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
 209 for a warehouse-sized computer.
 210
 211     \end{frame}
 212
 213
 214
 215 %%%%%%%%%%%%%%%%%%%%
 216 %%    SLIDE 08    %%
 217 %%%%%%%%%%%%%%%%%%%%
 218
 219
 220 \begin{frame}{Contribution}
 221
 222 \section{\small {Energy optimization of homogeneous platform}}
 223 \begin{center}
 224 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
 225 \end{center}
 226  \end{frame}
 227
 228
 229
 230 %%%%%%%%%%%%%%%%%%%%
 231 %%    SLIDE 09    %%
 232 %%%%%%%%%%%%%%%%%%%%
 233
 234 \begin{frame}{Objectives}
 235         \begin{femtoBlock}{} \vspace{-12 mm}
 236                 \begin{itemize} \small
 237                    \item  Study the effect of the scaling factor $S$ on \textbf{energy consumption and performance } of parallel  applications with iterations such as NAS
 238                           Benchmarks. \includegraphics[width=.06\textwidth]{c1/nasa.pdf} \medskip
 239
 240                    \item  Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
 241                    \item  Proposing an algorithm for selecting the scaling factor $S$ producing \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
 242                    \item  Comparing the proposed algorithm to existing methods.
 243
 244
 245                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
 246                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
 247                 \end{itemize}
 248                  %\let\thefootnote\relax\footnote{}
 249           \vspace{-10 mm}
 250         \end{femtoBlock}
 251 \end{frame}
 252
 253
 254
 255 %%%%%%%%%%%%%%%%%%%%
 256 %%    SLIDE 10    %%
 257 %%%%%%%%%%%%%%%%%%%%
 258
 259
 260 \begin{frame}{Execution of synchronous parallel tasks}
 261 \vspace{-0.5 cm}
 262 \begin{figure}
 263   \centering
 264   \subfloat[Sync. imbalanced communications]{%
 265     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
 266   \subfloat[Sync. imbalanced computations]{%
 267     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
 268  % \caption{Parallel tasks on homogeneous platform}
 269   \label{fig:homo}
 270 \end{figure}
 271
 272  \end{frame}
 273
 274
 275
 276
 277 %%%%%%%%%%%%%%%%%%%%
 278 %%    SLIDE 11   %%
 279 %%%%%%%%%%%%%%%%%%%%
 280 \begin{frame}{Energy model for homogeneous platform}
 281       The power consumed by a processor divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and static
 282        (\textcolor{red}{$P_s$}) power.
 283     \begin{equation}
 284      \label{eq:pd}
 285      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
 286    \end{equation}
 287     \scriptsize \underline{Where}: \\
 288     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\
 289     \textcolor{blue}{$V$} the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
 290    \begin{equation}
 291      \label{eq:ps}
 292      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
 293    \end{equation}
 294     \underline{Where}:\\
 295         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\
 296         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
 297              parameter.}
 298 \end{frame}
 299
 300 %%%%%%%%%%%%%%%%%%%%
 301 %%    SLIDE 12   %%
 302 %%%%%%%%%%%%%%%%%%%%
 303
 304 \begin{frame}{Energy model for homogeneous platform}
 305
 306           The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.  \medskip
 307
 308
 309
 310         \begin{block}{\small Rauber and Rünger's energy model}
 311          $ E = P_{d} \cdot S_1^{-2} \cdot
 312          \left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
 313             P_{s} \cdot S_1  \cdot T_1 \cdot N$
 314         \end{block}
 315            \textcolor{blue}{$S_1$}: the max. scaling factor\\
 316            \textcolor{blue}{$P_{d}$}: the dynamic power\\
 317            \textcolor{blue}{$P_{s}$}: the static power\\
 318            \textcolor{blue}{$T_I$}: the time of the slower task\\
 319            \textcolor{blue}{$T_i$}: the time of the other tasks\\
 320            \textcolor{blue}{$N$}:  the number of  nodes
 321
 322 \end{frame}
 323
 324
 325 %%%%%%%%%%%%%%%%%%%%
 326 %%    SLIDE 13   %%
 327 %%%%%%%%%%%%%%%%%%%%
 328 \begin{frame}{Performance evaluation of MPI programs}
 329         \begin{femtoBlock}{}
 330               \vspace{-5 mm}
 331               \begin{block}{\small Execution time prediction model}
 332                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
 333           \end{block}
 334           \vspace{10 mm}
 335            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
 336            \quad%
 337            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
 338             \vspace{5 mm}
 339
 340            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 341            \end{femtoBlock}
 342 \end{frame}
 343
 344
 345
 346
 347  %%%%%%%%%%%%%%%%%%%%
 348 %%    SLIDE 14   %%
 349 %%%%%%%%%%%%%%%%%%%%
 350 \begin{frame}{Performance and energy reduction trade-off}
 351         \begin{femtoBlock}{} \vspace{-15 mm}
 352                \begin{figure}
 353      \centering
 354      \subfloat[\small  Real relation.]{%
 355      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
 356      \quad%
 357      \subfloat[\small Converted relation.]{%
 358      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
 359   \label{fig:rel}
 360  % \caption{The energy and performance relation}
 361 \end{figure}
 362
 363  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
 364
 365 %\vspace{-0.3cm}
 366       \small
 367          \begin{block}{\small Our objective function}
 368          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}
 369                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} -
 370                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
 371
 372         \end{block}
 373         \end{femtoBlock}
 374
 375 \end{frame}
 376
 377 %%%%%%%%%%%%%%%%%%%%
 378 %%    SLIDE 15   %%
 379 %%%%%%%%%%%%%%%%%%%%
 380  \begin{frame}{Scaling factor selection algorithm}
 381 \vspace{-0.75cm}
 382      \begin{center}
 383       \includegraphics[width=.56 \textwidth]{c1/algo-homo}
 384      \end{center}
 385
 386 \end{frame}
 387
 388
 389 %%%%%%%%%%%%%%%%%%%%
 390 %%    SLIDE 16   %%
 391 %%%%%%%%%%%%%%%%%%%%
 392 \begin{frame}{Scaling algorithm example}
 393 \vspace{-0.75cm}
 394
 395      \begin{figure}
 396   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{159}
 397
 398   \end{figure}
 399 \end{frame}
 400
 401 %%%%%%%%%%%%%%%%%%%%
 402 %%    SLIDE 17   %%
 403 %%%%%%%%%%%%%%%%%%%%
 404 \begin{frame}{Experimental results }
 405       \begin{femtoBlock}{}
 406         \begin{itemize}
 407          \small
 408            \item The experiments are executed on the simulator SimGrid/SMPI v3.10.\medskip
 409            \item The proposed algorithm is applied to the NAS parallel benchmarks.\medskip
 410            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 411            \item The proposed algorithm was evaluated over the A, B, C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
 412            \item $P_d=20W$,  $P_s=4W$.
 413                 \end{itemize}
 414         \end{femtoBlock}
 415 \end{frame}
 416
 417
 418 %%%%%%%%%%%%%%%%%%%%
 419 %%    SLIDE 18   %%
 420 %%%%%%%%%%%%%%%%%%%%
 421 \begin{frame}{Experimental results}
 422   \begin{femtoBlock}{}
 423       \centering {
 424      \includegraphics[width=.35\textwidth]{c1/ep}
 425      \includegraphics[width=.35\textwidth]{c1/cg}
 426      \includegraphics[width=.35\textwidth]{c1/bt}}
 427
 428      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
 429  \end{femtoBlock}
 430 \end{frame}
 431
 432
 433   %%%%%%%%%%%%%%%%%%%%
 434 %%    SLIDE 19   %%
 435 %%%%%%%%%%%%%%%%%%%%
 436 \begin{frame}{Results comparison}
 437          \begin{block}{\small Rauber and Rünger's optimal scaling factor}
 438            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
 439             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
 440         \end{block}
 441     \centering {
 442          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
 443          %\qquad
 444          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
 445
 446
 447             \includegraphics[width=.55\textwidth]{c1/compare_c.pdf}}
 448
 449 \end{frame}
 450
 451
 452 %%%%%%%%%%%%%%%%%%%%
 453 %%    SLIDE 20   %%
 454 %%%%%%%%%%%%%%%%%%%%
 455 \begin{frame}{The proposed new energy model}
 456     \vspace{-0.75cm}
 457   \begin{figure}
 458   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
 459   \end{figure}
 460 \end{frame}
 461
 462
 463 %%%%%%%%%%%%%%%%%%%%
 464 %%    SLIDE 21   %%
 465 %%%%%%%%%%%%%%%%%%%%
 466 \begin{frame}{Comparing the new model with Rauber model }
 467  \vspace{0.1cm}
 468  \centering
 469     \includegraphics[width=.45\textwidth]{c1/energy_con}
 470
 471     \includegraphics[width=.5\textwidth]{c1/compare-scales}
 472 \end{frame}
 473
 474
 475
 476
 477    % \begin{frame}{Summary}
 478      % \begin{femtoBlock}{}
 479      % \begin{itemize}
 480       %\small
 481        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
 482        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
 483          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip
 484          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
 485                % performance}. \medskip
 486          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 487          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
 488          %\end{itemize}
 489
 490         %\end{femtoBlock}
 491 %\end{frame}
 492
 493
 494 %%%%%%%%%%%%%%%%%%%%
 495 %%    SLIDE 22    %%
 496 %%%%%%%%%%%%%%%%%%%%
 497
 498
 499 \begin{frame}{Contribution}
 500
 501 \section{\small {Energy optimization of heterogeneous platform}}
 502 \begin{center}
 503
 504
 505 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over Heterogeneous platform}
 506 \end{center}
 507  \end{frame}
 508
 509
 510
 511 %%%%%%%%%%%%%%%%%%%%
 512 %%    SLIDE 23    %%
 513 %%%%%%%%%%%%%%%%%%%%
 514
 515 \begin{frame}{Objectives}
 516         \begin{femtoBlock}{} \vspace{-12 mm}
 517                 \begin{itemize} \small
 518                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running
 519                           over a heterogeneous platform (cluster and Grid). \medskip
 520                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
 521                           message passing iterative applications.    \medskip
 522
 523                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the optimal trade-off} between
 524                           the energy consumption and the performance.
 525                 \end{itemize}
 526
 527           \vspace{-10 mm}
 528         \end{femtoBlock}
 529 \end{frame}
 530
 531
 532 %%%%%%%%%%%%%%%%%%%%
 533 %%    SLIDE 24    %%
 534 %%%%%%%%%%%%%%%%%%%%
 535 \begin{frame}{The execution time model}
 536       \vspace{-8 mm}
 537      \begin{figure}[!t]
 538        \centering
 539        \includegraphics[scale=0.5]{c2/commtasks}
 540        \label{fig:heter}
 541      \end{figure}
 542        \vspace{-12 mm}
 543        \medskip
 544
 545     \begin{block}{\small The execution time prediction model}
 546     \begin{equation}
 547      \label{eq:perf}
 548      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
 549     \end{equation}
 550     \end{block}
 551  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
 552
 553 \end{frame}
 554
 555  %%%%%%%%%%%%%%%%%%%%
 556 %%    SLIDE 25    %%
 557 %%%%%%%%%%%%%%%%%%%%
 558  \begin{frame}{The energy consumption model}
 559     The overall energy consumption of a message passing synchronous  application executed over
 560      a heterogeneous platform can be computed as  follows:
 561     \begin{multline}
 562      \label{eq:energy}
 563      \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
 564      \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
 565       \hspace{10 mm}
 566     \end{multline}
 567     \underline{where}:\\
 568     \textcolor{blue}{N} : is the number of nodes.
 569 \end{frame}
 570
 571
 572 %%%%%%%%%%%%%%%%%%%%
 573 %%    SLIDE 26    %%
 574 %%%%%%%%%%%%%%%%%%%%
 575   \begin{frame}{The  energy  model example for heter. cluster}
 576   \vspace{-0.5cm}
 577  \begin{figure}
 578   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{heter-model/a-}{0}{272}
 579   \end{figure}
 580  \end{frame}
 581
 582
 583
 584
 585 %%%%%%%%%%%%%%%%%%%%
 586 %%    SLIDE 27    %%
 587 %%%%%%%%%%%%%%%%%%%%
 588 %\begin{frame}{The trade-off between energy  and performance}
 589    % \vspace{-7 mm}
 590     %\begin{figure}
 591    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
 592    % \end{figure}
 593    % \vspace{-7 mm}
 594    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}}
 595     %{E_{Max}}$}. \\
 596     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
 597
 598    %  \begin{block}{\small The tradeoff model}
 599     % \begin{equation}
 600     %  \label{eq:max}
 601     %  \textcolor{red}{MaxDist} =
 602      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
 603       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
 604       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
 605       %\end{equation}
 606     % \end{block}
 607 %\end{frame}
 608
 609
 610 %%%%%%%%%%%%%%%%%%%%
 611 %%    SLIDE 28    %%
 612 %%%%%%%%%%%%%%%%%%%%
 613  \begin{frame}{The scaling algorithm for heter. cluster}
 614
 615  \centering
 616    \includegraphics[width=.52\textwidth]{algo-heter}
 617  \end{frame}
 618
 619
 620  %%%%%%%%%%%%%%%%%%%%
 621 %%    SLIDE 29    %%
 622 %%%%%%%%%%%%%%%%%%%%
 623  \begin{frame}{The scaling algorithm example}
 624  \vspace{-0.5cm}
 625  \centering
 626
 627   \begin{figure}
 628   \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{650}
 629   \end{figure}
 630 \end{frame}
 631
 632
 633
 634
 635 %%%%%%%%%%%%%%%%%%%%
 636 %%    SLIDE 30    %%
 637 %%%%%%%%%%%%%%%%%%%%
 638 \begin{frame}{Experiments over a heterogeneous cluster  }
 639         \begin{itemize}
 640          \small
 641            \item The experiments executed on the simulator SimGrid/SMPI v3.10.\medskip
 642            \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
 643            \item Four types of processors with different computing powers were used.\medskip
 644            \item We ran the benchmarks on different number of nodes ranging from 4 to 144 nodes.\medskip
 645            \item The total power consumption of the chosen CPUs  assumed to be composed of $80\%$ for the dynamic power and $20\%$ for the static power.
 646                   \medskip
 647
 648         \end{itemize}
 649
 650 \end{frame}
 651
 652
 653 %%%%%%%%%%%%%%%%%%%%
 654 %%    SLIDE 31    %%
 655 %%%%%%%%%%%%%%%%%%%%
 656 \begin{frame}{The experimental results}
 657    \vspace{-5 mm}
 658    \begin{figure}[!t]
 659    \centering
 660     \includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
 661
 662     \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
 663      for the class C of the NAS benchmarks executed over 8 nodes}
 664
 665    \end{figure}
 666 \end{frame}
 667
 668
 669
 670 %%%%%%%%%%%%%%%%%%%%
 671 %%    SLIDE 32    %%
 672 %%%%%%%%%%%%%%%%%%%%
 673 \begin{frame}{The experimental results}
 674    \vspace{-5 mm}
 675    \begin{figure}[!t]
 676    \centering
 677
 678     \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
 679
 680    \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
 681      of NAS benchmarks class C executed over 8 nodes}
 682      \end{figure}
 683 \end{frame}
 684
 685
 686
 687 %%%%%%%%%%%%%%%%%%%%
 688 %%    SLIDE 33    %%
 689 %%%%%%%%%%%%%%%%%%%%
 690 \begin{frame}{The results of the three power scenarios}
 691    \vspace{-5 mm}
 692    \begin{figure}[!t]
 693    \centering
 694    \includegraphics[width=.55\textwidth]{c2/three_power.pdf}
 695    \vspace{10 mm}
 696    \includegraphics[width=.55\textwidth]{c2/three_scenarios.pdf}
 697    \end{figure}
 698 \end{frame}
 699
 700
 701
 702 %%%%%%%%%%%%%%%%%%%%
 703 %%    SLIDE 34    %%
 704 %%%%%%%%%%%%%%%%%%%%
 705 \begin{frame}{Comparing the objective function to EDP}
 706
 707      EDP is the products between the energy consumption and the delay.
 708     \vspace{-5 mm}
 709     \begin{figure}[!t]
 710     \centering
 711     \includegraphics[width=.55\textwidth]{c2/avg_compare.pdf}
 712
 713     \includegraphics[width=.55\textwidth]{c2/compare_with_EDP.pdf}
 714     \end{figure}
 715 \end{frame}
 716
 717
 718
 719
 720 %%%%%%%%%%%%%%%%%%%%
 721 %%    SLIDE 35    %%
 722 %%%%%%%%%%%%%%%%%%%%
 723 %\begin{frame}{Energy optimization of grid platform}
 724   % \begin{figure}[!t]
 725    % \centering
 726          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
 727
 728         %   \small  10 sites distributed over France and Luxembourg
 729         %\end{figure}
 730 %\end{frame}
 731
 732
 733 %%%%%%%%%%%%%%%%%%%%
 734 %%    SLIDE 36    %%
 735 %%%%%%%%%%%%%%%%%%%%
 736 \begin{frame}{The grid architecture}
 737 \begin{center}
 738 \includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
 739 \end{center}
 740
 741  %\begin{frame}{Performance, Energy and trade-off models} \small
 742   %\begin{block}{\small The performance model of grid}
 743    % \begin{equation}
 744   %\label{eq:perf}
 745   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
 746  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
 747 %\end{equation}
 748     %\end{block}
 749
 750
 751  %\begin{block}{\small The energy model of grid}\small
 752   %  \begin{equation}
 753   %\label{eq:energy}
 754  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +
 755 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
 756 %\end{equation}
 757    % \end{block}
 758
 759 %\begin{block}{\small The trade-off model of grid}
 760 %\small
 761     %\begin{equation}
 762    %\label{eq:max}
 763   %\MaxDist =
 764   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
 765    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
 766     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
 767 %\end{equation}
 768    % \end{block}
 769
 770
 771  \end{frame}
 772
 773
 774
 775 %%%%%%%%%%%%%%%%%%%%
 776 %%    SLIDE 37    %%
 777 %%%%%%%%%%%%%%%%%%%%
 778  \begin{frame}{Experiments over Grid'5000}
 779   \centering
 780
 781           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
 782
 783           \vspace{-3 mm}
 784           \textcolor{blue}{Two experiments were conducted: over one site and two sites
 785           each one with three clusters }
 786
 787               \vspace{1mm}
 788
 789           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
 790
 791         \textcolor{blue}{Grid'5000 power measurement tools were used}
 792 \end{frame}
 793
 794
 795
 796
 797 %%%%%%%%%%%%%%%%%%%%
 798 %%    SLIDE 38    %%
 799 %%%%%%%%%%%%%%%%%%%%
 800 \begin{frame}{Experiments over Grid'5000}
 801
 802    \begin{minipage}{0.4\textwidth}
 803        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
 804         %\textcolor{red}{30\%}}
 805         \textcolor{blue}{The energy saving =  \textcolor{red}{30\%}}
 806    \end{minipage}
 807      \begin{minipage}{0.55\textwidth}
 808         \begin{figure}[h!]
 809           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
 810      \end{figure}
 811 \end{minipage}
 812
 813          \begin{minipage}{0.4\textwidth}
 814            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
 815                 %performance by \textcolor{red}{3.2\%}}
 816               \textcolor{blue}{The performance degradation  =  \textcolor{red}{3.2\%}}
 817         \end{minipage}
 818        \begin{minipage}{0.55\textwidth}
 819          \begin{figure}[h!]
 820            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
 821          \end{figure}
 822           \end{minipage}
 823  \end{frame}
 824
 825
 826
 827 %%%%%%%%%%%%%%%%%%%%
 828 %%    SLIDE 39    %%
 829 %%%%%%%%%%%%%%%%%%%%
 830 \begin{frame}{Experiments over Grid'5000}
 831    \textcolor{blue}{One core  and Multi-cores per node results:}
 832
 833   \begin{figure}[h!]
 834   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
 835   \hspace{0.3cm}
 836   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
 837   \end{figure}
 838
 839   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
 840 \end{frame}
 841
 842
 843
 844 %\begin{frame}{Summary}
 845 %\begin{itemize}
 846      % \small
 847         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
 848         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
 849       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real
 850           %test-bed \textcolor{blue}{Grid'5000}.
 851
 852          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
 853         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
 854       %    clusters.
 855
 856          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
 857         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
 858
 859        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
 860      %    \end{itemize}
 861 %\end{frame}
 862
 863
 864 %%%%%%%%%%%%%%%%%%%%
 865 %%    SLIDE 40    %%
 866 %%%%%%%%%%%%%%%%%%%%
 867 \begin{frame}{Contribution}
 868 \section{\small {Energy optimization of asynchronous applications}}
 869 \begin{center}
 870 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous  message passing iterative applications}
 871 \end{center}
 872  \end{frame}
 873
 874
 875
 876 %%%%%%%%%%%%%%%%%%%%
 877 %%    SLIDE 41   %%
 878 %%%%%%%%%%%%%%%%%%%%
 879 \begin{frame}{Problem definition}\vspace{0.8 mm}
 880 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
 881 \vspace{-8 mm}
 882 \begin{figure}
 883   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{syn/a-}{0}{503}
 884   \end{figure}
 885 \end{frame}
 886
 887
 888
 889 %%%%%%%%%%%%%%%%%%%%
 890 %%    SLIDE 42   %%
 891 %%%%%%%%%%%%%%%%%%%%
 892 \begin{frame}{Problem definition}\vspace{0.8 mm}
 893 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
 894 \vspace{-8 mm}
 895 \begin{figure}
 896   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn/a-}{0}{440}
 897   \end{figure}
 898 \end{frame}
 899
 900
 901
 902 %%%%%%%%%%%%%%%%%%%%
 903 %%    SLIDE 43   %%
 904 %%%%%%%%%%%%%%%%%%%%
 905 \begin{frame}{Solution}\vspace{0.8mm}
 906 \textcolor{blue}{Using asynchronous communications with DVFS }
 907 \vspace{-8 mm}
 908 \begin{figure}
 909   \animategraphics[autopause,controls,scale=0.25,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{314}
 910   \end{figure}
 911 \end{frame}
 912
 913
 914
 915
 916 %%%%%%%%%%%%%%%%%%%%
 917 %%    SLIDE 44   %%
 918 %%%%%%%%%%%%%%%%%%%%
 919 %\begin{frame}{The performance models}
 920
 921 %\begin{block}{\small The performance model of Asynch. Applications}\small
 922 %\begin{equation}
 923   %\label{eq:asyn_time}
 924  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
 925 %\end{equation}
 926 %\end{block}
 927
 928
 929 %\begin{block}{\small The performance model of Hybrid Applications}\small
 930 %\begin{equation}
 931   %\label{eq:asyn_perf}
 932   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
 933    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
 934 %\end{equation}
 935 %\end{block}
 936
 937
 938 %\end{frame}
 939
 940
 941
 942 %%%%%%%%%%%%%%%%%%%%
 943 %%    SLIDE 45   %%
 944 %%%%%%%%%%%%%%%%%%%%
 945 %\begin{frame}{The energy consumption models}
 946
 947 %\begin{block}{\small The energy model of Asynch. Applications}\small
 948 %\begin{equation}
 949   %\label{eq:asyn_energy1}
 950 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
 951 %\end{equation}
 952 %\end{block}
 953
 954
 955 %\begin{block}{\small The energy model of Hybrid Applications}\small
 956 %\begin{multline}
 957   %\label{eq:asyn_energy}
 958  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
 959 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
 960 %\end{multline}
 961 %\end{block}
 962 %\end{frame}
 963
 964
 965
 966 %%%%%%%%%%%%%%%%%%%%
 967 %%    SLIDE 44   %%
 968 %%%%%%%%%%%%%%%%%%%%
 969 \begin{frame}{The performance and the energy models }
 970
 971 \centering
 972 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
 973 \end{frame}
 974
 975
 976
 977
 978
 979 %%%%%%%%%%%%%%%%%%%%
 980 %%    SLIDE 46   %%
 981 %%%%%%%%%%%%%%%%%%%%
 982 \begin{frame}{The scaling algorithm for Asynch.  applications}
 983 \vspace{-0.1 mm}
 984 \centering
 985 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
 986 \end{frame}
 987
 988
 989
 990 %%%%%%%%%%%%%%%%%%%%
 991 %%    SLIDE 47   %%
 992 %%%%%%%%%%%%%%%%%%%%
 993 \begin{frame}{The experiments}
 994    \vspace{-5 mm}
 995    \begin{figure}[!t]
 996    \begin{itemize}
 997       \small
 998         \item The architecture of the grid:
 999    \end{itemize}
1000     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
1001    \end{figure}
1002    \begin{itemize}
1003       \small
1004         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1005         \item Evaluating the application over the simulator and Grid'5000.
1006    \end{itemize}
1007 \end{frame}
1008
1009
1010
1011 %%%%%%%%%%%%%%%%%%%%
1012 %%    SLIDE 48   %%
1013 %%%%%%%%%%%%%%%%%%%%
1014 \begin{frame}{The simulation results}
1015 \centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1016
1017 \centering
1018     \includegraphics[scale=0.42]{c3/energy_saving.eps}
1019
1020  \centering  The average of energy saving  = \textcolor{red}{22\%}
1021 \end{frame}
1022
1023
1024
1025 %%%%%%%%%%%%%%%%%%%%
1026 %%    SLIDE 49   %%
1027 %%%%%%%%%%%%%%%%%%%%
1028 \begin{frame}{The simulation results}
1029 \centering
1030
1031      \includegraphics[scale=0.42]{c3/perf_degra.eps}
1032
1033  \centering    The average speed-up  = \textcolor{red}{5.72\%}
1034 \end{frame}
1035
1036
1037
1038 %%%%%%%%%%%%%%%%%%%%
1039 %%    SLIDE 50   %%
1040 %%%%%%%%%%%%%%%%%%%%
1041  \begin{frame}{The Grid'5000 results}
1042    \vspace{-20 mm}
1043    \begin{figure}[!t]
1044    \centering
1045    \hspace{-8 mm}
1046     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1047     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1048    \end{figure}
1049     \vspace{-5 mm}
1050      \centering
1051 The energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1052 \end{frame}
1053
1054
1055 %%%%%%%%%%%%%%%%%%%%
1056 %%    SLIDE 51   %%
1057 %%%%%%%%%%%%%%%%%%%%
1058 \begin{frame}{The comparison results}
1059  \centering
1060     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1061
1062     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1063 \end{frame}
1064
1065
1066
1067
1068 %%%%%%%%%%%%%%%%%%%%
1069 %%    SLIDE 52  %%
1070 %%%%%%%%%%%%%%%%%%%%
1071 \begin{frame}{Conclusions}
1072 \section{Conclusions and Perspectives}
1073 \begin{itemize}
1074
1075 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous and asynchronous parallel applications with iterations running over
1076 \textcolor{blue}{homogeneous and  heterogeneous clusters and grids}.
1077
1078
1079
1080 \small \barrow \textcolor{blue}{A new objective function} was proposed to optimize both the energy consumption and the performance.
1081
1082 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1083
1084 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1085 Multi-splitting} method.
1086
1087 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  \textcolor{blue}{Grid'5000 testbed}.
1088
1089 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or  \textcolor{blue}{the EDP objective function}.
1090
1091
1092 \end{itemize}
1093 \end{frame}
1094
1095
1096
1097 %%%%%%%%%%%%%%%%%%%%
1098 %%    SLIDE 53   %%
1099 %%%%%%%%%%%%%%%%%%%%
1100 \begin{frame}{Publications}
1101
1102 \begin{block}{\small Journal Articles }\scriptsize
1103 \begin{enumerate}[$\lbrack$1$\rbrack$]
1104
1105 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1106       Science}, 2016.
1107
1108 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for
1109       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1110
1111 \end{enumerate}
1112 \end{block}
1113
1114
1115 \begin{block}{\small Conference Articles }\scriptsize
1116
1117 \begin{enumerate}[$\lbrack$1$\rbrack$]
1118
1119 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1120       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1121       225-230. IEEE Computer Society, Milan, Italy (2014).
1122
1123 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1124       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1125       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1126
1127 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1128       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1129       Paris (2016).
1130
1131 \end{enumerate}
1132
1133 \end{block}
1134 \end{frame}
1135
1136
1137 %%%%%%%%%%%%%%%%%%%%
1138 %%    SLIDE 54   %%
1139 %%%%%%%%%%%%%%%%%%%%
1140 \begin{frame}{Perspectives}
1141
1142 \begin{itemize}
1143
1144 \small  \barrow The proposed algorithms should  take into consideration the
1145 \textcolor{blue}{variability between some iterations}.
1146
1147 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1148
1149 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1150
1151 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1152 \end{itemize}
1153
1154 \end{frame}
1155
1156 %%%%%%%%%%%%%%%%%%%%
1157 %%    SLIDE 55  %%
1158 %%%%%%%%%%%%%%%%%%%%
1159 \begin{frame}{Fin} \vspace{-10 mm}
1160
1161             \centering \Large \textcolor{blue}{Thank you for your listening}
1162
1163             \vspace{2cm}
1164             \centering \textcolor{blue}{ {\Large Questions?}}
1165
1166 \end{frame}
1167 \end{document}
1168 %  _____ ___ _   _
1169 % |  ___|_ _| \ | |
1170 % | |_   | ||  \| |
1171 % |  _|  | || |\  |
1172 % |_|   |___|_| \_|
1173 %