thesis-presentation/AhmedSlides.tex

   1  \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13  \usepackage{movie15}
  14  \usepackage{animate}
  15 \usepackage{xmpmulti}
  16  \newcommand{\AG}[2][inline]{%
  17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
  18 \newcommand{\JC}[2][inline]{%
  19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
  20 \definecolor{myblue}{RGB}{0,29,119}
  21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
  22 \usepackage{fixltx2e}
  23 %% used to put some subscripts lower, and make them more legible
  24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
  25 \usepackage{ragged2e}
  26 \newcommand{\CL}{\Xsub{C}{L}}
  27 \newcommand{\Dist}{\mathit{Dist}}
  28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
  29 \newcommand{\Eind}{\Xsub{E}{ind}}
  30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
  31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
  32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
  33 \newcommand{\Es}{\Xsub{E}{S}}
  34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
  35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
  36 \newcommand{\Fnew}{\Xsub{F}{new}}
  37 \newcommand{\Vnew}{\Xsub{V}{new}}
  38 \newcommand{\Vmax}{\Xsub{V}{max}}
  39 \newcommand{\Ileak}{\Xsub{I}{leak}}
  40 \newcommand{\Kdesign}{\Xsub{K}{design}}
  41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
  42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
  43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
  44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
  45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
  46
  47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
  48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
  49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
  50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
  51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
  52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
  53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
  54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
  55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
  56 \newcommand{\Tnew}{\Xsub{T}{New}}
  57 \newcommand{\Told}{\Xsub{T}{Old}}
  58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
  59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
  60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
  61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
  62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
  63  \usepackage{pifont}
  64 \usepackage{xcolor}
  65 \definecolor{myblue}{RGB}{0,29,119}
  66 \usepackage[textsize=footnotesize]{todonotes}
  67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
  68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
  69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
  70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
  71
  72
  73
  74 %\title{Energy Consumption Optimization of Parallel Applications with
  75 %Iterations using CPU Frequency Scaling}
  76 \vspace{2cm}
  77
  78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
  79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
  80
  81 \date{}
  82 \vspace{-3cm}
  83 %  ____  _____ ____  _   _ _____
  84 % |  _ \| ____| __ )| | | |_   _|
  85 % | | | |  _| |  _ \| | | | | |
  86 % | |_| | |___| |_) | |_| | | |
  87 % |____/|_____|____/ \___/  |_|
  88 %
  89 \begin{document}
  90 \setbeamertemplate{background}{\titrefemto}
  91
  92 %%%%%%%%%%%%%%%%%%%%
  93 %%    SLIDE 01    %%
  94 %%%%%%%%%%%%%%%%%%%%
  95 \begin{frame}[plain]
  96 \vspace{1cm}
  97 \centering
  98    \titlepage
  99 \end{frame}
 100
 101
 102 %%%%%%%%%%%%%%%%%%%%
 103 %%    SLIDE 02    %%
 104 %%%%%%%%%%%%%%%%%%%%
 105 \setbeamertemplate{background}{\pagefemto}
 106 \begin{frame}{Outline}
 107
 108 \setbeamertemplate{section in toc}[sections numbered]
 109 \tableofcontents
 110 \end{frame}
 111
 112 %%%%%%%%%%%%%%%%%%%%
 113 %%    SLIDE 03    %%
 114 %%%%%%%%%%%%%%%%%%%%
 115 \begin{frame}{Introduction and problem definition}
 116 \section{\small {Introduction and Problem definition}}
 117  \centering
 118  \includegraphics[width=0.99\textwidth]{para.pdf}
 119 \end{frame}
 120
 121
 122
 123
 124
 125
 126
 127
 128 \begin{frame}{Execution of synchronous parallel tasks}
 129 \vspace{-0.5 cm}
 130 \begin{figure}
 131   \centering
 132   \subfloat[Synchronous imbalanced communications]{%
 133     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
 134   \subfloat[Synchronous imbalanced computations]{%
 135     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
 136  % \caption{Parallel tasks on homogeneous platform}
 137   \label{fig:homo}
 138 \end{figure}
 139
 140  \end{frame}
 141
 142
 143 %%%%%%%%%%%%%%%%%%%%
 144 %%    SLIDE 07   %%
 145 %%%%%%%%%%%%%%%%%%%%
 146
 147
 148 \begin{frame}{\large Synchronous and asynchronous iterative methods }
 149 \vspace{-0.5 cm}
 150 \begin{figure}
 151
 152 \includegraphics[scale=0.42]{syn_tasks.pdf}
 153 \vspace{0.6 cm}
 154 \includegraphics[scale=0.42]{Asyn_tasks.pdf}
 155 \end{figure}
 156
 157
 158  \end{frame}
 159
 160  %%%%%%%%%%%%%%%%%%%%
 161 %%    SLIDE 03    %%
 162 %%%%%%%%%%%%%%%%%%%%
 163 \begin{frame}{Approaches to get more computing power}
 164
 165    %\bf \textcolor{blue}{}
 166      \begin{minipage}{0.5\textwidth}
 167       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increase the frequency of a  processor.\\ (limited due to overheating)}
 168     \end{minipage}%
 169     \begin{minipage}{0.6\textwidth}
 170
 171 \begin{figure}[h!]
 172
 173     \includegraphics[width=0.7\textwidth]{fig/freq-years}
 174     \end{figure}
 175     \end{minipage}%
 176     \vspace{0.2cm}
 177     \begin{minipage}{0.5\textwidth}
 178      \textcolor{blue}{2)} \small \bf \textcolor{black}{Increase the number of nodes.}
 179
 180  \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}
 181
 182     \end{minipage}%
 183     \begin{minipage}{0.6\textwidth}
 184     \begin{figure}[h!]
 185      \includegraphics[width=0.7\textwidth]{fig/clusters}
 186     \end{figure}
 187     \end{minipage}%
 188  \end{frame}
 189
 190
 191
 192  %%%%%%%%%%%%%%%%%%%
 193 %%    SLIDE 04   %%
 194 %%%%%%%%%%%%%%%%%%%%
 195 \begin{frame}{Techniques for energy consumption reduction}
 196
 197      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
 198     \vspace{-0.9cm}
 199     \begin{figure}
 200      \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{200}{on-off/a-}{0}{111}
 201      %\includegraphics[width=0.6\textwidth]{on-off/a-69}
 202     \end{figure}
 203  \end{frame}
 204
 205 %%%%%%%%%%%%%%%%%%%%
 206 %%    SLIDE 05    %%
 207 %%%%%%%%%%%%%%%%%%%%
 208 \begin{frame}{Techniques for energy consumption reduction}
 209
 210   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic Voltage and Frequency Scaling (DVFS)}
 211      \vspace{-0.9cm}
 212     \begin{figure}
 213     \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{175}
 214      %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
 215     \end{figure}
 216     \end{frame}
 217
 218 %%%%%%%%%%%%%%%%%%%%
 219 %%    SLIDE 06   %%
 220 %%%%%%%%%%%%%%%%%%%%
 221 %%%%%%%%%%%%%%%%%%%%
 222 %%    SLIDE 07    %%
 223 %%%%%%%%%%%%%%%%%%%%
 224 \begin{frame}{Motivations}
 225 \vspace{0.05cm}
 226 \section{\small {Motivations}}
 227 \textcolor{blue}{Why we used the DVFS method:}
 228 \vspace{-0.49cm}
 229 \begin{minipage}{0.5\textwidth}
 230     \vspace{-0.49cm}
 231       \begin{itemize}
 232        \item  \small \textcolor{black}{ The CPU is the component that consumes the  highest amount of energy in a node \textsuperscript{1}. }
 233
 234          \end{itemize}
 235
 236     \end{minipage}%
 237     \begin{minipage}{0.5\textwidth}
 238      \vspace{-0.49cm}
 239     \begin{figure}[h!]
 240      \includegraphics[width=0.85\textwidth]{fig/node-power}
 241
 242     \end{figure}
 243     \end{minipage}%
 244
 245   \begin{itemize} \item \small  \textcolor{black}{DVFS reduces the energy consumption while
 246    keeping all the nodes working.}
 247                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.}  \end{itemize}
 248
 249 \vspace{-0.12cm}
 250
 251  \begin{block}{\textcolor{white}{Challenge and Objective}}
 252
 253         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it also degrades the performance of the CPU.}
 254
 255                 \vspace{0.1cm}
 256  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
 257 \end{block}
 258
 259  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
 260 for a warehouse-sized computer.
 261
 262     \end{frame}
 263
 264
 265
 266 %%%%%%%%%%%%%%%%%%%%
 267 %%    SLIDE 08    %%
 268 %%%%%%%%%%%%%%%%%%%%
 269
 270
 271 \begin{frame}{The first contribution}
 272
 273 \section{\small {Energy optimization of a homogeneous platform}}
 274 %\vspace{-3cm}
 275  % \includegraphics[width=0.6\textwidth]{white.pdf}
 276
 277 \begin{center}
 278 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
 279 \end{center}
 280  \end{frame}
 281
 282
 283
 284 %%%%%%%%%%%%%%%%%%%%
 285 %%    SLIDE 09    %%
 286 %%%%%%%%%%%%%%%%%%%%
 287
 288 \begin{frame}{Objectives}
 289
 290                 \begin{itemize}   \small \justifying
 291
 292                    \item   Studying the effect of the scaling factor on the \textbf{energy consumption and performance } of parallel  applications with iterations. \medskip
 293
 294                    \item   Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
 295                    \item   Proposing an algorithm for selecting the scaling factor that produces  \textbf {the optimal trade-off} between the energy consumption and the performance. \medskip
 296                    \item   Comparing the proposed algorithm to existing methods.
 297
 298
 299                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
 300                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
 301                 \end{itemize}
 302                  %\let\thefootnote\relax\footnote{}
 303
 304
 305 \end{frame}
 306
 307
 308
 309
 310
 311
 312
 313
 314 %%%%%%%%%%%%%%%%%%%%
 315 %%    SLIDE 11   %%
 316 %%%%%%%%%%%%%%%%%%%%
 317 \begin{frame}{Energy model for a homogeneous platform}
 318       The power consumed by a processor is divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and  the static
 319        (\textcolor{red}{$P_s$}) power.
 320     \begin{equation}
 321      \label{eq:pd}
 322      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
 323    \end{equation}
 324     \scriptsize \underline{Where}: \\
 325     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance\\
 326     \textcolor{blue}{$V$}: the supply voltage \hspace{14 mm} \textcolor{blue}{$F$}: operational frequency}
 327    \begin{equation}
 328      \label{eq:ps}
 329      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
 330    \end{equation}
 331     \underline{Where}:\\
 332         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage.  \hspace{28 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\
 333         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{8 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
 334              parameter.}
 335
 336              The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.
 337 \end{frame}
 338
 339 %%%%%%%%%%%%%%%%%%%%
 340 %%    SLIDE 12   %%
 341 %%%%%%%%%%%%%%%%%%%%
 342
 343 \begin{frame}{Energy model for a homogeneous platform}
 344        \vspace{-0.77cm}
 345             \begin{figure}
 346   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{homo-model/a-}{0}{441}
 347   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
 348   \end{figure}
 349
 350       %  \begin{block}{\small Rauber and Rünger's energy model}
 351          %$ E = P_{d} \cdot S_1^{-2} \cdot
 352          %\left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
 353           %  P_{s} \cdot S_1  \cdot T_1 \cdot N$
 354         %\end{block}
 355           % \textcolor{blue}{$S_1$}: the maximum scaling factor.\\
 356           % \textcolor{blue}{$P_{d}$}: the dynamic power.\\
 357           % \textcolor{blue}{$P_{s}$}: the static power.\\
 358           % \textcolor{blue}{$T_I$}: the execution time of the slower task.\\
 359           % \textcolor{blue}{$T_i$}: the execution time of task i.\\
 360           % \textcolor{blue}{$N$}:  the number of  nodes.
 361
 362
 363
 364 \end{frame}
 365
 366
 367 %%%%%%%%%%%%%%%%%%%%
 368 %%    SLIDE 13   %%
 369 %%%%%%%%%%%%%%%%%%%%
 370 \begin{frame}{Performance evaluation of MPI programs}
 371         \begin{femtoBlock}{}
 372               \vspace{-5 mm}
 373               \begin{block}{\small Execution time prediction model}
 374                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
 375           \end{block}
 376           \vspace{10 mm}
 377            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
 378            \quad%
 379            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
 380             \vspace{5 mm}
 381
 382            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 383            \end{femtoBlock}
 384 \end{frame}
 385
 386
 387
 388
 389  %%%%%%%%%%%%%%%%%%%%
 390 %%    SLIDE 14   %%
 391 %%%%%%%%%%%%%%%%%%%%
 392 \begin{frame}{Performance and energy reduction trade-off}
 393         \begin{femtoBlock}{} \vspace{-15 mm}
 394                \begin{figure}
 395      \centering
 396      \subfloat[\small  Real relation.]{%
 397      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
 398      \quad%
 399      \subfloat[\small Converted relation.]{%
 400      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
 401   \label{fig:rel}
 402  % \caption{The energy and performance relation}
 403 \end{figure}
 404
 405  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
 406
 407 %\vspace{-0.3cm}
 408       \small
 409          \begin{block}{\small Our objective function}
 410          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}
 411                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} -
 412                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
 413
 414         \end{block}
 415         \end{femtoBlock}
 416
 417 \end{frame}
 418
 419 %%%%%%%%%%%%%%%%%%%%
 420 %%    SLIDE 15   %%
 421 %%%%%%%%%%%%%%%%%%%%
 422  %\begin{frame}{Scaling factor selection algorithm}
 423 %\vspace{-0.75cm}
 424     % \begin{center}
 425       %\includegraphics[width=.56 \textwidth]{c1/algo-homo}
 426      %\end{center}
 427
 428 %\end{frame}
 429
 430
 431 %%%%%%%%%%%%%%%%%%%%
 432 %%    SLIDE 16   %%
 433 %%%%%%%%%%%%%%%%%%%%
 434 \begin{frame}{Scaling factor selection algorithm}
 435 \vspace{-0.75cm}
 436
 437      \begin{figure}
 438   \animategraphics[autopause,controls,scale=0.29,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{335}
 439   %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
 440   \end{figure}
 441 \end{frame}
 442
 443 %%%%%%%%%%%%%%%%%%%%
 444 %%    SLIDE 17   %%
 445 %%%%%%%%%%%%%%%%%%%%
 446 \begin{frame}{Experimental results }
 447       \begin{femtoBlock}{}
 448         \begin{itemize}
 449          \small
 450            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 451            \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
 452            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 453            \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
 454            \item $P_d=20W$,  $P_s=4W$.
 455                 \end{itemize}
 456         \end{femtoBlock}
 457 \end{frame}
 458
 459
 460 %%%%%%%%%%%%%%%%%%%%
 461 %%    SLIDE 18   %%
 462 %%%%%%%%%%%%%%%%%%%%
 463 \begin{frame}{Experimental results}
 464   \begin{femtoBlock}{}
 465       \centering {
 466      \includegraphics[width=.35\textwidth]{c1/ep}
 467      \includegraphics[width=.35\textwidth]{c1/cg}
 468      \includegraphics[width=.35\textwidth]{c1/bt}}
 469
 470 \hspace{0.5cm}
 471
 472      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
 473  \end{femtoBlock}
 474 \end{frame}
 475
 476
 477   %%%%%%%%%%%%%%%%%%%%
 478 %%    SLIDE 19   %%
 479 %%%%%%%%%%%%%%%%%%%%
 480 \begin{frame}{Results comparison}
 481          \begin{block}{\small Rauber and Rünger's optimal scaling factor}
 482            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
 483             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
 484         \end{block}
 485
 486
 487     \centering {
 488          %\includegraphics[width=.33\textwidth]{c1/c1.pdf}
 489          %\qquad
 490          %\includegraphics[width=.33\textwidth]{c1/c2.pdf}}
 491
 492
 493             \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}}
 494
 495 \end{frame}
 496
 497
 498 %%%%%%%%%%%%%%%%%%%%
 499 %%    SLIDE 20   %%
 500 %%%%%%%%%%%%%%%%%%%%
 501 %\begin{frame}{The proposed new energy model}
 502    % \vspace{-0.75cm}
 503   %\begin{figure}
 504  % \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
 505   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
 506  % \end{figure}
 507 %\end{frame}
 508
 509
 510 %%%%%%%%%%%%%%%%%%%%
 511 %%    SLIDE 21   %%
 512 %%%%%%%%%%%%%%%%%%%%
 513 %\begin{frame}{\large Comparing the new model with Rauber's model }
 514 % \vspace{0.1cm}
 515 % \centering
 516     %\includegraphics[width=.45\textwidth]{c1/energy_con}
 517
 518    %\includegraphics[width=.5\textwidth]{c1/compare-scales}
 519 %\end{frame}
 520
 521
 522
 523
 524    % \begin{frame}{Summary}
 525      % \begin{femtoBlock}{}
 526      % \begin{itemize}
 527       %\small
 528        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
 529        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
 530          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip
 531          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
 532                % performance}. \medskip
 533          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 534          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
 535          %\end{itemize}
 536
 537         %\end{femtoBlock}
 538 %\end{frame}
 539
 540
 541 %%%%%%%%%%%%%%%%%%%%
 542 %%    SLIDE 22    %%
 543 %%%%%%%%%%%%%%%%%%%%
 544
 545
 546 \begin{frame}{The second contribution}
 547
 548 \section{\small {Energy optimization of a heterogeneous platform}}
 549 \begin{center}
 550
 551
 552 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
 553 \end{center}
 554  \end{frame}
 555
 556
 557
 558 %%%%%%%%%%%%%%%%%%%%
 559 %%    SLIDE 23    %%
 560 %%%%%%%%%%%%%%%%%%%%
 561
 562 \begin{frame}{Objectives}
 563         \begin{femtoBlock}{} \vspace{-12 mm}
 564                 \begin{itemize} \small
 565                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running
 566                           over a heterogeneous platform (cluster or Grid). \medskip
 567                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
 568                           message passing iterative applications.    \medskip
 569
 570                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the optimal trade-off} between
 571                           the energy consumption and the performance.
 572                 \end{itemize}
 573
 574           \vspace{-10 mm}
 575         \end{femtoBlock}
 576 \end{frame}
 577
 578
 579 %%%%%%%%%%%%%%%%%%%%
 580 %%    SLIDE 24    %%
 581 %%%%%%%%%%%%%%%%%%%%
 582 \begin{frame}{The execution time model}
 583       \vspace{-8 mm}
 584      \begin{figure}[!t]
 585        \centering
 586        \includegraphics[scale=0.5]{c2/commtasks}
 587        \label{fig:heter}
 588      \end{figure}
 589        \vspace{-12 mm}
 590        \medskip
 591
 592     \begin{block}{\small The execution time prediction model}
 593     \begin{equation}
 594      \label{eq:perf}
 595      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
 596     \end{equation}
 597     \end{block}
 598  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
 599
 600 \end{frame}
 601
 602  %%%%%%%%%%%%%%%%%%%%
 603 %%    SLIDE 25    %%
 604 %%%%%%%%%%%%%%%%%%%%
 605  %\begin{frame}{The energy consumption model}
 606    % The overall energy consumption of a message passing synchronous  application executed over
 607    %  a heterogeneous platform can be computed as  follows:
 608    % \begin{multline}
 609     % \label{eq:energy}
 610    %  \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
 611   %   \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
 612    %   \hspace{10 mm}
 613    % \end{multline}
 614    % \underline{where}:\\
 615    % \textcolor{blue}{N} : is the number of nodes.
 616 %\end{frame}
 617
 618
 619 %%%%%%%%%%%%%%%%%%%%
 620 %%    SLIDE 26    %%
 621 %%%%%%%%%%%%%%%%%%%%
 622   \begin{frame}{The energy  model  for heterogeneous cluster}
 623   \vspace{-0.77cm}
 624  \begin{figure}
 625   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{heter-model/a-}{0}{350}
 626   %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
 627   \end{figure}
 628  \end{frame}
 629
 630
 631
 632
 633 %%%%%%%%%%%%%%%%%%%%
 634 %%    SLIDE 27    %%
 635 %%%%%%%%%%%%%%%%%%%%
 636 %\begin{frame}{The trade-off between energy  and performance}
 637    % \vspace{-7 mm}
 638     %\begin{figure}
 639    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
 640    % \end{figure}
 641    % \vspace{-7 mm}
 642    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}}
 643     %{E_{Max}}$}. \\
 644     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
 645
 646    %  \begin{block}{\small The tradeoff model}
 647     % \begin{equation}
 648     %  \label{eq:max}
 649     %  \textcolor{red}{MaxDist} =
 650      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
 651       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
 652       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
 653       %\end{equation}
 654     % \end{block}
 655 %\end{frame}
 656
 657
 658 %%%%%%%%%%%%%%%%%%%%
 659 %%    SLIDE 28    %%
 660 %%%%%%%%%%%%%%%%%%%%
 661  %\begin{frame}{The scaling algorithm for heter. cluster}
 662
 663  %\centering
 664    %\includegraphics[width=.52\textwidth]{algo-heter}
 665  %\end{frame}
 666
 667
 668  %%%%%%%%%%%%%%%%%%%%
 669 %%    SLIDE 29    %%
 670 %%%%%%%%%%%%%%%%%%%%
 671  \begin{frame}{The scaling algorithm for heter. cluster}
 672  \vspace{-0.77cm}
 673  \centering
 674
 675   \begin{figure}
 676   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{836}
 677  % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
 678   \end{figure}
 679 \end{frame}
 680
 681
 682
 683
 684 %%%%%%%%%%%%%%%%%%%%
 685 %%    SLIDE 30    %%
 686 %%%%%%%%%%%%%%%%%%%%
 687 %\begin{frame}{Experiments over a heterogeneous cluster  }
 688       %  \begin{itemize}
 689         % \small
 690           % \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 691           % \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
 692           % \item Four types of processors with different computing powers were used.\medskip
 693           % \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
 694           % \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
 695                  % \medskip
 696
 697         %\end{itemize}
 698
 699 %\end{frame}
 700
 701
 702 %%%%%%%%%%%%%%%%%%%%
 703 %%    SLIDE 31    %%
 704 %%%%%%%%%%%%%%%%%%%%
 705 %\begin{frame}{The simulation results}
 706   % \vspace{-5 mm}
 707   % \begin{figure}[!t]
 708    %\centering
 709     %\includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
 710
 711    % \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
 712      %for the class C of the NAS Benchmarks executed over 8 nodes}
 713
 714   % \end{figure}
 715 %\end{frame}
 716
 717
 718
 719 %%%%%%%%%%%%%%%%%%%%
 720 %%    SLIDE 32    %%
 721 %%%%%%%%%%%%%%%%%%%%
 722 %\begin{frame}{The simulation results}
 723  %  \vspace{-5 mm}
 724   % \begin{figure}[!t]
 725   % \centering
 726
 727    % \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
 728
 729   % \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
 730     % of NAS Benchmarks class C executed over 8 nodes}
 731   %   \end{figure}
 732 %\end{frame}
 733
 734
 735
 736
 737
 738
 739
 740
 741 %%%%%%%%%%%%%%%%%%%%
 742 %%    SLIDE 35    %%
 743 %%%%%%%%%%%%%%%%%%%%
 744 %\begin{frame}{Energy optimization of grid platform}
 745   % \begin{figure}[!t]
 746    % \centering
 747          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
 748
 749         %   \small  10 sites distributed over France and Luxembourg
 750         %\end{figure}
 751 %\end{frame}
 752
 753
 754 %%%%%%%%%%%%%%%%%%%%
 755 %%    SLIDE 36    %%
 756 %%%%%%%%%%%%%%%%%%%%
 757 %\begin{frame}{The grid architecture}
 758 %\begin{center}
 759 %\includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
 760 %\end{center}
 761
 762  %\begin{frame}{Performance, Energy and trade-off models} \small
 763   %\begin{block}{\small The performance model of grid}
 764    % \begin{equation}
 765   %\label{eq:perf}
 766   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
 767  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
 768 %\end{equation}
 769     %\end{block}
 770
 771
 772  %\begin{block}{\small The energy model of grid}\small
 773   %  \begin{equation}
 774   %\label{eq:energy}
 775  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +
 776 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
 777 %\end{equation}
 778    % \end{block}
 779
 780 %\begin{block}{\small The trade-off model of grid}
 781 %\small
 782     %\begin{equation}
 783    %\label{eq:max}
 784   %\MaxDist =
 785   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
 786    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
 787     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
 788 %\end{equation}
 789    % \end{block}
 790
 791
 792  %\end{frame}
 793
 794
 795
 796 %%%%%%%%%%%%%%%%%%%%
 797 %%    SLIDE 37    %%
 798 %%%%%%%%%%%%%%%%%%%%
 799  \begin{frame}{Experiments over Grid'5000}
 800
 801    \textcolor{blue}{The experiments were conducted using three
 802           clusters distributed over one or two sites.}
 803            \vspace{-7 mm}
 804           \begin{center}
 805           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
 806           \end{center}
 807       \vspace{-10 mm}
 808   \textcolor{blue}{Grid'5000 power measurement tools were used.}
 809         \vspace{-9 mm}
 810   \begin{center}
 811           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
 812           \end{center}
 813
 814
 815 \end{frame}
 816
 817
 818
 819
 820 %%%%%%%%%%%%%%%%%%%%
 821 %%    SLIDE 38    %%
 822 %%%%%%%%%%%%%%%%%%%%
 823 \begin{frame}{Experiments over Grid'5000}
 824
 825    \begin{minipage}{0.4\textwidth}
 826        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
 827         %\textcolor{red}{30\%}}
 828      \small \textcolor{blue}{The average energy saving =  \textcolor{red}{30\%}}
 829    \end{minipage}
 830      \begin{minipage}{0.55\textwidth}
 831         \begin{figure}[h!]
 832           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
 833      \end{figure}
 834 \end{minipage}
 835
 836          \begin{minipage}{0.4\textwidth}
 837            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
 838                 %performance by \textcolor{red}{3.2\%}}
 839       \small  \textcolor{blue}{The average performance degradation  =  \textcolor{red}{3.2\%}}
 840         \end{minipage}
 841        \begin{minipage}{0.55\textwidth}
 842          \begin{figure}[h!]
 843            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
 844          \end{figure}
 845           \end{minipage}
 846  \end{frame}
 847
 848
 849
 850 %%%%%%%%%%%%%%%%%%%%
 851 %%    SLIDE 33    %%
 852 %%%%%%%%%%%%%%%%%%%%
 853 \begin{frame}{The results of the three power scenarios}
 854    \vspace{-5 mm}
 855    \begin{figure}[!t]
 856    \centering
 857    \includegraphics[width=.45\textwidth]{c2/eng_pow.eps}
 858    \hspace{0.3cm}
 859    \includegraphics[width=.45\textwidth]{c2/per_pow.eps}
 860    \vspace{4 mm}
 861    \includegraphics[width=.7\textwidth]{c2/three_scenarios.pdf}
 862    \end{figure}
 863 \end{frame}
 864
 865
 866
 867
 868
 869
 870
 871 %%%%%%%%%%%%%%%%%%%%
 872 %%    SLIDE 39    %%
 873 %%%%%%%%%%%%%%%%%%%%
 874 \begin{frame}{One core and Multi-cores per node results}
 875    %\textcolor{blue}{One core  and Multi-cores per node results:}
 876
 877   \begin{figure}[h!]
 878   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
 879   \hspace{0.3cm}
 880   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
 881   \end{figure}
 882
 883   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
 884 \end{frame}
 885
 886
 887 %%%%%%%%%%%%%%%%%%%%
 888 %%    SLIDE 34    %%
 889 %%%%%%%%%%%%%%%%%%%%
 890 \begin{frame}{Comparing the objective function to EDP}
 891
 892      EDP is the products between the energy consumption and the delay.
 893     \vspace{-5 mm}
 894     \begin{figure}[!t]
 895     \centering
 896     \includegraphics[width=.6\textwidth]{c2/edp_dist.eps}
 897
 898
 899     \end{figure}
 900 \end{frame}
 901 %\begin{frame}{Summary}
 902 %\begin{itemize}
 903      % \small
 904         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
 905         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
 906       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real
 907           %test-bed \textcolor{blue}{Grid'5000}.
 908
 909          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
 910         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
 911       %    clusters.
 912
 913          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
 914         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
 915
 916        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
 917      %    \end{itemize}
 918 %\end{frame}
 919
 920
 921 %%%%%%%%%%%%%%%%%%%%
 922 %%    SLIDE 40    %%
 923 %%%%%%%%%%%%%%%%%%%%
 924 \begin{frame}{The third contribution}
 925 \section{\small {Energy optimization of asynchronous applications}}
 926 \begin{center}
 927 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing  applications}
 928 \end{center}
 929  \end{frame}
 930
 931
 932
 933 %%%%%%%%%%%%%%%%%%%%
 934 %%    SLIDE 41   %%
 935 %%%%%%%%%%%%%%%%%%%%
 936 \begin{frame}{Problem definition}\vspace{0.8 mm}
 937 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
 938 \vspace{-8 mm}
 939 \begin{figure}
 940  \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{syn/a-}{0}{647}
 941  %\includegraphics[width=0.6\textwidth]{syn/a-503}
 942   \end{figure}
 943 \end{frame}
 944
 945
 946
 947 %%%%%%%%%%%%%%%%%%%%
 948 %%    SLIDE 42   %%
 949 %%%%%%%%%%%%%%%%%%%%
 950 \begin{frame}{Problem definition}\vspace{0.8 mm}
 951 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
 952 \vspace{-8 mm}
 953 \begin{figure}
 954  \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn/a-}{0}{556}
 955  %\includegraphics[width=0.6\textwidth]{asyn/a-440}
 956   \end{figure}
 957 \end{frame}
 958
 959
 960
 961 %%%%%%%%%%%%%%%%%%%%
 962 %%    SLIDE 43   %%
 963 %%%%%%%%%%%%%%%%%%%%
 964 \begin{frame}{Solution}\vspace{0.8mm}
 965 \textcolor{blue}{Using asynchronous communications with DVFS }
 966 \vspace{-8 mm}
 967 \begin{figure}
 968   \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{344}
 969   %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
 970   \end{figure}
 971 \end{frame}
 972
 973
 974
 975
 976 %%%%%%%%%%%%%%%%%%%%
 977 %%    SLIDE 44   %%
 978 %%%%%%%%%%%%%%%%%%%%
 979 %\begin{frame}{The performance models}
 980
 981 %\begin{block}{\small The performance model of Asynch. Applications}\small
 982 %\begin{equation}
 983   %\label{eq:asyn_time}
 984  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
 985 %\end{equation}
 986 %\end{block}
 987
 988
 989 %\begin{block}{\small The performance model of Hybrid Applications}\small
 990 %\begin{equation}
 991   %\label{eq:asyn_perf}
 992   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
 993    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
 994 %\end{equation}
 995 %\end{block}
 996
 997
 998 %\end{frame}
 999
1000
1001
1002 %%%%%%%%%%%%%%%%%%%%
1003 %%    SLIDE 45   %%
1004 %%%%%%%%%%%%%%%%%%%%
1005 %\begin{frame}{The energy consumption models}
1006
1007 %\begin{block}{\small The energy model of Asynch. Applications}\small
1008 %\begin{equation}
1009   %\label{eq:asyn_energy1}
1010 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
1011 %\end{equation}
1012 %\end{block}
1013
1014
1015 %\begin{block}{\small The energy model of Hybrid Applications}\small
1016 %\begin{multline}
1017   %\label{eq:asyn_energy}
1018  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
1019 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
1020 %\end{multline}
1021 %\end{block}
1022 %\end{frame}
1023
1024
1025
1026 %%%%%%%%%%%%%%%%%%%%
1027 %%    SLIDE 44   %%
1028 %%%%%%%%%%%%%%%%%%%%
1029 \begin{frame}{The performance and the energy models }
1030
1031 \centering
1032 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
1033 \end{frame}
1034
1035
1036
1037
1038
1039 %%%%%%%%%%%%%%%%%%%%
1040 %%    SLIDE 46   %%
1041 %%%%%%%%%%%%%%%%%%%%
1042 \begin{frame}{The scaling algorithm for Asynch.  applications}
1043 \vspace{-0.1 mm}
1044 \centering
1045 \includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
1046 \end{frame}
1047
1048
1049
1050 %%%%%%%%%%%%%%%%%%%%
1051 %%    SLIDE 47   %%
1052 %%%%%%%%%%%%%%%%%%%%
1053 \begin{frame}{The experiments}
1054    \vspace{-5 mm}
1055    \begin{figure}[!t]
1056    \begin{itemize}
1057       \small
1058         \item The architecture of the grid:
1059    \end{itemize}
1060     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
1061    \end{figure}
1062    \begin{itemize}
1063       \small
1064         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1065         \item Evaluating the application over the simulator and Grid'5000.
1066    \end{itemize}
1067 \end{frame}
1068
1069
1070
1071 %%%%%%%%%%%%%%%%%%%%
1072 %%    SLIDE 48   %%
1073 %%%%%%%%%%%%%%%%%%%%
1074 %\begin{frame}{The simulation results}
1075 %\centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is %the Async. MS with Sync. DVFS}
1076
1077 %\centering
1078    % \includegraphics[scale=0.42]{c3/energy_saving.eps}
1079
1080  %\centering  The average energy saving  = \textcolor{red}{22\%}
1081 %\end{frame}
1082
1083
1084
1085 %%%%%%%%%%%%%%%%%%%%
1086 %%    SLIDE 49   %%
1087 %%%%%%%%%%%%%%%%%%%%
1088 %\begin{frame}{The simulation results}
1089 %\centering
1090
1091    %  \includegraphics[scale=0.42]{c3/perf_degra.eps}
1092
1093 %\centering    The average speed-up  = \textcolor{red}{5.72\%}
1094 %\end{frame}
1095
1096
1097
1098 %%%%%%%%%%%%%%%%%%%%
1099 %%    SLIDE 50   %%
1100 %%%%%%%%%%%%%%%%%%%%
1101  \begin{frame}{The Grid'5000 results}
1102    \vspace{-10 mm}
1103    \begin{figure}[!t]
1104    \centering
1105    \hspace{-8 mm}
1106     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1107     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1108    \end{figure}
1109     \vspace{-5 mm}
1110      \centering \footnotesize
1111
1112      %\small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1113
1114 The average energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1115 \end{frame}
1116
1117
1118 %%%%%%%%%%%%%%%%%%%%
1119 %%    SLIDE 51   %%
1120 %%%%%%%%%%%%%%%%%%%%
1121 \begin{frame}{The comparison results}
1122  \centering
1123     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1124
1125     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1126 \end{frame}
1127
1128
1129
1130
1131 %%%%%%%%%%%%%%%%%%%%
1132 %%    SLIDE 52  %%
1133 %%%%%%%%%%%%%%%%%%%%
1134 \begin{frame}{Conclusions}
1135 \section{Conclusions and Perspectives}
1136 \begin{itemize}
1137
1138 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over
1139 \textcolor{blue}{homogeneous and  heterogeneous clusters or grids}.
1140
1141
1142
1143 \small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
1144
1145 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1146
1147 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1148 Multi-splitting} method.
1149
1150 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  the \textcolor{blue}{Grid'5000 testbed}.
1151
1152 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or to the \textcolor{blue}{EDP objective function}.
1153
1154
1155 \end{itemize}
1156 \end{frame}
1157
1158
1159
1160 %%%%%%%%%%%%%%%%%%%%
1161 %%    SLIDE 53   %%
1162 %%%%%%%%%%%%%%%%%%%%
1163 \begin{frame}{Publications}
1164
1165 \begin{block}{\small Journal Articles }\scriptsize
1166 \begin{enumerate}[$\lbrack$1$\rbrack$]
1167
1168 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1169       Science}, 2016.
1170
1171 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for
1172       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Submitted)
1173
1174 \end{enumerate}
1175 \end{block}
1176
1177
1178 \begin{block}{\small Conference Articles }\scriptsize
1179
1180 \begin{enumerate}[$\lbrack$1$\rbrack$]
1181
1182 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1183       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1184       225-230. IEEE Computer Society, Milan, Italy (2014).
1185
1186 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1187       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1188       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1189
1190 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1191       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1192       Paris (2016).
1193
1194 \end{enumerate}
1195
1196 \end{block}
1197 \end{frame}
1198
1199
1200 %%%%%%%%%%%%%%%%%%%%
1201 %%    SLIDE 54   %%
1202 %%%%%%%%%%%%%%%%%%%%
1203 \begin{frame}{Perspectives}
1204
1205 \begin{itemize}
1206
1207 \small  \barrow The proposed algorithms should  take into consideration the
1208 \textcolor{blue}{variability between some iterations}.
1209
1210 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1211
1212 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1213
1214 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1215 \end{itemize}
1216
1217 \end{frame}
1218
1219 %%%%%%%%%%%%%%%%%%%%
1220 %%    SLIDE 55  %%
1221 %%%%%%%%%%%%%%%%%%%%
1222 \begin{frame}{Fin} \vspace{-10 mm}
1223
1224             \centering \Large \textcolor{blue}{Thank you for your attention}
1225
1226             \vspace{2cm}
1227             \centering \textcolor{blue}{ {\Large Questions?}}
1228
1229 \end{frame}
1230 \end{document}
1231 %  _____ ___ _   _
1232 % |  ___|_ _| \ | |
1233 % | |_   | ||  \| |
1234 % |  _|  | || |\  |
1235 % |_|   |___|_| \_|
1236 %