thesis-presentation/AhmedSlides.tex

   1  \documentclass{beamer}
   2 \usepackage{beamerthemefemto}
   3 \usepackage[latin1]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
   6 \usepackage{algorithm,algorithmicx,algpseudocode}
   7 \usepackage{graphicx,graphics}
   8 \usepackage{subfig}
   9 \usepackage{listings}
  10 \usepackage{colortbl}
  11 \usepackage{amsmath}
  12 \usepackage{xspace}
  13  \usepackage{movie15}
  14  \usepackage{animate}
  15 \usepackage{xmpmulti}
  16  \newcommand{\AG}[2][inline]{%
  17   \todo[color=green!50,#1]{\sffamily\textbf{AG:} #2}\xspace}
  18 \newcommand{\JC}[2][inline]{%
  19   \todo[color=red!10,#1]{\sffamily\textbf{JC:} #2}\xspace}
  20 \definecolor{myblue}{RGB}{0,29,119}
  21 \newcommand{\Xsub}[2]{{\ensuremath{#1_\mathit{#2}}}}
  22 \usepackage{fixltx2e}
  23 %% used to put some subscripts lower, and make them more legible
  24 \newcommand{\fxheight}[1]{\ifx#1\relax\relax\else\rule{0pt}{1.52ex}#1\fi}
  25 \usepackage{ragged2e}
  26 \newcommand{\CL}{\Xsub{C}{L}}
  27 \newcommand{\Dist}{\mathit{Dist}}
  28 \newcommand{\EdNew}{\Xsub{E}{dNew}}
  29 \newcommand{\Eind}{\Xsub{E}{ind}}
  30 \newcommand{\Enorm}{\Xsub{E}{Norm}}
  31 \newcommand{\Eoriginal}{\Xsub{E}{Original}}
  32 \newcommand{\Ereduced}{\Xsub{E}{Reduced}}
  33 \newcommand{\Es}{\Xsub{E}{S}}
  34 \newcommand{\Fdiff}[1][]{\Xsub{F}{diff}_{\!#1}}
  35 \newcommand{\Fmax}[1][]{\Xsub{F}{max}_{\fxheight{#1}}}
  36 \newcommand{\Fnew}{\Xsub{F}{new}}
  37 \newcommand{\Vnew}{\Xsub{V}{new}}
  38 \newcommand{\Vmax}{\Xsub{V}{max}}
  39 \newcommand{\Ileak}{\Xsub{I}{leak}}
  40 \newcommand{\Kdesign}{\Xsub{K}{design}}
  41 \newcommand{\MaxDist}{\mathit{Max}\Dist}
  42 \newcommand{\MinTcm}{\mathit{Min}\Tcm}
  43 \newcommand{\Ntrans}{\Xsub{N}{trans}}
  44 \newcommand{\Pd}[1][]{\Xsub{P}{d}_{\fxheight{#1}}}
  45 \newcommand{\PdNew}{\Xsub{P}{dNew}}
  46
  47 \newcommand{\PdOld}{\Xsub{P}{dOld}}
  48 \newcommand{\Pnorm}{\Xsub{P}{Norm}}
  49 \newcommand{\Tnorm}{\Xsub{T}{Norm}}
  50 \newcommand{\Ps}[1][]{\Xsub{P}{s}_{\fxheight{#1}}}
  51 \newcommand{\Scp}[1][]{\Xsub{S}{cp}_{#1}}
  52 \newcommand{\Sopt}[1][]{\Xsub{S}{opt}_{#1}}
  53 \newcommand{\Tcm}[1][]{\Xsub{T}{cm}_{\fxheight{#1}}}
  54 \newcommand{\Tcp}[1][]{\Xsub{T}{cp}_{#1}}
  55 \newcommand{\TcpOld}[1][]{\Xsub{T}{cpOld}_{#1}}
  56 \newcommand{\Tnew}{\Xsub{T}{New}}
  57 \newcommand{\Told}{\Xsub{T}{Old}}
  58 \newcommand{\Ltcm}[1][]{\Xsub{L}{tcm}_{\fxheight{#1}}}
  59 \newcommand{\Etcm}[1][]{\Xsub{E}{tcm}_{\fxheight{#1}}}
  60 \newcommand{\Niter}[1][]{\Xsub{N}{iter}_{\fxheight{#1}}}
  61 \newcommand{\Pmax}[1][]{\Xsub{P}{max}_{\fxheight{#1}}}
  62 \newcommand{\Pidle}[1][]{\Xsub{P}{idle}_{\fxheight{#1}}}
  63  \usepackage{pifont}
  64 \usepackage{xcolor}
  65 \definecolor{myblue}{RGB}{0,29,119}
  66 \usepackage[textsize=footnotesize]{todonotes}
  67 \newcommand{\bsquare}{\item[\color{myblue}\ding{110}]}
  68 \newcommand{\barrow}{\item[\color{myblue}\ding{228}]}
  69 \newcommand{\bwarrow}{\item[\color{myblue}\ding{227}]}
  70 \DeclareGraphicsExtensions{.jpg, .png , .pdf, .bmp, .pdftex}
  71
  72
  73
  74 %\title{Energy Consumption Optimization of Parallel Applications with
  75 %Iterations using CPU Frequency Scaling}
  76 \vspace{2cm}
  77
  78 \title{   \textbf{Energy Consumption Optimization of   Parallel Applications with Iterations   using CPU Frequency Scaling} \\ \vspace{0.2cm} \hspace{1.8cm}\textbf{\textcolor{cyan}{\small PhD Dissertation Defense}}}\vspace{-0.5cm}
  79 \author{ \textbf{Ahmed Badri Muslim Fanfakh} \\ \vspace{0.5cm}\small Under the supervision of: \\ \textcolor{cyan}{\small  Raphaël COUTURIER and Jean-Claude CHARR} \\\vspace{0.1cm} \textcolor{blue}{ UBFC - FEMTO-ST - DISC Dept.  - AND Team} \\ ~~~~~~~~~~~~~~~~~~~~~ \textbf{\textcolor{blue}{ 17 October 2016 }}}
  80
  81 \date{}
  82 \vspace{-3cm}
  83 %  ____  _____ ____  _   _ _____
  84 % |  _ \| ____| __ )| | | |_   _|
  85 % | | | |  _| |  _ \| | | | | |
  86 % | |_| | |___| |_) | |_| | | |
  87 % |____/|_____|____/ \___/  |_|
  88 %
  89 \begin{document}
  90 \setbeamertemplate{background}{\titrefemto}
  91
  92 %%%%%%%%%%%%%%%%%%%%
  93 %%    SLIDE 01    %%
  94 %%%%%%%%%%%%%%%%%%%%
  95 \begin{frame}[plain]
  96 \vspace{1cm}
  97 \centering
  98    \titlepage
  99 \end{frame}
 100
 101
 102 %%%%%%%%%%%%%%%%%%%%
 103 %%    SLIDE 02    %%
 104 %%%%%%%%%%%%%%%%%%%%
 105 \setbeamertemplate{background}{\pagefemto}
 106 \begin{frame}{Outline}
 107
 108 \setbeamertemplate{section in toc}[sections numbered]
 109 \tableofcontents
 110 \end{frame}
 111
 112 %%%%%%%%%%%%%%%%%%%%
 113 %%    SLIDE 03    %%
 114 %%%%%%%%%%%%%%%%%%%%
 115 \begin{frame}{Definition of parallel computing}
 116 \section{\small {Introduction and Problem definition}}
 117  \centering
 118  \includegraphics[width=0.99\textwidth]{para.pdf}
 119 \end{frame}
 120
 121
 122
 123 \begin{frame}{Execution of synchronous parallel tasks}
 124 \vspace{-0.5 cm}
 125 \begin{figure}
 126   \centering
 127   \subfloat[Synchronous imbalanced communications]{%
 128     \includegraphics[scale=0.49]{c1/commtasks}\label{fig:h1}}
 129   \subfloat[Synchronous imbalanced computations]{%
 130     \includegraphics[scale=0.49]{c1/compt}\label{fig:h2}}
 131  % \caption{Parallel tasks on homogeneous platform}
 132   \label{fig:homo}
 133 \end{figure}
 134
 135  \end{frame}
 136
 137
 138 %%%%%%%%%%%%%%%%%%%%
 139 %%    SLIDE 07   %%
 140 %%%%%%%%%%%%%%%%%%%%
 141
 142
 143 \begin{frame}{\large Synchronous and asynchronous iterative methods }
 144 \vspace{-0.5 cm}
 145 \begin{figure}
 146
 147 \includegraphics[scale=0.42]{syn_tasks.pdf}
 148 \vspace{0.6 cm}
 149 \includegraphics[scale=0.42]{Asyn_tasks.pdf}
 150 \end{figure}
 151
 152
 153  \end{frame}
 154
 155  %%%%%%%%%%%%%%%%%%%%
 156 %%    SLIDE 03    %%
 157 %%%%%%%%%%%%%%%%%%%%
 158 \begin{frame}{Approaches to get more computing power}
 159
 160    %\bf \textcolor{blue}{}
 161      \begin{minipage}{0.5\textwidth}
 162       \textcolor{blue}{1)} \small  \bf \textcolor{black}{Increase the frequency of a  processor.\\ (limited due to overheating)}
 163     \end{minipage}%
 164     \begin{minipage}{0.6\textwidth}
 165
 166 \begin{figure}[h!]
 167
 168     \includegraphics[width=0.7\textwidth]{fig/freq-years}
 169     \end{figure}
 170     \end{minipage}%
 171     \vspace{0.2cm}
 172     \begin{minipage}{0.5\textwidth}
 173      \textcolor{blue}{2)} \small \bf \textcolor{black}{Increase the number of computing
 174      units.}
 175
 176  \textcolor{black}{The supercomputer Tianhe-2 has more than 3 million cores and consumes around 17.8 megawatts.}
 177
 178     \end{minipage}%
 179     \begin{minipage}{0.6\textwidth}
 180     \begin{figure}[h!]
 181      \includegraphics[width=0.7\textwidth]{fig/clusters}
 182     \end{figure}
 183     \end{minipage}%
 184  \end{frame}
 185
 186
 187
 188  %%%%%%%%%%%%%%%%%%%
 189 %%    SLIDE 04   %%
 190 %%%%%%%%%%%%%%%%%%%%
 191 \begin{frame}{Techniques for energy consumption reduction}
 192
 193      \textcolor{blue}{1)} \bf \textcolor{black}{Switch-off idle nodes method}
 194     \vspace{-0.9cm}
 195     \begin{figure}
 196      \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{200}{on-off/a-}{0}{111}
 197      %\includegraphics[width=0.6\textwidth]{on-off/a-69}
 198     \end{figure}
 199  \end{frame}
 200
 201 %%%%%%%%%%%%%%%%%%%%
 202 %%    SLIDE 05    %%
 203 %%%%%%%%%%%%%%%%%%%%
 204 \begin{frame}{Techniques for energy consumption reduction}
 205
 206   \textcolor{blue}{2)} \bf \textcolor{black}{Dynamic Voltage and Frequency Scaling (DVFS)}
 207      \vspace{-0.9cm}
 208     \begin{figure}
 209     \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{DVFS-meq/a-}{0}{175}
 210      %\includegraphics[width=0.6\textwidth]{DVFS-meq/a-109}
 211     \end{figure}
 212     \end{frame}
 213
 214 %%%%%%%%%%%%%%%%%%%%
 215 %%    SLIDE 06   %%
 216 %%%%%%%%%%%%%%%%%%%%
 217 %%%%%%%%%%%%%%%%%%%%
 218 %%    SLIDE 07    %%
 219 %%%%%%%%%%%%%%%%%%%%
 220 \begin{frame}{Motivations}
 221 \vspace{0.05cm}
 222 \section{\small {Motivations}}
 223 \textcolor{blue}{Why we used the DVFS method:}
 224 \vspace{-0.49cm}
 225 \begin{minipage}{0.5\textwidth}
 226     \vspace{-0.49cm}
 227       \begin{itemize}
 228        \item  \small \textcolor{black}{ The CPU is the component that consumes the  highest amount of energy in a node \textsuperscript{1}. }
 229
 230          \end{itemize}
 231
 232     \end{minipage}%
 233     \begin{minipage}{0.5\textwidth}
 234      \vspace{-0.49cm}
 235     \begin{figure}[h!]
 236      \includegraphics[width=0.85\textwidth]{fig/node-power}
 237
 238     \end{figure}
 239     \end{minipage}%
 240
 241   \begin{itemize} \item \small  \textcolor{black}{DVFS reduces the energy consumption while
 242    keeping all the nodes working.}
 243                 \item \small \textcolor{black}{It has a very small overhead compared to switching-off the idle nodes.}  \end{itemize}
 244
 245 \vspace{-0.12cm}
 246
 247  \begin{block}{\textcolor{white}{Challenge and Objective}}
 248
 249         \small  \textcolor{blue}{Challenge:} \textcolor{black}{DVFS is used to reduce the energy consumption, \textcolor{blue}{but} it also degrades the performance of the CPU.}
 250
 251                 \vspace{0.1cm}
 252  \small  \textcolor{blue}{Objective:} \textcolor{black}{Applying the DVFS to minimize the energy consumption while maintaining the performance of the parallel application.}
 253 \end{block}
 254
 255  \tiny \textsuperscript{1} Fan, X., Weber, W., and Barroso, L. A. 2007.  Power provisioning
 256 for a warehouse-sized computer.
 257
 258     \end{frame}
 259
 260
 261
 262 %%%%%%%%%%%%%%%%%%%%
 263 %%    SLIDE 08    %%
 264 %%%%%%%%%%%%%%%%%%%%
 265
 266
 267 \begin{frame}{The first contribution}
 268
 269 \section{\small {Energy optimization of a homogeneous platform}}
 270 %\vspace{-3cm}
 271  % \includegraphics[width=0.6\textwidth]{white.pdf}
 272
 273 \begin{center}
 274 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a homogeneous platform}
 275 \end{center}
 276  \end{frame}
 277
 278
 279
 280 %%%%%%%%%%%%%%%%%%%%
 281 %%    SLIDE 09    %%
 282 %%%%%%%%%%%%%%%%%%%%
 283
 284 \begin{frame}{Objectives}
 285
 286                 \begin{itemize}   \small \justifying
 287
 288                    \item   Studying the effect of the frequency scaling  on the \textbf{energy consumption and performance } of parallel  applications with iterations. \medskip
 289
 290                    \item   Discovering the \textbf{energy-performance trade-off relation} when changing the frequency of the processor.\medskip
 291                    \item   Proposing an algorithm for selecting the scaling factor that produces  \textbf {the good trade-off} between the energy consumption and the performance. \medskip
 292                    \item   Comparing the proposed algorithm to existing methods.
 293
 294
 295                    %\footnote{\tiny Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the
 296                           %energy consumption \\  \quad ~ ~\quad    of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.} method that our method best on.
 297                 \end{itemize}
 298                  %\let\thefootnote\relax\footnote{}
 299
 300
 301 \end{frame}
 302
 303
 304
 305
 306 %%%%%%%%%%%%%%%%%%%%
 307 %%    SLIDE 13   %%
 308 %%%%%%%%%%%%%%%%%%%%
 309 \begin{frame}{Performance evaluation of MPI programs}
 310
 311 \small The frequency scaling factor is the ratio between the maximum and the new frequency, \textcolor{blue}{$S = \frac{F_{max}}{F_{new}}$}.
 312     \vspace{5 mm}
 313
 314         \begin{femtoBlock}{}
 315               \vspace{-5 mm}
 316               \begin{block}{\small Execution time prediction model}
 317                      \centering{ $ \textcolor{red}{T_{new}} = \textcolor{blue}{T_{Max Comp Old} \cdot S + T_{{Min Comm Old}}}$}
 318           \end{block}
 319           \vspace{5 mm}
 320            \centering{\includegraphics[width=.4\textwidth]{c1/cg_per}
 321            \quad%
 322            \includegraphics[width=.4\textwidth]{c1/lu_pre}}
 323             \vspace{1 mm}
 324
 325            \small The maximum normalized error for CG=0.0073 \textbf{(the smallest)} and LU=0.031 \textbf{(the worst)}.
 326            \end{femtoBlock}
 327 \end{frame}
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342 %%%%%%%%%%%%%%%%%%%%
 343 %%    SLIDE 11   %%
 344 %%%%%%%%%%%%%%%%%%%%
 345 \begin{frame}{Energy model for a homogeneous platform}
 346       The power consumed by a processor is divided into two power metrics: the dynamic (\textcolor{red}{$P_d$}) and  the static
 347        (\textcolor{red}{$P_s$}) powers.
 348     \begin{equation}
 349      \label{eq:pd}
 350      \textcolor{red}{ P_d} = \textcolor{blue}{\alpha \cdot CL \cdot V^2 \cdot F}
 351    \end{equation}
 352     \scriptsize \underline{Where}: \\
 353     \scriptsize {\textcolor{blue}{$\alpha$}: switching activity. \hspace{15 mm}  \textcolor{blue}{$CL$}: load capacitance [F].\\
 354     \textcolor{blue}{$V$}: the supply voltage [V]. \hspace{8 mm} \textcolor{blue}{$F$}: operational frequency [Hz].}
 355    \begin{equation}
 356      \label{eq:ps}
 357      \small \textcolor{red}{P_s} = \textcolor{blue}{V \cdot N_{trans} \cdot K_{design} \cdot I_{Leak}}
 358    \end{equation}
 359     \underline{Where}:\\
 360         \scriptsize{ \textcolor{blue}{$V$}: the supply voltage [V].  \hspace{19 mm}   \textcolor{blue}{$N_{trans}$}: number of transistors. \\
 361         \textcolor{blue}{$K_{design}$}: design dependent parameter. \hspace{3 mm} \textcolor{blue}{$I_{leak}$}: technology dependent
 362              parameter [A].}
 363
 364
 365 \end{frame}
 366
 367
 368
 369 %%%%%%%%%%%%%%%%%%%%
 370 %%    SLIDE 12   %%
 371 %%%%%%%%%%%%%%%%%%%%
 372
 373 \begin{frame}{Energy model for a homogeneous platform}
 374        \vspace{-0.77cm}
 375             \begin{figure}
 376   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{homo-model/a-}{0}{441}
 377   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
 378   \end{figure}
 379
 380       %  \begin{block}{\small Rauber and Rünger's energy model}
 381          %$ E = P_{d} \cdot S_1^{-2} \cdot
 382          %\left( T_1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^2} \right) +
 383           %  P_{s} \cdot S_1  \cdot T_1 \cdot N$
 384         %\end{block}
 385           % \textcolor{blue}{$S_1$}: the maximum scaling factor.\\
 386           % \textcolor{blue}{$P_{d}$}: the dynamic power.\\
 387           % \textcolor{blue}{$P_{s}$}: the static power.\\
 388           % \textcolor{blue}{$T_I$}: the execution time of the slower task.\\
 389           % \textcolor{blue}{$T_i$}: the execution time of task i.\\
 390           % \textcolor{blue}{$N$}:  the number of  nodes.
 391
 392
 393
 394 \end{frame}
 395
 396
 397
 398
 399  %%%%%%%%%%%%%%%%%%%%
 400 %%    SLIDE 14   %%
 401 %%%%%%%%%%%%%%%%%%%%
 402 \begin{frame}{Performance and energy reduction trade-off}
 403         \begin{femtoBlock}{} \vspace{-15 mm}
 404                \begin{figure}
 405      \centering
 406      \subfloat[\small  Real relation.]{%
 407      \includegraphics[width=.43\textwidth]{c1/file3}\label{fig:r2}}
 408      \quad%
 409      \subfloat[\small Converted relation.]{%
 410      \includegraphics[width=.43\textwidth]{c1/file}\label{fig:r1}}%
 411   \label{fig:rel}
 412  % \caption{The energy and performance relation}
 413 \end{figure}
 414
 415  Where:~~~ $\textcolor{blue}{Performance} = execution~time^{-1}$
 416
 417 %\vspace{-0.3cm}
 418       \small
 419          \begin{block}{\small Our objective function}
 420          \centering{$\textbf{\emph {\textcolor{red}{MaxDist}}} = \max_{j=1,2,\dots ,F}
 421                     (\overbrace{P_{Norm}(S_j)}^{{\textcolor{blue}{Maximize}}} -
 422                      \overbrace{E_{Norm}(S_j)}^{{\textcolor{blue}{Minimize}}} )$}
 423
 424         \end{block}
 425         \end{femtoBlock}
 426
 427 \end{frame}
 428
 429 %%%%%%%%%%%%%%%%%%%%
 430 %%    SLIDE 15   %%
 431 %%%%%%%%%%%%%%%%%%%%
 432  %\begin{frame}{Scaling factor selection algorithm}
 433 %\vspace{-0.75cm}
 434     % \begin{center}
 435       %\includegraphics[width=.56 \textwidth]{c1/algo-homo}
 436      %\end{center}
 437
 438 %\end{frame}
 439
 440
 441 %%%%%%%%%%%%%%%%%%%%
 442 %%    SLIDE 16   %%
 443 %%%%%%%%%%%%%%%%%%%%
 444 \begin{frame}{Scaling factor selection algorithm}
 445 \vspace{-0.75cm}
 446
 447      \begin{figure}
 448   \animategraphics[autopause,controls,scale=0.29,buttonsize=0.2cm]{10}{dvfs-homo/a-}{0}{335}
 449   %\includegraphics[width=0.6\textwidth]{dvfs-homo/a-159}
 450   \end{figure}
 451 \end{frame}
 452
 453 %%%%%%%%%%%%%%%%%%%%
 454 %%    SLIDE 17   %%
 455 %%%%%%%%%%%%%%%%%%%%
 456 \begin{frame}{Experiment over SimGrid }
 457       \begin{femtoBlock}{}
 458         \begin{itemize}
 459          \small
 460            \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 461            \item The proposed algorithm was applied to the NAS parallel benchmarks.\medskip
 462            \item Each node in the cluster has 18 frequency values from \textbf{2.5$GHz$} to \textbf{800$MHz$}.\medskip
 463            \item The proposed algorithm was evaluated over the A, B and C classes of the benchmarks using 4, 8 or 9 and 16 nodes respectively. \medskip
 464            \item $P_d=20W$,  $P_s=4W$.
 465                 \end{itemize}
 466         \end{femtoBlock}
 467 \end{frame}
 468
 469
 470 %%%%%%%%%%%%%%%%%%%%
 471 %%    SLIDE 18   %%
 472 %%%%%%%%%%%%%%%%%%%%
 473 \begin{frame}{Experimental results}
 474   \begin{femtoBlock}{}
 475       \centering {
 476      \includegraphics[width=.35\textwidth]{c1/ep}
 477      \includegraphics[width=.35\textwidth]{c1/cg}
 478      \includegraphics[width=.35\textwidth]{c1/bt}}
 479
 480 \hspace{0.5cm}
 481
 482      \centering {\includegraphics[width=.55\textwidth]{c1/results.pdf}}
 483  \end{femtoBlock}
 484 \end{frame}
 485
 486
 487   %%%%%%%%%%%%%%%%%%%%
 488 %%    SLIDE 19   %%
 489 %%%%%%%%%%%%%%%%%%%%
 490 \begin{frame}{Results comparison}
 491          \small \textcolor{blue}{Rauber and Rünger's  scaling factor  \textcolor{black}{ \tiny \textsuperscript{2}}}
 492
 493          \vspace{2 mm}
 494
 495            $S_{opt} = \sqrt[3]{\frac{2}{N} \cdot \frac{P_{dyn}}{P_{static}} \cdot
 496             \left( 1 + \sum_{i=2}^{N} \frac{T_i^3}{T_1^3}\right) } $
 497
 498
 499    \begin{center}
 500             \includegraphics[width=.55\textwidth]{c1/compare-c.pdf}
 501    \end{center}
 502
 503
 504 \vspace{-2 mm}
 505          \tiny \textsuperscript{2}  Thomas Rauber and Gudula Rünger. Analytical modeling and simulation of the energy consumption of  independent tasks. In Proceedings of the Winter Simulation Conference, 2012.
 506 \end{frame}
 507
 508
 509 %%%%%%%%%%%%%%%%%%%%
 510 %%    SLIDE 20   %%
 511 %%%%%%%%%%%%%%%%%%%%
 512 %\begin{frame}{The proposed new energy model}
 513    % \vspace{-0.75cm}
 514   %\begin{figure}
 515  % \animategraphics[autopause,controls,scale=0.28,buttonsize=0.2cm]{10}{homo-model/a-}{0}{356}
 516   %\includegraphics[width=0.6\textwidth]{homo-model/a-356}
 517  % \end{figure}
 518 %\end{frame}
 519
 520
 521 %%%%%%%%%%%%%%%%%%%%
 522 %%    SLIDE 21   %%
 523 %%%%%%%%%%%%%%%%%%%%
 524 %\begin{frame}{\large Comparing the new model with Rauber's model }
 525 % \vspace{0.1cm}
 526 % \centering
 527     %\includegraphics[width=.45\textwidth]{c1/energy_con}
 528
 529    %\includegraphics[width=.5\textwidth]{c1/compare-scales}
 530 %\end{frame}
 531
 532
 533
 534
 535    % \begin{frame}{Summary}
 536      % \begin{femtoBlock}{}
 537      % \begin{itemize}
 538       %\small
 539        %\item  We have presented a new online scaling factor selection method that  \textcolor{blue}{optimizes simultaneously the energy and performance}.\medskip
 540        % \item It predicts \textcolor{blue}{ the energy consumption and the performance} of the parallel applications. \medskip
 541          %\item Our algorithm  \textcolor{blue}{saves more energy} when the communication and the other slacks times are big.     \medskip
 542          %\item It gives the  \textcolor{blue}{best trade-off between energy reduction and
 543                % performance}. \medskip
 544          %\item  Our method \ \textcolor{blue}{outperforms Rauber and Rünger's method} in terms of  energy-performance ratio.
 545          %\item The proposed new energy model is  \textcolor{blue}{more accurate} then Rauber energy model.
 546          %\end{itemize}
 547
 548         %\end{femtoBlock}
 549 %\end{frame}
 550
 551
 552 %%%%%%%%%%%%%%%%%%%%
 553 %%    SLIDE 22    %%
 554 %%%%%%%%%%%%%%%%%%%%
 555
 556
 557 \begin{frame}{The second contribution}
 558
 559 \section{\small {Energy optimization of a heterogeneous platform}}
 560 \begin{center}
 561
 562
 563 \bf  \Large \textcolor{blue}{Energy optimization of a parallel application with iterations running over a Heterogeneous platform}
 564 \end{center}
 565  \end{frame}
 566
 567
 568
 569 %%%%%%%%%%%%%%%%%%%%
 570 %%    SLIDE 23    %%
 571 %%%%%%%%%%%%%%%%%%%%
 572
 573 \begin{frame}{Objectives}
 574         \begin{femtoBlock}{} \vspace{-12 mm}
 575                 \begin{itemize} \small
 576                   \item   Proposing  \textcolor{blue}{new energy and performance models} for message passing  applications with iterations running
 577                           over a heterogeneous platform (cluster or Grid). \medskip
 578                    \item  Studying the effect of the scaling factor $S$ on both the \textcolor{blue}{energy consumption  and the performance} of
 579                           message passing iterative applications.    \medskip
 580
 581                    \item  Computing  the vector of scaling factors ($S_1, S_2, ..., S_n$)  producing \textcolor{blue} {the good trade-off} between
 582                           the energy consumption and the performance.
 583                 \end{itemize}
 584
 585           \vspace{-10 mm}
 586         \end{femtoBlock}
 587 \end{frame}
 588
 589
 590 %%%%%%%%%%%%%%%%%%%%
 591 %%    SLIDE 24    %%
 592 %%%%%%%%%%%%%%%%%%%%
 593 \begin{frame}{The execution time model}
 594       \vspace{-8 mm}
 595      \begin{figure}[!t]
 596        \centering
 597        \includegraphics[scale=0.5]{c2/commtasks}
 598        \label{fig:heter}
 599      \end{figure}
 600        \vspace{-12 mm}
 601        \medskip
 602
 603     \begin{block}{\small The execution time prediction model}
 604     \begin{equation}
 605      \label{eq:perf}
 606      \small\textcolor{red}{ T_{new}} = \textcolor{blue}{\max_{i=1,2,\dots,N} ({TcpOld_i} \cdot S_{i}) + \min_{i=1,2,\dots,N} (Tcm_i)}
 607     \end{equation}
 608     \end{block}
 609  \small  Where: $ \textcolor{red}{Tcm} = \textcolor{blue}{communication~times + slack~times}$
 610
 611 \end{frame}
 612
 613  %%%%%%%%%%%%%%%%%%%%
 614 %%    SLIDE 25    %%
 615 %%%%%%%%%%%%%%%%%%%%
 616  %\begin{frame}{The energy consumption model}
 617    % The overall energy consumption of a message passing synchronous  application executed over
 618    %  a heterogeneous platform can be computed as  follows:
 619    % \begin{multline}
 620     % \label{eq:energy}
 621    %  \textcolor{red}{E} = \textcolor{blue}{\sum_{i=1}^{N} {(S_i^{-2} \cdot Pd_i \cdot  Tcp_i)}} + {} \\
 622   %   \textcolor{blue}{\sum_{i=1}^{N} (Ps_i \cdot (\max_{i=1,2,\dots,N} (Tcp_i \cdot S_{i}) + {\min_{i=1,2,\dots,N} (Tcm_i))}}
 623    %   \hspace{10 mm}
 624    % \end{multline}
 625    % \underline{where}:\\
 626    % \textcolor{blue}{N} : is the number of nodes.
 627 %\end{frame}
 628
 629
 630 %%%%%%%%%%%%%%%%%%%%
 631 %%    SLIDE 26    %%
 632 %%%%%%%%%%%%%%%%%%%%
 633   \begin{frame}{The energy  model  for heterogeneous cluster}
 634   \vspace{-0.77cm}
 635  \begin{figure}
 636   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{heter-model/a-}{0}{350}
 637   %\includegraphics[width=0.6\textwidth]{heter-model/a-272}
 638   \end{figure}
 639  \end{frame}
 640
 641
 642
 643
 644 %%%%%%%%%%%%%%%%%%%%
 645 %%    SLIDE 27    %%
 646 %%%%%%%%%%%%%%%%%%%%
 647 %\begin{frame}{The trade-off between energy  and performance}
 648    % \vspace{-7 mm}
 649     %\begin{figure}
 650    %  \centering{ \includegraphics[width=.4\textwidth]{c2/heter}}
 651    % \end{figure}
 652    % \vspace{-7 mm}
 653    % \textcolor{red}{\underline{Step1}}: computing the normalized energy \textcolor{blue}%{$E_{norm} = \frac{E_{reduced}}
 654     %{E_{Max}}$}. \\
 655     % \textcolor{red}{\underline{Step2}}: computing the normalized performance \textcolor{blue}{$P_{norm} = \frac{T_{Max}}{T_{new}}$}.
 656
 657    %  \begin{block}{\small The tradeoff model}
 658     % \begin{equation}
 659     %  \label{eq:max}
 660     %  \textcolor{red}{MaxDist} =
 661      % \mathop {\max_{i=1,\dots F}}_{j=1,\dots,N}
 662       % (\overbrace{P_{norm}(S_{ij})}^{\text{\textcolor{blue}{Maximize}}} -
 663       % \overbrace{E_{norm}(S_{ij})}^{\text{\textcolor{blue}{Minimize}}} )
 664       %\end{equation}
 665     % \end{block}
 666 %\end{frame}
 667
 668
 669 %%%%%%%%%%%%%%%%%%%%
 670 %%    SLIDE 28    %%
 671 %%%%%%%%%%%%%%%%%%%%
 672  %\begin{frame}{The scaling algorithm for heter. cluster}
 673
 674  %\centering
 675    %\includegraphics[width=.52\textwidth]{algo-heter}
 676  %\end{frame}
 677
 678
 679  %%%%%%%%%%%%%%%%%%%%
 680 %%    SLIDE 29    %%
 681 %%%%%%%%%%%%%%%%%%%%
 682  \begin{frame}{The scaling algorithm for heter. cluster}
 683  \vspace{-0.77cm}
 684  \centering
 685
 686   \begin{figure}
 687   \animategraphics[autopause,controls,scale=0.3,buttonsize=0.2cm]{10}{dvfs-heter/a-}{0}{836}
 688  % \includegraphics[width=0.6\textwidth]{dvfs-heter/a-650}
 689   \end{figure}
 690 \end{frame}
 691
 692
 693
 694
 695 %%%%%%%%%%%%%%%%%%%%
 696 %%    SLIDE 30    %%
 697 %%%%%%%%%%%%%%%%%%%%
 698 %\begin{frame}{Experiments over a heterogeneous cluster  }
 699       %  \begin{itemize}
 700         % \small
 701           % \item The experiments were executed on the simulator SimGrid/SMPI v3.10.\medskip
 702           % \item The scaling algorithm was applied to the NAS parallel benchmarks class C.\medskip
 703           % \item Four types of processors with different computing powers were used.\medskip
 704           % \item The benchmarks were executed with different number of nodes ranging from 4 to 144 nodes.\medskip
 705           % \item It was assumed that the total power consumption of the CPU consist of 80\% dynamic power and 20\% static power.
 706                  % \medskip
 707
 708         %\end{itemize}
 709
 710 %\end{frame}
 711
 712
 713 %%%%%%%%%%%%%%%%%%%%
 714 %%    SLIDE 31    %%
 715 %%%%%%%%%%%%%%%%%%%%
 716 %\begin{frame}{The simulation results}
 717   % \vspace{-5 mm}
 718   % \begin{figure}[!t]
 719    %\centering
 720     %\includegraphics[width=0.8\textwidth]{c2/energy_saving.pdf}
 721
 722    % \textcolor{blue}{On average, it reduces the energy consumption by \textcolor{red}{29\%}
 723      %for the class C of the NAS Benchmarks executed over 8 nodes}
 724
 725   % \end{figure}
 726 %\end{frame}
 727
 728
 729
 730 %%%%%%%%%%%%%%%%%%%%
 731 %%    SLIDE 32    %%
 732 %%%%%%%%%%%%%%%%%%%%
 733 %\begin{frame}{The simulation results}
 734  %  \vspace{-5 mm}
 735   % \begin{figure}[!t]
 736   % \centering
 737
 738    % \includegraphics[width=.8\textwidth]{c2/perf_degra.pdf}
 739
 740   % \textcolor{blue}{On average, it degrades  by \textcolor{red}{3.8\%} the performance
 741     % of NAS Benchmarks class C executed over 8 nodes}
 742   %   \end{figure}
 743 %\end{frame}
 744
 745
 746
 747
 748
 749
 750
 751
 752 %%%%%%%%%%%%%%%%%%%%
 753 %%    SLIDE 35    %%
 754 %%%%%%%%%%%%%%%%%%%%
 755 %\begin{frame}{Energy optimization of grid platform}
 756   % \begin{figure}[!t]
 757    % \centering
 758          %    \includegraphics[width=.6\textwidth]{c2/grid5000.pdf}
 759
 760         %   \small  10 sites distributed over France and Luxembourg
 761         %\end{figure}
 762 %\end{frame}
 763
 764
 765 %%%%%%%%%%%%%%%%%%%%
 766 %%    SLIDE 36    %%
 767 %%%%%%%%%%%%%%%%%%%%
 768 %\begin{frame}{The grid architecture}
 769 %\begin{center}
 770 %\includegraphics[width=.8\textwidth]{c2/init_freq.pdf}
 771 %\end{center}
 772
 773  %\begin{frame}{Performance, Energy and trade-off models} \small
 774   %\begin{block}{\small The performance model of grid}
 775    % \begin{equation}
 776   %\label{eq:perf}
 777   %\Tnew = \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}({\TcpOld[ij]} \cdot S_{ij})
 778  % +\mathop{\min_{j=1,\dots,M_h}}  (\Tcm[hj])
 779 %\end{equation}
 780     %\end{block}
 781
 782
 783  %\begin{block}{\small The energy model of grid}\small
 784   %  \begin{equation}
 785   %\label{eq:energy}
 786  %E = \sum_{i=1}^{N} \sum_{i=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +
 787 % \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \Tnew)
 788 %\end{equation}
 789    % \end{block}
 790
 791 %\begin{block}{\small The trade-off model of grid}
 792 %\small
 793     %\begin{equation}
 794    %\label{eq:max}
 795   %\MaxDist =
 796   %\mathop{  \mathop{\max_{i=1,\dots N}}_{j=1,\dots,M_i}}_{k=1,\dots,F_j}
 797    %   (\overbrace{\Pnorm(S_{ijk})}^{\text{Maximize}} -
 798     %   \overbrace{\Enorm(S_{ijk})}^{\text{Minimize}} )
 799 %\end{equation}
 800    % \end{block}
 801
 802
 803  %\end{frame}
 804
 805
 806
 807 %%%%%%%%%%%%%%%%%%%%
 808 %%    SLIDE 37    %%
 809 %%%%%%%%%%%%%%%%%%%%
 810  \begin{frame}{Experiments over Grid'5000}
 811
 812    \textcolor{blue}{The experiments were conducted using three
 813           clusters distributed over one or two sites.}
 814            \vspace{-7 mm}
 815           \begin{center}
 816           \includegraphics[width=.5\textwidth]{c2/grid5000-2.pdf}
 817           \end{center}
 818       \vspace{-10 mm}
 819   \textcolor{blue}{Grid'5000 power measurement tools were used.}
 820         \vspace{-9 mm}
 821   \begin{center}
 822           \includegraphics[width=.5\textwidth]{c2/power_consumption.pdf}
 823           \end{center}
 824
 825
 826 \end{frame}
 827
 828
 829
 830
 831 %%%%%%%%%%%%%%%%%%%%
 832 %%    SLIDE 38    %%
 833 %%%%%%%%%%%%%%%%%%%%
 834 \begin{frame}{Experiments over Grid'5000}
 835
 836    \begin{minipage}{0.4\textwidth}
 837        %\textcolor{blue}{Execution the NAS class D on 16 nodes saves the energy by
 838         %\textcolor{red}{30\%}}
 839      \small \textcolor{blue}{The average energy saving =  \textcolor{red}{30\%}}
 840    \end{minipage}
 841      \begin{minipage}{0.55\textwidth}
 842         \begin{figure}[h!]
 843           \includegraphics[width=0.83 \textwidth]{c2/eng_s.eps}
 844      \end{figure}
 845 \end{minipage}
 846
 847          \begin{minipage}{0.4\textwidth}
 848            %\textcolor{blue}{Execution the NAS class D on 16 nodes degrades the
 849                 %performance by \textcolor{red}{3.2\%}}
 850       \small  \textcolor{blue}{The average performance degradation  =  \textcolor{red}{3.2\%}}
 851         \end{minipage}
 852        \begin{minipage}{0.55\textwidth}
 853          \begin{figure}[h!]
 854            \includegraphics[width=.83\textwidth]{c2/per_d.eps}
 855          \end{figure}
 856           \end{minipage}
 857  \end{frame}
 858
 859
 860
 861 %%%%%%%%%%%%%%%%%%%%
 862 %%    SLIDE 33    %%
 863 %%%%%%%%%%%%%%%%%%%%
 864 \begin{frame}{The results of the three power scenarios}
 865    \vspace{-5 mm}
 866    \begin{figure}[!t]
 867    \centering
 868    \includegraphics[width=.45\textwidth]{c2/eng_pow.eps}
 869    \hspace{0.3cm}
 870    \includegraphics[width=.45\textwidth]{c2/per_pow.eps}
 871    \vspace{4 mm}
 872    \includegraphics[width=.7\textwidth]{c2/three_scenarios.pdf}
 873    \end{figure}
 874 \end{frame}
 875
 876
 877
 878
 879
 880
 881
 882 %%%%%%%%%%%%%%%%%%%%
 883 %%    SLIDE 39    %%
 884 %%%%%%%%%%%%%%%%%%%%
 885 \begin{frame}{One core and Multi-cores per node results}
 886    %\textcolor{blue}{One core  and Multi-cores per node results:}
 887
 888   \begin{figure}[h!]
 889   \includegraphics[width=.48\textwidth]{c2/eng_s_mc.eps}
 890   \hspace{0.3cm}
 891   \includegraphics[width=.48\textwidth]{c2/per_d_mc.eps}
 892   \end{figure}
 893
 894   \centering \small \textcolor{blue}{Using multi-cores per node scenario decreases the computations to communications ratio}.
 895 \end{frame}
 896
 897
 898 %%%%%%%%%%%%%%%%%%%%
 899 %%    SLIDE 34    %%
 900 %%%%%%%%%%%%%%%%%%%%
 901 \begin{frame}{Comparing the objective function to EDP}
 902
 903      EDP is the product between the energy consumption and the delay \tiny\textsuperscript{3}.
 904     \vspace{-5 mm}
 905     \begin{figure}[!t]
 906     \centering
 907     \includegraphics[width=.6\textwidth]{c2/edp_dist.eps}
 908
 909
 910     \end{figure}
 911
 912   \tiny  \textsuperscript{3} Spiliopoulos et al, Green governors: A framework for continuously adaptive dvfs, in International Green Computing Conference and Workshops (IGCC), 2011.
 913 \end{frame}
 914 %\begin{frame}{Summary}
 915 %\begin{itemize}
 916      % \small
 917         % \item  Two scaling algorithm were applies to \textcolor{blue}{heterogeneous %cluster} and \textcolor{blue}{grid}.
 918         % \item  A new \textcolor{blue}{energy} and \textcolor{blue}{performance} models were proposed.
 919       %   \item  The experimental results ere conducted over \textcolor{blue}{SimGrid}  simulators and real
 920           %test-bed \textcolor{blue}{Grid'5000}.
 921
 922          %\item The algorithm saves the energy by \textcolor{blue}{29\%} and only
 923         %  degrades the performance by \textcolor{blue}{3.8\%} for simulated  heterogeneous
 924       %    clusters.
 925
 926          %\item The algorithm saves the energy by \textcolor{blue}{30\%} and only
 927         % degrades the performance by \textcolor{blue}{3.2\%} for  Grid'5000 results.
 928
 929        %  \item  The proposed method \textcolor{blue}{outperforms the EDP method} in terms of  energy-performance ratio.
 930      %    \end{itemize}
 931 %\end{frame}
 932
 933
 934 %%%%%%%%%%%%%%%%%%%%
 935 %%    SLIDE 40    %%
 936 %%%%%%%%%%%%%%%%%%%%
 937 \begin{frame}{The third contribution}
 938 \section{\small {Energy optimization of asynchronous applications}}
 939 \begin{center}
 940 \bf  \Large \textcolor{blue}{Energy optimization of asynchronous iterative message passing  applications}
 941 \end{center}
 942  \end{frame}
 943
 944
 945
 946 %%%%%%%%%%%%%%%%%%%%
 947 %%    SLIDE 41   %%
 948 %%%%%%%%%%%%%%%%%%%%
 949 \begin{frame}{Problem definition}\vspace{0.8 mm}
 950 \textcolor{blue}{The execution of a synchronous parallel iterative application over a grid }
 951 \vspace{-8 mm}
 952 \begin{figure}
 953  \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{syn/a-}{0}{647}
 954  %\includegraphics[width=0.6\textwidth]{syn/a-503}
 955   \end{figure}
 956 \end{frame}
 957
 958
 959
 960 %%%%%%%%%%%%%%%%%%%%
 961 %%    SLIDE 42   %%
 962 %%%%%%%%%%%%%%%%%%%%
 963 \begin{frame}{Problem definition}\vspace{0.8 mm}
 964 \textcolor{blue}{The execution of an asynchronous parallel iterative application over a grid }
 965 \vspace{-8 mm}
 966 \begin{figure}
 967  \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn/a-}{0}{556}
 968  %\includegraphics[width=0.6\textwidth]{asyn/a-440}
 969   \end{figure}
 970 \end{frame}
 971
 972
 973
 974 %%%%%%%%%%%%%%%%%%%%
 975 %%    SLIDE 43   %%
 976 %%%%%%%%%%%%%%%%%%%%
 977 \begin{frame}{Solution}\vspace{0.8mm}
 978 \textcolor{blue}{Using asynchronous communications with DVFS }
 979 \vspace{-8 mm}
 980 \begin{figure}
 981   \animategraphics[autopause,controls,scale=0.26,buttonsize=0.2cm]{10}{asyn+dvfs/a-}{0}{344}
 982   %\includegraphics[width=0.6\textwidth]{asyn+dvfs/a-314}
 983   \end{figure}
 984 \end{frame}
 985
 986
 987
 988
 989 %%%%%%%%%%%%%%%%%%%%
 990 %%    SLIDE 44   %%
 991 %%%%%%%%%%%%%%%%%%%%
 992 %\begin{frame}{The performance models}
 993
 994 %\begin{block}{\small The performance model of Asynch. Applications}\small
 995 %\begin{equation}
 996   %\label{eq:asyn_time}
 997  %\Tnew =  \frac{\sum_{i=1}^{N} \sum_{j=1}^{M_i}({\TcpOld[ij]} \cdot S_{ij})} {N  \cdot M_i }
 998 %\end{equation}
 999 %\end{block}
1000
1001
1002 %\begin{block}{\small The performance model of Hybrid Applications}\small
1003 %\begin{equation}
1004   %\label{eq:asyn_perf}
1005   %\Tnew =  \frac{\sum_{i=1}^{N} (\max_{j=1,\dots, M_i} ({\TcpOld[ij]} \cdot S_{ij}) +
1006    %\min_{j=1,\dots,M_i} ({\Ltcm[ij]}))}{N}
1007 %\end{equation}
1008 %\end{block}
1009
1010
1011 %\end{frame}
1012
1013
1014
1015 %%%%%%%%%%%%%%%%%%%%
1016 %%    SLIDE 45   %%
1017 %%%%%%%%%%%%%%%%%%%%
1018 %\begin{frame}{The energy consumption models}
1019
1020 %\begin{block}{\small The energy model of Asynch. Applications}\small
1021 %\begin{equation}
1022   %\label{eq:asyn_energy1}
1023 % E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot  \Tcp[ij] \cdot (\Pd[ij]+\Ps[ij]) )}
1024 %\end{equation}
1025 %\end{block}
1026
1027
1028 %\begin{block}{\small The energy model of Hybrid Applications}\small
1029 %\begin{multline}
1030   %\label{eq:asyn_energy}
1031  %E = \sum_{i=1}^{N} \sum_{j=1}^{M_i} {(S_{ij}^{-2} \cdot \Pd[ij] \cdot  \Tcp[ij])} +  \sum_{i=1}^{N} \sum_{j=1}^{M_i} (\Ps[ij] \cdot \\
1032 % ( \mathop{\max_{j=1,\dots,M_i}} ({\Tcp[ij]} \cdot S_{ij}) + \mathop{\min_{j=1,\dots,M_i}} ({\Ltcm[ij]})))
1033 %\end{multline}
1034 %\end{block}
1035 %\end{frame}
1036
1037
1038
1039 %%%%%%%%%%%%%%%%%%%%
1040 %%    SLIDE 44   %%
1041 %%%%%%%%%%%%%%%%%%%%
1042 \begin{frame}{The performance and the energy models }
1043
1044 \centering
1045 \includegraphics[width=0.9\textwidth]{syn-vs-asyn.pdf}
1046 \end{frame}
1047
1048
1049
1050
1051
1052 %%%%%%%%%%%%%%%%%%%%
1053 %%    SLIDE 46   %%
1054 %%%%%%%%%%%%%%%%%%%%
1055 %\begin{frame}{The scaling algorithm for Asynch.  applications}
1056 %\vspace{-0.1 mm}
1057 %\centering
1058 %\includegraphics[width=0.55\textwidth]{algo-hybrid.pdf}
1059 %\end{frame}
1060
1061
1062
1063 %%%%%%%%%%%%%%%%%%%%
1064 %%    SLIDE 47   %%
1065 %%%%%%%%%%%%%%%%%%%%
1066 \begin{frame}{The experiments}
1067    \vspace{-5 mm}
1068    \begin{figure}[!t]
1069    \begin{itemize}
1070       \small
1071         \item The architecture of the grid:
1072    \end{itemize}
1073     \includegraphics[width=0.5\textwidth]{c3/hybrid-model.pdf}
1074    \end{figure}
1075    \begin{itemize}
1076       \small
1077         \item Applying the proposed algorithm to the asynchronous iterative message passing multi-splitting method.
1078         \item Evaluating the application over the simulator and Grid'5000.
1079    \end{itemize}
1080 \end{frame}
1081
1082
1083
1084 %%%%%%%%%%%%%%%%%%%%
1085 %%    SLIDE 48   %%
1086 %%%%%%%%%%%%%%%%%%%%
1087 %\begin{frame}{The simulation results}
1088 %\centering \small \textcolor{blue}{The best scenario in terms of energy and performance  is %the Async. MS with Sync. DVFS}
1089
1090 %\centering
1091    % \includegraphics[scale=0.42]{c3/energy_saving.eps}
1092
1093  %\centering  The average energy saving  = \textcolor{red}{22\%}
1094 %\end{frame}
1095
1096
1097
1098 %%%%%%%%%%%%%%%%%%%%
1099 %%    SLIDE 49   %%
1100 %%%%%%%%%%%%%%%%%%%%
1101 %\begin{frame}{The simulation results}
1102 %\centering
1103
1104    %  \includegraphics[scale=0.42]{c3/perf_degra.eps}
1105
1106 %\centering    The average speed-up  = \textcolor{red}{5.72\%}
1107 %\end{frame}
1108
1109
1110
1111 %%%%%%%%%%%%%%%%%%%%
1112 %%    SLIDE 50   %%
1113 %%%%%%%%%%%%%%%%%%%%
1114  \begin{frame}{The Grid'5000 results}
1115    \vspace{-10 mm}
1116    \begin{figure}[!t]
1117    \centering
1118    \hspace{-8 mm}
1119     \includegraphics[width=0.53\textwidth]{c3/energy-s-compare.eps}
1120     \includegraphics[width=0.53\textwidth]{c3/perf-deg-compare.eps}
1121    \end{figure}
1122     \vspace{-5 mm}
1123      \centering \footnotesize
1124
1125      %\small \textcolor{blue}{The best scenario in terms of energy and performance  is the Async. MS with Sync. DVFS}
1126
1127 The average energy saving = \textcolor{red}{26.93\%}, the average speed-up =  \textcolor{red}{21.48\%}
1128 \end{frame}
1129
1130
1131 %%%%%%%%%%%%%%%%%%%%
1132 %%    SLIDE 51   %%
1133 %%%%%%%%%%%%%%%%%%%%
1134 \begin{frame}{The comparison results}
1135  \centering
1136     \includegraphics[width=.5\textwidth]{c3/compare.eps}
1137
1138     \includegraphics[width=.5\textwidth]{c3/compare_scales.eps}
1139 \end{frame}
1140
1141
1142
1143
1144 %%%%%%%%%%%%%%%%%%%%
1145 %%    SLIDE 52  %%
1146 %%%%%%%%%%%%%%%%%%%%
1147 \begin{frame}{Conclusions}
1148 \section{Conclusions and Perspectives}
1149 \begin{itemize}
1150
1151 \small  \barrow  Three \textcolor{blue}{ new energy consumption and performance} models were proposed for synchronous or asynchronous parallel applications with iterations running over
1152 \textcolor{blue}{homogeneous and  heterogeneous clusters or grids}.
1153
1154
1155
1156 \small \barrow \textcolor{blue}{A new objective function} to optimize both the energy consumption and the performance was proposed.
1157
1158 \small \barrow \textcolor{blue}{New online frequency selecting algorithms} for clusters and grids were developed.
1159
1160 \small \barrow The proposed algorithms were applied to the \textcolor{blue}{NAS parallel benchmarks} and \textcolor{blue}{the
1161 Multi-splitting} method.
1162
1163 \small \barrow The proposed algorithms were evaluated over the \textcolor{blue}{SimGrid simulator} and over  the \textcolor{blue}{Grid'5000 testbed}.
1164
1165 \small  \barrow All the proposed methods were compared to either \textcolor{blue}{Rauber and Rünger's  method} or to the \textcolor{blue}{EDP objective function}.
1166
1167
1168 \end{itemize}
1169 \end{frame}
1170
1171
1172
1173 %%%%%%%%%%%%%%%%%%%%
1174 %%    SLIDE 53   %%
1175 %%%%%%%%%%%%%%%%%%%%
1176 \begin{frame}{Publications}
1177
1178 \begin{block}{\small Journal Articles }\scriptsize
1179 \begin{enumerate}[$\lbrack$1$\rbrack$]
1180
1181 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Optimizing the energy consumption of message passing applications with iterations executed over grids. \textit{Journal of Computational
1182       Science}, 2016.
1183
1184 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. Energy Consumption Reduction for
1185       Asynchronous Message Passing Applications.  \textit{Journal of Supercomputing}, 2016, (Accepted with minor revisions)
1186
1187 \end{enumerate}
1188 \end{block}
1189
1190
1191 \begin{block}{\small Conference Articles }\scriptsize
1192
1193 \begin{enumerate}[$\lbrack$1$\rbrack$]
1194
1195 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Dynamic Frequency Scaling for
1196       Energy Consumption Reduction in Distributed MPI Programs. \textit{ISPA 2014}, pp.
1197       225-230. IEEE Computer Society, Milan, Italy (2014).
1198
1199 \item Jean-Claude Charr, Raphaël Couturier, Ahmed Fanfakh, Arnaud Giersch. Energy Consumption Reduction
1200       with DVFS for Message Passing Iterative Applications on Heterogeneous Architectures.
1201       \textit{The $16^{th}$ PDSEC}. pp. 922-931. IEEE Computer Society, INDIA (2015).
1202
1203 \item Ahmed Fanfakh, Jean-Claude Charr, Raphaël Couturier,  Arnaud Giersch. CPUs Energy Consumption
1204       Reduction for Asynchronous Parallel Methods Running over Grids. \textit{The $19^{th}$ CSE conference}. IEEE Computer Society,
1205       Paris (2016).
1206
1207 \end{enumerate}
1208
1209 \end{block}
1210 \end{frame}
1211
1212
1213 %%%%%%%%%%%%%%%%%%%%
1214 %%    SLIDE 54   %%
1215 %%%%%%%%%%%%%%%%%%%%
1216 \begin{frame}{Perspectives}
1217
1218 \begin{itemize}
1219
1220 \small  \barrow The proposed algorithms should  take into consideration the
1221 \textcolor{blue}{variability between some iterations}.
1222
1223 \small  \barrow The proposed algorithms should be applied to \textcolor{blue}{other message passing methods with iterations} in order to see how they adapt to the characteristics of these methods.
1224
1225 \small \barrow The proposed algorithms for heterogeneous platforms should be applied to heterogeneous platforms composed of \textcolor{blue}{CPUs and GPUs}.
1226
1227 \small \barrow Comparing the results returned by the energy models to the values given by  \textcolor{blue}{real instruments that measure the energy consumptions} of CPUs during the execution time.
1228 \small \barrow  Considering the power consumed by the other devices in the node such as
1229 \textcolor{blue}{the memory and the hard drive}  in the energy consumption model.
1230
1231 \end{itemize}
1232
1233 \end{frame}
1234
1235 %%%%%%%%%%%%%%%%%%%%
1236 %%    SLIDE 55  %%
1237 %%%%%%%%%%%%%%%%%%%%
1238 \begin{frame}{Fin} \vspace{-10 mm}
1239
1240             \centering \Large \textcolor{blue}{Thank you for your attention}
1241
1242             \vspace{2cm}
1243             \centering \textcolor{blue}{ {\Large Questions?}}
1244
1245 \end{frame}
1246 \end{document}
1247 %  _____ ___ _   _
1248 % |  ___|_ _| \ | |
1249 % | |_   | ||  \| |
1250 % |  _|  | || |\  |
1251 % |_|   |___|_| \_|
1252 %