-\chapterauthor{Xavier Meyer}{Department of Computer Science, University of Geneva}
-\chapterauthor{Paul Albuquerque}{Institute for Informatics and Telecommunications, hepia, \\ University of Applied Sciences of Western Switzerland - Geneva}
-\chapterauthor{Bastien Chopard}{Department of Computer Science, University of Geneva}
+\chapterauthor{Xavier Meyer and Bastien Chopard}{Department of Computer Science, University of Geneva, Switzerland}
+\chapterauthor{Paul Albuquerque}{Institute for Informatics and Telecommunications, Hepia, \\ University of Applied Sciences of Western Switzerland - Geneva, Switzerland}
+%\chapterauthor{Bastien Chopard}{Department of Computer Science, University of Geneva}
%\chapter{Linear programming on a GPU: a study case based on the simplex method and the branch-cut-and bound algorithm}
\chapter{Linear programming on a GPU: a case study}
-\chapterauthor{Gleb Beliakov}{School of Information Technology, Deakin University, Burwood 3125, Australia}
-\chapterauthor{Shaowu Liu}{School of Information Technology, Deakin University, Burwood 3125, Australia}
+\chapterauthor{Gleb Beliakov and Shaowu Liu}{School of Information Technology, Deakin University, Burwood 3125, Australia}
+%\chapterauthor{Shaowu Liu}{School of Information Technology, Deakin University, Burwood 3125, Australia}
\chapter{Parallel Monotone Spline Interpolation and Approximation on GPUs}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\chapterauthor{}{}
-\chapterauthor{Lilia Ziane Khodja}{Femto-ST Institute, University of Franche-Comte, France}
+\chapterauthor{Lilia Ziane Khodja, Raphaël Couturier and Jacques Bahi}{Femto-ST Institute, University of Franche-Comte, France}
\chapterauthor{Ming Chau}{Advanced Solutions Accelerator, Castelnau Le Lez, France}
-\chapterauthor{Raphaël Couturier}{Femto-ST Institute, University of Franche-Comte, France}
+%\chapterauthor{Raphaël Couturier}{Femto-ST Institute, University of Franche-Comte, France}
\chapterauthor{Pierre Spitéri}{ENSEEIHT-IRIT, Toulouse, France}
-\chapterauthor{Jacques Bahi}{Femto-ST Institute, University of Franche-Comte, France}
+%\chapterauthor{Jacques Bahi}{Femto-ST Institute, University of Franche-Comte, France}
\chapter{Solving sparse nonlinear systems of obstacle problems on GPU clusters}
-\chapterauthor{Alan Gray and Kevin Stratford}{EPCC, The University of Edinburgh}
+\chapterauthor{Alan Gray and Kevin Stratford}{EPCC, The University of Edinburgh, United Kingdom}
\chapter[Ludwig: multiple GPUs for a fluid lattice Boltzmann
application]{Ludwig: multiple GPUs for a complex fluid lattice Boltzmann
-\chapterauthor{Pierre Fortin}{Laboratoire d'Informatique de Paris 6, University Paris 6}
-\chapterauthor{Rachid Habel}{T\'el\'ecom SudParis}
-\chapterauthor{Fabienne J\'ez\'equel}{Laboratoire d'Informatique de Paris 6, University Paris 6}
-\chapterauthor{Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, University Paris 6}
+\chapterauthor{Pierre Fortin, Fabienne J\'ez\'equel and Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, University Paris 6, France}
+\chapterauthor{Rachid Habel}{T\'el\'ecom SudParis, France}
+%\chapterauthor{Fabienne J\'ez\'equel}{Laboratoire d'Informatique de Paris 6, University Paris 6}
+%\chapterauthor{Jean-Luc Lamotte}{Laboratoire d'Informatique de Paris 6, University Paris 6}
\chapterauthor{Stan Scott}{School of Electronics, Electrical Engineering \& Computer Science,
-The Queen's University of Belfast}
+The Queen's University of Belfast, Ireland}
%\newcommand{\fixme}[1]{{\bf #1}}
-\chapterauthor{Guillaume Laville}{Femto-ST Institute, University of Franche-Comte, France}
-\chapterauthor{Christophe Lang}{Femto-ST Institute, University of Franche-Comte, France}
+\chapterauthor{Guillaume Laville, Christophe Lang, Bénédicte Herrmann and Laurent Philippe}{Femto-ST Institute, University of Franche-Comte, France}
+%\chapterauthor{Christophe Lang}{Femto-ST Institute, University of Franche-Comte, France}
\chapterauthor{Kamel Mazouzi}{Franche-Comte Computing Center, University of Franche-Comte, France}
\chapterauthor{Nicolas Marilleau}{UMMISCO, Institut de Recherche pour le Developpement (IRD), France}
-\chapterauthor{Bénédicte Herrmann}{Femto-ST Institute, University of Franche-Comte, France}
-\chapterauthor{Laurent Philippe}{Femto-ST Institute, University of Franche-Comte, France}
+%\chapterauthor{Bénédicte Herrmann}{Femto-ST Institute, University of Franche-Comte, France}
+%\chapterauthor{Laurent Philippe}{Femto-ST Institute, University of Franche-Comte, France}
\newlength\mylen
\newcommand\myinput[1]{%
-\chapterauthor{Raphaël Couturier}{Femto-ST Institute, University of Franche-Comt\'{e}}
-\chapterauthor{Christophe Guyeux}{Femto-ST Institute, University of Franche-Comt\'{e}}
+\chapterauthor{Raphaël Couturier and Christophe Guyeux}{Femto-ST Institute, University of Franche-Comte, France}
+%\chapterauthor{Christophe Guyeux}{Femto-ST Institute, University of Franche-Comt\'{e}}
\chapter{Pseudorandom Number Generator on GPU}
-\chapterauthor{Bertil Schmidt}{Johannes Gutenberg University of Mainz}
-\chapterauthor{Hoang-Vu Dang}{Johannes Gutenberg University of Mainz}
+\chapterauthor{Bertil Schmidt and Hoang-Vu Dang}{Johannes Gutenberg University of Mainz, Germany}
+%\chapterauthor{Hoang-Vu Dang}{Johannes Gutenberg University of Mainz}
\chapter{Solving large sparse linear systems for integer factorization on GPUs}
\label{chapter19}
-\chapterauthor{Stefan L. Glimberg}{Technical University of Denmark}
-\chapterauthor{Allan P. Engsig-Karup}{Technical University of Denmark}
-\chapterauthor{Allan S. Nielsen}{Technical University of Denmark}
-\chapterauthor{Bernd Dammann}{Technical University of Denmark}
+\chapterauthor{Stefan L. Glimberg, Allan P. Engsig-Karup, Allan S. Nielsen and Bernd Dammann}{Technical University of Denmark}
+%\chapterauthor{Allan P. Engsig-Karup}{Technical University of Denmark}
+%\chapterauthor{Allan S. Nielsen}{Technical University of Denmark}
+%\chapterauthor{Bernd Dammann}{Technical University of Denmark}
-\chapterauthor{Allan P. Engsig-Karup}{Technical University of Denmark}
-\chapterauthor{Stefan L. Glimberg}{Technical University of Denmark}
-\chapterauthor{Allan S. Nielsen}{Technical University of Denmark}
-\chapterauthor{Ole Lindberg}{Technical University of Denmark}
+\chapterauthor{Allan P. Engsig-Karup, Stefan L. Glimberg, Allan S. Nielsen and Ole Lindberg}{Technical University of Denmark}
+%\chapterauthor{Stefan L. Glimberg}{Technical University of Denmark}
+%\chapterauthor{Allan S. Nielsen}{Technical University of Denmark}
+%\chapterauthor{Ole Lindberg}{Technical University of Denmark}
\chapter{Fast hydrodynamics on heterogenous many-core hardware}
\label{ch7}
-\chapterauthor{Imen Chakroun}{Universit\'e Lille 1 CNRS/LIFL, INRIA Lille Nord Europe, Cit\'e scientifique - 59655, Villeneuve d'Ascq cedex, France\\}
-\chapterauthor{Nouredine Melab}{Universit\'e Lille 1 CNRS/LIFL, INRIA Lille Nord Europe, Cit\'e scientifique - 59655, Villeneuve d'Ascq cedex, France\\}
+\chapterauthor{Imen Chakroun and Nouredine Melab}{University of Lille 1 CNRS/LIFL, INRIA Lille Nord Europe, Cit\'e scientifique, 59655 Villeneuve d'Ascq cedex, France\\}
+%\chapterauthor{Nouredine Melab}{Universit\'e Lille 1 CNRS/LIFL, INRIA Lille Nord Europe, Cit\'e scientifique - 59655, Villeneuve d'Ascq cedex, France\\}
\chapter{GPU-accelerated Tree-based Exact Optimization Methods}
\label{ch8:GPU-accelerated-tree-based-exact-optimization-methods}
\vspace{-0.4cm}
-\section{Thread divergence \index{Thread divergence}}
+\section{Thread divergence}
+\label{ch8:ThreadDivergence}
\subsection{The thread divergence issue}
-During the execution of an application on GPU, to each GPU multiprocessor is assigned one or more thread block(s) to execute. Those threads are partitioned into warps that get scheduled for execution. For each instruction of the flow, the multiprocessor selects a warp that is ready to be run. A warp executes one common instruction at a time, so full efficiency is realized when all threads of a warp agree on their execution path. In this chapter, the G80 model, in which a warp is a pool of 32 threads, is used. If threads of a warp diverge via a data-dependent conditional branch, the warp serially executes each branch path taken. Threads that are not on the taken path are disabled, and when all paths complete, the threads converge back to the same execution path. This phenomenon is called thread/branch divergence and often causes serious performance degradations. Branch divergence occurs only within a warp; different warps execute independently regardless of whether they are executing common or disjointed code paths.
+During the execution of an application on GPU, to each GPU multiprocessor is assigned one or more thread block(s) to execute. Those threads are partitioned into warps that get scheduled for execution. For each instruction of the flow, the multiprocessor selects a warp that is ready to be run. A warp executes one common instruction at a time, so full efficiency is realized when all threads of a warp agree on their execution path. In this chapter, the G80 model, in which a warp is a pool of 32 threads, is used. If threads of a warp diverge via a data-dependent conditional branch, the warp serially executes each branch path taken. Threads that are not on the taken path are disabled, and when all paths complete, the threads converge back to the same execution path. This phenomenon is called thread/branch divergence\index{Thread divergence} and often causes serious performance degradations. Branch divergence occurs only within a warp; different warps execute independently regardless of whether they are executing common or disjointed code paths.
\vspace{0.2cm}
-\chapterauthor{Malika Mehdi}{CERIST Research Center, DTISI, 3 rue des frères Aissou, 16030 Ben-Aknoun, Algiers, Algeria}
+\chapterauthor{Malika Mehdi and Ahc\`{e}ne Bendjoudi}{CERIST Research Center, DTISI, 3 rue des frères Aissou, 16030 Ben-Aknoun, Algiers, Algeria}
\chapterauthor{Lakhdar Loukil}{University of Oran, Algeria}
-\chapterauthor{Ahc\`{e}ne Bendjoudi}{CERIST Research Center, DTISI, 3 rue des frères Aissou, 16030 Ben-Aknoun, Algiers, Algeria}
+%\chapterauthor{Ahc\`{e}ne Bendjoudi}{CERIST Research Center, DTISI, 3 rue des frères Aissou, 16030 Ben-Aknoun, Algiers, Algeria}
\chapterauthor{Nouredine Melab}{Université Lille 1, LIFL/UMR CNRS 8022, 59655-Villeneuve d’Ascq cedex, France}
\stepcounter{numauthors}
%%\the\c@numauthors
\ifnum\c@numauthors=1 %
- \sbox\@AUonebox{\CAPlusOneFont#1}
+ \sbox\@AUonebox{\vbox{\hsize\textwidth\CAPlusOneFont\raggedright\noindent \CAPlusOneFont#1}}
\sbox\@AUaffonebox{\vbox{\hsize\textwidth\CAAPlusOneFont\noindent #2\par}}
\sbox\@finalAUboxfromone{\copy\@AUonebox}
\def\chapter@authorone{\copy\@finalAUboxfromone}
\def\chapter@affiliationone{\copy\@AUaffonebox}
\fi \ifnum\c@numauthors=2
- \sbox\@AUtwobox{\CAPlusOneFont#1}
+ \sbox\@AUtwobox{\vbox{\hsize\textwidth\CAPlusOneFont\raggedright\noindent \CAPlusOneFont#1}}
\sbox\@AUafftwobox{\vbox{\hsize\textwidth\CAAPlusOneFont\noindent #2\par}}
\sbox\@finalAUboxfromtwo{\copy\@AUtwobox}
\def\chapter@authortwo{\copy\@finalAUboxfromtwo}
\def\chapter@affiliationtwo{\copy\@AUafftwobox}
\fi \ifnum\c@numauthors=3
- \sbox\@AUthreebox{\CAPlusOneFont#1}
+ \sbox\@AUthreebox{\vbox{\hsize\textwidth\CAPlusOneFont\raggedright\noindent \CAPlusOneFont#1}}
\sbox\@AUaffthreebox{\vbox{\hsize\textwidth\CAAPlusOneFont\noindent #2\par}}
\sbox\@finalAUboxfromthree{\copy\@AUthreebox}
\def\chapter@authorthree{\copy\@finalAUboxfromthree}
\def\chapter@affiliationthree{\copy\@AUaffthreebox}
\fi \ifnum\c@numauthors=4
- \sbox\@AUfourbox{\CAPlusOneFont#1}
+ \sbox\@AUfourbox{\vbox{\hsize\textwidth\CAPlusOneFont\raggedright\noindent \CAPlusOneFont#1}}
\sbox\@AUafffourbox{\vbox{\hsize\textwidth\CAAPlusOneFont\noindent #2\par}}
\sbox\@finalAUboxfromfour{\copy\@AUfourbox}
\def\chapter@authorfour{\copy\@finalAUboxfromfour}
\def\chapter@affiliationfour{\copy\@AUafffourbox}
\fi \ifnum\c@numauthors=5
- \sbox\@AUfivebox{\CAPlusOneFont#1}
+ \sbox\@AUfivebox{\vbox{\hsize\textwidth\CAPlusOneFont\raggedright\noindent \CAPlusOneFont#1}}
\sbox\@AUafffivebox{\vbox{\hsize\textwidth\CAAPlusOneFont\noindent #2\par}}
\sbox\@finalAUboxfromfive{\copy\@AUfivebox}
\def\chapter@authorfive{\copy\@finalAUboxfromfive}