@article{ref1,
title = {Asynchronous grid computation for {A}merican options derivatives},
-author = {Chau, Ming and Couturier, Rapha\"el and Bahi, Jacques M. and Spiteri, Pierre},
+author = {Chau, M. and Couturier, R. and Bahi, J. M. and Spiteri, P.},
journal = {Advances in Engineering Software},
volume = {},
number = {0},
@article{ref2,
title = {Matrix iterative analysis},
-author = {Varga, Richard S.},
+author = {Varga, R. S.},
journal = {Prentice Hall},
volume = {},
number = {},
}
@article{ref3,
- author = {Baudet, G\'erard M.},
+ author = {Baudet, G. M.},
title = {Asynchronous iterative methods for multiprocessors},
journal = {Journal Assoc. Comput. Mach.},
volume = {25},
}
@article{ref4,
- author = {Bertsekas, Dimitri P. and Tsitsiklis, John N.},
+ author = {Bertsekas, D. P. and Tsitsiklis, J. N.},
title = {Parallel and distributed computation, numerical methods},
journal = {Prentice Hall Englewood Cliffs N. J. (1989)},
volume = {},
}
@article{ref5,
- author = {Bahi, Jacques M. and Contassot-Vivier, Sylvain and Couturier, Rapha\"el},
+ author = {Bahi, J. M. and Contassot-Vivier, S. and Couturier, R.},
title = {Parallel iterative algorithms: from sequential to grid computing},
journal = {Chapman \& Hall/CRC, Numerical Analysis \& Scientific Computating 1 (2007)},
volume = {},
}
@article{ref6,
- author = {Miellou, Jean-Claude and Spiteri, Pierre},
+ author = {Miellou, J.-C. and Spiteri, P.},
title = {Two criteria for the convergence of asynchronous iterations},
journal = {in Computers and computing, P. Chenin et al. ed., Wiley Masson},
volume = {},
}
@article{ref7,
- author = {Miellou, Jean-Claude},
+ author = {Miellou, J.-C.},
title = {Algorithmes de relaxation chaotique \`a retards},
journal = {RAIRO Analyse num\'erique},
volume = {R1},
}
@article{ref9,
- author = {Micikevicius, Paulius},
+ author = {Micikevicius, P.},
title = {{3D} finite difference computation on {GPUs} using {CUDA}},
journal = {Proceedings of 2nd Workshop on General Purpose Processing on Graphics Processing Units},
volume = {},
}
@article{ref10,
- author = {Leist, Aron and Playne, Daniel P. and Hawick, Ken A.},
+ author = {Leist, A. and Playne, D. P. and Hawick, K. A.},
title = {Exploiting graphical processing units for data-parallel scientific applications},
journal = {Concurrency and Computation: Practice and Experience},
volume = {21},
}
@article{ref11,
- author = {Chau, Ming and Couturier, Rapha\"el and Bahi, Jacques M. and Spiteri, Pierre},
+ author = {Chau, M. and Couturier, R. and Bahi, J. M. and Spiteri, P.},
title = {Parallel solution of the obstacle problem in grid environments},
journal = {International Journal of High Performance Computing Applications},
volume = {25},
}
@article{ref13,
- author = {Evans, David J.},
+ author = {Evans, D. J.},
title = {Parallel {S}.{O}.{R}. iterative methods},
journal = {Parallel Computing},
volume = {1},
}
@article{ref14,
- author = {Iwashita, Takeshi and Shimasaki, Masaaki},
+ author = {Iwashita, T. and Shimasaki, M.},
title = {Block red-black ordering method for parallel processing of {ICCG} solver},
journal = {High Performance Computing},
volume = {2327},
@article{ref15,
title = {Iterative methods for sparse linear systems},
-author = {Saad, Yousef},
+author = {Saad, Y.},
journal = {Society for Industrial and Applied Mathematics, 2nd edition},
volume = {},
number = {},
\begin{figure}[!t]
\centering
\includegraphics[width=12cm]{Chapters/chapter14/figures/decomphalo}
-\caption{Left: the lattice is decomposed between MPI tasks. For
+\caption[The lattice is decomposed between MPI tasks.]{Left: the lattice is decomposed between MPI tasks. For
clarity we show a 2D decomposition of a 3D lattice, but in practice
we decompose in all 3 dimensions. Halo cells are added to each
sub-domain (as shown on the upper right for a single slice) which store
\begin{figure}[!t]
\centering
\includegraphics[width=10cm]{Chapters/chapter14/figures/two_graphs_ud}
-\caption{The weak (top) and strong (bottom) scaling of \textit{Ludwig}. Closed
+\caption[The weak (top) and strong (bottom) scaling of \textit{Ludwig}.]{The weak (top) and strong (bottom) scaling of \textit{Ludwig}. Closed
shapes denote results using the CPU version run on the Cray
XE6 (using two 16-core AMD Interlagos CPUs per node), while open
shapes denote results using the GPU version on the Cray XK6 (using a
\centering
%\includegraphics[width=10cm]{Chapters/chapter14/figures/bbl}
\includegraphics[width=10cm]{Chapters/chapter14/figures/colloid_new}
-\caption{
+\caption[A two-dimensional schematic picture of spherical particles on the lattice.]{
A two-dimensional schematic picture of spherical particles on the lattice.
Left: a particle is allowed
to move continuously across the lattice, and the position of the
introducing the associated logic.
Ultimately, the GPU might favour other boundary methods which treat solid and
fluid on a somewhat more equal basis, for example, the immersed boundary
-method \cite{ch14:immersed,ch14:immersed-lb} or smoothed profile method
+method \cite{ch14:immersed1,ch14:immersed2,ch14:immersed-lb} or smoothed profile method
\cite{ch14:spm}.
However, the approach adopted here allows us to exploit
the GPU for the intensive fluid simulation whilst maintaining the complex
% set second argument of \begin to the number of references
% (used to reserve space for the reference number labels box)
-
-\begin{thebibliography}{1}
+\putbib[Chapters/chapter14/biblio14]
+%\begin{thebibliography}{1}
%\bibitem{IEEEhowto:kopka}
%H.~Kopka and P.~W. Daly, \emph{A Guide to \LaTeX}, 3rd~ed.\hskip 1em plus
-\bibitem{succi-book}
-S. Succi, \textit{The lattice Boltzmann equation and beyond},
-Oxford University Press, Oxford, 2001.
+%% \bibitem{succi-book}
+%% S. Succi, \textit{The lattice Boltzmann equation and beyond},
+%% Oxford University Press, Oxford, 2001.
%\bibitem{mpi-standard}
%Message Passing Interface Forum, http://www.mpi-forum.org
-\bibitem{desplat}
-Desplat, J.-C., I. Pagonabarraga, and P. Bladon,
-\textit{LUDWIG: A parallel lattice-Boltzmann code for complex fluids}.
-Comput. Phys. Comms., \textbf{134}, 273, 2001.
+%% \bibitem{desplat}
+%% Desplat, J.-C., I. Pagonabarraga, and P. Bladon,
+%% \textit{LUDWIG: A parallel lattice-Boltzmann code for complex fluids}.
+%% Comput. Phys. Comms., \textbf{134}, 273, 2001.
-\bibitem{aidun2010}
-C.K. Aidun and J.R. Clausen,
-\textit{Lattice Boltzmann method for complex flows},
-Ann. Rev. Fluid Mech., \textbf{42} 439--472 (2010).
+%% \bibitem{aidun2010}
+%% C.K. Aidun and J.R. Clausen,
+%% \textit{Lattice Boltzmann method for complex flows},
+%% Ann. Rev. Fluid Mech., \textbf{42} 439--472 (2010).
%\bibitem{bray1994}
%A.J. Bray,
%fluid systems},
%Phys. Rev. E, \textbf{54}, 5041--5052 (1996).
-\bibitem{stratford2008}
-K. Stratford and I. Pagonabarraga,
-Parallel domain decomposition for lattice Boltzmann with moving particles,
-\textit{Comput. Math. with Applications} \textbf{55}, 1585 (2008).
+%% \bibitem{stratford2008}
+%% K. Stratford and I. Pagonabarraga,
+%% Parallel domain decomposition for lattice Boltzmann with moving particles,
+%% \textit{Comput. Math. with Applications} \textbf{55}, 1585 (2008).
%\bibitem{xe6}
%Cray XE6 Product Brochure, available from
%http://www.cray.com/Products/XK6/XK6.aspx (2011)
-\bibitem{wei2004}
-X. Wei, W. Li, K. M\"uller, and A.E. Kaufman,
-\textit{The lattice Boltzmann method for simulating gaseous phenomena},
-IEEE Transactions on Visualization and Computer Graphics,
-\textbf{10}, 164--176 (2004).
+%% \bibitem{wei2004}
+%% X. Wei, W. Li, K. M\"uller, and A.E. Kaufman,
+%% \textit{The lattice Boltzmann method for simulating gaseous phenomena},
+%% IEEE Transactions on Visualization and Computer Graphics,
+%% \textbf{10}, 164--176 (2004).
%% Apparently first LB via GPU; a serious contribution using single fluid
%%d3q19 single relaxation time.
-\bibitem{zhu2006}
-H. Zhu, X. Liu, Y. Liu, and E. Wu,
-\textit{Simulation of miscible binary mixtures based on lattice Boltzmann method},
-Comp. Anim. Virtual Worlds, \textbf{17}, 403--410 (2006).
+%% \bibitem{zhu2006}
+%% H. Zhu, X. Liu, Y. Liu, and E. Wu,
+%% \textit{Simulation of miscible binary mixtures based on lattice Boltzmann method},
+%% Comp. Anim. Virtual Worlds, \textbf{17}, 403--410 (2006).
%% Single relaxation (MRT mentioned) time d3q19 apparently sound although
%%not a lot of detail. Pre-cuda so graphics code.
-\bibitem{zhao2007}
-Y. Zhao,
-\textit{Lattice Boltzmann based PDE solver on the GPU},
-Visual Comput., doi 10.1007/s00371-0070191-y (2007).
-
-\bibitem{toelke2010}
-J. T\"olke,
-Implementation of a lattice Boltzmann kernel using the compute unified
-device architecture developed by nVIDIA,
-Comput. Visual Sci. 13 29--39 (2010).
-
-\bibitem{fan2004}
-Z. Fan, F. Qiu, A. Kaufman, and S. Yoakum-Stover,
-\textit{GPU cluster for high performance computing},
-Proceedings of ACM/IEEE Supercomputing Conference, pp. 47--59,
-IEEE Computer Society Press, Pittsburgh, PA (2004).
-
-\bibitem{myre2011}
-J. Myre, S.D.C. Walsh, D. Lilja, and M.O. Saar,
-\textit{Performance analysis of single-phase, multiphase, and multicomponent
-lattice Boltzmann fluid flow simulations on GPU clusters},
-Concurrency Computat.: Pract. Exper., \textbf{23}, 332--350 (2011).
-
-\bibitem{obrecht2011}
-C. Obrecht, F. Kuznik, B. Tourancheau, and J.-J. Roux,
-\textit{Multi-GPU implementation of the lattice Boltzmann method},
-Comput. Math. with Applications,
-doi:10.1016/j.camwa.2011.02.020 (2011).
-
-\bibitem{bernaschi2010}
-M. Bernaschi, M. Fatica, S. Melchionna, S. Succi, and E. Kaxiras,
-\textit{A flexible high-performance lattice Boltzmann GPU code for the
-simulations of fluid flow in complex geometries},
-Concurrency Computat.: Pract. Exper., \textbf{22}, 1--14 (2010).
-
-\bibitem{xian2011}
-W. Xian and A. Takayuki,
-\textit{Multi-GPU performance of incompressible flow computation by
-lattice Boltzmann method on GPU cluster},
-Parallel Comput., doi:10.1016/j.parco.2011.02.007 (2011).
-
-\bibitem{feichtinger2011}
-C. Feichtinger, J. Habich, H. K\"ostler, G. Hager, U. R\"ude, and
-G. Wellein,
-A flexible patch-based lattice Boltzmann parallelization approach
-for heterogeneous GPU-CPU clusters,
-\textit{Parallel Computing} \textbf{37} 536--549 (2011).
-
-\bibitem{wellein2006}
-G. Wellein, T. Zeiser, G Hager, and S. Donath,
-On the single processor performance of simple lattice Boltzmann kernels,
-\textit{Computers and Fluids}, \textbf{35}, 910--919 (2006).
-
-\bibitem{pohl2003}
-T. Pohl, M. Kowarschik, J. Wilke, K. Igelberger, and U. R\"ude,
-Optimization and profiling of the cache performance of parallel
-lattice Boltzmann code,
-\textit{Parallel Process Lett.} \textit{13} 549--560 (2003).
-
-\bibitem{mattila2007}
-K. Mattila, J. Hyv\"aluoma, T. Rossi, M. Aspn\"as and J. Westerholm,
-An efficient swap algorithm for the lattice Boltzmann method,
-\textit{Comput. Phys. Comms.} \textit{176} 200-210 (2007).
-
-\bibitem{wittmann2012}
-M. Wittmann, T. Zeiser, G. Hager, and G. Wellein,
-Comparison of different propagation steps for lattice Boltzmann methods,
-\textit{Comput. Math with Appl.} doi:10.1016/j.camwa.2012.05.002 (2012).
-
-\bibitem{walshsaar2012}
-S.D.C. Walsh and M.O. Saar,
-Developing extensible lattice Boltzmann simulators for general-purpose
-graphics-processing units,
-\textit{Comm. Comput. Phys.}, \textbf{13} 867--879 (2013).
-
-
-\bibitem{williams2011}
-S. Williams, L. Oliker, J. Carter, and J Shalf,
-Extracting ultra-scale lattice Boltzmann performance via
-hierarchical and distributed auto-tuning,
-\textit{Proc. SC2011}.
-
-
-\bibitem{ch14:stratford-jsp2005}
-K. Stratford, R. Adhikari, I. Pagonabarraga, and J.-C. Desplat,
-\textit{Lattice Boltzmann for Binary Fluids with Suspended Colloids},
-J. Stat. Phys. \textbf{121}, 163 (2005).
-
-\bibitem{ladd1994}
-A.J.C. Ladd,
-Numerical simulations of particle suspensions via a discretized
-Boltzmann equation. Part 1. Theoretical foundation,
-\textit{J. Fluid Mech.} \textbf{271} 285--309 (1994);
-Part II. Numerical results,
-\textit{ibid.} \textbf{271} 311--339 (1994).
+%% \bibitem{zhao2007}
+%% Y. Zhao,
+%% \textit{Lattice Boltzmann based PDE solver on the GPU},
+%% Visual Comput., doi 10.1007/s00371-0070191-y (2007).
+
+%% \bibitem{toelke2010}
+%% J. T\"olke,
+%% Implementation of a lattice Boltzmann kernel using the compute unified
+%% device architecture developed by nVIDIA,
+%% Comput. Visual Sci. 13 29--39 (2010).
+
+%% \bibitem{fan2004}
+%% Z. Fan, F. Qiu, A. Kaufman, and S. Yoakum-Stover,
+%% \textit{GPU cluster for high performance computing},
+%% Proceedings of ACM/IEEE Supercomputing Conference, pp. 47--59,
+%% IEEE Computer Society Press, Pittsburgh, PA (2004).
+
+%% \bibitem{myre2011}
+%% J. Myre, S.D.C. Walsh, D. Lilja, and M.O. Saar,
+%% \textit{Performance analysis of single-phase, multiphase, and multicomponent
+%% lattice Boltzmann fluid flow simulations on GPU clusters},
+%% Concurrency Computat.: Pract. Exper., \textbf{23}, 332--350 (2011).
+
+%% \bibitem{obrecht2011}
+%% C. Obrecht, F. Kuznik, B. Tourancheau, and J.-J. Roux,
+%% \textit{Multi-GPU implementation of the lattice Boltzmann method},
+%% Comput. Math. with Applications,
+%% doi:10.1016/j.camwa.2011.02.020 (2011).
+
+%% \bibitem{bernaschi2010}
+%% M. Bernaschi, M. Fatica, S. Melchionna, S. Succi, and E. Kaxiras,
+%% \textit{A flexible high-performance lattice Boltzmann GPU code for the
+%% simulations of fluid flow in complex geometries},
+%% Concurrency Computat.: Pract. Exper., \textbf{22}, 1--14 (2010).
+
+%% \bibitem{xian2011}
+%% W. Xian and A. Takayuki,
+%% \textit{Multi-GPU performance of incompressible flow computation by
+%% lattice Boltzmann method on GPU cluster},
+%% Parallel Comput., doi:10.1016/j.parco.2011.02.007 (2011).
+
+%% \bibitem{feichtinger2011}
+%% C. Feichtinger, J. Habich, H. K\"ostler, G. Hager, U. R\"ude, and
+%% G. Wellein,
+%% A flexible patch-based lattice Boltzmann parallelization approach
+%% for heterogeneous GPU-CPU clusters,
+%% \textit{Parallel Computing} \textbf{37} 536--549 (2011).
+
+%% \bibitem{wellein2006}
+%% G. Wellein, T. Zeiser, G Hager, and S. Donath,
+%% On the single processor performance of simple lattice Boltzmann kernels,
+%% \textit{Computers and Fluids}, \textbf{35}, 910--919 (2006).
+
+%% \bibitem{pohl2003}
+%% T. Pohl, M. Kowarschik, J. Wilke, K. Igelberger, and U. R\"ude,
+%% Optimization and profiling of the cache performance of parallel
+%% lattice Boltzmann code,
+%% \textit{Parallel Process Lett.} \textit{13} 549--560 (2003).
+
+%% \bibitem{mattila2007}
+%% K. Mattila, J. Hyv\"aluoma, T. Rossi, M. Aspn\"as and J. Westerholm,
+%% An efficient swap algorithm for the lattice Boltzmann method,
+%% \textit{Comput. Phys. Comms.} \textit{176} 200-210 (2007).
+
+%% \bibitem{wittmann2012}
+%% M. Wittmann, T. Zeiser, G. Hager, and G. Wellein,
+%% Comparison of different propagation steps for lattice Boltzmann methods,
+%% \textit{Comput. Math with Appl.} doi:10.1016/j.camwa.2012.05.002 (2012).
+
+%% \bibitem{walshsaar2012}
+%% S.D.C. Walsh and M.O. Saar,
+%% Developing extensible lattice Boltzmann simulators for general-purpose
+%% graphics-processing units,
+%% \textit{Comm. Comput. Phys.}, \textbf{13} 867--879 (2013).
+
+
+%% \bibitem{williams2011}
+%% S. Williams, L. Oliker, J. Carter, and J Shalf,
+%% Extracting ultra-scale lattice Boltzmann performance via
+%% hierarchical and distributed auto-tuning,
+%% \textit{Proc. SC2011}.
+
+
+%% \bibitem{ch14:stratford-jsp2005}
+%% K. Stratford, R. Adhikari, I. Pagonabarraga, and J.-C. Desplat,
+%% \textit{Lattice Boltzmann for Binary Fluids with Suspended Colloids},
+%% J. Stat. Phys. \textbf{121}, 163 (2005).
+
+%% \bibitem{ladd1994}
+%% A.J.C. Ladd,
+%% Numerical simulations of particle suspensions via a discretized
+%% Boltzmann equation. Part 1. Theoretical foundation,
+%% \textit{J. Fluid Mech.} \textbf{271} 285--309 (1994);
+%% Part II. Numerical results,
+%% \textit{ibid.} \textbf{271} 311--339 (1994).
-\bibitem{nguyen2002}
-N.-Q. Nguyen and A.J.C. Ladd,
-Lubrication corrections for lattice Boltzmann simulations of particle
-suspensions,
-\textit{Phys. Rev. E} \textbf{66} 046708 (2002).
-
-\bibitem{ch14:immersed}
-C.S. Peskin,
-Flow patterns around heart valves; a numerical method,
-\textit{J. Comp. Phys.}, \textbf{10}, 252--271 (1972);
-C.S. Peskin,
-The immersed boundary method,
-\textit{Acta Nummerica} \textbf{11} 479--517 (2002).
-
-\bibitem{ch14:immersed-lb}
-Z.-G. Feng and E.E. Michaelides,
-The immersed boundary-lattice Boltzmann method for solving
-fluid-particles interaction problem,
-\textit{J. Comp. Phys.}, \textbf{195} 602--628 (2004).
-
-\bibitem{ch14:spm}
-Y. Nakayama and R. Yammamoto,
-Simulation method to resolve hydrodynamic interactions in colloidal
-dispersions,
-\textit{Phys. Rev. E}, \textbf{71} 036707 (2005).
-
-
-\end{thebibliography}
+%% \bibitem{nguyen2002}
+%% N.-Q. Nguyen and A.J.C. Ladd,
+%% Lubrication corrections for lattice Boltzmann simulations of particle
+%% suspensions,
+%% \textit{Phys. Rev. E} \textbf{66} 046708 (2002).
+
+%% \bibitem{ch14:immersed}
+%% C.S. Peskin,
+%% Flow patterns around heart valves; a numerical method,
+%% \textit{J. Comp. Phys.}, \textbf{10}, 252--271 (1972);
+%% C.S. Peskin,
+%% The immersed boundary method,
+%% \textit{Acta Nummerica} \textbf{11} 479--517 (2002).
+
+%% \bibitem{ch14:immersed-lb}
+%% Z.-G. Feng and E.E. Michaelides,
+%% The immersed boundary-lattice Boltzmann method for solving
+%% fluid-particles interaction problem,
+%% \textit{J. Comp. Phys.}, \textbf{195} 602--628 (2004).
+
+%% \bibitem{ch14:spm}
+%% Y. Nakayama and R. Yammamoto,
+%% Simulation method to resolve hydrodynamic interactions in colloidal
+%% dispersions,
+%% \textit{Phys. Rev. E}, \textbf{71} 036707 (2005).
+
+
+%\end{thebibliography}