X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/ab6faec96e0c06da1a3dc2eb8eeb3cdfc1a4528f..ccff43db77ed9a71b6d9fc52aaf03585104713ce:/BookGPU/Chapters/chapter14/ch14.tex diff --git a/BookGPU/Chapters/chapter14/ch14.tex b/BookGPU/Chapters/chapter14/ch14.tex index 916717d..318015f 100755 --- a/BookGPU/Chapters/chapter14/ch14.tex +++ b/BookGPU/Chapters/chapter14/ch14.tex @@ -1,13 +1,14 @@ -\chapterauthor{Alan Gray and Kevin Stratford}{EPCC, The University of Edinburgh} +\chapterauthor{Alan Gray and Kevin Stratford}{EPCC, The University of Edinburgh, United Kingdom} -\chapter{Ludwig: multiple GPUs for a complex fluid lattice Boltzmann +\chapter[Ludwig: multiple GPUs for a fluid lattice Boltzmann +application]{Ludwig: multiple GPUs for a complex fluid lattice Boltzmann application} %\putbib[biblio] \section{Introduction} -The lattice Boltzmann (LB) method (for an overview see, e.g., +The lattice Boltzmann (LB) method \index{Lattice Boltzmann method} (for an overview see, e.g., \cite{succi-book}) has become a popular approach to a variety of fluid dynamics problems. It provides a way to solve the incompressible, isothermal Navier-Stokes equations and has the attractive features of @@ -55,7 +56,7 @@ and particle suspensions, and typically require additional physics beyond the bare Navier-Stokes equations to provide a full description~\cite{aidun2010}. The representation of this extra physics raises additional design questions for the application -programmer. Here, we consider the \textit{Ludwig} code \cite{desplat}, +programmer. Here, we consider the \textit{Ludwig} code \cite{desplat}\index{Ludwig code}, an LB application developed specifically for complex fluids (\textit{Ludwig} was named for Boltzmann, 1844--1906). We will present the steps @@ -217,7 +218,7 @@ for (is = 0; is < nsites; is++) { \begin{figure}[!t] \centering \includegraphics[width=12cm]{Chapters/chapter14/figures/decomphalo} -\caption{Left: the lattice is decomposed between MPI tasks. For +\caption[The lattice is decomposed between MPI tasks.]{Left: the lattice is decomposed between MPI tasks. For clarity we show a 2D decomposition of a 3D lattice, but in practice we decompose in all 3 dimensions. Halo cells are added to each sub-domain (as shown on the upper right for a single slice) which store @@ -437,7 +438,7 @@ a simulation in terms of accounting budgets, or electricity. \begin{figure}[!t] \centering \includegraphics[width=10cm]{Chapters/chapter14/figures/two_graphs_ud} -\caption{The weak (top) and strong (bottom) scaling of \textit{Ludwig}. Closed +\caption[The weak (top) and strong (bottom) scaling of \textit{Ludwig}.]{The weak (top) and strong (bottom) scaling of \textit{Ludwig}. Closed shapes denote results using the CPU version run on the Cray XE6 (using two 16-core AMD Interlagos CPUs per node), while open shapes denote results using the GPU version on the Cray XK6 (using a @@ -498,7 +499,7 @@ scaling for the larger systems. \centering %\includegraphics[width=10cm]{Chapters/chapter14/figures/bbl} \includegraphics[width=10cm]{Chapters/chapter14/figures/colloid_new} -\caption{ +\caption[A two-dimensional schematic picture of spherical particles on the lattice.]{ A two-dimensional schematic picture of spherical particles on the lattice. Left: a particle is allowed to move continuously across the lattice, and the position of the @@ -619,7 +620,7 @@ stage everywhere instead of moving the look-up table to the GPU and introducing the associated logic. Ultimately, the GPU might favour other boundary methods which treat solid and fluid on a somewhat more equal basis, for example, the immersed boundary -method \cite{ch14:immersed,ch14:immersed-lb} or smoothed profile method +method \cite{ch14:immersed1,ch14:immersed2,ch14:immersed-lb} or smoothed profile method \cite{ch14:spm}. However, the approach adopted here allows us to exploit the GPU for the intensive fluid simulation whilst maintaining the complex @@ -958,8 +959,8 @@ by UK EPSRC under grant EV/J007404/1. % set second argument of \begin to the number of references % (used to reserve space for the reference number labels box) - -\begin{thebibliography}{1} +\putbib[Chapters/chapter14/biblio14] +%\begin{thebibliography}{1} %\bibitem{IEEEhowto:kopka} %H.~Kopka and P.~W. Daly, \emph{A Guide to \LaTeX}, 3rd~ed.\hskip 1em plus @@ -967,22 +968,22 @@ by UK EPSRC under grant EV/J007404/1. -\bibitem{succi-book} -S. Succi, \textit{The lattice Boltzmann equation and beyond}, -Oxford University Press, Oxford, 2001. +%% \bibitem{succi-book} +%% S. Succi, \textit{The lattice Boltzmann equation and beyond}, +%% Oxford University Press, Oxford, 2001. %\bibitem{mpi-standard} %Message Passing Interface Forum, http://www.mpi-forum.org -\bibitem{desplat} -Desplat, J.-C., I. Pagonabarraga, and P. Bladon, -\textit{LUDWIG: A parallel lattice-Boltzmann code for complex fluids}. -Comput. Phys. Comms., \textbf{134}, 273, 2001. +%% \bibitem{desplat} +%% Desplat, J.-C., I. Pagonabarraga, and P. Bladon, +%% \textit{LUDWIG: A parallel lattice-Boltzmann code for complex fluids}. +%% Comput. Phys. Comms., \textbf{134}, 273, 2001. -\bibitem{aidun2010} -C.K. Aidun and J.R. Clausen, -\textit{Lattice Boltzmann method for complex flows}, -Ann. Rev. Fluid Mech., \textbf{42} 439--472 (2010). +%% \bibitem{aidun2010} +%% C.K. Aidun and J.R. Clausen, +%% \textit{Lattice Boltzmann method for complex flows}, +%% Ann. Rev. Fluid Mech., \textbf{42} 439--472 (2010). %\bibitem{bray1994} %A.J. Bray, @@ -1001,10 +1002,10 @@ Ann. Rev. Fluid Mech., \textbf{42} 439--472 (2010). %fluid systems}, %Phys. Rev. E, \textbf{54}, 5041--5052 (1996). -\bibitem{stratford2008} -K. Stratford and I. Pagonabarraga, -Parallel domain decomposition for lattice Boltzmann with moving particles, -\textit{Comput. Math. with Applications} \textbf{55}, 1585 (2008). +%% \bibitem{stratford2008} +%% K. Stratford and I. Pagonabarraga, +%% Parallel domain decomposition for lattice Boltzmann with moving particles, +%% \textit{Comput. Math. with Applications} \textbf{55}, 1585 (2008). %\bibitem{xe6} %Cray XE6 Product Brochure, available from @@ -1017,146 +1018,146 @@ Parallel domain decomposition for lattice Boltzmann with moving particles, %http://www.cray.com/Products/XK6/XK6.aspx (2011) -\bibitem{wei2004} -X. Wei, W. Li, K. M\"uller, and A.E. Kaufman, -\textit{The lattice Boltzmann method for simulating gaseous phenomena}, -IEEE Transactions on Visualization and Computer Graphics, -\textbf{10}, 164--176 (2004). +%% \bibitem{wei2004} +%% X. Wei, W. Li, K. M\"uller, and A.E. Kaufman, +%% \textit{The lattice Boltzmann method for simulating gaseous phenomena}, +%% IEEE Transactions on Visualization and Computer Graphics, +%% \textbf{10}, 164--176 (2004). %% Apparently first LB via GPU; a serious contribution using single fluid %%d3q19 single relaxation time. -\bibitem{zhu2006} -H. Zhu, X. Liu, Y. Liu, and E. Wu, -\textit{Simulation of miscible binary mixtures based on lattice Boltzmann method}, -Comp. Anim. Virtual Worlds, \textbf{17}, 403--410 (2006). +%% \bibitem{zhu2006} +%% H. Zhu, X. Liu, Y. Liu, and E. Wu, +%% \textit{Simulation of miscible binary mixtures based on lattice Boltzmann method}, +%% Comp. Anim. Virtual Worlds, \textbf{17}, 403--410 (2006). %% Single relaxation (MRT mentioned) time d3q19 apparently sound although %%not a lot of detail. Pre-cuda so graphics code. -\bibitem{zhao2007} -Y. Zhao, -\textit{Lattice Boltzmann based PDE solver on the GPU}, -Visual Comput., doi 10.1007/s00371-0070191-y (2007). - -\bibitem{toelke2010} -J. T\"olke, -Implementation of a lattice Boltzmann kernel using the compute unified -device architecture developed by nVIDIA, -Comput. Visual Sci. 13 29--39 (2010). - -\bibitem{fan2004} -Z. Fan, F. Qiu, A. Kaufman, and S. Yoakum-Stover, -\textit{GPU cluster for high performance computing}, -Proceedings of ACM/IEEE Supercomputing Conference, pp. 47--59, -IEEE Computer Society Press, Pittsburgh, PA (2004). - -\bibitem{myre2011} -J. Myre, S.D.C. Walsh, D. Lilja, and M.O. Saar, -\textit{Performance analysis of single-phase, multiphase, and multicomponent -lattice Boltzmann fluid flow simulations on GPU clusters}, -Concurrency Computat.: Pract. Exper., \textbf{23}, 332--350 (2011). - -\bibitem{obrecht2011} -C. Obrecht, F. Kuznik, B. Tourancheau, and J.-J. Roux, -\textit{Multi-GPU implementation of the lattice Boltzmann method}, -Comput. Math. with Applications, -doi:10.1016/j.camwa.2011.02.020 (2011). - -\bibitem{bernaschi2010} -M. Bernaschi, M. Fatica, S. Melchionna, S. Succi, and E. Kaxiras, -\textit{A flexible high-performance lattice Boltzmann GPU code for the -simulations of fluid flow in complex geometries}, -Concurrency Computat.: Pract. Exper., \textbf{22}, 1--14 (2010). - -\bibitem{xian2011} -W. Xian and A. Takayuki, -\textit{Multi-GPU performance of incompressible flow computation by -lattice Boltzmann method on GPU cluster}, -Parallel Comput., doi:10.1016/j.parco.2011.02.007 (2011). - -\bibitem{feichtinger2011} -C. Feichtinger, J. Habich, H. K\"ostler, G. Hager, U. R\"ude, and -G. Wellein, -A flexible patch-based lattice Boltzmann parallelization approach -for heterogeneous GPU-CPU clusters, -\textit{Parallel Computing} \textbf{37} 536--549 (2011). - -\bibitem{wellein2006} -G. Wellein, T. Zeiser, G Hager, and S. Donath, -On the single processor performance of simple lattice Boltzmann kernels, -\textit{Computers and Fluids}, \textbf{35}, 910--919 (2006). - -\bibitem{pohl2003} -T. Pohl, M. Kowarschik, J. Wilke, K. Igelberger, and U. R\"ude, -Optimization and profiling of the cache performance of parallel -lattice Boltzmann code, -\textit{Parallel Process Lett.} \textit{13} 549--560 (2003). - -\bibitem{mattila2007} -K. Mattila, J. Hyv\"aluoma, T. Rossi, M. Aspn\"as and J. Westerholm, -An efficient swap algorithm for the lattice Boltzmann method, -\textit{Comput. Phys. Comms.} \textit{176} 200-210 (2007). - -\bibitem{wittmann2012} -M. Wittmann, T. Zeiser, G. Hager, and G. Wellein, -Comparison of different propagation steps for lattice Boltzmann methods, -\textit{Comput. Math with Appl.} doi:10.1016/j.camwa.2012.05.002 (2012). - -\bibitem{walshsaar2012} -S.D.C. Walsh and M.O. Saar, -Developing extensible lattice Boltzmann simulators for general-purpose -graphics-processing units, -\textit{Comm. Comput. Phys.}, \textbf{13} 867--879 (2013). - - -\bibitem{williams2011} -S. Williams, L. Oliker, J. Carter, and J Shalf, -Extracting ultra-scale lattice Boltzmann performance via -hierarchical and distributed auto-tuning, -\textit{Proc. SC2011}. - - -\bibitem{ch14:stratford-jsp2005} -K. Stratford, R. Adhikari, I. Pagonabarraga, and J.-C. Desplat, -\textit{Lattice Boltzmann for Binary Fluids with Suspended Colloids}, -J. Stat. Phys. \textbf{121}, 163 (2005). - -\bibitem{ladd1994} -A.J.C. Ladd, -Numerical simulations of particle suspensions via a discretized -Boltzmann equation. Part 1. Theoretical foundation, -\textit{J. Fluid Mech.} \textbf{271} 285--309 (1994); -Part II. Numerical results, -\textit{ibid.} \textbf{271} 311--339 (1994). +%% \bibitem{zhao2007} +%% Y. Zhao, +%% \textit{Lattice Boltzmann based PDE solver on the GPU}, +%% Visual Comput., doi 10.1007/s00371-0070191-y (2007). + +%% \bibitem{toelke2010} +%% J. T\"olke, +%% Implementation of a lattice Boltzmann kernel using the compute unified +%% device architecture developed by nVIDIA, +%% Comput. Visual Sci. 13 29--39 (2010). + +%% \bibitem{fan2004} +%% Z. Fan, F. Qiu, A. Kaufman, and S. Yoakum-Stover, +%% \textit{GPU cluster for high performance computing}, +%% Proceedings of ACM/IEEE Supercomputing Conference, pp. 47--59, +%% IEEE Computer Society Press, Pittsburgh, PA (2004). + +%% \bibitem{myre2011} +%% J. Myre, S.D.C. Walsh, D. Lilja, and M.O. Saar, +%% \textit{Performance analysis of single-phase, multiphase, and multicomponent +%% lattice Boltzmann fluid flow simulations on GPU clusters}, +%% Concurrency Computat.: Pract. Exper., \textbf{23}, 332--350 (2011). + +%% \bibitem{obrecht2011} +%% C. Obrecht, F. Kuznik, B. Tourancheau, and J.-J. Roux, +%% \textit{Multi-GPU implementation of the lattice Boltzmann method}, +%% Comput. Math. with Applications, +%% doi:10.1016/j.camwa.2011.02.020 (2011). + +%% \bibitem{bernaschi2010} +%% M. Bernaschi, M. Fatica, S. Melchionna, S. Succi, and E. Kaxiras, +%% \textit{A flexible high-performance lattice Boltzmann GPU code for the +%% simulations of fluid flow in complex geometries}, +%% Concurrency Computat.: Pract. Exper., \textbf{22}, 1--14 (2010). + +%% \bibitem{xian2011} +%% W. Xian and A. Takayuki, +%% \textit{Multi-GPU performance of incompressible flow computation by +%% lattice Boltzmann method on GPU cluster}, +%% Parallel Comput., doi:10.1016/j.parco.2011.02.007 (2011). + +%% \bibitem{feichtinger2011} +%% C. Feichtinger, J. Habich, H. K\"ostler, G. Hager, U. R\"ude, and +%% G. Wellein, +%% A flexible patch-based lattice Boltzmann parallelization approach +%% for heterogeneous GPU-CPU clusters, +%% \textit{Parallel Computing} \textbf{37} 536--549 (2011). + +%% \bibitem{wellein2006} +%% G. Wellein, T. Zeiser, G Hager, and S. Donath, +%% On the single processor performance of simple lattice Boltzmann kernels, +%% \textit{Computers and Fluids}, \textbf{35}, 910--919 (2006). + +%% \bibitem{pohl2003} +%% T. Pohl, M. Kowarschik, J. Wilke, K. Igelberger, and U. R\"ude, +%% Optimization and profiling of the cache performance of parallel +%% lattice Boltzmann code, +%% \textit{Parallel Process Lett.} \textit{13} 549--560 (2003). + +%% \bibitem{mattila2007} +%% K. Mattila, J. Hyv\"aluoma, T. Rossi, M. Aspn\"as and J. Westerholm, +%% An efficient swap algorithm for the lattice Boltzmann method, +%% \textit{Comput. Phys. Comms.} \textit{176} 200-210 (2007). + +%% \bibitem{wittmann2012} +%% M. Wittmann, T. Zeiser, G. Hager, and G. Wellein, +%% Comparison of different propagation steps for lattice Boltzmann methods, +%% \textit{Comput. Math with Appl.} doi:10.1016/j.camwa.2012.05.002 (2012). + +%% \bibitem{walshsaar2012} +%% S.D.C. Walsh and M.O. Saar, +%% Developing extensible lattice Boltzmann simulators for general-purpose +%% graphics-processing units, +%% \textit{Comm. Comput. Phys.}, \textbf{13} 867--879 (2013). + + +%% \bibitem{williams2011} +%% S. Williams, L. Oliker, J. Carter, and J Shalf, +%% Extracting ultra-scale lattice Boltzmann performance via +%% hierarchical and distributed auto-tuning, +%% \textit{Proc. SC2011}. + + +%% \bibitem{ch14:stratford-jsp2005} +%% K. Stratford, R. Adhikari, I. Pagonabarraga, and J.-C. Desplat, +%% \textit{Lattice Boltzmann for Binary Fluids with Suspended Colloids}, +%% J. Stat. Phys. \textbf{121}, 163 (2005). + +%% \bibitem{ladd1994} +%% A.J.C. Ladd, +%% Numerical simulations of particle suspensions via a discretized +%% Boltzmann equation. Part 1. Theoretical foundation, +%% \textit{J. Fluid Mech.} \textbf{271} 285--309 (1994); +%% Part II. Numerical results, +%% \textit{ibid.} \textbf{271} 311--339 (1994). -\bibitem{nguyen2002} -N.-Q. Nguyen and A.J.C. Ladd, -Lubrication corrections for lattice Boltzmann simulations of particle -suspensions, -\textit{Phys. Rev. E} \textbf{66} 046708 (2002). - -\bibitem{ch14:immersed} -C.S. Peskin, -Flow patterns around heart valves; a numerical method, -\textit{J. Comp. Phys.}, \textbf{10}, 252--271 (1972); -C.S. Peskin, -The immersed boundary method, -\textit{Acta Nummerica} \textbf{11} 479--517 (2002). - -\bibitem{ch14:immersed-lb} -Z.-G. Feng and E.E. Michaelides, -The immersed boundary-lattice Boltzmann method for solving -fluid-particles interaction problem, -\textit{J. Comp. Phys.}, \textbf{195} 602--628 (2004). - -\bibitem{ch14:spm} -Y. Nakayama and R. Yammamoto, -Simulation method to resolve hydrodynamic interactions in colloidal -dispersions, -\textit{Phys. Rev. E}, \textbf{71} 036707 (2005). - - -\end{thebibliography} +%% \bibitem{nguyen2002} +%% N.-Q. Nguyen and A.J.C. Ladd, +%% Lubrication corrections for lattice Boltzmann simulations of particle +%% suspensions, +%% \textit{Phys. Rev. E} \textbf{66} 046708 (2002). + +%% \bibitem{ch14:immersed} +%% C.S. Peskin, +%% Flow patterns around heart valves; a numerical method, +%% \textit{J. Comp. Phys.}, \textbf{10}, 252--271 (1972); +%% C.S. Peskin, +%% The immersed boundary method, +%% \textit{Acta Nummerica} \textbf{11} 479--517 (2002). + +%% \bibitem{ch14:immersed-lb} +%% Z.-G. Feng and E.E. Michaelides, +%% The immersed boundary-lattice Boltzmann method for solving +%% fluid-particles interaction problem, +%% \textit{J. Comp. Phys.}, \textbf{195} 602--628 (2004). + +%% \bibitem{ch14:spm} +%% Y. Nakayama and R. Yammamoto, +%% Simulation method to resolve hydrodynamic interactions in colloidal +%% dispersions, +%% \textit{Phys. Rev. E}, \textbf{71} 036707 (2005). + + +%\end{thebibliography}