-Y. Zhao,
-\textit{Lattice Boltzmann based PDE solver on the GPU},
-Visual Comput., doi 10.1007/s00371-0070191-y (2007).
-J. T\"olke,
-Implementation of a lattice Boltzmann kernel using the compute unified
-device architecture developed by nVIDIA,
-Comput. Visual Sci. 13 29--39 (2010).
-Z. Fan, F. Qiu, A. Kaufman, and S. Yoakum-Stover,
-\textit{GPU cluster for high performance computing},
-Proceedings of ACM/IEEE Supercomputing Conference, pp. 47--59,
-IEEE Computer Society Press, Pittsburgh, PA (2004).
-J. Myre, S.D.C. Walsh, D. Lilja, and M.O. Saar,
-\textit{Performance analysis of single-phase, multiphase, and multicomponent
-lattice Boltzmann fluid flow simulations on GPU clusters},
-Concurrency Computat.: Pract. Exper., \textbf{23}, 332--350 (2011).
-C. Obrecht, F. Kuznik, B. Tourancheau, and J.-J. Roux,
-\textit{Multi-GPU implementation of the lattice Boltzmann method},
-Comput. Math. with Applications,
-doi:10.1016/j.camwa.2011.02.020 (2011).
-M. Bernaschi, M. Fatica, S. Melchionna, S. Succi, and E. Kaxiras,
-\textit{A flexible high-performance lattice Boltzmann GPU code for the
-simulations of fluid flow in complex geometries},
-Concurrency Computat.: Pract. Exper., \textbf{22}, 1--14 (2010).
-W. Xian and A. Takayuki,
-\textit{Multi-GPU performance of incompressible flow computation by
-lattice Boltzmann method on GPU cluster},
-Parallel Comput., doi:10.1016/j.parco.2011.02.007 (2011).
-C. Feichtinger, J. Habich, H. K\"ostler, G. Hager, U. R\"ude, and
-G. Wellein,
-A flexible patch-based lattice Boltzmann parallelization approach
-for heterogeneous GPU-CPU clusters,
-\textit{Parallel Computing} \textbf{37} 536--549 (2011).
-G. Wellein, T. Zeiser, G Hager, and S. Donath,
-On the single processor performance of simple lattice Boltzmann kernels,
-\textit{Computers and Fluids}, \textbf{35}, 910--919 (2006).
-T. Pohl, M. Kowarschik, J. Wilke, K. Igelberger, and U. R\"ude,
-Optimization and profiling of the cache performance of parallel
-lattice Boltzmann code,
-\textit{Parallel Process Lett.} \textit{13} 549--560 (2003).
-K. Mattila, J. Hyv\"aluoma, T. Rossi, M. Aspn\"as and J. Westerholm,
-An efficient swap algorithm for the lattice Boltzmann method,
-\textit{Comput. Phys. Comms.} \textit{176} 200-210 (2007).
-M. Wittmann, T. Zeiser, G. Hager, and G. Wellein,
-Comparison of different propagation steps for lattice Boltzmann methods,
-\textit{Comput. Math with Appl.} doi:10.1016/j.camwa.2012.05.002 (2012).
-S.D.C. Walsh and M.O. Saar,
-Developing extensible lattice Boltzmann simulators for general-purpose
-graphics-processing units,
-\textit{Comm. Comput. Phys.}, \textbf{13} 867--879 (2013).
-S. Williams, L. Oliker, J. Carter, and J Shalf,
-Extracting ultra-scale lattice Boltzmann performance via
-hierarchical and distributed auto-tuning,
-\textit{Proc. SC2011}.
-K. Stratford, R. Adhikari, I. Pagonabarraga, and J.-C. Desplat,
-\textit{Lattice Boltzmann for Binary Fluids with Suspended Colloids},
-J. Stat. Phys. \textbf{121}, 163 (2005).
-A.J.C. Ladd,
-Numerical simulations of particle suspensions via a discretized
-Boltzmann equation. Part 1. Theoretical foundation,
-\textit{J. Fluid Mech.} \textbf{271} 285--309 (1994);
-Part II. Numerical results,
-\textit{ibid.} \textbf{271} 311--339 (1994).
+%% \bibitem{zhao2007}
+%% Y. Zhao,
+%% \textit{Lattice Boltzmann based PDE solver on the GPU},
+%% Visual Comput., doi 10.1007/s00371-0070191-y (2007).
+%% \bibitem{toelke2010}
+%% J. T\"olke,
+%% Implementation of a lattice Boltzmann kernel using the compute unified
+%% device architecture developed by nVIDIA,
+%% Comput. Visual Sci. 13 29--39 (2010).
+%% \bibitem{fan2004}
+%% Z. Fan, F. Qiu, A. Kaufman, and S. Yoakum-Stover,
+%% \textit{GPU cluster for high performance computing},
+%% Proceedings of ACM/IEEE Supercomputing Conference, pp. 47--59,
+%% IEEE Computer Society Press, Pittsburgh, PA (2004).
+%% \bibitem{myre2011}
+%% J. Myre, S.D.C. Walsh, D. Lilja, and M.O. Saar,
+%% \textit{Performance analysis of single-phase, multiphase, and multicomponent
+%% lattice Boltzmann fluid flow simulations on GPU clusters},
+%% Concurrency Computat.: Pract. Exper., \textbf{23}, 332--350 (2011).
+%% \bibitem{obrecht2011}
+%% C. Obrecht, F. Kuznik, B. Tourancheau, and J.-J. Roux,
+%% \textit{Multi-GPU implementation of the lattice Boltzmann method},
+%% Comput. Math. with Applications,
+%% doi:10.1016/j.camwa.2011.02.020 (2011).
+%% \bibitem{bernaschi2010}
+%% M. Bernaschi, M. Fatica, S. Melchionna, S. Succi, and E. Kaxiras,
+%% \textit{A flexible high-performance lattice Boltzmann GPU code for the
+%% simulations of fluid flow in complex geometries},
+%% Concurrency Computat.: Pract. Exper., \textbf{22}, 1--14 (2010).
+%% \bibitem{xian2011}
+%% W. Xian and A. Takayuki,
+%% \textit{Multi-GPU performance of incompressible flow computation by
+%% lattice Boltzmann method on GPU cluster},
+%% Parallel Comput., doi:10.1016/j.parco.2011.02.007 (2011).
+%% \bibitem{feichtinger2011}
+%% C. Feichtinger, J. Habich, H. K\"ostler, G. Hager, U. R\"ude, and
+%% G. Wellein,
+%% A flexible patch-based lattice Boltzmann parallelization approach
+%% for heterogeneous GPU-CPU clusters,
+%% \textit{Parallel Computing} \textbf{37} 536--549 (2011).
+%% \bibitem{wellein2006}
+%% G. Wellein, T. Zeiser, G Hager, and S. Donath,
+%% On the single processor performance of simple lattice Boltzmann kernels,
+%% \textit{Computers and Fluids}, \textbf{35}, 910--919 (2006).
+%% \bibitem{pohl2003}
+%% T. Pohl, M. Kowarschik, J. Wilke, K. Igelberger, and U. R\"ude,
+%% Optimization and profiling of the cache performance of parallel
+%% lattice Boltzmann code,
+%% \textit{Parallel Process Lett.} \textit{13} 549--560 (2003).
+%% \bibitem{mattila2007}
+%% K. Mattila, J. Hyv\"aluoma, T. Rossi, M. Aspn\"as and J. Westerholm,
+%% An efficient swap algorithm for the lattice Boltzmann method,
+%% \textit{Comput. Phys. Comms.} \textit{176} 200-210 (2007).
+%% \bibitem{wittmann2012}
+%% M. Wittmann, T. Zeiser, G. Hager, and G. Wellein,
+%% Comparison of different propagation steps for lattice Boltzmann methods,
+%% \textit{Comput. Math with Appl.} doi:10.1016/j.camwa.2012.05.002 (2012).
+%% \bibitem{walshsaar2012}
+%% S.D.C. Walsh and M.O. Saar,
+%% Developing extensible lattice Boltzmann simulators for general-purpose
+%% graphics-processing units,
+%% \textit{Comm. Comput. Phys.}, \textbf{13} 867--879 (2013).
+%% \bibitem{williams2011}
+%% S. Williams, L. Oliker, J. Carter, and J Shalf,
+%% Extracting ultra-scale lattice Boltzmann performance via
+%% hierarchical and distributed auto-tuning,
+%% \textit{Proc. SC2011}.
+%% \bibitem{ch14:stratford-jsp2005}
+%% K. Stratford, R. Adhikari, I. Pagonabarraga, and J.-C. Desplat,
+%% \textit{Lattice Boltzmann for Binary Fluids with Suspended Colloids},
+%% J. Stat. Phys. \textbf{121}, 163 (2005).
+%% \bibitem{ladd1994}
+%% A.J.C. Ladd,
+%% Numerical simulations of particle suspensions via a discretized
+%% Boltzmann equation. Part 1. Theoretical foundation,
+%% \textit{J. Fluid Mech.} \textbf{271} 285--309 (1994);
+%% Part II. Numerical results,
+%% \textit{ibid.} \textbf{271} 311--339 (1994).