1 % This file was created with JabRef 2.8.1.
4 @INCOLLECTION{ch5:Bajrovic2011,
5 author = {Bajrovic, Enes and Traff, Jesper},
6 title = {Using MPI Derived Datatypes in Numerical Libraries},
7 booktitle = {Recent Advances in the Message Passing Interface},
8 publisher = {Springer Berlin / Heidelberg},
10 editor = {Cotronis, Yiannis and Danalis, Anthony and Nikolopoulos, Dimitrios
13 series = {Lecture Notes in Computer Science},
15 affiliation = {Faculty of Computer Science, University of Vienna, Nordbergstrasse
16 15/3C, A-1090 Vienna, Austria},
17 file = {Bajrovic2011.pdf:Bajrovic2011.pdf:PDF},
18 isbn = {978-3-642-24448-3},
19 keyword = {Computer Science},
21 timestamp = {2012.09.10},
22 url = {http://dx.doi.org/10.1007/978-3-642-24449-0_6}
25 @ARTICLE{ch5:Bell2011,
26 author = {Nathan Bell and Jared Hoberock},
27 title = {Thrust: A Productivity-Oriented Library for CUDA},
28 journal = {in GPU Computing Gems, Jade Edition, Edited by Wen-mei W. Hwu},
33 timestamp = {2012.09.12}
36 @ARTICLE{ch5:Engsig-Karup2011,
37 author = {Allan Peter Engsig-Karup and Morten Gorm Madsen and Stefan Lemvig
39 title = {A massively parallel GPU-accelerated model for analysis of fully
40 nonlinear free surface waves},
41 journal = {International Journal for Numerical Methods in Fluids},
44 timestamp = {2011.11.06}
48 title = {Design Patterns - Elements of Reusable Object-Oriented Software},
49 publisher = {Addison-Wesley Professional Computing Series},
51 author = {Erich Gamma and Richard Helm and Ralph Johnson and John Vlissides},
53 timestamp = {2012.09.10}
57 title = {Using {MPI}: Portable Parallel Programming with the Message Passing
58 Interface, {\rm 2nd edition}},
59 publisher = {MIT Press},
61 author = {William Gropp and Ewing Lusk and Anthony Skjellum},
62 address = {Cambridge, MA},
68 title = {Using {MPI-2}: Advanced Features of the Message-Passing Interface},
69 publisher = {MIT Press},
71 author = {William Gropp and Ewing Lusk and Rajeev Thakur},
72 address = {Cambridge, MA},
77 @INCOLLECTION{ch5:Hoefler2011,
78 author = {Hoefler, Torsten and Snir, Marc},
79 title = {Writing Parallel Libraries with MPI - Common Practice, Issues, and
81 booktitle = {Recent Advances in the Message Passing Interface},
82 publisher = {Springer Berlin / Heidelberg},
84 editor = {Cotronis, Yiannis and Danalis, Anthony and Nikolopoulos, Dimitrios
87 series = {Lecture Notes in Computer Science},
89 affiliation = {University of Illinois, Urbana, IL 61801, USA},
90 file = {Hoefler2011.pdf:Hoefler2011.pdf:PDF},
91 isbn = {978-3-642-24448-3},
92 keyword = {Computer Science},
94 timestamp = {2012.09.10},
95 url = {http://dx.doi.org/10.1007/978-3-642-24449-0_45}
98 @BOOK{ch5:LeVeque2007,
99 title = {Finite difference methods for ordinary and partial differential equations
100 - steady-state and time-dependent problems},
103 author = {Randall J. LeVeque},
104 pages = {I-XV, 1-341},
105 bibsource = {DBLP, http://dblp.uni-trier.de},
106 ee = {http://www.ec-securehost.com/SIAM/OT98.html},
107 isbn = {978-0-89871-629-0}
110 @TECHREPORT{ch5:Skjellum1994,
111 author = {Anthony Skjellum and Nathan E. Doss and Purushotham V. Bangaloret},
112 title = {Writing Libraries in MPI},
113 institution = {Department of Computer Science and NSF Engineering Research Center
114 for Computational Fiels Simulation. Mississippi State University},
116 file = {Skjellum1994.pdf:Skjellum1994.pdf:PDF},
118 timestamp = {2012.09.10}
122 title = {Domain Decomposition: Parallel Multilevel Methods for Elliptic Partial
123 Differential Equations},
124 publisher = {Cambridge University Press},
126 author = {B. F. Smith and P. E. Bj{\o}rstad and W. D. Gropp},
127 address = {New York},
132 @BOOK{ch5:Vandevoorde2002,
133 title = {C++ Templates: The Complete Guide},
134 publisher = {Addison-Wesley Professional},
136 author = {David Vandevoorde and Nicolai M. Josuttis},
141 timestamp = {2011.07.15},
142 url = {http://www.amazon.com/Templates-Complete-Guide-David-Vandevoorde/dp/0201734842/ref=sr_1_3?ie=UTF8&qid=1310721621&sr=8-3}
145 @article{ch5:Korson1992,
146 author = {Korson, Tim and McGregor, John D.},
147 title = {Technical criteria for the specification and evaluation of object-oriented libraries},
148 journal = {Softw. Eng. J.},
149 issue_date = {March 1992},
157 url = {http://dx.doi.org/10.1049/sej.1992.0009},
158 doi = {10.1049/sej.1992.0009},
160 publisher = {Michael Faraday House},
161 address = {Herts, UK, UK},
164 @MISC{ch5:Acklam1998,
165 author = {Elizabeth Acklam and Hans Petter Langtangen and Hans Petter Langtangen and Are Magnus Bruaset},
166 title = {Parallelization of Explicit Finite Difference Schemes via Domain Decomposition},
170 @INPROCEEDINGS{ch5:Glimberg2011,
171 AUTHOR = {Stefan L. Glimberg and Allan P. Engsig-Karup and Morten G. Madsen},
172 TITLE = {A Fast GPU-accelerated Mixed-precision Strategy for Fully Nonlinear Water Wave Computations},
173 BOOKTITLE = {Numerical Mathematics and Advanced Applications 2011, Proceedings of ENUMATH 2011, the 9th European Conference on Numerical Mathematics and Advanced Applications, Leicester, September 2011},
175 editor = {A. Cangiani and R. L. Davidchack and E. Georgoulis and A.N. Gorban and J. Levesley and M. V. Tretyakov},
176 publisher = {Springer},
179 @techreport{ch5:Asanovic:EECS-2006-183,
180 Author = {Asanovic, K. and Bodik, R. and Catanzaro, B. C. and Gebis, Joseph, J. and Husbands, P. and Keutzer, K. and Patterson, D. A. and Plishker, W. L. and Shalf, J. and Williams, S. W. and Yelick, K. A.},
181 Title = {The Landscape of Parallel Computing Research: A View from Berkeley},
182 Institution = {EECS Department, University of California, Berkeley},
185 Number = {UCB/EECS-2006-183},
186 Abstract = {The recent switch to parallel microprocessors is a milestone in the history of computing. Industry has laid out a roadmap for multicore designs that preserves the programming paradigm of the past via binary compatibility and cache coherence. Conventional wisdom is now to double the number of cores on a chip with each silicon generation.
187 A multidisciplinary group of Berkeley researchers met nearly two years to discuss this change. Our view is that this evolutionary approach to parallel hardware and software may work from 2 or 8 processor systems, but is likely to face diminishing returns as 16 and 32 processor systems are realized, just as returns fell with greater instruction-level parallelism.
188 We believe that much can be learned by examining the success of parallelism at the extremes of the computing spectrum, namely embedded computing and high performance computing. This led us to frame the parallel landscape with seven questions, and to recommend the following:
190 <li>The overarching goal should be to make it easy to write programs that execute efficiently on highly parallel computing systems
191 <li>The target should be 1000s of cores per chip, as these chips are built from processing elements that are the most efficient in MIPS (Million Instructions per Second) per watt, MIPS per area of silicon, and MIPS per development dollar.
192 <li>Instead of traditional benchmarks, use 13 "Dwarfs" to design and evaluate parallel programming models and architectures. (A dwarf is an algorithmic method that captures a pattern of computation and communication.)
193 <li>"Autotuners" should play a larger role than conventional compilers in translating parallel programs.
194 <li>To maximize programmer productivity, future programming models must be more human-centric than the conventional focus on hardware or applications.
195 <li>To be successful, programming models should be independent of the number of processors.
196 <li>To maximize application efficiency, programming models should support a wide range of data types and successful models of parallelism: task-level parallelism, word-level parallelism, and bit-level parallelism.
197 <li>Architects should not include features that significantly affect performance or energy if programmers cannot accurately measure their impact via performance counters and energy counters.
198 <li>Traditional operating systems will be deconstructed and operating system functionality will be orchestrated using libraries and virtual machines.
199 <li>To explore the design space rapidly, use system emulators based on Field Programmable Gate Arrays (FPGAs) that are highly scalable and low cost.
201 Since real world applications are naturally parallel and hardware is naturally parallel, what we need is a programming model, system software, and a supporting architecture that are naturally parallel. Researchers have the rare opportunity to re-invent these cornerstones of computing, provided they simplify the efficient programming of highly parallel systems.}
204 @article{ch5:mooreslaw1965,
205 author = {Gordon E. Moore},
206 title = {Cramming more components onto integrated circuits},
207 journal = {Electronics},
214 @techreport{ch5:Kloeckner2011,
215 title = "{High-Order Discontinuous Galerkin Methods by GPU Metaprogramming}",
216 author = "A. Kloeckner, T. Warburton and J. S. Hesthaven",
217 institution = "Scientific Computing Group, Brown University",
219 address = "Providence, RI, USA",
224 @book{ch5:Ferziger1996,
225 title={Computational methods for fluid dynamics},
226 author={Ferziger, J.H. and Peri{\'c}, M.},
227 isbn={9783540594345},
229 series={Numerical methods: Research and development},
230 url={http://books.google.dk/books?id=SJkeAQAAIAAJ},
232 publisher={Springer-Verlag}
235 @book{ch5:chorin1993,
236 title={A Mathematical Introduction to Fluid Mechanics},
237 author={Chorin, A.J. and Marsden, J.E.},
238 isbn={9780387979182},
240 series={Texts in Applied Mathematics},
241 url={http://books.google.dk/books?id=0Iglq1WA5PQC},
247 author = {Saad, Yousef},
248 title = {Iterative Methods for Sparse Linear Systems},
252 publisher = {Society for Industrial and Applied Mathematics},
253 address = {Philadelphia, PA, USA},
256 @book{ch5:Kelley1995,
257 title={Iterative Methods for Linear and Nonlinear Equations},
258 author={Kelley, C.T.},
259 isbn={9780898713527},
261 series={Frontiers in Applied Mathematics Series},
262 url={http://books.google.dk/books?id=3J4XEAooQOoC},
264 publisher={Society for Industrial and Applied Mathematics (SIAM, 3600 Market Street, Floor 6, Philadelphia, PA 19104)}
267 @techreport{ch5:YMTR08,
268 author = {Yvon Maday},
269 title = {The parareal in time algorithm},
270 institution = {Universite Pierr\'{e} et Marie Curie},
272 type = {Technical Report},
277 author = {M. Gander and S. Vandewalle},
278 title = {Analysis of the parareal time-parallel time-integration method},
279 journal = {SIAM Journal of scientific computing},
287 author = {J.-L. Lions and Y. Maday and G. Turinici},
288 title = {R\'{e}solution d'EDP par un sch\'{e}ma en temps parar\'{e}el},
289 journal = {C.R. Acad Sci. Paris S\'{e}r. I math},
296 author = {L. Baffico and S. Bernard and Y. Maday and G. Turinici and G. Z\'{e}rah},
297 title = {Parallel in time molecular dynamics simulations},
298 journal = {Physical Review E.},
304 @mastersthesis{ch5:ASNP12,
305 author = {Allan S. Nielsen},
306 title = {Feasibility study of the Parareal algorithm},
307 school = {Technical University of Denmark, Department of Informatics and Mathematical Modeling},
309 type = {Master Thesis}
313 author = {Aubanel, E.},
314 title = {Scheduling of tasks in the Parareal algorithm},
315 journal = {Parallel Computing},
321 @BOOK{ch5:Barrett1994,
322 AUTHOR = {R. Barrett and M. Berry and T. F. Chan and J. Demmel and J. Donato and J. Dongarra and V. Eijkhout and R. Pozo and C. Romine and H. Van der Vorst },
323 TITLE = {Templates for the Solution of Linear Systems: Building Blocks for Iterative Methods, 2nd Edition},
326 ADDRESS = {Philadelphia, PA}
329 @TECHREPORT{ch5:ScientificGrandChallenges2010,
330 author = {David L. Brown and Paul Messina et. al},
331 title = {Scientific Grand Challenges, Crosscutting technologies for computing at the exascale},
332 institution = {U.S. Department of Energy},
335 address = {Washington, D.C.},
338 @article{ch5:Keyes2011,
339 author = {David E. Keyes},
340 title = {{Exaflop/s: The why and the how}},
341 journal = {Comptes Rendus Mecanique},
346 doi = {10.1016/j.crme.2010.11.002},
350 @ARTICLE{ch5:Cai2005,
351 author = {X. Cai and G.K. Pedersen and H.P. Langtangen},
352 title = {A parallel multi-subdomain strategy for solving Boussinesq water wave equations},
353 journal = {Elsevier - Advances in Water Resources},
359 @ARTICLE{ch5:GlimbergEtAl2012,
360 AUTHOR = {Stefan L. Glimberg and Allan P. Engsig-Karup},
361 TITLE = {On a Multi-GPU Implementation of a Free Surface Water Wave Model for Large-scale Simulations},
362 JOURNAL = {Submitted to: Special Issue of the Journal Parallel Computing},
364 volume = {7th Special Issue devoted to PMAA 2012},
368 author = {{NVIDIA Corporation}},
369 title = {CUDA C Programming Guide},
370 publisher = {NVIDIA Corporation},
372 url = {http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html}
375 @misc{ch5:cudapractice,
376 author = {{NVIDIA Corporation}},
377 title = {CUDA C Best Practices Guide},
378 publisher = {NVIDIA Corporation},
380 url = {http://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html}
383 @inproceedings{ch5:Bell2009,
384 author = {Bell, Nathan and Garland, Michael},
385 title = {Implementing sparse matrix-vector multiplication on throughput-oriented processors},
386 booktitle = {SC '09: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis},
388 isbn = {978-1-60558-744-8},
390 location = {Portland, Oregon},
391 doi = {http://doi.acm.org/10.1145/1654059.1654078},
393 address = {New York, NY, USA},
397 author = {Kirk, David B. and Hwu, Wen-mei W.},
398 title = {Programming Massively Parallel Processors: A Hands-on Approach},
400 isbn = {0123814723, 9780123814722},
402 publisher = {Morgan Kaufmann Publishers Inc.},
403 address = {San Francisco, CA, USA},
406 @book{ch5:Trottenberg2001,
408 author={Trottenberg, U. and Oosterlee, C.W. and Sch{\"u}ller, A.},
409 isbn={9780127010700},
411 url={http://books.google.dk/books?id=9ysyNPZoR24C},
413 publisher={Academic Press}