BookGPU/Chapters/chapter5/biblio5.bib

   1 % This file was created with JabRef 2.8.1.
   2 % Encoding: Cp1252
   3
   4 @INCOLLECTION{ch5:Bajrovic2011,
   5   author = {Bajrovic, E. and Traff, J.},
   6   title = {Using MPI Derived Datatypes in Numerical Libraries},
   7   booktitle = {Recent Advances in the Message Passing Interface},
   8   publisher = {Springer Berlin / Heidelberg},
   9   year = {2011},
  10   editor = {Cotronis, Y. and Danalis, A. and Nikolopoulos, D.
  11         and Dongarra, J.},
  12   volume = {6960},
  13   series = {Lecture Notes in Computer Science},
  14   pages = {29-38},
  15   affiliation = {Faculty of Computer Science, University of Vienna, Nordbergstrasse
  16         15/3C, A-1090 Vienna, Austria},
  17   file = {Bajrovic2011.pdf:Bajrovic2011.pdf:PDF},
  18   isbn = {978-3-642-24448-3},
  19   keyword = {Computer Science},
  20   owner = {slgl},
  21   timestamp = {2012.09.10},
  22   url = {http://dx.doi.org/10.1007/978-3-642-24449-0_6}
  23 }
  24
  25 @ARTICLE{ch5:Bell2011,
  26   author = {N. Bell and J. Hoberock},
  27   title = {Thrust: A Productivity-Oriented Library for CUDA},
  28   journal = {in GPU Computing Gems, Jade Edition, Edited by Wen-mei W. Hwu},
  29   year = {2011},
  30   volume = {2},
  31   pages = {359-371},
  32   owner = {slgl},
  33   timestamp = {2012.09.12}
  34 }
  35
  36 @ARTICLE{ch5:Engsig-Karup2011,
  37   author = {Allan Peter Engsig-Karup and Morten Gorm Madsen and Stefan Lemvig
  38         Glimberg},
  39   title = {A massively parallel GPU-accelerated model for analysis of fully
  40         nonlinear free surface waves},
  41   journal = {International Journal for Numerical Methods in Fluids},
  42   year = {2011},
  43   owner = {slgl},
  44   timestamp = {2011.11.06}
  45 }
  46
  47 @BOOK{ch5:Gamma1995,
  48   title = {Design Patterns - Elements of Reusable Object-Oriented Software},
  49   publisher = {Addison-Wesley Professional Computing Series},
  50   year = {1995},
  51   author = {E. Gamma and R. Helm and R. Johnson and J. Vlissides},
  52   owner = {slgl},
  53   timestamp = {2012.09.10}
  54 }
  55
  56 @BOOK{ch5:Gropp1999,
  57   title = {Using {MPI}: Portable Parallel Programming with the Message Passing
  58         Interface, {\rm 2nd edition}},
  59   publisher = {MIT Press},
  60   year = {1999},
  61   author = {W. Gropp and E. Lusk and A. Skjellum},
  62   address = {Cambridge, MA},
  63   area = {M},
  64   areaseq = {0}
  65 }
  66
  67 @BOOK{ch5:Gropp1999a,
  68   title = {Using {MPI-2}: Advanced Features of the Message-Passing Interface},
  69   publisher = {MIT Press},
  70   year = {1999},
  71   author = {William Gropp and Ewing Lusk and Rajeev Thakur},
  72   address = {Cambridge, MA},
  73   area = {M},
  74   areaseq = {0}
  75 }
  76
  77 @INCOLLECTION{ch5:Hoefler2011,
  78   author = {Hoefler, T. and Snir, M.},
  79   title = {Writing Parallel Libraries with MPI - Common Practice, Issues, and
  80         Extensions},
  81   booktitle = {Recent Advances in the Message Passing Interface},
  82   publisher = {Springer Berlin / Heidelberg},
  83   year = {2011},
  84   editor = {Cotronis, Y. and Danalis, A. and Nikolopoulos, D.
  85         and Dongarra, J.},
  86   volume = {6960},
  87   series = {Lecture Notes in Computer Science},
  88   pages = {345-355},
  89   affiliation = {University of Illinois, Urbana, IL 61801, USA},
  90   file = {Hoefler2011.pdf:Hoefler2011.pdf:PDF},
  91   isbn = {978-3-642-24448-3},
  92   keyword = {Computer Science},
  93   owner = {slgl},
  94   timestamp = {2012.09.10},
  95   url = {http://dx.doi.org/10.1007/978-3-642-24449-0_45}
  96 }
  97
  98 @BOOK{ch5:LeVeque2007,
  99   title = {Finite difference methods for ordinary and partial differential equations
 100         - steady-state and time-dependent problems},
 101   publisher = {SIAM},
 102   year = {2007},
 103   author = {R. J. LeVeque},
 104   pages = {I-XV, 1-341},
 105   bibsource = {DBLP, http://dblp.uni-trier.de},
 106   ee = {http://www.ec-securehost.com/SIAM/OT98.html},
 107   isbn = {978-0-89871-629-0}
 108 }
 109
 110 @TECHREPORT{ch5:Skjellum1994,
 111   author = {A. Skjellum and N. E. Doss and P. V. Bangaloret},
 112   title = {Writing Libraries in MPI},
 113   institution = {Department of Computer Science and NSF Engineering Research Center
 114         for Computational Fiels Simulation. Mississippi State University},
 115   year = {1994},
 116   file = {Skjellum1994.pdf:Skjellum1994.pdf:PDF},
 117   owner = {slgl},
 118   timestamp = {2012.09.10}
 119 }
 120
 121 @BOOK{ch5:Smith1996,
 122   title = {Domain Decomposition: Parallel Multilevel Methods for Elliptic Partial
 123         Differential Equations},
 124   publisher = {Cambridge University Press},
 125   year = {1996},
 126   author = {B. F. Smith and P. E. Bj{\o}rstad and W. D. Gropp},
 127   address = {New York},
 128   area = {D},
 129   areaseq = {0}
 130 }
 131
 132 @BOOK{ch5:Vandevoorde2002,
 133   title = {C++ Templates: The Complete Guide},
 134   publisher = {Addison-Wesley Professional},
 135   year = {2002},
 136   author = {D. Vandevoorde and N. M. Josuttis},
 137   pages = {552},
 138   edition = {1st},
 139   month = {November},
 140   owner = {slgl},
 141   timestamp = {2011.07.15},
 142   url = {http://www.amazon.com/Templates-Complete-Guide-David-Vandevoorde/dp/0201734842/ref=sr_1_3?ie=UTF8&qid=1310721621&sr=8-3}
 143 }
 144
 145 @article{ch5:Korson1992,
 146  author = {Korson, T. and McGregor, J. D.},
 147  title = {Technical criteria for the specification and evaluation of object-oriented libraries},
 148  journal = {Softw. Eng. J.},
 149  issue_date = {March 1992},
 150  volume = {7},
 151  number = {2},
 152  month = mar,
 153  year = {1992},
 154  issn = {0268-6961},
 155  pages = {85--94},
 156  numpages = {10},
 157  url = {http://dx.doi.org/10.1049/sej.1992.0009},
 158  doi = {10.1049/sej.1992.0009},
 159  acmid = {148991},
 160  publisher = {Michael Faraday House},
 161  address = {Herts, UK, UK},
 162 }
 163
 164 @MISC{ch5:Acklam1998,
 165     author = {Elizabeth Acklam and Hans Petter Langtangen and Hans Petter Langtangen and Are Magnus Bruaset},
 166     title = {Parallelization of Explicit Finite Difference Schemes via Domain Decomposition},
 167     year = {1998}
 168 }
 169
 170 @INPROCEEDINGS{ch5:Glimberg2011,
 171   AUTHOR =       {S. L. Glimberg and A. P. Engsig-Karup and M. G. Madsen},
 172   TITLE =        {A Fast GPU-accelerated Mixed-precision Strategy for Fully Nonlinear Water Wave Computations},
 173   BOOKTITLE =    {Numerical Mathematics and Advanced Applications 2011, Proceedings of ENUMATH 2011, the 9th European Conference on Numerical Mathematics and Advanced Applications, Leicester, September 2011},
 174   YEAR =         {2011},
 175   editor =       {A. Cangiani and R. L. Davidchack and E. Georgoulis and A.N. Gorban and J. Levesley and M. V. Tretyakov},
 176   publisher =    {Springer},
 177 }
 178
 179 @techreport{ch5:Asanovic:EECS-2006-183,
 180   Author = {Asanovic, K. and Bodik, R. and Catanzaro, B. C. and Gebis, Joseph, J. and Husbands, P. and Keutzer, K. and Patterson, D. A. and Plishker, W. L. and Shalf, J. and Williams, S. W. and Yelick, K. A.},
 181   Title = {The Landscape of Parallel Computing Research: A View from Berkeley},
 182   Institution = {EECS Department, University of California, Berkeley},
 183   Year = {2006},
 184   Month = {Dec},
 185   Number = {UCB/EECS-2006-183},
 186   Abstract = {The recent switch to parallel microprocessors is a milestone in the history of computing. Industry has laid out a roadmap for multicore designs that preserves the programming paradigm of the past via binary compatibility and cache coherence. Conventional wisdom is now to double the number of cores on a chip with each silicon generation.
 187 A multidisciplinary group of Berkeley researchers met nearly two years to discuss this change. Our view is that this evolutionary approach to parallel hardware and software may work from 2 or 8 processor systems, but is likely to face diminishing returns as 16 and 32 processor systems are realized, just as returns fell with greater instruction-level parallelism.
 188 We believe that much can be learned by examining the success of parallelism at the extremes of the computing spectrum, namely embedded computing and high performance computing. This led us to frame the parallel landscape with seven questions, and to recommend the following:
 189 <ul>
 190 <li>The overarching goal should be to make it easy to write programs that execute efficiently on highly parallel computing systems
 191 <li>The target should be 1000s of cores per chip, as these chips are built from processing elements that are the most efficient in MIPS (Million Instructions per Second) per watt, MIPS per area of silicon, and MIPS per development dollar.
 192 <li>Instead of traditional benchmarks, use 13 "Dwarfs" to design and evaluate parallel programming models and architectures. (A dwarf is an algorithmic method that captures a pattern of computation and communication.)
 193 <li>"Autotuners" should play a larger role than conventional compilers in translating parallel programs.
 194 <li>To maximize programmer productivity, future programming models must be more human-centric than the conventional focus on hardware or applications.
 195 <li>To be successful, programming models should be independent of the number of processors.
 196 <li>To maximize application efficiency, programming models should support a wide range of data types and successful models of parallelism: task-level parallelism, word-level parallelism, and bit-level parallelism.
 197 <li>Architects should not include features that significantly affect performance or energy if programmers cannot accurately measure their impact via performance counters and energy counters.
 198 <li>Traditional operating systems will be deconstructed and operating system functionality will be orchestrated using libraries and virtual machines.
 199 <li>To explore the design space rapidly, use system emulators based on Field Programmable Gate Arrays (FPGAs) that are highly scalable and low cost.
 200 </ul>
 201 Since real world applications are naturally parallel and hardware is naturally parallel, what we need is a programming model, system software, and a supporting architecture that are naturally parallel. Researchers have the rare opportunity to re-invent these cornerstones of computing, provided they simplify the efficient programming of highly parallel systems.}
 202 }
 203
 204 @article{ch5:mooreslaw1965,
 205   author =  {Gordon E. Moore},
 206   title =   {Cramming more components onto integrated circuits},
 207   journal = {Electronics},
 208   year = {1965},
 209   volume =  {38},
 210   number =  {8},
 211   pages =   {114-117}
 212 }
 213
 214 @techreport{ch5:Kloeckner2011,
 215   title = "{High-Order Discontinuous Galerkin Methods by GPU Metaprogramming}",
 216   author = "A. Kloeckner, T. Warburton and J. S. Hesthaven",
 217   institution = "Scientific Computing Group, Brown University",
 218   number = "2011-13",
 219   address = "Providence, RI, USA",
 220   year = "2011",
 221   month = jun,
 222 }
 223
 224 @book{ch5:Ferziger1996,
 225   title={Computational methods for fluid dynamics},
 226   author={Ferziger, J.H. and Peri{\'c}, M.},
 227   isbn={9783540594345},
 228   lccn={98231766},
 229   series={Numerical methods: Research and development},
 230   url={http://books.google.dk/books?id=SJkeAQAAIAAJ},
 231   year={1996},
 232   publisher={Springer-Verlag}
 233 }
 234
 235 @book{ch5:chorin1993,
 236   title={A Mathematical Introduction to Fluid Mechanics},
 237   author={Chorin, A. J. and Marsden, J. E.},
 238   isbn={9780387979182},
 239   lccn={98115991},
 240   series={Texts in Applied Mathematics},
 241   url={http://books.google.dk/books?id=0Iglq1WA5PQC},
 242   year={1993},
 243   publisher={Springer}
 244 }
 245
 246 @book{ch5:Saad2003,
 247  author = {Saad, Y.},
 248  title = {Iterative Methods for Sparse Linear Systems},
 249  year = {2003},
 250  isbn = {0898715342},
 251  edition = {2nd},
 252  publisher = {Society for Industrial and Applied Mathematics},
 253  address = {Philadelphia, PA, USA},
 254 }
 255
 256 @book{ch5:Kelley1995,
 257   title={Iterative Methods for Linear and Nonlinear Equations},
 258   author={Kelley, C. T.},
 259   isbn={9780898713527},
 260   lccn={lc95032249},
 261   series={Frontiers in Applied Mathematics Series},
 262   url={http://books.google.dk/books?id=3J4XEAooQOoC},
 263   year={1995},
 264   publisher={Society for Industrial and Applied Mathematics (SIAM, 3600 Market Street, Floor 6, Philadelphia, PA 19104)}
 265 }
 266
 267 @techreport{ch5:YMTR08,
 268         author = {Y. Maday},
 269         title = {The parareal in time algorithm},
 270         institution = {Universite Pierr\'{e} et Marie Curie},
 271         year = {2008},
 272         type = {Technical Report},
 273         number = {R08030}
 274 }
 275
 276 @article{ch5:MS07,
 277         author = {M. Gander and S. Vandewalle},
 278         title = {Analysis of the parareal time-parallel time-integration method},
 279         journal = {SIAM Journal of scientific computing},
 280         year = {2007},
 281         volume = {29},
 282         number = {2},
 283         pages = {556-578}
 284 }
 285
 286 @article{ch5:LMT01,
 287         author = {J.-L. Lions and Y. Maday and G. Turinici},
 288         title = {R\'{e}solution d'EDP par un sch\'{e}ma en temps parar\'{e}el},
 289         journal = {C.R. Acad Sci. Paris S\'{e}r. I math},
 290         year = {2001},
 291         volume = {332},
 292         pages = {661-668}
 293 }
 294
 295 @article{ch5:LSY02,
 296         author = {L. Baffico and S. Bernard and Y. Maday and G. Turinici and G. Z\'{e}rah},
 297         title = {Parallel in time molecular dynamics simulations},
 298         journal = {Physical Review E.},
 299         year = {2002},
 300         volume = {66},
 301         number = {057701}
 302 }
 303
 304 @mastersthesis{ch5:ASNP12,
 305         author = {A. S. Nielsen},
 306         title = {Feasibility study of the Parareal algorithm},
 307         school = {Technical University of Denmark, Department of Informatics and Mathematical Modeling},
 308         year = {2012},
 309         type = {Master Thesis}
 310 }
 311
 312 @article{ch5:EA10,
 313         author = {Aubanel, E.},
 314         title = {Scheduling of tasks in the Parareal algorithm},
 315         journal = {Parallel Computing},
 316         year = {2010},
 317         volume = {37},
 318         pages = {172-182}
 319 }
 320
 321 @BOOK{ch5:Barrett1994,
 322   AUTHOR = {R. Barrett and M. Berry and T. F. Chan and J. Demmel and J. Donato and J. Dongarra and V. Eijkhout and R. Pozo and C. Romine and H. Van der Vorst },
 323   TITLE = {Templates for the Solution of Linear Systems: Building Blocks for Iterative Methods, 2nd Edition},
 324   PUBLISHER = {SIAM},
 325   YEAR = {1994},
 326   ADDRESS = {Philadelphia, PA}
 327 }
 328
 329 @TECHREPORT{ch5:ScientificGrandChallenges2010,
 330   author = {D. L. Brown and P. Messina et. al},
 331   title = {Scientific Grand Challenges, Crosscutting technologies for computing at the exascale},
 332   institution = {U.S. Department of Energy},
 333   year = {2010},
 334   month = {February},
 335   address = {Washington, D.C.},
 336 }
 337
 338 @article{ch5:Keyes2011,
 339 author = {D. E. Keyes},
 340 title = {{Exaflop/s: The why and the how}},
 341 journal = {Comptes Rendus Mecanique},
 342 volume = {339},
 343 year = {2011},
 344 pages = {70--77},
 345 issue = {2},
 346 doi = {10.1016/j.crme.2010.11.002},
 347 masid = {49649121}
 348 }
 349
 350 @ARTICLE{ch5:Cai2005,
 351   author = {X. Cai and G.K. Pedersen and H.P. Langtangen},
 352   title = {A parallel multi-subdomain strategy for solving Boussinesq water wave equations},
 353   journal = {Elsevier - Advances in Water Resources},
 354   year = {2005},
 355   volume = {28},
 356   pages = {215-233},
 357 }
 358
 359 @ARTICLE{ch5:GlimbergEtAl2012,
 360   AUTHOR =       {Stefan L. Glimberg and Allan P. Engsig-Karup},
 361   TITLE =        {On a Multi-GPU Implementation of a Free Surface Water Wave Model for Large-scale Simulations},
 362   JOURNAL =      {Submitted to: Special Issue of the Journal Parallel Computing},
 363   YEAR =         {2012},
 364   volume =       {7th Special Issue devoted to PMAA 2012},
 365 }
 366
 367 @misc{ch5:cudaguide,
 368    author = {{NVIDIA Corporation}},
 369    title = {CUDA C Programming Guide},
 370    publisher = {NVIDIA Corporation},
 371    year = {2012},
 372    url = {http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html}
 373 }
 374
 375 @misc{ch5:cudapractice,
 376    author = {{NVIDIA Corporation}},
 377    title = {CUDA C Best Practices Guide},
 378    publisher = {NVIDIA Corporation},
 379    year = {2012},
 380    url = {http://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html}
 381 }
 382
 383 @inproceedings{ch5:Bell2009,
 384  author = {Bell, N. and Garland, M.},
 385  title = {Implementing sparse matrix-vector multiplication on throughput-oriented processors},
 386  booktitle = {SC '09: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis},
 387  year = {2009},
 388  isbn = {978-1-60558-744-8},
 389  pages = {1--11},
 390  location = {Portland, Oregon},
 391  doi = {http://doi.acm.org/10.1145/1654059.1654078},
 392  publisher = {ACM},
 393  address = {New York, NY, USA},
 394  }
 395
 396 @book{ch5:Kirk2010,
 397  author = {Kirk, D. B. and Hwu, W.-M. W.},
 398  title = {Programming Massively Parallel Processors: A Hands-on Approach},
 399  year = {2010},
 400  isbn = {0123814723, 9780123814722},
 401  edition = {1st},
 402  publisher = {Morgan Kaufmann Publishers Inc.},
 403  address = {San Francisco, CA, USA},
 404 }
 405
 406 @book{ch5:Trottenberg2001,
 407   title={Multigrid},
 408   author={Trottenberg, U. and Oosterlee, C. W. and Sch{\"u}ller, A.},
 409   isbn={9780127010700},
 410   lccn={00103940},
 411   url={http://books.google.dk/books?id=9ysyNPZoR24C},
 412   year={2001},
 413   publisher={Academic Press}
 414 }