1 @manual{cudabestpractices,
2 TITLE = {{NVIDIA {CUDA C} Best Practices Guide 4.0}},
3 organization = {NVIDIA},
6 howpublished = "\url{http://docs.nvidia.com/cuda/pdf/CUDA_C_Best_Practices_Guide.pdf}",
10 author = {A. Frommer and D. B. Szyld},
12 title = {On Asynchronous Iterations},
13 journal = {J. Comput. and Appl. Math.},
20 author = {D. P. Bertsekas and J. N. Tsitsiklis},
22 title = {Parallel and Distributed Computation},
23 publisher = {Prentice Hall},
25 address = {Englewood Cliffs, New Jersey},
29 author = {Vialle, S. and Contassot-Vivier, S. and Jost, T.},
30 editor = {Sanjay Ranka and Ishfag Ahmad},
31 title = {Handbook of Energy-Aware and Green Computing},
32 chapter = {Optimizing Computing and Energy Performances in Heterogeneous Clusters of CPUs and GPUs},
33 publisher = {Chapman and Hall/CRC},
35 url = {http://www.crcpress.com/product/isbn/9781466501164#},
36 isbn = {9781466501164},
40 series = {Computer \& Information Science Series},
50 author = {S. Vialle and S. Contassot-Vivier},
51 editor = {Magoul\'{e}s, Fr\'{e}d\'{e}ric},
52 title = {Patterns for parallel programming on {G}{P}{U}s},
53 chapter = {Optimization methodology for Parallel Programming of Homogeneous or Hybrid Clusters},
54 publisher = {Saxe-Coburg Publications},
70 author = {Bahi, J. M. and Contassot-Vivier, S. and Couturier, R.},
71 title = {Parallel Iterative Algorithms: from sequential to grid computing},
72 publisher = {Chapman \& Hall/CRC},
74 series = {Numerical Analysis \& Scientific Computing Series},
75 OPTdoi = {http://www.crcpress.com/shopping_cart/products/product_detail.asp?sku=C808X&isbn=9781584888086&parent_id=&pc=},
78 @InProceedings{HPCS2002,
79 author = {Bahi, J. M. and Contassot-Vivier, S. and Couturier, R.},
80 title = {Asynchronism for Iterative Algorithms in a Global Computing Environment},
81 booktitle = {The 16th Annual International Symposium on High Performance
82 Computing Systems and Applications (HPCS'2002)},
85 address = {Moncton, Canada},
89 @InProceedings{Vecpar08a,
90 author = {Bahi, J. M. and Contassot-Vivier, S. and Couturier, R.},
91 title = {An efficient and robust decentralized algorithm for detecting the global
92 convergence in asynchronous iterative algorithms},
93 booktitle = {8th International Meeting on High Performance Computing for Computational Science, VECPAR'08},
101 author = {Bahi, J. M. and Contassot-Vivier, S. and Couturier, R.},
102 title = {Evaluation of the Asynchronous Iterative Algorithms in the Context of Distant Heterogeneous Clusters},
103 journal = {Parallel Computing},
110 @InProceedings{ECost10,
111 author = {Contassot-Vivier, S. and Vialle, S. and Jost, T.},
112 title = {Optimizing computing and energy performances on {GPU} clusters: experimentation on a {PDE} solver},
113 booktitle = {COST Action IC0804 on Large Scale Distributed Systems,1st Year},
116 editor = {Jean-Marc Pierson and Helmut Hlavacs},
117 organization = {IRIT},
118 note = {ISBN: 978-2-917490-10-5},
122 author = {Bahi, J. M. and Contassot-Vivier, S. and Couturier, R.},
123 title = {Performance comparison of parallel programming environments for implementing {AIAC} algorithms},
124 journal = {Journal of Supercomputing. Special Issue on Performance Modelling and Evaluation of Parallel and Distributed Systems},
131 @InProceedings{Para10,
132 author = {Contassot-Vivier, S. and Jost, T. and Vialle, S.},
133 title = {Impact of asynchronism on {GPU} accelerated parallel iterative computations},
134 booktitle = {PARA 2010 conference: State of the Art in Scientific and Parallel Computing},
137 address = {Reykjavík, Iceland},
141 @InProceedings{ECost10,
142 author = {Contassot-Vivier, S. and Vialle, S. and Jost, T.},
143 title = {Optimizing computing and energy performances on {GPU} clusters: experimentation on a {PDE} solver},
144 booktitle = {COST Action IC0804 on Large Scale Distributed Systems,1st Year},
147 editor = {Jean-Marc Pierson and Helmut Hlavacs},
148 organization = {IRIT},
149 note = {ISBN: 978-2-917490-10-5},
152 @InCollection{JCVV10,
153 author = {T. Jost and S. Contassot-Vivier and S. Vialle},
154 title = {An efficient multi-algorithm sparse linear solver for {GPU}s},
155 booktitle = {Parallel Computing : From Multicores and GPU's to Petascale},
157 publisher = {IOS Press},
162 series = {Advances in Parallel Computing},
169 OPTannote = {Extended version of EuroGPU symposium article, in the International Conference on Parallel Computing (ParCo) 2009}
172 @InProceedings{ParCo09,
173 author = {T. Jost and S. Contassot-Vivier and S. Vialle},
174 title = {An efficient multi-algorithms sparse linear solver for {GPU}s},
175 booktitle = {EuroGPU mini-symposium of the International Conference on Parallel Computing, ParCo'2009},
182 @InProceedings{BCVG11,
183 author = {Bahi, J. M. and Contassot-Vivier, S. and Giersch, A.},
184 title = {Load Balancing in Dynamic Networks by Bounded Delays Asynchronous Diffusion},
185 booktitle = {VECPAR 2010},
188 editor = {J.M.L.M. Palma et al.},
192 publisher = {Springer, Heidelberg},
193 note = "\url{DOI:~10.1007/978-3-642-19328-6\33}"
197 title = {{NVIDIA {CUDA} C Programming Guide 4.0}},
198 organization = {NVIDIA},
199 howpublished = "\url{http://developer.download.nvidia.com/compute/DevZone/docs/html/C/doc/CUDA_C_Programming_Guide.pdf}",
205 title = {Open Source High Performance Computing},
206 howpublished = {\url{http://www.open-mpi.org}}
210 title = {Message Passing Interface},
211 howpublished = {\url{http://www.mpi-forum.org/docs}}
215 title = {Open{M}{P} multi-threaded programming {API}},
216 howpublished = {\url{http://www.openmp.org}}
220 author = {T. Hoefler and A. Lumsdaine},
221 title = {Overlapping Communication and Computation with High Level Communication Routines},
222 journal ={Cluster Computing and the Grid, IEEE International Symposium on},
224 isbn = {978-0-7695-3156-4},
227 note = "\url{http://doi.ieeecomputersociety.org/10.1109/CCGRID.2008.15}",
228 publisher = {IEEE Computer Society},
229 address = {Los Alamitos, CA, USA},
232 @Article{Valiant:BSP,
233 author = {Valiant, L. G.},
234 title = {A bridging model for parallel computation},
235 journal = {Communications of the ACM},
242 @inproceedings{gustedt:hal-00639289,
243 AUTHOR = {Gustedt, J. and Jeanvoine, E.},
244 TITLE = {{Relaxed Synchronization with Ordered Read-Write Locks}},
245 BOOKTITLE = {{Euro-Par 2011: Parallel Processing Workshops}},
249 EDITOR = {Michael Alexander and others},
250 PUBLISHER = {Springer},
253 ADDRESS = {Bordeaux, France},
254 X-INTERNATIONAL-AUDIENCE = {yes},
255 X-PROCEEDINGS = {yes},
256 URL = {http://hal.inria.fr/hal-00639289},
257 X-ID-HAL = {hal-00639289},
260 @article{clauss:2010:inria-00330024:1,
261 AUTHOR = {Clauss, P.-N. and Gustedt, J.},
262 TITLE = {{Iterative Computations with Ordered Read-Write Locks}},
263 JOURNAL = {{Journal of Parallel and Distributed Computing}},
264 PUBLISHER = {Elsevier},
269 DOI = {10.1016/j.jpdc.2009.09.002},
270 X-INTERNATIONAL-AUDIENCE = {yes},
271 X-EDITORIAL-BOARD = {yes},
272 URL = {http://hal.inria.fr/inria-00330024/en},
273 X-ID-HAL = {inria-00330024},
276 @inproceedings{GUSTEDT:2007:HAL-00280094:1,
277 TITLE = {The par{X}{X}{L} Environment: Scalable Fine Grained Development for Large Coarse Grained Platforms},
279 X-INTERNATIONAL-AUDIENCE = {yes},
280 AUTHOR = {Gustedt, J. AND Vialle, S. AND De Vivo, A.},
281 BOOKTITLE = {PARA 06},
282 LONG-BOOKTITLE = {PARA 06: Worshop on state-of-the-art in scientific and parallel computing },
283 EDITOR = {Bo K{\aa}gstr{\"o}m and others},
284 PAGES = {1094-1104 },
285 ADDRESS = {Ume{\aa}, Sweden},
287 PUBLISHER = {Springer},
290 URL = {http://hal-supelec.archives-ouvertes.fr/hal-00280094/en/},
291 X-PROCEEDINGS = {yes},
294 @InProceedings{suss04:users_exper,
295 author = {S\"{u}{\ss}, Michael and Leopold, Claudia},
296 title = {A User's Experience with Parallel Sorting and {O}pen{M}{P}},
297 booktitle = {Proceedings of the 6th European Workshop on OpenMP (EWOMP)},
300 editor = {Eduard Ayguad\'{e} and others},
301 address = {Stockholm, Sweden}}
305 editor = {JTC1/SC22/WG14},
306 title = {Programming languages - C},
309 number = {ISO/IEC 9899},
310 edition = {Cor. 1:2012}}