-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutk.bib
248 lines (226 loc) · 9.3 KB
/
utk.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
@inproceedings{dplasma,
OPTok={yes},
author = {Bosilca, G. and Bouteiller, A. and Danalis, A. and Faverge, M. and Haidar, A. and Herault, T. and Kurzak, J. and Langou, J. and Lemarinier, P. and Ltaeif, H. and Luszczek, P. and YarKhan, A. and Dongarra, J. },
title = "Flexible Development of Dense Linear Algebra Algorithms on Massively Parallel Architectures with {DPLASMA}",
booktitle = {Proceedings of the Workshops of the 25th IEEE International Symposium on Parallel and Distributed Processing (IPDPSW 2011)},
publisher = {IEEE},
location = {Anchorage, Alaska, USA},
pages = {1432-1441},
month = {16-20 May},
year = {2011},
}
@article{parsec,
OPTok={yes},
title = "{DAGuE}: A generic distributed {DAG} engine for High Performance Computing ",
journal = "Parallel Computing ",
volume = "38",
number = "1-2",
pages = "37 - 51",
year = "2012",
note = "Extensions for Next-Generation Parallel Programming Models ",
issn = "0167-8191",
doi = "http://dx.doi.org/10.1016/j.parco.2011.10.003",
url = "http://www.sciencedirect.com/science/article/pii/S0167819111001347",
author = "George Bosilca and Aurelien Bouteiller and Anthony Danalis and Thomas Herault and Pierre Lemarinier and Jack Dongarra",
keywords = "\{HPC\}",
keywords = "Micro-task \{DAG\}",
keywords = "Heterogeneous architectures",
keywords = "Architecture aware scheduling",
}
@article{luszczek2006ctwq,
OPTok={yes},
Author = {Piotr Luszczek and Jack Dongarra and Jeremy Kepner},
Journal = {Cyberinfrastructure Technology Watch, CT Watch Quarterly},
Month = NOV,
Number = {4A},
Title = {Design and Implementation of the {HPCC} Benchmark Suite},
Volume = 2,
Year = 2006,
}
@incollection{Dongarra:2013:hpcc_history,
OPTok={yes},
title={{HPC Challenge}: Design, History, and Implementation Highlights},
author={Jack Dongarra and Piotr Luszczek},
booktitle={Contemporary High Performance Computing: From Petascale Toward Exascale},
editor={Jefferey S. Vetter},
publisher={Taylor and Francis},
address={Boca Raton},
volume=1,
edition=1,
chapter=2,
pages={13-32},
series={CRC Computational Science Series},
year=2013,
}
@inproceedings{luszczek_2011_ppam,
OPTok={yes},
title={Reducing the time to tune parallel dense linear algebra routines with partial execution and performance modelling},
author={Piotr Luszczek and Jack Dongarra},
booktitle={Proceedings of PPAM 2011 9th International Conference On Parallel Processing and Applied Mathematics},
address={Toru\'n, Poland},
month={September 11-14,},
year=2011,
}
@INPROCEEDINGS{Tomov_2009_iccs,
OPTok={yes},
author = {{Li}, Y. and {Dongarra}, J. and {Tomov}, S.},
title = {A Note on Auto-tuning {GEMM} for {GPUs}},
booktitle = {Proceedings of the 2009 International Conference on Computational Science, ICCS'09},
year = 2009,
month = {May 25-27},
address = {Baton Roube, LA},
publisher = {Lecture Notes in Computer Science 5544},
pages = {884-892},
note = {\href{http://dx.doi.org/10.1007/978-3-642-01970-8\_89}{DOI:~10.1007/978-3-642-01970-8\_89}},
}
@INPROCEEDINGS{Kurzak_2010_para,
OPTok={yes},
author = {{Kurzak}, J. and {Nath}, R. and {Du}, P. and {Dongarra}, J.~J.},
title = {An Implementation of the Tile {QR} Factorization for a {GPU} and Multiple {CPUs}},
booktitle = {Proceedings of the State of the Art in Scientific and Parallel Computing Conference, PARA'10},
year = 2010,
month = {June 6-9},
address = {Reykjav\'ik},
publisher = {Lecture Notes in Computer Science 7134},
pages = {248-257},
note = {\href{http://dx.doi.org/10.1007/978-3-642-28145-7}{DOI:~10.1007/978-3-642-28145-7}},
}
@INPROCEEDINGS{Tomov_2010_ipdps,
author = {{Tomov}, S. and {Nath}, R. and {Ltaief}, H. and {Dongarra}, J.},
title = {Dense Linear Algebra Solvers for Multicore with {GPU} Accelerators},
booktitle = {Proceedings of the 2010 IEEE International Parallel \& Distributed Processing Symposium, IPDPS'10},
year = 2010,
month = {April 19-23},
address = {Atlanta, GA},
publisher = {IEEE Computer Society},
pages = {1-8},
note = {\href{http://dx.doi.org/10.1109/IPDPSW.2010.5470941}{DOI:~10.1109/IPDPSW.2010.5470941}}
}
@ARTICLE{Tomov_2010_jhpca,
OPTok={yes},
author = {{Nath}, R. and {Tomov}, S. and {Dongarra}, J.},
title = {An Improved {MAGMA} {GEMM} for {Fermi} Graphics Processing Units},
journal = {Int. J. High Perf. Comput. Applic.},
year = 2010,
volume = 24,
number = 4,
pages = {511-515},
note = {\href{http://dx.doi.org/10.1177/1094342010385729}{DOI:~10.1177/1094342010385729}}
}
@INPROCEEDINGS{Tomov_2010_vecpar,
OPTok={yes},
author = {{Nath}, R. and {Tomov}, S. and {Dongarra}, J.},
title = {Accelerating {GPU} Kernels for Dense Linear Algebra},
booktitle = {Proceedings of the 2010 International Meeting on High Performance Computing
for Computational Science, VECPAR'10},
year = 2010,
month = {June 22-25},
address = {Berkeley, CA},
publisher = {Lecture Notes in Computer Science 6449},
pages = {83-92},
note = {\href{http://dx.doi.org/10.1007/978-3-642-19328-6\_10}{DOI:~10.1007/978-3-642-19328-6\_10}}
}
@article{Kurzak_2011_tpds,
author = {Jakub Kurzak and Stanimire Tomov and Jack Dongarra},
title = {Autotuning GEMM Kernels for the Fermi GPU},
journal ={IEEE Transactions on Parallel and Distributed Systems},
volume = {23},
number = {11},
issn = {1045-9219},
year = {2012},
pages = {2045-2057},
doi = {http://doi.ieeecomputersociety.org/10.1109/TPDS.2011.311},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
note = {\url{www.netlib.org/lapack/lawnspdf/lawn245.pdf} (accepted to IEEE TPDS)}
}
@ARTICLE{Kurzak_2012_tpds,
OPTok={yes},
author = {{Kurzak}, J. and {Luszczek}, P. and {Faverge}, M. and {Dongarra}, J.},
title = {{LU} Factorization with Partial Pivoting for a Multicore System with Accelerators},
journal = {IEEE Trans. Parallel Distrib. Syst.},
note = {\href{http://dx.doi.org/10.1109/TPDS.2012.242}{DOI:~10.1109/TPDS.2012.242}},
year = 2013,
volume = 24,
number = 8,
pages = {1613-1621},
month = aug,
}
@article{jia2012multi,
OPTok={yes},
author = {Jia, Yulu and Luszczek, Piotr and Dongarra, Jack},
title = {Multi-{GPU} Implementation of {LU} Factorization.},
journal = {Procedia CS},
pages = {106-115},
volume = {9},
year = {2012}
}
@article {blkd:ccpe:08,
OPTok={yes},
author = {Buttari, Alfredo and Langou, Julien and Kurzak, Jakub and Dongarra, Jack},
title = {Parallel tiled {QR} factorization for multicore architectures},
journal = {Concurrency and Computation: Practice and Experience},
volume = 20,
number = 13,
publisher = {John Wiley \& Sons, Ltd.},
issn = {1532-0634},
url = {http://dx.doi.org/10.1002/cpe.1301},
doi = {10.1002/cpe.1301},
pages = {1573--1590},
keywords = {multicore, linear algebra, QR factorization},
year = 2008,
}
@phdthesis{Petitet:1996:Algorithmic-redistribution,
OPTok={yes},
Author = {Petitet, Antoine Paul},
Isbn = {0-591-45140-9},
Order_No = {AAI9735349},
Publisher = {The University of Tennessee},
Title = {Algorithmic redistribution methods for block cyclic decompositions},
Year = 1996,
}
@phdthesis{petitet1996,
OPTok={yes},
Address = {Knoxville, Tennessee},
Author = {Petitet, Antoine Paul},
Month = {December},
Note = {PhD dissertation, Major Professor-Dongarra, Jack},
School = {University of Tennessee},
Title = {Algorithmic Redistribution Methods for Block Cyclic Decompositions},
Type = {{C}omputer {S}cience {D}epartment},
Year = 1996,
}
@inproceedings{ma:2011:dist_coll,
OPTok={yes},
author = {Ma, T. and Herault, T. and Bosilca, G. and Dongarra, J.},
title = {Process Distance-aware Adaptive {MPI} Collective Communications},
booktitle = "IEEE Int'l Conference on Cluster Computing (Cluster 2011)",
address = {Austin, TX},
month = {September},
year = 2011,
}
@inproceedings{pjesivac-grbovic:2007:mpi_coll,
OPTok={yes},
author={Pjesivac-Grbovic, J. and Bosilca, G. and Fagg, G. and Angskun, T. and Dongarra, J.},
title={Decision Trees and {MPI} Collective Algorithm Selection Problem},
booktitle={Euro-Par 2007},
publisher={Springer},
address={Rennes, France},
pages={105-115},
month={August},
year=2007,
}
@Misc{plasmawebsite,
OPTok={yes},
key={PLASMA},
title = {The {PLASMA} website},
howpublished = {\url{http://icl.cs.utk.edu/plasma/}}
}
@TECHREPORT{kogge:exascale-hw,
author = {Peter Kogge and Keren Bergman and Shekhar Borkar and Dan Campbell and William Carlson and William Dally and Monty Denneau and Paul Franzon and William Harrod and Kerry Hill and Jon Hiller and Sherman Karp and Stephen Keckler and Dean Klein and Robert Lucas and Mark Richards and Al Scarpelli and Steven Scott and Allan Snavely and Thomas Sterling and R. Stanley Williams and Katherine Yelick },
title = {Exascale {C}omputing {S}tudy: Technology {C}hallenges in {A}chieving
{E}xascale {S}ystems},
institution = {DARPA},
year = {2008},
note = {{Available: }\url{http://users.ece.gatech.edu/mrichard/ExascaleComputingStudyReports/exascale_final_report_100208.pdf}}
}