-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcollect_performance.py
executable file
·274 lines (239 loc) · 8.89 KB
/
collect_performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
from pathlib import Path
import os
import pandas as pd
import argparse
from utilities import (
list_submissions,
get_target_data,
views_month_id_to_year,
views_month_id_to_month,
TargetType,
get_submission_details,
is_parquet_in_target,
)
def get_eval(
submission: str | os.PathLike,
target: TargetType,
groupby: str | list[str] = None,
aggregate_submissions: bool = False,
) -> pd.DataFrame:
"""Convenience function to read and aggregate evaluation metrics from a submission.
Parameters
----------
submission : str | os.PathLike
Path to a folder structured like a submission_template
target : TargetType
A string, either "pgm" for PRIO-GRID-months, or "cm" for country-months.
groupby : str | list[str], optional
A dimension to aggregate results across. Some options (all except None and "pooled" can be combined in a list):
None: no aggregation
"pooled": complete aggregation
"year": aggregate by calendar year
"month": aggregate by calendar month
"month_id": aggregate by month_id (1 is January 1980)
"country_id": aggregate by country (currently only works for target == "cm")
"priogrid_gid": aggregate by PRIO-GRID id.
aggregate_submissions : bool
Aggregate across submissions. Default false (i.e., aggregate by [team, model])
Returns
-------
pandas.DataFrame
Raises
------
ValueError
Target must be "cm" or "pgm".
FileNotFoundError
There must be .parquet-files in the submission/{target} sub-folders.
"""
if target == "cm":
unit = "country_id"
elif target == "pgm":
unit = "priogrid_gid"
else:
raise ValueError(f'Target must be either "cm" or "pgm".')
submission = Path(submission)
if not is_parquet_in_target(submission, target):
raise FileNotFoundError
groupby_inner = groupby.copy()
match groupby_inner:
case None:
sdetails = get_submission_details(submission)
df["team"] = sdetails["team"]
df["model"] = sdetails["even_shorter_identifier"]
return df
case str():
if groupby_inner == "pooled":
groupby_inner = []
else:
groupby_inner = [groupby_inner]
case list():
pass
case _:
raise ValueError
df = get_target_data(submission / "eval", target=target)
if df.index.names != [None]:
df = df.reset_index()
if "year" in groupby_inner:
df["year"] = views_month_id_to_year(df["month_id"])
if "month" in groupby_inner:
df["month"] = views_month_id_to_month(df["month_id"])
for col in ["month_id", unit, "window"]:
if col not in groupby_inner:
df = df.drop(columns=col)
if aggregate_submissions:
pass
else:
sdetails = get_submission_details(submission)
df["team"] = sdetails["team"]
df["model"] = sdetails["even_shorter_identifier"]
groupby_inner.extend(["team", "model"])
# Aggregate metric values
groupby_inner.append("metric")
df = df.set_index(groupby_inner)
df = df.groupby(level=groupby_inner, observed=True).mean()
groupby_inner.pop()
# Pivot metrics to wide
df = df.pivot_table(values=["value"], index=groupby_inner, columns="metric")
df.columns = df.columns.get_level_values(1).to_list()
return df
def evaluation_table(
submissions: str | os.PathLike,
target: TargetType,
groupby: str | list[str],
save_to: str | os.PathLike = None,
aggregate_submissions: bool = False,
) -> None | pd.DataFrame:
"""Convenience function to make aggregated result tables of the evaulation metrics and store them to LaTeX, HTML, and excel format.
Parameters
----------
submissions : str | os.PathLike
Path to a folder only containing folders structured like a submission_template
target : TargetType
A string, either "pgm" for PRIO-GRID-months, or "cm" for country-months.
groupby : str | list[str], optional
A dimension to aggregate results across. Some options (all except "pooled" can be combined in a list):
"pooled": complete aggregation
"window": aggregate by test window
"year": aggregate by calendar year
"month": aggregate by calendar month
"month_id": aggregate by month_id (1 is January 1980)
"country_id": aggregate by country (currently only works for target == "cm")
"priogrid_gid": aggregate by PRIO-GRID id.
save_to : str | os.PathLike, optional
Folder to store evaulation tables in LaTeX, HTML, and excel format.
aggregate_submissions : bool
Aggregate across submissions
Returns
-------
pandas.DataFrame
If save_to is None, or if groupby is a list or None, the function returns the dataframe.
It can be useful to collate all evaluation data into one dataframe, but not to write everything out to a table.
"""
groupby_inner = groupby.copy()
match groupby_inner:
case None:
# Edge case if user specify a list of dimensions to groupby or no aggregation. Probably not something to plot tables for.
return df
case str():
if groupby_inner == "pooled":
groupby_inner = []
else:
groupby_inner = [groupby_inner]
case list():
pass
case _:
raise ValueError
if aggregate_submissions:
pass
else:
groupby_inner.extend(["team", "model"])
submissions = list_submissions(Path(submissions))
submissions = [
submission for submission in submissions if is_parquet_in_target(submission, target)
]
# Silently accept that there might not be evaluation data for all submissions for all targets for all windows.
eval_data = []
for submission in submissions:
try:
eval_df = get_eval(submission, target, groupby, aggregate_submissions)
eval_data.append(eval_df)
except FileNotFoundError as e:
pass
df = pd.concat(eval_data)
if df.index.names != [None]:
df = df.reset_index()
# Aggregate metric values
if len(groupby_inner) > 0:
df = df.set_index(groupby_inner)
df = df.groupby(level=groupby_inner, observed=True).mean().reset_index()
else:
# This is the case where groupby is "pooled" and across submissions is True. Should just return the global mean.
df = df.mean()
# Pull windows to wide
if "window" in groupby_inner:
sorting_column = df["window"].unique()[0]
df = df.pivot_table(
values=["crps", "ign", "mis"],
index=[g for g in groupby_inner if g != "window"],
aggfunc={"crps": "mean", "ign": "mean", "mis": "mean"},
columns="window",
)
df = df.sort_values(("crps", sorting_column))
else:
if isinstance(df, pd.Series):
# No need to sort single events.
pass
else:
df = df.sort_values("crps")
if save_to == None:
return df
else:
file_stem = f"metrics_{target}_by={groupby_inner}"
css_alt_rows = "background-color: #e6e6e6; color: black;"
highlight_props = "background-color: #00718f; color: #fafafa;"
df = (
df.style.format(decimal=".", thousands=" ", precision=3)
.highlight_min(axis=0, props=highlight_props)
.set_table_styles(
[{"selector": "tr:nth-child(even)", "props": css_alt_rows}]
)
)
#df.to_latex(save_to / f"{file_stem}.tex")
df.to_latex(os.path.join(save_to, f"{file_stem}.tex"))
df.to_html(os.path.join(save_to, f"{file_stem}.html"))
df.to_excel(os.path.join(save_to, f"{file_stem}.xlsx"))
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Method for collating evaluations from all submissions in the ViEWS Prediction Challenge",
epilog="Example usage: python collect_performance.py -s ./submissions",
)
parser.add_argument(
"-s",
metavar="submissions",
type=str,
help="path to folder with submissions complying with submission_template",
)
parser.add_argument(
"-o",
metavar="output_folder",
type=str,
help="path to folder to save result tables",
default=None,
)
parser.add_argument(
"-tt",
metavar="target_type",
type=str,
help='target "pgm" or "cm"',
default=None,
)
parser.add_argument(
"-g",
metavar="groupby",
nargs="+",
type=str,
help="string or list of strings of dimensions to aggregate over",
default=None,
)
args = parser.parse_args()
evaluation_table(submissions=args.s, target=args.tt, groupby=args.g, save_to=args.o,aggregate_submissions=True)