-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathentrypoint.py
163 lines (129 loc) · 4.72 KB
/
entrypoint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import os
import pickle
from importlib.machinery import SourceFileLoader
from datakitpy.datakit import (
load_resource_by_variable,
load_resource,
update_resource,
load_run_configuration,
write_run_configuration,
load_algorithm,
load_view,
VIEWS_DIR,
VIEW_ARTEFACTS_DIR,
ALGORITHM_DIR,
get_algorithm_name,
)
# Datakit is mounted at /datakit in container definition
DATAKIT_PATH = os.getcwd() + "/datakit"
# Helpers
def execute():
"""Execute the specified run"""
# Load requested execution parameters from env vars
if "RUN" in os.environ:
run_name = os.environ.get("RUN")
else:
raise ValueError("RUN environment variable missing")
algorithm_name = get_algorithm_name(run_name)
# Load run configuration
run = load_run_configuration(run_name, base_path=DATAKIT_PATH)
# Load algorithm
# TODO Validate run config variables against algorithm signature here
algorithm = load_algorithm(algorithm_name, base_path=DATAKIT_PATH)
# Populate dict of key: value variable pairs to pass to function
kwargs = {}
for variable in run["data"]["inputs"]:
variable_name = variable["name"]
if "value" in variable:
# Variable is a simple value
kwargs[variable_name] = variable["value"]
elif "resource" in variable:
# Variable is a resource
kwargs[variable_name] = load_resource_by_variable(
run_name=run_name,
variable_name=variable_name,
base_path=DATAKIT_PATH,
).data
# Import algorithm module
# Import as "algorithm_module" here to avoid clashing with any library
# names (e.g. bindfit.py algorithm vs. bindfit library)
algorithm_module = SourceFileLoader(
"algorithm_module",
ALGORITHM_DIR.format(
base_path=DATAKIT_PATH, algorithm_name=algorithm_name
)
+ f"/{algorithm['code']}",
).load_module()
# Execute algorithm with kwargs
result: dict = algorithm_module.main(**kwargs)
# Populate run configuration with outputs and save
for variable in run["data"]["outputs"]:
if variable["name"] in result.keys():
# Update variable value or resource with algorithm output
if "value" in variable:
# Variable is a simple value
variable["value"] = result[variable["name"]]
elif "resource" in variable:
# Variable is a resource, update the associated resource file
updated_data = result[variable["name"]]
# TODO: Validate updated_data against resource schema here
update_resource(
run_name=run_name,
resource_name=variable["resource"],
data=updated_data,
base_path=DATAKIT_PATH,
)
# TODO: Validate outputs against algorithm signature - make sure they are
# the right types
# Save updated run configuration
write_run_configuration(run, base_path=DATAKIT_PATH)
def view():
"""Render view in specified container"""
view_name = os.environ.get("VIEW")
run_name = os.environ.get("RUN")
# Load view
view = load_view(
run_name=run_name, view_name=view_name, base_path=DATAKIT_PATH
)
# Load associated resources
resources = {}
# TODO: Handle single resource case
for resource_name in view["resources"]:
# Load resource data as pandas DataFrame
resources[resource_name] = load_resource(
run_name=run_name,
resource_name=resource_name,
base_path=DATAKIT_PATH,
).data
if view["specType"] == "matplotlib":
# Import matplotlib module
matplotlib_module = SourceFileLoader(
"matplotlib_module",
VIEWS_DIR.format(
base_path=DATAKIT_PATH,
algorithm_name=get_algorithm_name(run_name),
)
+ f"/{view['specFile']}",
).load_module()
# Pass resources and execute
fig = matplotlib_module.main(**resources)
# Save figure
figpath = (
VIEW_ARTEFACTS_DIR.format(
base_path=DATAKIT_PATH,
run_name=run_name,
)
+ f"/{view_name}"
)
print(f"Saving image at {figpath}.png")
fig.savefig(f"{figpath}.png")
print(f"Saving object at {figpath}.p")
with open(f"{figpath}.p", "wb") as f:
pickle.dump(fig, f)
if __name__ == "__main__":
if "VIEW" in os.environ and "RUN" in os.environ:
view()
elif "RUN" in os.environ:
execute()
else:
raise ValueError("RUN environment variable not provided")