-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdvc.lock
171 lines (171 loc) · 6.49 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
schema: '2.0'
stages:
preprocess:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/preprocessing/preprocess_kaggle_survey.py\
\ -i \"data/raw/IT Salary Survey EU 2018.csv\" \"data/raw/T Salary Survey EU\
\ 2019.csv\" \"data/raw/IT Salary Survey EU 2020.csv\" -s -l data/interim/labels.json\
\ -o data/interim/cleaned_data.parquet\n"
deps:
- path: data/raw/IT Salary Survey EU 2020.csv
md5: e00c49d579e5109542b21802c86078f9
size: 238182
- path: data/raw/IT Salary Survey EU 2018.csv
md5: 76f8eb45fcf224799c84b77995da482d
size: 77161
- path: data/raw/T Salary Survey EU 2019.csv
md5: 3973056878ed7795424b918963e14c47
size: 141780
- path: src/preprocessing/kaggle_survey_mappings.py
md5: 5e406216b037bb17d94cb9503998663d
size: 4264
- path: src/preprocessing/kaggle_survey_utils.py
md5: f0d8aca058f48fb3e7aa5a90e4eb5d58
size: 11765
- path: src/preprocessing/preprocess_kaggle_survey.py
md5: 7bf9c5a82521588597a16be260d8ca2f
size: 6513
outs:
- path: data/interim/cleaned_data.parquet
md5: f30cff94c4cbd23d645b3a08370b2458
size: 21295
- path: data/interim/labels.json
md5: 94de0839e6ab5978801d20f9728e4484
size: 767
train:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/modeling/train_kaggle.py -f\
\ data/interim/transformed_features.parquet -t data/interim/transformed_targets.parquet\
\ -m artefacts/model.joblib -p artefacts/hyperparameters.json -e artefacts/metrics.json\n"
deps:
- path: artefacts/hyperparameters.json
md5: 36f4070c5aae8593530216ba1432b9cb
size: 157
- path: data/interim/transformed_features.parquet
md5: bf12dc430cdf08ca0f974117a2e91775
size: 32992
- path: data/interim/transformed_targets.parquet
md5: f266b33d1db0265cddc4fe5e415abc9e
size: 21512
- path: src/modeling/train_kaggle.py
md5: bebf0d829f3fc854b0ad426bc8ce66cf
size: 5777
outs:
- path: artefacts/metrics.json
md5: 7891451a1dbc9bc5aa316ebb62112e58
size: 156
- path: artefacts/model.joblib
md5: f51f084bf176d0f56f9b2b80aa95d4f2
size: 112437266
load:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/data_loading/load_raw_data.py\
\ -i \"data/raw/IT Salary Survey EU 2018.csv\" \"data/raw/IT Salary Survey EU\
\ 2019.csv\" \"data/raw/IT Salary Survey EU 2020.csv\" -o data/interim/raw_data.parquet\n"
deps:
- path: data/raw/IT Salary Survey EU 2020.csv
md5: e00c49d579e5109542b21802c86078f9
size: 238182
- path: data/raw/IT Salary Survey EU 2018.csv
md5: 76f8eb45fcf224799c84b77995da482d
size: 77161
- path: data/raw/IT Salary Survey EU 2019.csv
md5: 3973056878ed7795424b918963e14c47
size: 141780
- path: src/data_loading/load_raw_data.py
md5: c29fae1296dfe94958267861b371d339
size: 3075
outs:
- path: data/interim/raw_data.parquet
md5: 4c544b8d0f896aa7cf0ae9945c309833
size: 80962
clean:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/preprocessing/clean_features.py\
\ -i data/interim/raw_data.parquet -o data/interim/cleaned_data.parquet -m train\n"
deps:
- path: data/interim/raw_data.parquet
md5: 5f554226e324265c54442bfe96ff39db
size: 63334
- path: src/preprocessing/clean_features.py
md5: 5032b7fcf4799e98048e07e4ad231f1f
size: 9190
outs:
- path: data/interim/cleaned_data.parquet
md5: a345f3272e833555aec0cdd247db47e8
size: 17749
transform:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/preprocessing/transform_features.py\
\ -i data/interim/cleaned_data.parquet -o data/interim/transformed_data.parquet\
\ -m train -l data/interim/labels.json\n"
deps:
- path: data/interim/cleaned_data.parquet
md5: a345f3272e833555aec0cdd247db47e8
size: 17749
- path: src/preprocessing/transform_features.py
md5: 0c1a2d0428d2267923837ea72f9b06cc
size: 6320
outs:
- path: data/interim/labels.json
md5: 94de0839e6ab5978801d20f9728e4484
size: 767
- path: data/interim/transformed_data.parquet
md5: 2bad349f5f22e79894bd0f967ad938c8
size: 17152
clean-features:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/preprocessing/clean_features.py\
\ -i data/interim/raw_data.parquet -o data/interim/cleaned_features.parquet\
\ -m train\n"
deps:
- path: data/interim/raw_data.parquet
md5: 4c544b8d0f896aa7cf0ae9945c309833
size: 80962
- path: src/preprocessing/clean_features.py
md5: dd824977177908b995e149a485dded6c
size: 9285
outs:
- path: data/interim/cleaned_features.parquet
md5: 1690dc8269dc5b9fe47edee2a3991c7f
size: 33603
transform-features:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/preprocessing/transform_features.py\
\ -i data/interim/cleaned_features.parquet -o data/interim/transformed_features.parquet\
\ -m train -l artefacts/labels.json\n"
deps:
- path: data/interim/cleaned_features.parquet
md5: 1690dc8269dc5b9fe47edee2a3991c7f
size: 33603
- path: src/preprocessing/transform_features.py
md5: ba33a43b40279b803927f785fa6a4dfc
size: 6674
outs:
- path: artefacts/labels.json
md5: 94de0839e6ab5978801d20f9728e4484
size: 767
- path: data/interim/transformed_features.parquet
md5: bf12dc430cdf08ca0f974117a2e91775
size: 32992
clean-targets:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/preprocessing/clean_targets.py\
\ -i data/interim/raw_data.parquet -o data/interim/cleaned_targets.parquet\n"
deps:
- path: data/interim/raw_data.parquet
md5: 4c544b8d0f896aa7cf0ae9945c309833
size: 80962
- path: src/preprocessing/clean_targets.py
md5: 8630c49e77106874b78ea12de695cfc9
size: 3292
outs:
- path: data/interim/cleaned_targets.parquet
md5: f266b33d1db0265cddc4fe5e415abc9e
size: 21512
transform-targets:
cmd: "export PYTHONPATH=$PWD:$PWD/src && python src/preprocessing/transform_targets.py\
\ -i data/interim/cleaned_targets.parquet -o data/interim/transformed_targets.parquet\n"
deps:
- path: data/interim/cleaned_targets.parquet
md5: f266b33d1db0265cddc4fe5e415abc9e
size: 21512
- path: src/preprocessing/transform_targets.py
md5: d02d41634c4fcede69b2531ee8e6a2ed
size: 2550
outs:
- path: data/interim/transformed_targets.parquet
md5: f266b33d1db0265cddc4fe5e415abc9e
size: 21512