-
Notifications
You must be signed in to change notification settings - Fork 2
/
dvc.yaml
162 lines (150 loc) · 4.11 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# dvc project pipeline
# NOTE: see scripts/README.MD for preprocessing from raw data up to the first
# tracked stage `createtiles`
stages:
createtiles:
foreach:
- 2017
- 2018
- 2019
- 2020
do:
cmd: >-
mkdir -p data/processed.images.${item};
gdal_retile.py
-csv locations.csv
-v -ps ${source_dim} ${source_dim}
-co "TILED=YES" -co "COMPRESS=LZW" -co "PREDICTOR=2" -co "ALPHA=NO" -co "NUM_THREADS=ALL_CPUS"
-targetDir data/processed.images.${item}
data/raw/ortho_ms_${item}_EPSG3044.tif
deps:
- data/raw/ortho_ms_${item}_EPSG3044.tif
params:
- source_dim
outs:
- data/processed.images.${item}
computestats:
cmd: >-
python scripts/computestats.py --frac 0.1
data/processed.images.2017
data/processed.images.2018
data/processed.images.2019
data/processed.images.2020
deps:
- data/processed.images.2017
- data/processed.images.2018
- data/processed.images.2019
- data/processed.images.2020
outs:
- data/processed.images.stats.json
createmasks:
foreach:
- 2017
- 2018
- 2019
- 2020
do:
cmd: >-
python scripts/createmasks.py
data/processed.images.${item}
data/processed.masks.${item}
data/raw/shapefiles/deadtrees_${item}/deadtrees_${item}.shp
deps:
- data/processed.images.${item}
- data/raw/shapefiles/deadtrees_${item}
outs:
- data/processed.masks.${item}
createforestmasks:
foreach:
- 2017
- 2018
- 2019
- 2020
do:
cmd: >-
python scripts/createmasks.py
data/processed.images.${item}
data/processed.lus.${item}
data/raw/shapefiles/forestmask/CORINE_forest.shp
--simple
deps:
- data/processed.images.${item}
- data/raw/shapefiles/forestmask/CORINE_forest.shp
outs:
- data/processed.lus.${item}
createdataset:
foreach:
- 2017
- 2018
- 2019
- 2020
do:
cmd: >-
python scripts/createdataset.py
data/processed.images.${item}
data/processed.masks.${item}
data/processed.lus.${item}
data/dataset
--subdir train_${item}
--source_dim ${source_dim}
--tile_size ${createdataset.tile_size}
--format ${file_type}
--stats stats_${item}.csv
deps:
- data/processed.images.${item}
- data/processed.masks.${item}
- data/processed.lus.${item}
params:
- source_dim
- createdataset.tile_size
- file_type
outs:
- data/dataset/train_${item}
- data/dataset/stats_${item}.csv
# we only merge 2017, 2019 for train, val, test - 2018, 2020 are test only
mergedatasets:
cmd: >-
python scripts/mergedatasets.py data/dataset/train_2017 data/dataset/train_2019
deps:
- data/dataset/train_2017
- data/dataset/train_2019
outs:
- data/dataset/train
- data/dataset/val
- data/dataset/test
# train: do this manually
# inference
inference:
foreach:
- 2017
- 2018
- 2019
- 2020
do:
cmd: >-
mkdir -p data/predicted.${item};
stdbuf -i0 -o0 -e0 python scripts/inference.py --all --nopreview -o data/predicted.${item} data/processed.images.${item};
gdal_merge.py
-co "TILED=YES" -co "COMPRESS=LZW" -co "PREDICTOR=2" -co "NUM_THREADS=ALL_CPUS"
-o data/predicted_mosaic_${item}.tif
data/predicted.${item}/ortho_ms_${item}_EPSG3044_*
deps:
- data/processed.images.${item}
- checkpoints/bestmodel.ckpt
outs:
- data/predicted.${item}
- data/predicted_mosaic_${item}.tif
computestatsinference:
cmd: >-
python scripts/computestats_inference.py
data/predicted.2017
data/predicted.2018
data/predicted.2019
data/predicted.2020
deps:
- data/predicted.2017
- data/predicted.2018
- data/predicted.2019
- data/predicted.2020
outs:
- data/predicted.stats.csv