-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path1_fetch.smk
85 lines (73 loc) · 2.8 KB
/
1_fetch.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
# Download lake_metadata.csv
# This is a checkpoint because lake_metadata.csv is needed to determine
# later outputs; namely, the lake_sequence file names.
checkpoint fetch_mntoha_metadata:
input:
"1_fetch/in/pull_date.txt",
output:
"1_fetch/out/lake_metadata.csv"
params:
sb_id = "5e5c1c1ce4b01d50924f27e7"
script:
"1_fetch/src/sb_fetch.py"
# Download MNTOHA temperature observations zip file
rule fetch_mntoha_obs:
input:
"1_fetch/in/pull_date.txt"
output:
"1_fetch/tmp/obs_mntoha/temperature_observations.zip"
params:
sb_id = "5e5d0b68e4b01d50924f2b32"
script:
"1_fetch/src/sb_fetch.py"
# Download MNTOHA meteorological drivers, clarity, and ice flag zip files
rule fetch_mntoha_dynamic:
input:
"1_fetch/in/pull_date.txt"
output:
"1_fetch/tmp/dynamic_mntoha/{file}"
params:
sb_id = "5e5d0b96e4b01d50924f2b34"
script:
"1_fetch/src/sb_fetch.py"
# Download metadata for lakes across CONUS compiled by Willard et al., 2022:
# Publication: https://doi.org/10.1002/lol2.10249
rule fetch_surface_metadata:
input:
"1_fetch/in/pull_date.txt",
output:
"1_fetch/out/surface/lake_metadata.csv"
params:
sb_id = "60341c3ed34eb12031172aa6"
script:
"1_fetch/src/sb_fetch.py"
# Unzip files from a zipped archive.
#
# This is a checkpoint because otherwise Snakemake won't track unzipped files
# from an archive and will delete or ignore them. The output is a directory
# because we don't know how many unzipped files there will be, but we know
# which directory they'll be in after they are unzipped.
checkpoint unzip_archive:
input:
"1_fetch/tmp/{file_category}/{archive_name}.zip"
output:
# The output is the name of the directory that files are extracted to.
# Regular expressions ensure that files in subdirectories of
# 1_fetch/out/{file_category} don't get matched, and csvs in
# 1_fetch/out/ don't get matched.
#
# Regular expression syntax explanation:
# snakemake allows regular expressions to match wildcards by placing
# them after a comma, like this: {wildcard,regex}
# [^/] means any character that is not / (so, not a subdirectory), and
# + means one or more of the previous
# So, [^/]+ means match any string that doesn't have a / in it.
# $ means to the end of the string, and (?<!string) is a negative lookbehind
# So, $(?<!\.csv) means don't match if the final characters in the string are .csv
folder = directory("1_fetch/out/{file_category,[^/]+}/{archive_name,[^/]+$(?<!\.csv)}")
params:
source_dir = "1_fetch/tmp",
destination_dir = "1_fetch/out"
script:
"1_fetch/src/unzip_file.py"