-
Notifications
You must be signed in to change notification settings - Fork 15
/
data_loader.py
126 lines (110 loc) · 4.78 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""Data Loader CLI STAC_API Ingestion Tool."""
import json
import os
import click
import requests
def load_data(data_dir, filename):
"""Load json data from a file within the specified data directory."""
filepath = os.path.join(data_dir, filename)
if not os.path.exists(filepath):
click.secho(f"File not found: {filepath}", fg="red", err=True)
raise click.Abort()
with open(filepath) as file:
return json.load(file)
def load_collection(base_url, collection_id, data_dir):
"""Load a STAC collection into the database."""
collection = load_data(data_dir, "collection.json")
collection["id"] = collection_id
try:
resp = requests.post(f"{base_url}/collections", json=collection)
if resp.status_code == 200 or resp.status_code == 201:
click.echo(f"Status code: {resp.status_code}")
click.echo(f"Added collection: {collection['id']}")
elif resp.status_code == 409:
click.echo(f"Status code: {resp.status_code}")
click.echo(f"Collection: {collection['id']} already exists")
else:
click.echo(f"Status code: {resp.status_code}")
click.echo(
f"Error writing {collection['id']} collection. Message: {resp.text}"
)
except requests.ConnectionError:
click.secho("Failed to connect", fg="red", err=True)
def load_items(base_url, collection_id, use_bulk, data_dir):
"""Load STAC items into the database based on the method selected."""
# Attempt to dynamically find a suitable feature collection file
feature_files = [
file
for file in os.listdir(data_dir)
if file.endswith(".json") and file != "collection.json"
]
if not feature_files:
click.secho(
"No feature collection files found in the specified directory.",
fg="red",
err=True,
)
raise click.Abort()
feature_collection_file = feature_files[
0
] # Use the first found feature collection file
feature_collection = load_data(data_dir, feature_collection_file)
load_collection(base_url, collection_id, data_dir)
if use_bulk:
load_items_bulk_insert(base_url, collection_id, feature_collection, data_dir)
else:
load_items_one_by_one(base_url, collection_id, feature_collection, data_dir)
def load_items_one_by_one(base_url, collection_id, feature_collection, data_dir):
"""Load STAC items into the database one by one."""
for feature in feature_collection["features"]:
try:
feature["collection"] = collection_id
resp = requests.post(
f"{base_url}/collections/{collection_id}/items", json=feature
)
if resp.status_code == 200:
click.echo(f"Status code: {resp.status_code}")
click.echo(f"Added item: {feature['id']}")
elif resp.status_code == 409:
click.echo(f"Status code: {resp.status_code}")
click.echo(f"Item: {feature['id']} already exists")
except requests.ConnectionError:
click.secho("Failed to connect", fg="red", err=True)
def load_items_bulk_insert(base_url, collection_id, feature_collection, data_dir):
"""Load STAC items into the database via bulk insert."""
try:
for i, _ in enumerate(feature_collection["features"]):
feature_collection["features"][i]["collection"] = collection_id
resp = requests.post(
f"{base_url}/collections/{collection_id}/items", json=feature_collection
)
if resp.status_code == 200:
click.echo(f"Status code: {resp.status_code}")
click.echo("Bulk inserted items successfully.")
elif resp.status_code == 204:
click.echo(f"Status code: {resp.status_code}")
click.echo("Bulk update successful, no content returned.")
elif resp.status_code == 409:
click.echo(f"Status code: {resp.status_code}")
click.echo("Conflict detected, some items might already exist.")
except requests.ConnectionError:
click.secho("Failed to connect", fg="red", err=True)
@click.command()
@click.option("--base-url", required=True, help="Base URL of the STAC API")
@click.option(
"--collection-id",
default="test-collection",
help="ID of the collection to which items are added",
)
@click.option("--use-bulk", is_flag=True, help="Use bulk insert method for items")
@click.option(
"--data-dir",
type=click.Path(exists=True),
default="sample_data/",
help="Directory containing collection.json and feature collection file",
)
def main(base_url, collection_id, use_bulk, data_dir):
"""Load STAC items into the database."""
load_items(base_url, collection_id, use_bulk, data_dir)
if __name__ == "__main__":
main()