-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add notebooks for EIS Fire data (#10)
* Add notebooks for fetching data from S3 and inserting into the features database * Add annotation Co-authored-by: j08lue <[email protected]>
- Loading branch information
1 parent
627b035
commit 0211656
Showing
5 changed files
with
449 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,5 @@ cdk.out | |
.idea | ||
|
||
.env | ||
.ipynb_checkpoints | ||
data/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,249 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "fc060b63", | ||
"metadata": {}, | ||
"source": [ | ||
"# Ingest fire features" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "1ea8ffd5", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import geopandas\n", | ||
"import geoalchemy2\n", | ||
"from sqlalchemy import create_engine\n", | ||
"import os\n", | ||
"import boto3\n", | ||
"import json" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "6a44355b", | ||
"metadata": {}, | ||
"source": [ | ||
"## Manual method" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "b5308956", | ||
"metadata": {}, | ||
"source": [ | ||
"### List locally stored FlatGeobuf files" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"id": "caeca9f7", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"table_prefix = 'eis_fire'\n", | ||
"data_dir = 'data/eis_fire'\n", | ||
"years = ['2019', '2020']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 19, | ||
"id": "0655c2e0", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['data/eis_fire/2019/newfirepix.fgb',\n", | ||
" 'data/eis_fire/2019/perimeter.fgb',\n", | ||
" 'data/eis_fire/2019/fireline.fgb',\n", | ||
" 'data/eis_fire/2020/newfirepix.fgb',\n", | ||
" 'data/eis_fire/2020/perimeter.fgb',\n", | ||
" 'data/eis_fire/2020/fireline.fgb']" | ||
] | ||
}, | ||
"execution_count": 19, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"files_to_insert = []\n", | ||
"for year in years:\n", | ||
" directory = f\"{data_dir}/{year}\"\n", | ||
" files = os.listdir(directory)\n", | ||
" for file in files:\n", | ||
" files_to_insert.append(f\"{directory}/{file}\")\n", | ||
"\n", | ||
"files_to_insert" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "2920b0e4", | ||
"metadata": {}, | ||
"source": [ | ||
"### Get database configuration from vault" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "42c477a0", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"client = boto3.client('secretsmanager')\n", | ||
"response = client.get_secret_value(SecretId='OMITTED')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"id": "82ed63e3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"secrets = json.loads(response['SecretString'])\n", | ||
"host = secrets['host']\n", | ||
"password = secrets['password']\n", | ||
"username = secrets['username']\n", | ||
"database = secrets['dbname']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "1ae3533f", | ||
"metadata": {}, | ||
"source": [ | ||
"### Run `ogr2ogr` on files" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 41, | ||
"id": "6de7d13b", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"inserting data/eis_fire/2020/fireline.fgb into eis_fire_fireline\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"file_to_insert = files_to_insert[5]\n", | ||
"tablename = f\"{table_prefix}_{file_to_insert.split('/')[-1].split('.')[0]}\"\n", | ||
"print(f\"inserting {file_to_insert} into {tablename}\")\n", | ||
"connection_string = f\"'host={host} dbname={database} user={username} password={password}'\"\n", | ||
"!ogr2ogr -f \"PostgreSQL\" PG:{connection_string} -t_srs EPSG:4326 {file_to_insert} -nln {tablename} -append" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "84b7ec5e", | ||
"metadata": {}, | ||
"source": [ | ||
"## Attempt at " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 47, | ||
"id": "d787fbf2", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Check the number of rows against the database insertion" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 53, | ||
"id": "5775b0e8", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Input files for newfirepix have 44488 rows combined.\n", | ||
"Input files for perimeter have 44567 rows combined.\n", | ||
"Input files for fireline have 43661 rows combined.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"tables = ['newfirepix', 'perimeter', 'fireline']\n", | ||
"# check number of rows in data frames\n", | ||
"for table in tables:\n", | ||
" input_files = [f\"{data_dir}/{year}/{table}.fgb\" for year in years]\n", | ||
" # count rows in both inputs from 2019 and 2020\n", | ||
" rows = 0\n", | ||
" for file in input_files:\n", | ||
" df = geopandas.read_file(file)\n", | ||
" rows += df.shape[0]\n", | ||
" print(f\"Input files for {table} have {rows} rows combined.\")\n", | ||
" " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 27, | ||
"id": "f2b9f5f4", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# this results in an internal server error when trying to load items in the VEDA Features API\n", | ||
"# File \"pydantic/main.py\", line 342, in pydantic.main.BaseModel.__init__\n", | ||
"#pydantic.error_wrappers.ValidationError: 10 validation errors for FeatureCollection\n", | ||
"#features -> 0 -> id\n", | ||
" # str type expected (type=type_error.str)\n", | ||
"#features -> 1 -> id\n", | ||
"#df_reproj.to_postgis(tablename, engine, index=False, if_exists=\"append\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 45, | ||
"id": "525c4eb8", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# from sqlalchemy import text\n", | ||
"# engine = create_engine(f\"postgresql://{username}:{password}@{host}:5432/{database}\") \n", | ||
"# sql = text('DROP TABLE IF EXISTS fire_boundaries3;')\n", | ||
"# result = engine.execute(sql)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
IDENTITY_POOL_ID=us-west-2:XXX | ||
USER_POOL_ID=us-west-XXX | ||
CLIENT_ID=XXX | ||
USERNAME=XXX | ||
PASSWORD=XXX | ||
|
Oops, something went wrong.