Skip to content

Commit

Permalink
Add notebooks for EIS Fire data (#10)
Browse files Browse the repository at this point in the history
* Add notebooks for fetching data from S3 and inserting into the features database

* Add annotation

Co-authored-by: j08lue <[email protected]>
  • Loading branch information
abarciauskas-bgse and j08lue authored Nov 7, 2022
1 parent 627b035 commit 0211656
Show file tree
Hide file tree
Showing 5 changed files with 449 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ cdk.out
.idea

.env
.ipynb_checkpoints
data/
249 changes: 249 additions & 0 deletions notebooks/add_fire_features.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "fc060b63",
"metadata": {},
"source": [
"# Ingest fire features"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "1ea8ffd5",
"metadata": {},
"outputs": [],
"source": [
"import geopandas\n",
"import geoalchemy2\n",
"from sqlalchemy import create_engine\n",
"import os\n",
"import boto3\n",
"import json"
]
},
{
"cell_type": "markdown",
"id": "6a44355b",
"metadata": {},
"source": [
"## Manual method"
]
},
{
"cell_type": "markdown",
"id": "b5308956",
"metadata": {},
"source": [
"### List locally stored FlatGeobuf files"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "caeca9f7",
"metadata": {},
"outputs": [],
"source": [
"table_prefix = 'eis_fire'\n",
"data_dir = 'data/eis_fire'\n",
"years = ['2019', '2020']"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "0655c2e0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['data/eis_fire/2019/newfirepix.fgb',\n",
" 'data/eis_fire/2019/perimeter.fgb',\n",
" 'data/eis_fire/2019/fireline.fgb',\n",
" 'data/eis_fire/2020/newfirepix.fgb',\n",
" 'data/eis_fire/2020/perimeter.fgb',\n",
" 'data/eis_fire/2020/fireline.fgb']"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"files_to_insert = []\n",
"for year in years:\n",
" directory = f\"{data_dir}/{year}\"\n",
" files = os.listdir(directory)\n",
" for file in files:\n",
" files_to_insert.append(f\"{directory}/{file}\")\n",
"\n",
"files_to_insert"
]
},
{
"cell_type": "markdown",
"id": "2920b0e4",
"metadata": {},
"source": [
"### Get database configuration from vault"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "42c477a0",
"metadata": {},
"outputs": [],
"source": [
"client = boto3.client('secretsmanager')\n",
"response = client.get_secret_value(SecretId='OMITTED')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "82ed63e3",
"metadata": {},
"outputs": [],
"source": [
"secrets = json.loads(response['SecretString'])\n",
"host = secrets['host']\n",
"password = secrets['password']\n",
"username = secrets['username']\n",
"database = secrets['dbname']"
]
},
{
"cell_type": "markdown",
"id": "1ae3533f",
"metadata": {},
"source": [
"### Run `ogr2ogr` on files"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "6de7d13b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"inserting data/eis_fire/2020/fireline.fgb into eis_fire_fireline\n"
]
}
],
"source": [
"file_to_insert = files_to_insert[5]\n",
"tablename = f\"{table_prefix}_{file_to_insert.split('/')[-1].split('.')[0]}\"\n",
"print(f\"inserting {file_to_insert} into {tablename}\")\n",
"connection_string = f\"'host={host} dbname={database} user={username} password={password}'\"\n",
"!ogr2ogr -f \"PostgreSQL\" PG:{connection_string} -t_srs EPSG:4326 {file_to_insert} -nln {tablename} -append"
]
},
{
"cell_type": "markdown",
"id": "84b7ec5e",
"metadata": {},
"source": [
"## Attempt at "
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "d787fbf2",
"metadata": {},
"outputs": [],
"source": [
"# Check the number of rows against the database insertion"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "5775b0e8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Input files for newfirepix have 44488 rows combined.\n",
"Input files for perimeter have 44567 rows combined.\n",
"Input files for fireline have 43661 rows combined.\n"
]
}
],
"source": [
"tables = ['newfirepix', 'perimeter', 'fireline']\n",
"# check number of rows in data frames\n",
"for table in tables:\n",
" input_files = [f\"{data_dir}/{year}/{table}.fgb\" for year in years]\n",
" # count rows in both inputs from 2019 and 2020\n",
" rows = 0\n",
" for file in input_files:\n",
" df = geopandas.read_file(file)\n",
" rows += df.shape[0]\n",
" print(f\"Input files for {table} have {rows} rows combined.\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "f2b9f5f4",
"metadata": {},
"outputs": [],
"source": [
"# this results in an internal server error when trying to load items in the VEDA Features API\n",
"# File \"pydantic/main.py\", line 342, in pydantic.main.BaseModel.__init__\n",
"#pydantic.error_wrappers.ValidationError: 10 validation errors for FeatureCollection\n",
"#features -> 0 -> id\n",
" # str type expected (type=type_error.str)\n",
"#features -> 1 -> id\n",
"#df_reproj.to_postgis(tablename, engine, index=False, if_exists=\"append\")"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "525c4eb8",
"metadata": {},
"outputs": [],
"source": [
"# from sqlalchemy import text\n",
"# engine = create_engine(f\"postgresql://{username}:{password}@{host}:5432/{database}\") \n",
"# sql = text('DROP TABLE IF EXISTS fire_boundaries3;')\n",
"# result = engine.execute(sql)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
6 changes: 6 additions & 0 deletions notebooks/env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
IDENTITY_POOL_ID=us-west-2:XXX
USER_POOL_ID=us-west-XXX
CLIENT_ID=XXX
USERNAME=XXX
PASSWORD=XXX

Loading

0 comments on commit 0211656

Please sign in to comment.