diff --git a/Makefile b/Makefile index 855a5e62..229ed485 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ SHELL := /bin/bash # -- Docker COMPOSE = bin/compose +COMPOSE_UP = $(COMPOSE) up -d --force-recreate COMPOSE_RUN = $(COMPOSE) run --rm --no-deps COMPOSE_RUN_API = $(COMPOSE_RUN) api COMPOSE_RUN_API_PIPENV = $(COMPOSE_RUN_API) pipenv run @@ -81,19 +82,23 @@ logs-opendata: ## display opendata logs (follow mode) .PHONY: logs-opendata run: ## run the api server (and dependencies) - $(COMPOSE) up -d --wait api + $(COMPOSE_UP) --wait api .PHONY: run run-all: ## run the whole stack - $(COMPOSE) up -d api keycloak metabase notebook opendata + $(COMPOSE_UP) api keycloak metabase notebook opendata .PHONY: run-all +run-metabase: ## run the metabase service + $(COMPOSE_UP) metabase +.PHONY: run-metabase + run-notebook: ## run the notebook service - $(COMPOSE) up -d notebook + $(COMPOSE_UP) notebook .PHONY: run-notebook run-opendata: ## run the opendata service - $(COMPOSE) up -d opendata + $(COMPOSE_UP) opendata .PHONY: run-opendata status: ## an alias for "docker compose ps" @@ -131,9 +136,20 @@ drop-metabase-db: ## drop Metabase database @$(COMPOSE) exec postgresql bash -c 'psql "postgresql://$${POSTGRES_USER}:$${POSTGRES_PASSWORD}@$${QUALICHARGE_DB_HOST}:$${QUALICHARGE_DB_PORT}/postgres" -c "drop database \"$${MB_DB_DBNAME}\";"' || echo "Duly noted, skipping database deletion." .PHONY: drop-metabase-db +dump-metabase: ## dump metabase objects + bin/pg_dump -a --inserts \ + -t Report_Card \ + -t Report_Dashboard \ + -t Report_DashboardCard \ + -t Dashboard_Tab \ + -t Setting \ + -U qualicharge \ + metabaseappdb > src/metabase/custom.sql +.PHONY: dump-metabase + migrate-api: ## run alembic database migrations for the api service @echo "Running api service database engine…" - @$(COMPOSE) up -d --wait postgresql + @$(COMPOSE_UP) --wait postgresql @echo "Creating api service database…" @$(COMPOSE) exec postgresql bash -c 'psql "postgresql://$${POSTGRES_USER}:$${POSTGRES_PASSWORD}@$${QUALICHARGE_DB_HOST}:$${QUALICHARGE_DB_PORT}/postgres" -c "create database \"$${QUALICHARGE_DB_NAME}\";"' || echo "Duly noted, skipping database creation." @$(COMPOSE) exec postgresql bash -c 'psql "postgresql://$${POSTGRES_USER}:$${POSTGRES_PASSWORD}@$${QUALICHARGE_DB_HOST}:$${QUALICHARGE_DB_PORT}/$${QUALICHARGE_DB_NAME}" -c "create extension postgis;"' || echo "Duly noted, skipping extension creation." @@ -164,9 +180,9 @@ jupytext--to-ipynb: ## convert remote md files into ipynb reset-db: ## Reset the PostgreSQL database $(COMPOSE) stop postgresql $(COMPOSE) down postgresql - $(COMPOSE) up -d --force-recreate postgresql + $(COMPOSE_UP) postgresql $(MAKE) migrate-api - $(COMPOSE) up -d --force-recreate api + $(COMPOSE_UP) api $(MAKE) create-superuser .PHONY: reset-db @@ -178,17 +194,21 @@ seed-api: run seed-metabase: ## seed the Metabase server @echo "Running metabase service …" - @$(COMPOSE) up -d --wait metabase + @$(COMPOSE_UP) --wait metabase @echo "Create metabase initial admin user…" bin/metabase-init @echo "Create API data source…" $(COMPOSE_RUN) terraform init $(COMPOSE_RUN) terraform apply -auto-approve + cat src/metabase/custom.sql | \ + bin/psql \ + -U qualicharge \ + -d metabaseappdb .PHONY: seed-metabase seed-oidc: ## seed the OIDC provider @echo 'Starting OIDC provider…' - @$(COMPOSE) up -d keycloak + @$(COMPOSE_UP) keycloak @$(COMPOSE_RUN) dockerize -wait http://keycloak:8080 -timeout 60s @echo 'Seeding OIDC client…' @$(COMPOSE) exec keycloak /usr/local/bin/kc-init diff --git a/bin/pg_dump b/bin/pg_dump new file mode 100755 index 00000000..6c1ab279 --- /dev/null +++ b/bin/pg_dump @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +docker compose exec postgresql \ + pg_dump \ + "$@" diff --git a/bin/psql b/bin/psql new file mode 100755 index 00000000..2491ffdf --- /dev/null +++ b/bin/psql @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +docker compose exec -T postgresql \ + psql \ + "$@" diff --git a/src/api/CHANGELOG.md b/src/api/CHANGELOG.md index d93a296d..16566773 100644 --- a/src/api/CHANGELOG.md +++ b/src/api/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to - Allow to configure `API_STATIQUE_PAGE_MAX_SIZE` and `API_STATIQUE_PAGE_SIZE` independently from `API_STATIQUE_BULK_CREATE_MAX_SIZE` +- Store french administrative levels and geographic boundaries (shapes) ### Changed diff --git a/src/api/Pipfile b/src/api/Pipfile index cd22b9d4..34422e2c 100644 --- a/src/api/Pipfile +++ b/src/api/Pipfile @@ -9,9 +9,12 @@ annotated-types = "==0.7.0" email-validator = "==2.2.0" fastapi = "==0.111.1" geoalchemy2 = {extras = ["shapely"], version = "==0.15.2"} +geopandas = "==1.0.1" httpx = {extras = ["cli"], version = "==0.27.0"} +pandas = "==2.2.2" passlib = {extras = ["bcrypt"], version = "==1.7.4"} psycopg = {extras = ["pool", "binary"], version = "==3.2.1"} +pyarrow = "==17.0.0" pydantic-extra-types = {extras = ["all"], version = "==2.9.0"} pydantic-settings = "==2.3.4" pyjwt = "==2.8.0" diff --git a/src/api/Pipfile.lock b/src/api/Pipfile.lock index 767f8641..c33374ed 100644 --- a/src/api/Pipfile.lock +++ b/src/api/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "37b0e532ffaa1c9b7ba7328a339be0f24c804f44c05657d4a656398403dd7d70" + "sha256": "0f537b49d66b18ae51f1eee13839ed8480e798858cdff88d6df8e276ecbdb6ce" }, "pipfile-spec": 6, "requires": { @@ -135,6 +135,15 @@ "markers": "python_version >= '3.7'", "version": "==0.15.2" }, + "geopandas": { + "hashes": [ + "sha256:01e147d9420cc374d26f51fc23716ac307f32b49406e4bd8462c07e82ed1d3d6", + "sha256:b8bf70a5534588205b7a56646e2082fb1de9a03599651b3d80c99ea4c2ca08ab" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==1.0.1" + }, "greenlet": { "hashes": [ "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67", @@ -421,7 +430,7 @@ "sha256:f1659887361a7151f89e79b276ed8dff3d75877df906328f14d8bb40bb4f5101", "sha256:f9cf5ea551aec449206954b075db819f52adc1638d46a6738253a712d553c7b4" ], - "markers": "python_version >= '3.9'", + "markers": "python_version >= '3.12'", "version": "==2.0.1" }, "packaging": { @@ -432,6 +441,42 @@ "markers": "python_version >= '3.8'", "version": "==24.1" }, + "pandas": { + "hashes": [ + "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863", + "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2", + "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1", + "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad", + "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db", + "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76", + "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51", + "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32", + "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08", + "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b", + "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4", + "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921", + "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288", + "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee", + "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0", + "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24", + "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99", + "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151", + "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd", + "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce", + "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57", + "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef", + "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54", + "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a", + "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238", + "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23", + "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772", + "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce", + "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==2.2.2" + }, "passlib": { "extras": [ "bcrypt" @@ -622,6 +667,49 @@ ], "version": "==3.2.2" }, + "pyarrow": { + "hashes": [ + "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", + "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", + "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", + "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", + "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", + "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", + "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", + "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", + "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", + "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", + "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", + "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", + "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", + "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", + "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", + "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", + "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", + "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", + "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", + "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", + "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", + "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", + "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", + "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", + "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", + "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", + "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", + "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", + "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", + "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", + "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", + "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", + "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", + "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", + "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", + "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==17.0.0" + }, "pycountry": { "hashes": [ "sha256:b61b3faccea67f87d10c1f2b0fc0be714409e8fcdcc1315613174f6466c10221", @@ -769,6 +857,71 @@ "markers": "python_version >= '3.7'", "version": "==2.8.0" }, + "pyogrio": { + "hashes": [ + "sha256:019731a856a9abfe909e86f50eb13f8362f6742337caf757c54b7c8acfe75b89", + "sha256:083351b258b3e08b6c6085dac560bd321b68de5cb4a66229095da68d5f3d696b", + "sha256:13642608a1cd67797ae8b5d792b0518d8ef3eb76506c8232ab5eaa1ea1159dff", + "sha256:17420febc17651876d5140b54b24749aa751d482b5f9ef6267b8053e6e962876", + "sha256:1a495ca4fb77c69595747dd688f8f17bb7d2ea9cd86603aa71c7fc98cc8b4174", + "sha256:2829615cf58b1b24a9f96fea42abedaa1a800dd351c67374cc2f6341138608f3", + "sha256:2e98913fa183f7597c609e774820a149e9329fd2a0f8d33978252fbd00ae87e6", + "sha256:2f2ec57ab74785db9c2bf47c0a6731e5175595a13f8253f06fa84136adb310a9", + "sha256:30cbeeaedb9bced7012487e7438919aa0c7dfba18ac3d4315182b46eb3139b9d", + "sha256:3a2fcaa269031dbbc8ebd91243c6452c5d267d6df939c008ab7533413c9cf92d", + "sha256:3f964002d445521ad5b8e732a6b5ef0e2d2be7fe566768e5075c1d71398da64a", + "sha256:4a289584da6df7ca318947301fe0ba9177e7f863f63110e087c80ac5f3658de8", + "sha256:4da0b9deb380bd9a200fee13182c4f95b02b4c554c923e2e0032f32aaf1439ed", + "sha256:4e0f90a6c3771ee1f1fea857778b4b6a1b64000d851b819f435f9091b3c38c60", + "sha256:6a6fa2e8cf95b3d4a7c0fac48bce6e5037579e28d3eb33b53349d6e11f15e5a8", + "sha256:6dc94a67163218581c7df275223488ac9b31dc582ccd756da607c3338908566c", + "sha256:796e4f6a4e769b2eb6fea9a10546ea4bdee16182d1e29802b4d6349363c3c1d7", + "sha256:7fcafed24371fe6e23bcf5abebbb29269f8d79915f1dd818ac85453657ea714a", + "sha256:9440466c0211ac81f3417f274da5903f15546b486f76b2f290e74a56aaf0e737", + "sha256:959022f3ad04053f8072dc9a2ad110c46edd9e4f92352061ba835fc91df3ca96", + "sha256:d668cb10f2bf6ccd7c402f91e8b06290722dd09dbe265ae95b2c13db29ebeba0", + "sha256:e38c3c6d37cf2cc969407e4d051dcb507cfd948eb26c7b0840c4f7d7d4a71bd4", + "sha256:f47c9b6818cc0f420015b672d5dcc488530a5ee63e5ba35a184957b21ea3922a", + "sha256:f5d80eb846be4fc4e642cbedc1ed0c143e8d241653382ecc76a7620bbd2a5c3a", + "sha256:f8bf193269ea9d347ac3ddada960a59f1ab2e4a5c009be95dc70e6505346b2fc", + "sha256:fb04bd80964428491951766452f0071b0bc37c7d38c45ef02502dbd83e5d74a0" + ], + "markers": "python_version >= '3.8'", + "version": "==0.9.0" + }, + "pyproj": { + "hashes": [ + "sha256:18faa54a3ca475bfe6255156f2f2874e9a1c8917b0004eee9f664b86ccc513d3", + "sha256:1e9fbaf920f0f9b4ee62aab832be3ae3968f33f24e2e3f7fbb8c6728ef1d9746", + "sha256:2d6ff73cc6dbbce3766b6c0bce70ce070193105d8de17aa2470009463682a8eb", + "sha256:36b64c2cb6ea1cc091f329c5bd34f9c01bb5da8c8e4492c709bda6a09f96808f", + "sha256:38a3361941eb72b82bd9a18f60c78b0df8408416f9340521df442cebfc4306e2", + "sha256:447db19c7efad70ff161e5e46a54ab9cc2399acebb656b6ccf63e4bc4a04b97a", + "sha256:44aa7c704c2b7d8fb3d483bbf75af6cb2350d30a63b144279a09b75fead501bf", + "sha256:4ba1f9b03d04d8cab24d6375609070580a26ce76eaed54631f03bab00a9c737b", + "sha256:4bc0472302919e59114aa140fd7213c2370d848a7249d09704f10f5b062031fe", + "sha256:50100b2726a3ca946906cbaa789dd0749f213abf0cbb877e6de72ca7aa50e1ae", + "sha256:5279586013b8d6582e22b6f9e30c49796966770389a9d5b85e25a4223286cd3f", + "sha256:6420ea8e7d2a88cb148b124429fba8cd2e0fae700a2d96eab7083c0928a85110", + "sha256:65ad699e0c830e2b8565afe42bd58cc972b47d829b2e0e48ad9638386d994915", + "sha256:6d227a865356f225591b6732430b1d1781e946893789a609bb34f59d09b8b0f8", + "sha256:7a27151ddad8e1439ba70c9b4b2b617b290c39395fa9ddb7411ebb0eb86d6fb0", + "sha256:80fafd1f3eb421694857f254a9bdbacd1eb22fc6c24ca74b136679f376f97d35", + "sha256:83039e5ae04e5afc974f7d25ee0870a80a6bd6b7957c3aca5613ccbe0d3e72bf", + "sha256:8b8acc31fb8702c54625f4d5a2a6543557bec3c28a0ef638778b7ab1d1772132", + "sha256:9274880263256f6292ff644ca92c46d96aa7e57a75c6df3f11d636ce845a1877", + "sha256:ab7aa4d9ff3c3acf60d4b285ccec134167a948df02347585fdd934ebad8811b4", + "sha256:c41e80ddee130450dcb8829af7118f1ab69eaf8169c4bf0ee8d52b72f098dc2f", + "sha256:db3aedd458e7f7f21d8176f0a1d924f1ae06d725228302b872885a1c34f3119e", + "sha256:e7e13c40183884ec7f94eb8e0f622f08f1d5716150b8d7a134de48c6110fee85", + "sha256:ebfbdbd0936e178091309f6cd4fcb4decd9eab12aa513cdd9add89efa3ec2882", + "sha256:fd43bd9a9b9239805f406fd82ba6b106bf4838d9ef37c167d3ed70383943ade1", + "sha256:fd93c1a0c6c4aedc77c0fe275a9f2aba4d59b8acf88cebfc19fe3c430cfabf4f", + "sha256:fffb059ba3bced6f6725961ba758649261d85ed6ce670d3e3b0a26e81cf1aa8d" + ], + "markers": "python_version >= '3.9'", + "version": "==3.6.1" + }, "python-dateutil": { "hashes": [ "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", @@ -946,6 +1099,7 @@ "sha256:ff7731fea5face9ec08a861ed351734a79475631b7540ceb0b66fb9732a5f529", "sha256:ff9e520af0c5a578e174bca3c18713cd47a6c6a15b6cf1f50ac17dc8bb8db6a2" ], + "markers": "python_version >= '3.7'", "version": "==2.0.5" }, "shellingham": { diff --git a/src/api/qualicharge/migrations/env.py b/src/api/qualicharge/migrations/env.py index 35c54abc..f00bba8d 100644 --- a/src/api/qualicharge/migrations/env.py +++ b/src/api/qualicharge/migrations/env.py @@ -26,6 +26,7 @@ Station, Status, ) +from qualicharge.schemas.geo import Region, Department, EPCI, City # noqa: F401 # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/src/api/qualicharge/migrations/versions/7b8c33d8399d_add_admin_geo_boundaries.py b/src/api/qualicharge/migrations/versions/7b8c33d8399d_add_admin_geo_boundaries.py new file mode 100644 index 00000000..7cdbbb99 --- /dev/null +++ b/src/api/qualicharge/migrations/versions/7b8c33d8399d_add_admin_geo_boundaries.py @@ -0,0 +1,131 @@ +"""add admin geo boundaries + +Revision ID: 7b8c33d8399d +Revises: b5ef0fba88a7 +Create Date: 2024-07-29 08:57:49.939396 + +""" + +from typing import Sequence, Union + +from alembic import op +import geoalchemy2 +import sqlalchemy as sa +import sqlmodel + +# revision identifiers, used by Alembic. +revision: str = "7b8c33d8399d" +down_revision: Union[str, None] = "b5ef0fba88a7" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "epci", + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column( + "geometry", + geoalchemy2.types.Geometry( + srid=4326, from_text="ST_GeomFromEWKT", name="geometry", nullable=False + ), + nullable=False, + ), + sa.Column("code", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.CheckConstraint("created_at <= updated_at", name="pre-creation-update"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_epci_code"), "epci", ["code"], unique=True) + op.create_table( + "region", + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column( + "geometry", + geoalchemy2.types.Geometry( + srid=4326, from_text="ST_GeomFromEWKT", name="geometry", nullable=False + ), + nullable=False, + ), + sa.Column("code", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.CheckConstraint("created_at <= updated_at", name="pre-creation-update"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_region_code"), "region", ["code"], unique=True) + op.create_table( + "department", + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column( + "geometry", + geoalchemy2.types.Geometry( + srid=4326, from_text="ST_GeomFromEWKT", name="geometry", nullable=False + ), + nullable=False, + ), + sa.Column("code", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("region_id", sqlmodel.sql.sqltypes.GUID(), nullable=True), + sa.CheckConstraint("created_at <= updated_at", name="pre-creation-update"), + sa.ForeignKeyConstraint( + ["region_id"], + ["region.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_department_code"), "department", ["code"], unique=True) + op.create_table( + "city", + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False), + sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column( + "geometry", + geoalchemy2.types.Geometry( + srid=4326, from_text="ST_GeomFromEWKT", name="geometry", nullable=False + ), + nullable=False, + ), + sa.Column("code", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("department_id", sqlmodel.sql.sqltypes.GUID(), nullable=True), + sa.Column("epci_id", sqlmodel.sql.sqltypes.GUID(), nullable=True), + sa.CheckConstraint("created_at <= updated_at", name="pre-creation-update"), + sa.ForeignKeyConstraint( + ["department_id"], + ["department.id"], + ), + sa.ForeignKeyConstraint( + ["epci_id"], + ["epci.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_city_code"), "city", ["code"], unique=True) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f("ix_city_code"), table_name="city") + op.drop_index("idx_city_geometry", table_name="city", postgresql_using="gist") + op.drop_table("city") + op.drop_index(op.f("ix_department_code"), table_name="department") + op.drop_index( + "idx_department_geometry", table_name="department", postgresql_using="gist" + ) + op.drop_table("department") + op.drop_index(op.f("ix_region_code"), table_name="region") + op.drop_index("idx_region_geometry", table_name="region", postgresql_using="gist") + op.drop_table("region") + op.drop_index(op.f("ix_epci_code"), table_name="epci") + op.drop_index("idx_epci_geometry", table_name="epci", postgresql_using="gist") + op.drop_table("epci") + # ### end Alembic commands ### diff --git a/src/api/qualicharge/migrations/versions/f5416bc7dd5f_import_admin_geo_fixtures.py b/src/api/qualicharge/migrations/versions/f5416bc7dd5f_import_admin_geo_fixtures.py new file mode 100644 index 00000000..2951d71c --- /dev/null +++ b/src/api/qualicharge/migrations/versions/f5416bc7dd5f_import_admin_geo_fixtures.py @@ -0,0 +1,164 @@ +"""import admin geo fixtures + +Revision ID: f5416bc7dd5f +Revises: 7b8c33d8399d +Create Date: 2024-07-29 09:03:55.369265 + +""" + +import gzip +import tempfile +import uuid +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Sequence, Union + + +import httpx +import geopandas as gp +import pandas as pd +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "f5416bc7dd5f" +down_revision: Union[str, None] = "7b8c33d8399d" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +@dataclass +class AdministrativeBoundary: + """An administrative boundary level.""" + + path: Path + url: str + table: str + + +def download_fixtures() -> Dict[str, AdministrativeBoundary]: + """Download GeoJSON files from Etalab.""" + levels = ("communes", "epci", "departements", "regions") + tables = ("city", "epci", "department", "region") + etalab_root_url = ( + "https://etalab-datasets.geo.data.gouv.fr/contours-administratifs/2024/geojson" + ) + resolution = "100m" + boundaries = {} + + tmp_dirname = tempfile.mkdtemp() + for level, table in zip(levels, tables): + boundaries[level] = AdministrativeBoundary( + path=Path(f"{tmp_dirname}/{level}.geojson.gz"), + url=f"{etalab_root_url}/{level}-{resolution}.geojson.gz", + table=table, + ) + + # Download + for level, ab in boundaries.items(): + print(f"Downloading {level} file to {ab.path}...") + response = httpx.get(ab.url) + with open(ab.path, "wb") as output_file: + output_file.write(gzip.decompress(response.content)) + + return boundaries + + +def load_level(input_file: Path) -> gp.GeoDataFrame: + """Load administrative boundaries level.""" + boundaries = gp.read_file(f"GeoJSON:{input_file}") + + # Add missing columns (to fit with the ORM) + boundaries["id"] = boundaries.apply(lambda x: uuid.uuid4(), axis=1) + now = pd.Timestamp.now(tz="utc") + boundaries["created_at"] = now + boundaries["updated_at"] = now + + return boundaries + + +def import_fixtures(): + """Import administrative boundaries.""" + boundaries = download_fixtures() + + # -- Regions + print("Importing regions...") + regions = load_level(boundaries["regions"].path) + regions.rename(columns={"nom": "name"}, inplace=True) + regions.to_postgis(boundaries["regions"].table, op.get_bind(), if_exists="append") + + # -- Departments + print("Importing departments...") + departments = load_level(boundaries["departements"].path) + departments.rename(columns={"nom": "name"}, inplace=True) + # Handle foreign keys + departments = departments.merge( + regions[["id", "code"]], + how="outer", + left_on="region", + right_on="code", + suffixes=("_dept", "_reg"), + ) + departments.rename( + columns={"code_dept": "code", "id_dept": "id", "id_reg": "region_id"}, + inplace=True, + ) + departments.drop(["code_reg", "region"], axis=1, inplace=True) + departments.to_postgis( + boundaries["departements"].table, op.get_bind(), if_exists="append" + ) + + # -- EPCI + print("Importing epci...") + epci = load_level(boundaries["epci"].path) + epci.rename(columns={"nom": "name"}, inplace=True) + epci.to_postgis(boundaries["epci"].table, op.get_bind(), if_exists="append") + + # -- Cities + print("Importing cities...") + cities = load_level(boundaries["communes"].path) + cities.rename(columns={"nom": "name"}, inplace=True) + # Handle foreign keys: department + cities = cities.merge( + departments[["id", "code"]], + how="outer", + left_on="departement", + right_on="code", + suffixes=("_city", "_dept"), + ) + cities.rename( + columns={"code_city": "code", "id_city": "id", "id_dept": "department_id"}, + inplace=True, + ) + cities.drop( + ["code_dept", "region", "commune", "departement", "plm"], axis=1, inplace=True + ) + # Handle foreign keys: epci + cities = cities.merge( + epci[["id", "code"]], + how="outer", + left_on="epci", + right_on="code", + suffixes=("_city", "_epci"), + ) + cities.rename( + columns={"code_city": "code", "id_city": "id", "id_epci": "epci_id"}, + inplace=True, + ) + cities.drop(["code_epci", "epci"], axis=1, inplace=True) + cities.to_postgis(boundaries["communes"].table, op.get_bind(), if_exists="append") + + +def remove_fixtures(): + """Remove database administrative boundaries.""" + tables = ("region", "departement", "epci", "city") + for table in tables: + op.execute(f"TRUNCATE TABLE {table} CASCADE") + + +def upgrade() -> None: + import_fixtures() + + +def downgrade() -> None: + remove_fixtures() diff --git a/src/api/qualicharge/schemas/geo.py b/src/api/qualicharge/schemas/geo.py new file mode 100644 index 00000000..7202fe75 --- /dev/null +++ b/src/api/qualicharge/schemas/geo.py @@ -0,0 +1,71 @@ +"""QualiCharge schemas for administrative boundaries.""" + +from typing import List, Optional +from uuid import UUID, uuid4 + +from geoalchemy2.types import Geometry +from sqlmodel import Field, Relationship +from sqlmodel.main import SQLModelConfig + +from . import BaseTimestampedSQLModel + + +class BaseAdministrativeBoundaries(BaseTimestampedSQLModel): + """Base administrative boundaries model.""" + + model_config = SQLModelConfig( + validate_assignment=True, arbitrary_types_allowed=True + ) + + id: Optional[UUID] = Field(default_factory=lambda: uuid4().hex, primary_key=True) + code: str = Field(index=True, unique=True) + name: str + geometry: Geometry = Field( + sa_type=Geometry( + srid=4326, + spatial_index=True, + ) + ) # type: ignore[call-overload] + + +class Region(BaseAdministrativeBoundaries, table=True): + """Region level (région in French).""" + + code: str = Field(regex=r"^\d{2,3}$", index=True, unique=True) + + # Relationships + departments: List["Department"] = Relationship(back_populates="region") + + +class Department(BaseAdministrativeBoundaries, table=True): + """Department level (département in French).""" + + code: str = Field(regex=r"^\d{2,3}$", index=True, unique=True) + + # Relationships + region_id: Optional[UUID] = Field(default=None, foreign_key="region.id") + region: Region = Relationship(back_populates="departments") + + cities: List["City"] = Relationship(back_populates="department") + + +class EPCI(BaseAdministrativeBoundaries, table=True): + """Groupment of cities level (French).""" + + code: str = Field(regex=r"^\d{9}$", index=True, unique=True) + + # Relationships + cities: List["City"] = Relationship(back_populates="epci") + + +class City(BaseAdministrativeBoundaries, table=True): + """City level (communes in French).""" + + code: str = Field(regex=r"^\d{5}$", index=True, unique=True) + + # Relationships + department_id: Optional[UUID] = Field(default=None, foreign_key="department.id") + department: Department = Relationship(back_populates="cities") + + epci_id: Optional[UUID] = Field(default=None, foreign_key="epci.id") + epci: EPCI = Relationship(back_populates="cities") diff --git a/src/metabase/custom.sql b/src/metabase/custom.sql new file mode 100644 index 00000000..90a8477e --- /dev/null +++ b/src/metabase/custom.sql @@ -0,0 +1,115 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 14.11 (Ubuntu 14.11-1.pgdg22.04+1) +-- Dumped by pg_dump version 14.11 (Ubuntu 14.11-1.pgdg22.04+1) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- Data for Name: report_dashboard; Type: TABLE DATA; Schema: public; Owner: qualicharge +-- + +INSERT INTO public.report_dashboard VALUES (2, '2024-07-24 17:30:34.624319+00', '2024-07-24 19:27:50.720464+00', 'Etat des lieux réseau recharge électrique', NULL, 1, '[]', NULL, NULL, false, NULL, NULL, false, NULL, false, NULL, NULL, NULL, NULL, 'l1pKFuKIACYZv-IQ3AnU7', true, 'fixed', NULL, 0); +INSERT INTO public.report_dashboard VALUES (1, '2024-07-22 18:16:17.686908+00', '2024-07-25 09:57:55.824181+00', 'E-commerce insights', 'Quickly take an overview of an e-commerce reseller business and dive into separate tabs that focus on top selling products and demographic insights. Each vendor can log in as a tenant and see their own data sandboxed from all the others.', 13371338, '[{"id":"fc2cd1be","isMultiSelect":false,"name":"Vendor","sectionId":"string","slug":"vendor","type":"string/="},{"id":"afa56954","name":"Date Range","sectionId":"date","slug":"date_range","type":"date/range"},{"id":"5eeec658","name":"Category","sectionId":"string","slug":"category","type":"string/=","values_query_type":"list"},{"id":"512c560a","name":"Location","sectionId":"location","slug":"location","type":"string/=","values_query_type":"search"}]', NULL, NULL, false, NULL, NULL, false, NULL, true, NULL, 1, 2, NULL, 'DlK2jXoIHPXyVkEuo6Uy6', true, 'full', NULL, 0); + + +-- +-- Data for Name: dashboard_tab; Type: TABLE DATA; Schema: public; Owner: qualicharge +-- + +INSERT INTO public.dashboard_tab VALUES (1, 1, 'Overview', 0, 'PS7GW5IfD3ov0haogAPLu', '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00'); +INSERT INTO public.dashboard_tab VALUES (2, 1, 'Portfolio performance', 1, 'z5IymDhXzRY2kF79LxQIN', '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00'); +INSERT INTO public.dashboard_tab VALUES (3, 1, 'Demographics', 2, 'TKyJ0onLUPOuZgVfwZfU1', '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00'); +INSERT INTO public.dashboard_tab VALUES (4, 2, 'Région', 0, 'jAjJFc47q_kyfMU4akjUO', '2024-07-24 17:38:30.121495+00', '2024-07-24 17:38:30.121495+00'); +INSERT INTO public.dashboard_tab VALUES (5, 2, 'Département', 1, 'VLvs8eSpooLTgEHj4_GwZ', '2024-07-24 17:38:30.121495+00', '2024-07-24 17:38:55.643808+00'); +INSERT INTO public.dashboard_tab VALUES (6, 2, 'EPCI', 2, '-OWw8EY4qBqD1ubiigsI8', '2024-07-24 19:05:25.001515+00', '2024-07-24 19:05:25.001515+00'); + + +-- +-- Data for Name: report_card; Type: TABLE DATA; Schema: public; Owner: qualicharge +-- + +INSERT INTO public.report_card VALUES (28, '2024-07-24 17:30:07.941948+00', '2024-07-25 09:28:25.032997+00', 'Répartition points de charge par région', NULL, 'map', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n Region.code as code,\n COUNT(PointDeCharge.id) as num_pdc\nFROM\n Region,\n PointDeCharge\nINNER JOIN Station ON PointDeCharge.station_id = Station.id\nINNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (Region.geometry, Localisation.\"coordonneesXY\")\nGROUP BY code\n"}}', '{"map.region":"eb32b0c5-1ad0-c768-7941-be5e1ffb036f","map.type":"region","map.metric":"numloc","map.dimension":"code","table.cell_column":"num_pdc"}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":18,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":2.0555555555555554}}}},{"display_name":"num_pdc","field_ref":["field","num_pdc",{"base-type":"type/BigInteger"}],"base_type":"type/BigInteger","effective_type":"type/BigInteger","name":"num_pdc","semantic_type":"type/Quantity","fingerprint":{"global":{"distinct-count":18,"nil%":0.0},"type":{"type/Number":{"min":3.0,"q1":177.0,"q3":5079.0,"max":7922.0,"sd":2645.6216216053804,"avg":3132.3333333333335}}}}]', NULL, 'IYFRojMEDtpysWFSJpnxG', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (27, '2024-07-24 17:11:08.507318+00', '2024-07-25 13:08:50.012439+00', 'Répartition localisations par région', NULL, 'map', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n Region.code as code,\n COUNT(Localisation.id) as numloc\nFROM\n Region,\n Localisation\nWHERE\n ST_CONTAINS (Region.geometry, Localisation.\"coordonneesXY\")\nGROUP BY geometry, code\n"}}', '{"map.region":"42972c31-3ef6-e95f-3c8a-5253ba91e1fb","map.type":"region","map.metric":"numloc","map.dimension":"code"}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":18,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":2.0555555555555554}}}},{"display_name":"numloc","field_ref":["field","numloc",{"base-type":"type/BigInteger"}],"base_type":"type/BigInteger","effective_type":"type/BigInteger","name":"numloc","semantic_type":null,"fingerprint":{"global":{"distinct-count":17,"nil%":0.0},"type":{"type/Number":{"min":2.0,"q1":59.0,"q3":1296.0,"max":2420.0,"sd":727.6662332820065,"avg":820.5}}}}]', NULL, 'O-CDeyLmYUR0P7u41UOl5', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (31, '2024-07-24 18:59:59.4796+00', '2024-07-24 19:27:16.849137+00', 'Répartition puissance par département', NULL, 'map', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n Department.code as code,\n SUM(PointDeCharge.puissance_nominale) as puissance\nFROM\n Department,\n PointDeCharge\nINNER JOIN Station ON PointDeCharge.station_id = Station.id\nINNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (Department.geometry, Localisation.\"coordonneesXY\")\nGROUP BY code\n"}}', '{"map.region":"47e9a9a3-da5a-e129-5f07-44f6e3e7a408","map.type":"region","map.metric":"numloc","map.dimension":"code","table.cell_column":"num_pdc"}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":101,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":2.0495049504950495}}}},{"display_name":"puissance","field_ref":["field","puissance",{"base-type":"type/Float"}],"base_type":"type/Float","effective_type":"type/Float","name":"puissance","semantic_type":null,"fingerprint":{"global":{"distinct-count":101,"nil%":0.0},"type":{"type/Number":{"min":47.68,"q1":13621.9,"q3":51288.67000000002,"max":120995.73863999984,"sd":25968.98577683933,"avg":36841.12354871289}}}}]', NULL, '9UxGzJVb3xUddiK1JnJ6c', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (30, '2024-07-24 17:37:04.215216+00', '2024-07-25 09:28:25.074428+00', 'Répartition puissance par région', NULL, 'map', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n Region.code as code,\n SUM(PointDeCharge.puissance_nominale) as puissance\nFROM\n Region,\n PointDeCharge\nINNER JOIN Station ON PointDeCharge.station_id = Station.id\nINNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (Region.geometry, Localisation.\"coordonneesXY\")\nGROUP BY code\n"}}', '{"map.region":"eb32b0c5-1ad0-c768-7941-be5e1ffb036f","map.type":"region","map.metric":"numloc","map.dimension":"code","table.cell_column":"num_pdc"}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":18,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":2.0555555555555554}}}},{"display_name":"puissance","field_ref":["field","puissance",{"base-type":"type/Float"}],"base_type":"type/Float","effective_type":"type/Float","name":"puissance","semantic_type":null,"fingerprint":{"global":{"distinct-count":18,"nil%":0.0},"type":{"type/Number":{"min":47.68,"q1":6700.759999999998,"q3":370953.0502499941,"max":554256.800499998,"sd":177674.9207109036,"avg":206719.63768999843}}}}]', NULL, '9mMTGyW4_Ws5fBk4FViQE', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (33, '2024-07-24 19:02:19.559408+00', '2024-07-24 19:27:16.822126+00', 'Répartition points de charge par département', NULL, 'map', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n Department.code as code,\n COUNT(PointDeCharge.id) as num_pdc\nFROM\n Department,\n PointDeCharge\nINNER JOIN Station ON PointDeCharge.station_id = Station.id\nINNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (Department.geometry, Localisation.\"coordonneesXY\")\nGROUP BY code\n"}}', '{"map.region":"47e9a9a3-da5a-e129-5f07-44f6e3e7a408","map.type":"region","map.metric":"numloc","map.dimension":"code","table.cell_column":"num_pdc"}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":101,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":2.0495049504950495}}}},{"display_name":"num_pdc","field_ref":["field","num_pdc",{"base-type":"type/BigInteger"}],"base_type":"type/BigInteger","effective_type":"type/BigInteger","name":"num_pdc","semantic_type":"type/Quantity","fingerprint":{"global":{"distinct-count":93,"nil%":0.0},"type":{"type/Number":{"min":3.0,"q1":228.5,"q3":833.1886116991581,"max":1631.0,"sd":407.1471126062794,"avg":558.2376237623762}}}}]', NULL, '4RzkyQEUqW3HCCFp-HKdJ', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (35, '2024-07-24 19:20:21.06889+00', '2024-07-24 19:27:22.689109+00', 'Tables des puissances par EPCI', NULL, 'table', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n EPCI.code as code,\n EPCI.name as name,\n SUM(PointDeCharge.puissance_nominale) as puissance\nFROM\n EPCI,\n PointDeCharge\nINNER JOIN Station ON PointDeCharge.station_id = Station.id\nINNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (EPCI.geometry, Localisation.\"coordonneesXY\")\nGROUP BY EPCI.code, EPCI.name\nORDER BY puissance DESC "}}', '{"map.region":"47e9a9a3-da5a-e129-5f07-44f6e3e7a408","map.type":"region","map.metric":"numloc","map.dimension":"code","table.cell_column":"num_pdc","column_settings":{"[\"name\",\"puissance\"]":{"show_mini_bar":true}}}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":1089,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":9.0}}}},{"display_name":"name","field_ref":["field","name",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"name","semantic_type":"type/Name","fingerprint":{"global":{"distinct-count":1089,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":24.370982552800733}}}},{"display_name":"puissance","field_ref":["field","puissance",{"base-type":"type/Float"}],"base_type":"type/Float","effective_type":"type/Float","name":"puissance","semantic_type":null,"fingerprint":{"global":{"distinct-count":866,"nil%":0.0},"type":{"type/Number":{"min":3.68,"q1":306.8414041021311,"q3":3649.174969985125,"max":128294.57999999943,"sd":7577.414619681161,"avg":3416.4494751331495}}}}]', NULL, 'NWh8FaUgNfFedwlwOAyqK', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (36, '2024-07-24 19:22:32.769441+00', '2024-07-24 19:27:22.752719+00', 'Table des points de charge par EPCI', NULL, 'table', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n EPCI.code as code,\n EPCI.name as name,\n COUNT(PointDeCharge.id) as num_pdc\nFROM\n EPCI,\n PointDeCharge\nINNER JOIN Station ON PointDeCharge.station_id = Station.id\nINNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (EPCI.geometry, Localisation.\"coordonneesXY\")\nGROUP BY code, name\nORDER BY num_pdc DESC"}}', '{"map.region":"47e9a9a3-da5a-e129-5f07-44f6e3e7a408","map.type":"region","map.metric":"numloc","map.dimension":"code","table.cell_column":"num_pdc","column_settings":{"[\"name\",\"num_pdc\"]":{"show_mini_bar":true}}}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":1089,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":9.0}}}},{"display_name":"name","field_ref":["field","name",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"name","semantic_type":"type/Name","fingerprint":{"global":{"distinct-count":1089,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":24.370982552800733}}}},{"display_name":"num_pdc","field_ref":["field","num_pdc",{"base-type":"type/BigInteger"}],"base_type":"type/BigInteger","effective_type":"type/BigInteger","name":"num_pdc","semantic_type":"type/Quantity","fingerprint":{"global":{"distinct-count":196,"nil%":0.0},"type":{"type/Number":{"min":1.0,"q1":8.8119799196247,"q3":50.11119015177866,"max":3542.0,"sd":136.1169989595801,"avg":51.755739210284666}}}}]', NULL, 'xgUU-rFWIqS3jqQV_bKf0', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (32, '2024-07-24 19:01:25.601311+00', '2024-07-24 19:27:16.799431+00', 'Répartition stations par département', NULL, 'map', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n Department.code AS code,\n COUNT(Station.id) AS num_stations\nFROM\n Department,\n Station\n INNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (Department.geometry, Localisation.\"coordonneesXY\")\nGROUP BY\n code"}}', '{"map.region":"47e9a9a3-da5a-e129-5f07-44f6e3e7a408","map.type":"region","map.metric":"numloc","map.dimension":"code"}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":101,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":2.0495049504950495}}}},{"display_name":"num_stations","field_ref":["field","num_stations",{"base-type":"type/BigInteger"}],"base_type":"type/BigInteger","effective_type":"type/BigInteger","name":"num_stations","semantic_type":"type/Quantity","fingerprint":{"global":{"distinct-count":92,"nil%":0.0},"type":{"type/Number":{"min":2.0,"q1":95.2561512627574,"q3":367.25,"max":1008.0,"sd":203.31668532336852,"avg":248.05940594059405}}}}]', NULL, 'YXNY8Kwzt8Un33S4JVndF', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (34, '2024-07-24 19:08:35.016192+00', '2024-07-24 19:27:22.562504+00', 'Table des stations par EPCI', NULL, 'table', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n EPCI.code AS code,\n EPCI.name AS name,\n COUNT(Station.id) AS num_stations\nFROM\n EPCI,\n Station\n INNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (EPCI.geometry, Localisation.\"coordonneesXY\")\nGROUP BY\n EPCI.code, EPCI.name\nORDER BY num_stations DESC"}}', '{"map.region":"47e9a9a3-da5a-e129-5f07-44f6e3e7a408","map.dimension":"code","map.type":"region","pie.slice_threshold":1,"pie.percent_visibility":"inside","table.columns":[{"name":"code","enabled":true},{"name":"name","enabled":true},{"name":"num_stations","enabled":true}],"map.metric":"numloc","pie.dimension":"name","funnel.metric":"num_stations","table.cell_column":"num_stations","graph.series_order_dimension":null,"funnel.dimension":"code","graph.metrics":["num_stations"],"graph.series_order":null,"pie.metric":"num_stations","column_settings":{"[\"name\",\"num_stations\"]":{"show_mini_bar":true}},"graph.dimensions":["name"]}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":1089,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":9.0}}}},{"display_name":"name","field_ref":["field","name",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"name","semantic_type":"type/Name","fingerprint":{"global":{"distinct-count":1089,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":24.370982552800733}}}},{"display_name":"num_stations","field_ref":["field","num_stations",{"base-type":"type/BigInteger"}],"base_type":"type/BigInteger","effective_type":"type/BigInteger","name":"num_stations","semantic_type":"type/Quantity","fingerprint":{"global":{"distinct-count":123,"nil%":0.0},"type":{"type/Number":{"min":1.0,"q1":3.9992367748636877,"q3":21.230036068143907,"max":1738.0,"sd":65.51957575062008,"avg":22.997245179063363}}}}]', NULL, 'EDRumOULpUovhE0lTBk5p', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); +INSERT INTO public.report_card VALUES (29, '2024-07-24 17:33:02.243752+00', '2024-07-25 09:28:24.993463+00', 'Répartition stations par région', NULL, 'map', '{"database":2,"type":"native","native":{"template-tags":{},"query":"SELECT\n Region.code AS code,\n COUNT(Station.id) AS num_stations\nFROM\n Region,\n Station\n INNER JOIN Localisation ON Station.localisation_id = Localisation.id\nWHERE\n ST_CONTAINS (Region.geometry, Localisation.\"coordonneesXY\")\nGROUP BY\n code"}}', '{"map.region":"eb32b0c5-1ad0-c768-7941-be5e1ffb036f","map.type":"region","map.metric":"numloc","map.dimension":"code"}', 1, 2, NULL, 'native', false, NULL, NULL, NULL, false, NULL, NULL, '[{"display_name":"code","field_ref":["field","code",{"base-type":"type/Text"}],"base_type":"type/Text","effective_type":"type/Text","name":"code","semantic_type":null,"fingerprint":{"global":{"distinct-count":18,"nil%":0.0},"type":{"type/Text":{"percent-json":0.0,"percent-url":0.0,"percent-email":0.0,"percent-state":0.0,"average-length":2.0555555555555554}}}},{"display_name":"num_stations","field_ref":["field","num_stations",{"base-type":"type/BigInteger"}],"base_type":"type/BigInteger","effective_type":"type/BigInteger","name":"num_stations","semantic_type":"type/Quantity","fingerprint":{"global":{"distinct-count":17,"nil%":0.0},"type":{"type/Number":{"min":2.0,"q1":75.0,"q3":2355.0,"max":3552.0,"sd":1212.1465106985595,"avg":1391.888888888889}}}}]', NULL, 'jk6WWqVYid8of1GsaEDyI', '[]', '[]', true, 'v0.50.13 (2086968)', 'question', NULL, NULL, NULL, 0); + + +-- +-- Data for Name: report_dashboardcard; Type: TABLE DATA; Schema: public; Owner: qualicharge +-- + +INSERT INTO public.report_dashboardcard VALUES (3, '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00', 24, 1, 0, 0, NULL, 1, '[]', '{"column_settings":null,"dashcard.background":false,"text":"Overall business health","virtual_card":{"archived":false,"dataset_query":{},"display":"heading","name":null,"visualization_settings":{}}}', 'Cb92ioon7vWaER-4OcdDS', NULL, 1); +INSERT INTO public.report_dashboardcard VALUES (5, '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00', 24, 1, 0, 0, NULL, 1, '[]', '{"column_settings":null,"dashcard.background":false,"text":"Top performing products","virtual_card":{"archived":false,"dataset_query":{},"display":"heading","name":null,"visualization_settings":{}}}', 's2ZdKLKOql1B0gCbOh3XO', NULL, 2); +INSERT INTO public.report_dashboardcard VALUES (11, '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00', 7, 2, 1, 17, NULL, 1, '[]', '{"text":"### Top three all-time products \nMoM performance","virtual_card":{"archived":false,"dataset_query":{},"display":"text","name":null,"visualization_settings":{}}}', 'Jtd-7AGoT0MNS4CxhVjei', NULL, 2); +INSERT INTO public.report_dashboardcard VALUES (12, '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00', 24, 1, 0, 0, NULL, 1, '[]', '{"column_settings":null,"dashcard.background":false,"text":"Who and where are our customers","virtual_card":{"archived":false,"dataset_query":{},"display":"heading","name":null,"visualization_settings":{}}}', 'xmP43pM4LHbrbRP-rjT-j', NULL, 3); +INSERT INTO public.report_dashboardcard VALUES (16, '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00', 24, 1, 12, 0, NULL, 1, '[]', '{"column_settings":null,"dashcard.background":false,"text":"Product category insights","virtual_card":{"archived":false,"dataset_query":{},"display":"heading","name":null,"visualization_settings":{}}}', 'tNvUvy6ezg0rUhV9baI0O', NULL, 2); +INSERT INTO public.report_dashboardcard VALUES (20, '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00', 24, 1, 7, 0, NULL, 1, '[]', '{"column_settings":null,"dashcard.background":false,"text":"Age insights breakdowns","virtual_card":{"archived":false,"dataset_query":{},"display":"heading","name":null,"visualization_settings":{}}}', 'P37ixkaPafIr47ep0oypD', NULL, 3); +INSERT INTO public.report_dashboardcard VALUES (22, '2024-07-22 18:16:17.686908+00', '2024-07-22 18:16:17.686908+00', 24, 1, 18, 0, NULL, 1, '[]', '{"column_settings":null,"dashcard.background":false,"text":"Customer satisfaction insights","virtual_card":{"archived":false,"dataset_query":{},"display":"heading","name":null,"visualization_settings":{}}}', 'wRqWXKjFJ_d3t34HXl7Vg', NULL, 2); +INSERT INTO public.report_dashboardcard VALUES (33, '2024-07-24 17:30:53.988857+00', '2024-07-24 17:38:30.121495+00', 12, 6, 0, 12, 28, 2, '[]', '{}', 'KlFwAcpWYE68kyD_Yy3hs', NULL, 4); +INSERT INTO public.report_dashboardcard VALUES (34, '2024-07-24 17:33:28.418787+00', '2024-07-24 17:38:30.121495+00', 12, 6, 0, 0, 29, 2, '[]', '{}', 'fGK6e249eaj1MxDJyZZLf', NULL, 4); +INSERT INTO public.report_dashboardcard VALUES (35, '2024-07-24 17:38:30.121495+00', '2024-07-24 17:38:30.121495+00', 12, 6, 6, 0, 30, 2, '[]', '{}', '8VNMmzxsC-Mzxd3P1gDAn', NULL, 4); +INSERT INTO public.report_dashboardcard VALUES (39, '2024-07-24 19:17:07.05775+00', '2024-07-24 19:17:07.05775+00', 12, 9, 0, 0, 34, 2, '[]', '{}', 's0g3zFAPflKGbWxlqea2S', NULL, 6); +INSERT INTO public.report_dashboardcard VALUES (36, '2024-07-24 19:00:37.979702+00', '2024-07-24 19:03:23.49872+00', 12, 6, 6, 0, 31, 2, '[]', '{}', 'riSRpCJ7XzN6AscB0g-PS', NULL, 5); +INSERT INTO public.report_dashboardcard VALUES (37, '2024-07-24 19:01:40.939461+00', '2024-07-24 19:03:23.49872+00', 12, 6, 0, 0, 32, 2, '[]', '{}', 'j_f7skSFFzS95Mwwb184X', NULL, 5); +INSERT INTO public.report_dashboardcard VALUES (38, '2024-07-24 19:02:38.327454+00', '2024-07-24 19:03:23.49872+00', 12, 6, 0, 12, 33, 2, '[]', '{}', 'uoBWZC-eqcXl-y3r2mQJT', NULL, 5); +INSERT INTO public.report_dashboardcard VALUES (40, '2024-07-24 19:20:57.430005+00', '2024-07-24 19:27:50.720464+00', 12, 9, 9, 0, 35, 2, '[]', '{}', 'N_UXNX1_fHTF1Od-EHa1y', NULL, 6); +INSERT INTO public.report_dashboardcard VALUES (41, '2024-07-24 19:22:49.46583+00', '2024-07-24 19:27:50.720464+00', 12, 9, 0, 12, 36, 2, '[]', '{}', '1HOdzyFY9e4lN-tA0oOXS', NULL, 6); + + +-- +-- Data for Name: setting; Type: TABLE DATA; Schema: public; Owner: qualicharge +-- + +INSERT INTO public.setting VALUES ('custom-geojson', '{"42972c31-3ef6-e95f-3c8a-5253ba91e1fb":{"name":"Régions FR","url":"https://etalab-datasets.geo.data.gouv.fr/contours-administratifs/2024/geojson/regions-100m.geojson","region_key":"code","region_name":"nom"},"eb32b0c5-1ad0-c768-7941-be5e1ffb036f":{"name":"Regions FR (simplifiée)","url":"https://raw.githubusercontent.com/gregoiredavid/france-geojson/master/regions-version-simplifiee.geojson","region_key":"code","region_name":"nom"},"47e9a9a3-da5a-e129-5f07-44f6e3e7a408":{"name":"Départements FR (simplifiée)","url":"https://raw.githubusercontent.com/gregoiredavid/france-geojson/master/departements-version-simplifiee.geojson","region_key":"code","region_name":"nom"},"aa3c312c-ced2-dca9-a654-0ead8efc884c":{"name":"Test","url":"https://raw.githubusercontent.com/betagouv/Ressources-techniques/master/cartes-geojson/France-DOM-TOM.geojson","region_key":"dep","region_name":"libgeo"}}'); + + +-- +-- Name: dashboard_tab_id_seq; Type: SEQUENCE SET; Schema: public; Owner: qualicharge +-- + +SELECT pg_catalog.setval('public.dashboard_tab_id_seq', 6, true); + + +-- +-- Name: report_card_id_seq; Type: SEQUENCE SET; Schema: public; Owner: qualicharge +-- + +SELECT pg_catalog.setval('public.report_card_id_seq', 36, true); + + +-- +-- Name: report_dashboard_id_seq; Type: SEQUENCE SET; Schema: public; Owner: qualicharge +-- + +SELECT pg_catalog.setval('public.report_dashboard_id_seq', 2, true); + + +-- +-- Name: report_dashboardcard_id_seq; Type: SEQUENCE SET; Schema: public; Owner: qualicharge +-- + +SELECT pg_catalog.setval('public.report_dashboardcard_id_seq', 41, true); + + +-- +-- PostgreSQL database dump complete +-- + diff --git a/src/notebook/Dockerfile b/src/notebook/Dockerfile index 9922d9f4..fb3dea62 100644 --- a/src/notebook/Dockerfile +++ b/src/notebook/Dockerfile @@ -7,8 +7,10 @@ FROM quay.io/jupyter/base-notebook:notebook-7.2.1 # see: https://github.com/mwouts/jupytext/issues/1260 RUN mamba install --yes \ duckdb \ + geoalchemy2 \ geopandas \ jupytext==1.16.2 \ + libgdal-arrow-parquet \ matplotlib \ pandas \ psycopg[binary,pool] \ diff --git a/src/notebook/example.md b/src/notebook/example.md index 69a924a3..5f2aa1d9 100644 --- a/src/notebook/example.md +++ b/src/notebook/example.md @@ -94,6 +94,34 @@ import seaborn as sns sns.barplot(data=pdcs.value_counts("nom_operateur")) ``` +### Example 3: use the GIS + +```python +import pandas as pd + +query = """ +SELECT + Region.code as code, + Region.name as name, + SUM(PointDeCharge.puissance_nominale) as puissance +FROM + Region, + PointDeCharge +INNER JOIN Station ON PointDeCharge.station_id = Station.id +INNER JOIN Localisation ON Station.localisation_id = Localisation.id +WHERE + ST_CONTAINS (Region.geometry, Localisation."coordonneesXY") +GROUP BY code, name +ORDER BY puissance DESC +""" + +with engine.connect() as conn: + # Query a PostgreSQL database using the PostGIS extension + region_power = pd.read_sql_query(query, conn) + +region_power +``` + ## Write data to the database ### Example 1: create a new table with calculated indicator diff --git a/src/notebook/clean-static-dataset.md b/src/notebook/misc/clean-static-dataset.md similarity index 100% rename from src/notebook/clean-static-dataset.md rename to src/notebook/misc/clean-static-dataset.md diff --git a/src/notebook/misc/import-geo-boundaries.md b/src/notebook/misc/import-geo-boundaries.md new file mode 100644 index 00000000..3f018a58 --- /dev/null +++ b/src/notebook/misc/import-geo-boundaries.md @@ -0,0 +1,223 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.16.2 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Import French administrative boundaries + + +## Download official up-to-date geojson files + +```python +import gzip +import tempfile +from dataclasses import dataclass +from pathlib import Path + +import httpx + +@dataclass +class AdministrativeBoundary: + """An administrative boundary level.""" + path: Path + url: str + +# Define administrative boundaries files to download +levels = ("communes", "epci", "departements", "regions") +etalab_root_url = "https://etalab-datasets.geo.data.gouv.fr/contours-administratifs/2024/geojson" +resolution = "100m" +boundaries = {} + +tmp_dirname = tempfile.mkdtemp() +for level in levels: + boundaries[level] = AdministrativeBoundary( + path=Path(f"{tmp_dirname}/{level}.geojson.gz"), + url=f"{etalab_root_url}/{level}-{resolution}.geojson.gz", + ) + +# Download +for level, ab in boundaries.items(): + print(f"Downloading {level} file to {ab.path}...") + response = httpx.get(ab.url) + with open(ab.path, "wb") as output_file: + output_file.write(gzip.decompress(response.content)) +``` + +## Import administrative boundaries + +```python +import os +from sqlalchemy import create_engine + +# Get database URL from the environment +database_url = os.getenv("DATABASE_URL") + +# Create a database engine that will be used to generate connections +engine = create_engine(database_url) +``` + +```python +import uuid +import geopandas as gp +import pandas as pd +``` + +### Regions + +```python +regions = gp.read_file(f'GeoJSON:{boundaries["regions"].path}') + +# Add missing columns to regions (to fit with the ORM) +regions["id"] = regions.apply(lambda x: uuid.uuid4(), axis=1) +now = pd.Timestamp("now") +regions['created_at'] = now +regions['updated_at'] = now + +# Update column names (to fit with the ORM) +regions.rename(columns={"nom": "name"}, inplace=True) +regions +``` + +```python +regions.explore() +``` + +```python +import sqlalchemy + +# Here we append to keep existing database schema for this table +with engine.connect() as conn: + conn.execute(sqlalchemy.text("TRUNCATE TABLE region CASCADE")) + conn.commit() +regions.to_postgis("region", engine, if_exists="append") +``` + +### Departments + +```python +departments = gp.read_file(f'GeoJSON:{boundaries["departements"].path}') + +# Add missing columns to regions (to fit with the ORM) +departments["id"] = departments.apply(lambda x: uuid.uuid4(), axis=1) +now = pd.Timestamp("now") +departments['created_at'] = now +departments['updated_at'] = now + +# Update column names (to fit with the ORM) +departments.rename(columns={"nom": "name"}, inplace=True) +departments +``` + +```python +# Add foreign keys +departments = departments.merge(regions[["id", "code"]], how="outer", left_on="region", right_on="code", suffixes=("_dept", "_reg")) + +# Remove extra columns and rename columns used for merging +departments.rename(columns={"code_dept": "code", "id_dept": "id", "id_reg": "region_id"}, inplace=True) +departments.drop(["code_reg", "region"], axis=1, inplace=True) +departments.sort_values(by=["code"]) +``` + +```python +departments.explore() +``` + +```python +# Here we append to keep existing database schema for this table +with engine.connect() as conn: + conn.execute(sqlalchemy.text("TRUNCATE TABLE department CASCADE")) + conn.commit() + +departments.to_postgis("department", engine, if_exists="append") +``` + +### EPCI + +```python +epci = gp.read_file(f'GeoJSON:{boundaries["epci"].path}') + +# Add missing columns to regions (to fit with the ORM) +epci["id"] = epci.apply(lambda x: uuid.uuid4(), axis=1) +now = pd.Timestamp("now") +epci['created_at'] = now +epci['updated_at'] = now + +# Update column names (to fit with the ORM) +epci.rename(columns={"nom": "name"}, inplace=True) +epci +``` + +```python +epci.explore() +``` + +```python +# Here we append to keep existing database schema for this table +with engine.connect() as conn: + conn.execute(sqlalchemy.text("TRUNCATE TABLE epci CASCADE")) + conn.commit() + +epci.to_postgis("epci", engine, if_exists="append") +``` + +### Cities + +```python +cities = gp.read_file(f'GeoJSON:{boundaries["communes"].path}') + +# Add missing columns to regions (to fit with the ORM) +cities["id"] = cities.apply(lambda x: uuid.uuid4(), axis=1) +now = pd.Timestamp("now") +cities['created_at'] = now +cities['updated_at'] = now + +# Update column names (to fit with the ORM) +cities.rename(columns={"nom": "name"}, inplace=True) +cities +``` + +```python +# Add foreign keys + +# -- Department -- +cities = cities.merge(departments[["id", "code"]], how="outer", left_on="departement", right_on="code", suffixes=("_city", "_dept")) + +# Remove extra columns and rename columns used for merging +cities.rename(columns={"code_city": "code", "id_city": "id", "id_dept": "department_id"}, inplace=True) +cities.drop(["code_dept", "region", "commune", "departement", "plm"], axis=1, inplace=True) + +# -- EPCI -- +cities = cities.merge(epci[["id", "code"]], how="outer", left_on="epci", right_on="code", suffixes=("_city", "_epci")) + +# Remove extra columns and rename columns used for merging +cities.rename(columns={"code_city": "code", "id_city": "id", "id_epci": "epci_id"}, inplace=True) +cities.drop(["code_epci", "epci"], axis=1, inplace=True) +cities.sort_values(by=["code"]) +``` + +```python +cities.explore() +``` + +```python +# Here we append to keep existing database schema for this table +with engine.connect() as conn: + conn.execute(sqlalchemy.text("TRUNCATE TABLE city CASCADE")) + conn.commit() + +cities.to_postgis("city", engine, if_exists="append") +``` + +```python + +``` diff --git a/src/notebook/misc/import-static.md b/src/notebook/misc/import-static.md new file mode 100644 index 00000000..7dd956f4 --- /dev/null +++ b/src/notebook/misc/import-static.md @@ -0,0 +1,379 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.16.2 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Import IRVE static data + +```python +import os +from sqlalchemy import create_engine, text + +# Get database URL from the environment +database_url = os.getenv("DATABASE_URL") + +# Create a database engine that will be used to generate connections +engine = create_engine(database_url) +``` + +```python +from pathlib import Path +import uuid + +import geopandas as gp +import pandas as pd +``` + +## Load development dataset + +```python +static_file = Path("../../../data/irve-statique.parquet") +static = gp.read_file(static_file) +``` + +## Handle Enums + +```python +from enum import StrEnum + +class ImplantationStationEnum(StrEnum): + """Statique.implantation_station field enum.""" + + VOIRIE = "Voirie" + PARKING_PUBLIC = "Parking public" + PARKING_PRIVE_USAGE_PUBLIC = "Parking privé à usage public" + PARKING_PRIVE_CLIENTELE = "Parking privé réservé à la clientèle" + STATION_RECHARGE_RAPIDE = "Station dédiée à la recharge rapide" + + +class ConditionAccesEnum(StrEnum): + """Statique.condition_acces field enum.""" + + ACCESS_LIBRE = "Accès libre" + ACCESS_RESERVE = "Accès réservé" + + +class AccessibilitePMREnum(StrEnum): + """Statique.accessibilite_pmr field enum.""" + + RESERVE_PMR = "Réservé PMR" + NON_RESERVE = "Accessible mais non réservé PMR" + NON_ACCESSIBLE = "Non accessible" + INCONNUE = "Accessibilité inconnue" + + +class RaccordementEnum(StrEnum): + """Statique.raccordement field enum.""" + + DIRECT = "Direct" + INDIRECT = "Indirect" + +enum_to_replace = [] +enum_value = [] + +for enm in (ImplantationStationEnum, ConditionAccesEnum, AccessibilitePMREnum, RaccordementEnum): + for k in enm: + enum_to_replace.append(str(k.name)) + enum_value.append(k.value) + +print(f"{enum_to_replace=}") +print(f"{enum_value=}") +``` + +## Transform dataframe and save to database + +```python +from typing import Union + +from geopandas import GeoDataFrame +from pandas import DataFrame +from sqlalchemy import Engine +from sqlalchemy import types as sa_types + + +def save(data: Union[DataFrame, GeoDataFrame], engine: Engine, table: str, truncate: bool = False, dtype: dict = None): + """Save dataframe to database.""" + if truncate: + with engine.connect() as conn: + conn.execute(text(f"TRUNCATE TABLE {table} CASCADE")) + conn.commit() + + dtype = dtype if dtype else {} + dtype.update({"id": sa_types.UUID}) + + to_database = data.to_sql + if isinstance(data, GeoDataFrame): + to_database = data.to_postgis + + to_database(table, engine, if_exists="append", index=False, dtype=dtype) + + +def add_timestamped_table_fields(data: Union[DataFrame, GeoDataFrame]) -> Union[DataFrame, GeoDataFrame]: + """Add required fields for a BaseTimestampedSQLModel.""" + data["id"] = data.apply(lambda x: uuid.uuid4(), axis=1) + now = pd.Timestamp.now(tz="utc") + data['created_at'] = now + data['updated_at'] = now + return data +``` + +### Localisation + +```python +import json + +from shapely.geometry import Point + +# Extract Localisation fields +localisation_fields = ["adresse_station", "code_insee_commune", "coordonneesXY"] +localisation = static[localisation_fields] + +# Remove duplicates +localisation = localisation.drop_duplicates() + +# Transform coordinates to POINT() +localisation["geometry"] = localisation.apply(lambda x: Point(*json.loads(x["coordonneesXY"])), axis=1) +localisation.drop(columns="coordonneesXY", inplace=True) +localisation.rename(columns={"geometry": "coordonneesXY"}, inplace=True) + +# Add missing columns (to fit with the ORM) +localisation = add_timestamped_table_fields(localisation) + +# Convert to a GeoDataFrame +localisation = gp.GeoDataFrame(localisation, crs="EPSG:4326", geometry="coordonneesXY") +localisation +``` + +```python +save(localisation, engine, "localisation", truncate=True) + +# Just to check. +saved = gp.GeoDataFrame.from_postgis("SELECT * FROM localisation", engine, geom_col="coordonneesXY") +saved +``` + +## Amenageur + +```python +# Extract model fields +amenageur_fields = ["nom_amenageur", "siren_amenageur", "contact_amenageur"] +amenageur = static[amenageur_fields] + +# Remove duplicates +amenageur = amenageur.drop_duplicates() + +# Add missing columns (to fit with the ORM) +amenageur = add_timestamped_table_fields(amenageur) +``` + +```python +save(amenageur, engine, "amenageur", truncate=True) + +saved = pd.read_sql("SELECT * FROM amenageur", engine) +saved +``` + +### Operateur + +```python +# Extract model fields +operateur_fields = ["nom_operateur", "telephone_operateur", "contact_operateur"] +operateur = static[operateur_fields] + +# Remove duplicates +operateur = operateur.drop_duplicates() + +# Add missing columns (to fit with the ORM) +operateur = add_timestamped_table_fields(operateur) +operateur +``` + +```python +save(operateur, engine, "operateur", truncate=True) + +saved = pd.read_sql("SELECT * FROM operateur", engine) +saved +``` + +### Enseigne + +```python +# Extract model fields +enseigne_fields = ["nom_enseigne",] +enseigne = static[enseigne_fields] + +# Remove duplicates +enseigne = enseigne.drop_duplicates() + +# Add missing columns (to fit with the ORM) +enseigne = add_timestamped_table_fields(enseigne) +enseigne +``` + +```python +save(enseigne, engine, "enseigne", truncate=True) + +saved = pd.read_sql("SELECT * FROM enseigne", engine) +saved +``` + +### Get operational units + +```python +operational_units = pd.read_sql("SELECT * FROM operationalunit", engine) +operational_units +``` + +### Handle foreign keys + +```python +def add_ids(left: DataFrame, right: DataFrame, fields: list, fk_name: str) -> DataFrame: + """Add missings related object ids.""" + with_ids = left.merge(right, how="left", on=fields) + with_ids.drop(columns=["created_at", "updated_at"], inplace=True) + return with_ids.rename(columns={"id": fk_name}) + +static_with_ids = add_ids(static, amenageur, amenageur_fields, "amenageur_id") +static_with_ids = add_ids(static_with_ids, operateur, operateur_fields, "operateur_id") +static_with_ids = add_ids(static_with_ids, enseigne, enseigne_fields, "enseigne_id") + +# Get back to coordinates as a string for comparison +localisation_with_geom_string = localisation.drop(columns="coordonneesXY") +localisation_with_geom_string["coordonneesXY"] = static.loc[localisation_with_geom_string.index]["coordonneesXY"] + +static_with_ids = add_ids(static_with_ids, localisation_with_geom_string, localisation_fields, "localisation_id") +static_with_ids +``` + +### Station + +```python +# Extract model fields +station_fields = [ + "id_station_itinerance", + "id_station_local", + "nom_station", + "implantation_station", + "nbre_pdc", + "condition_acces", + "horaires", + "station_deux_roues", + "raccordement", + "num_pdl", + "date_maj", + "date_mise_en_service", +] +station = static[station_fields] + +# Remove duplicates +station = station.drop_duplicates() + +# Add missing columns (to fit with the ORM) +station = add_timestamped_table_fields(station) + +# Add foreign keys +station["amenageur_id"] = static_with_ids.loc[station.index]["amenageur_id"] +station["operateur_id"] = static_with_ids.loc[station.index]["operateur_id"] +station["enseigne_id"] = static_with_ids.loc[station.index]["enseigne_id"] +station["localisation_id"] = static_with_ids.loc[station.index]["localisation_id"] + +# Add operational units +station["operational_unit"] = station.apply(lambda x: x["id_station_itinerance"][:5], axis=1) +station = station.merge(operational_units[["id", "code"]], how="left", left_on="operational_unit", right_on="code") +station.drop(columns=["operational_unit", "code"], inplace=True) +station.rename(columns={"id_x": "id", "id_y": "operational_unit_id"}, inplace=True) + +# Fix Enums +station = station.replace(to_replace=enum_value, value=enum_to_replace) +station +``` + +```python +dtype = { + "implantation_station": sa_types.Enum, + "condition_acces": sa_types.Enum, + "condition_acces": sa_types.Enum, + "raccordement": sa_types.Enum, + "date_maj": sa_types.Date, + "date_mise_en_service": sa_types.Date, + "amenageur_id": sa_types.UUID, + "operateur_id": sa_types.UUID, + "enseigne_id": sa_types.UUID, + "localisation_id": sa_types.UUID, + "operational_unit_id": sa_types.UUID, +} +save(station, engine, "station", truncate=True, dtype=dtype) + +saved = pd.read_sql("SELECT * FROM station", engine) +saved +``` + +```python +static_with_ids = add_ids(static_with_ids, station, "id_station_itinerance", "station_id") +static_with_ids +``` + +### Point of charge + +```python +# Extract model fields +pdc_fields = [ + "id_pdc_itinerance", + "id_pdc_local", + "puissance_nominale", + "prise_type_ef", + "prise_type_2", + "prise_type_combo_ccs", + "prise_type_chademo", + "prise_type_autre", + "gratuit", + "paiement_acte", + "paiement_cb", + "paiement_autre", + "tarification", + "reservation", + "accessibilite_pmr", + "restriction_gabarit", + "observations", + "cable_t2_attache", +] +pdc = static_with_ids[pdc_fields] + +# Remove duplicates +pdc = pdc.drop_duplicates() + +# Add missing columns (to fit with the ORM) +pdc = add_timestamped_table_fields(pdc) + +# Add foreign keys +pdc["station_id"] = static_with_ids.loc[pdc.index]["station_id"] + +# Fix Enums +pdc = pdc.replace(to_replace=enum_value, value=enum_to_replace) +pdc +``` + +```python +dtype = { + "accessibilite_pmr": sa_types.Enum, + "station_id": sa_types.UUID, + "gratuit": sa_types.Boolean, + "paiement_cb": sa_types.Boolean, + "paiement_autre": sa_types.Boolean, + "cable_t2_attache": sa_types.Boolean, +} +save(pdc, engine, "pointdecharge", truncate=True, dtype=dtype) + +saved = pd.read_sql("SELECT * FROM PointDeCharge", engine) +saved +```