diff --git a/Makefile b/Makefile index 3a1f445b..fe8c61aa 100644 --- a/Makefile +++ b/Makefile @@ -395,7 +395,7 @@ seed-oidc: ## seed the OIDC provider seed-dashboard: ## seed dashboard @echo "Running dashboard service database engine…" @$(COMPOSE_UP) --wait postgresql - @echo "Seeding dashboard…"# + @echo "Seeding dashboard…" @bin/manage loaddata dashboard/fixtures/dsfr_fixtures.json @bin/manage seed_consent .PHONY: seed-dashboard diff --git a/data/irve-statique.json.gz b/data/irve-statique.json.gz index 6e09b4f7..a3278745 100644 Binary files a/data/irve-statique.json.gz and b/data/irve-statique.json.gz differ diff --git a/data/irve-statique.parquet b/data/irve-statique.parquet index 2dec02a8..90a1c4bc 100644 Binary files a/data/irve-statique.parquet and b/data/irve-statique.parquet differ diff --git a/src/api/CHANGELOG.md b/src/api/CHANGELOG.md index e6b93287..e661c951 100644 --- a/src/api/CHANGELOG.md +++ b/src/api/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to - Activate and configure Sentry profiling by setting the `SENTRY_PROFILES_SAMPLE_RATE` configuration - Set request's user (`username`) in Sentry's context +- Add `Localisation.coordonneesXY` unique contraint [BC] 💥 ### Changed @@ -37,6 +38,10 @@ and this project adheres to - Renamed database `raccordementemum` to `raccordementenum` +### Removed + +- Remove `Localisation.adresse_station` unique constraint + ## [0.16.0] - 2024-12-12 ### Changed diff --git a/src/api/qualicharge/factories/static.py b/src/api/qualicharge/factories/static.py index 23230de3..d4663d96 100644 --- a/src/api/qualicharge/factories/static.py +++ b/src/api/qualicharge/factories/static.py @@ -62,6 +62,12 @@ class StatiqueFactory(ModelFactory[Statique]): lambda: DataclassFactory.__random__.choice(prefixes) + FrenchDataclassFactory.__faker__.pystr_format("E######") ) + coordonneesXY = Use( + lambda: ( + f"[{round(DataclassFactory.__random__.uniform(-180,180), 6)}, " # longitude + f"{round(DataclassFactory.__random__.uniform(-90,90), 6)}]" # latitude + ) + ) @post_generated @classmethod diff --git a/src/api/qualicharge/migrations/versions/8144a7d2553d_update_localisation_uniqueness_.py b/src/api/qualicharge/migrations/versions/8144a7d2553d_update_localisation_uniqueness_.py new file mode 100644 index 00000000..771cfe9c --- /dev/null +++ b/src/api/qualicharge/migrations/versions/8144a7d2553d_update_localisation_uniqueness_.py @@ -0,0 +1,33 @@ +"""Update localisation uniqueness criterions + +Revision ID: 8144a7d2553d +Revises: c09664a85912 +Create Date: 2025-01-10 10:18:50.745037 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "8144a7d2553d" +down_revision: Union[str, None] = "d3d2c20f8efd" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.drop_constraint( + "localisation_adresse_station_key", "localisation", type_="unique" + ) + op.create_unique_constraint( + "localisation_coordonneesXY_key", "localisation", ["coordonneesXY"] + ) + + +def downgrade() -> None: + op.drop_constraint("localisation_coordonneesXY_key", "localisation", type_="unique") + op.create_unique_constraint( + "localisation_adresse_station_key", "localisation", ["adresse_station"] + ) diff --git a/src/api/qualicharge/schemas/core.py b/src/api/qualicharge/schemas/core.py index 1c88b3d6..ff15876e 100644 --- a/src/api/qualicharge/schemas/core.py +++ b/src/api/qualicharge/schemas/core.py @@ -122,7 +122,7 @@ class Localisation(BaseTimestampedSQLModel, table=True): ) id: UUID = Field(default_factory=uuid4, primary_key=True) - adresse_station: str = Field(unique=True) + adresse_station: str code_insee_commune: str = Field(regex=r"^([013-9]\d|2[AB1-9])\d{3}$") coordonneesXY: DataGouvCoordinate = Field( sa_type=Geometry( @@ -131,6 +131,7 @@ class Localisation(BaseTimestampedSQLModel, table=True): srid=4326, spatial_index=True, ), + unique=True, ) # type: ignore # Relationships diff --git a/src/api/qualicharge/schemas/sql.py b/src/api/qualicharge/schemas/sql.py index 674669de..c528d35e 100644 --- a/src/api/qualicharge/schemas/sql.py +++ b/src/api/qualicharge/schemas/sql.py @@ -289,7 +289,7 @@ def save(self): self._save_schema( self.localisation, Localisation, - constraint="localisation_adresse_station_key", + constraint="localisation_coordonneesXY_key", ) self._save_schema( self.station, diff --git a/src/notebook/misc/clean-static-dataset.md b/src/notebook/misc/clean-static-dataset.md index d12e8eac..c72faaa6 100644 --- a/src/notebook/misc/clean-static-dataset.md +++ b/src/notebook/misc/clean-static-dataset.md @@ -101,6 +101,52 @@ fixed.to_json("../../data/irve-statique.json.gz", orient="records", lines=True, fixed.to_parquet("../../data/irve-statique.parquet", compression="gzip") ``` +## Clean duplicated coordinates + +```python +import pandas as pd + +static = pd.read_parquet("../../../data/irve-statique.parquet") +static +``` + +Get a list of unique `coordonneesxy`/`adresse_station` couples. + ```python +addr_crds = static[~static.duplicated(["coordonneesxy", "adresse_station"], keep='first')][["adresse_station", "coordonneesxy"]] +addr_crds +``` + +Remove duplicated coordinates, as it's supposed to be unique in the database (two different addresses are not supposed to have the same coordinates). + +```python +pd.set_option('display.max_rows', 50) +selected_addr_crds = addr_crds[~addr_crds.duplicated(["coordonneesxy"], keep="first")] +selected_addr_crds +``` + +Perform rows selection. +```python +cleaned_static = static[static["adresse_station"].isin(selected_addr_crds["adresse_station"]) & static["coordonneesxy"].isin(selected_addr_crds["coordonneesxy"])] +cleaned_static[["id_pdc_itinerance", "coordonneesxy", "adresse_station"]] +``` + +Clean column names and Enums. + +```python +cleaned_static = cleaned_static.rename(columns={"coordonneesxy": "coordonneesXY"}) +cleaned_static = cleaned_static.replace(to_replace=enum_to_replace, value=enum_value) + +cleaned_static +``` + +Export to json + parquet + +```python +cleaned_static.to_json("../../../data/irve-statique.json.gz", orient="records", lines=True, compression="gzip") +``` + +```python +cleaned_static.to_parquet("../../../data/irve-statique.parquet") ```