Skip to content

Commit

Permalink
🔀 Merge branch 'main' into production
Browse files Browse the repository at this point in the history
  • Loading branch information
jh0ker committed Oct 30, 2024
2 parents 7800da6 + 2401064 commit fe21346
Show file tree
Hide file tree
Showing 20 changed files with 4,671 additions and 2,048 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
matrix:
node-version: ["20.x"]
python-version: ["3.11"]
pipenv-version: ["2023.10.24"]
pipenv-version: ["2024.1.0"]

steps:
# Checkout repository
Expand Down
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ datawrapper = "~=0.6.0"
fastparquet = "2023.10.1"
python-slugify = "*"
babel = "*"
pydantic = "~=2.9.2"

[dev-packages]
click = "~=8.1"
Expand Down
1,970 changes: 1,030 additions & 940 deletions Pipfile.lock

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions ddj_cloud/scrapers/spnv_qualitaetsmonitor_nrw/.copier-answers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
_src_path: /home/jhoeke/git/ddj/wdr-ddj-cloud/scraper_template
contact_email: [email protected]
contact_name: Jannes Höke
description: Scraping the SPNV-​​Qualitätsmonitor NRW website at https://infoportal.mobil.nrw/information-service/qualitaetsmonitor-nrw.html
display_name: SPNV-​​Qualitätsmonitor NRW
ephemeral_storage: '512'
interval: daily
memory_size: '1024'
module_name: spnv_qualitaetsmonitor_nrw
preset: minimal

5 changes: 5 additions & 0 deletions ddj_cloud/scrapers/spnv_qualitaetsmonitor_nrw/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPNV-​​Qualitätsmonitor NRW

**Contact:** Jannes Höke ([email protected])

Scraping the SPNV-​​Qualitätsmonitor NRW website at https://infoportal.mobil.nrw/information-service/qualitaetsmonitor-nrw.html
Empty file.
146 changes: 146 additions & 0 deletions ddj_cloud/scrapers/spnv_qualitaetsmonitor_nrw/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from __future__ import annotations

import abc
from typing import Annotated, Any, Literal, Never, TypeAlias

from annotated_types import Len # noqa: TCH002
from pydantic import BaseModel, Field


class Filters:
class ItemBase(BaseModel, abc.ABC):
class Config:
arbitrary_types_allowed = True

status: str
selected: bool
children: Annotated[list[Never], Len(max_length=0)]

class ItemInt(ItemBase):
title: int

class ItemString(ItemBase):
title: str

class TargetBase(BaseModel, abc.ABC): ...

class TargetYear(TargetBase):
target: Literal["year"]
items: list[Filters.ItemInt]

class TargetQuarter(TargetBase):
target: Literal["quarter"]
items: list[Filters.ItemInt]

class TargetRegion(TargetBase):
target: Literal["region"]
items: list[Filters.ItemString]

class TargetEvu(TargetBase):
target: Literal["evu"]
items: list[Filters.ItemString]

class TargetProductType(TargetBase):
target: Literal["product_type"]
items: list[Filters.ItemString]

class TargetLines(TargetBase):
target: Literal["lines"]
items: list[Filters.ItemString]

class TargetComplexity(TargetBase):
target: Literal["complexity"]
items: list[Filters.ItemInt]

Target: TypeAlias = (
TargetYear
| TargetQuarter
| TargetRegion
| TargetEvu
| TargetProductType
| TargetLines
| TargetComplexity
)

class Data(BaseModel):
targets: list[
Annotated[
Filters.Target,
Field(discriminator="target"),
]
]

@staticmethod
def from_json(json: list[dict[str, Any]]) -> Data:
return Filters.Data.model_validate({"targets": json})

# class Data(BaseModel):
# year: Filters.TargetYear
# quarter: Filters.TargetQuarter
# region: Filters.TargetRegion
# evu: Filters.TargetEvu
# product_type: Filters.TargetProductType
# lines: Filters.TargetLines
# complexity: Filters.TargetComplexity

# @staticmethod
# def from_json(json: list[dict[str, Any]]) -> Data:
# data_dict: dict[str, Filters.TargetBase] = {}

# for item in json:
# for model in Filters.TargetBase.__subclasses__():
# with contextlib.suppress(ValidationError):
# data_dict[item["target"]] = model.model_validate(item)

# return Filters.Data.model_validate(data_dict)


class Results:
class ColumnBase(BaseModel, abc.ABC):
year: int

class ColumnOverallRanking(ColumnBase):
quarters: list[float]

class ColumnComplexity(ColumnBase):
quarters: list[int]

class ColumnPunctuality(ColumnBase):
quarters: list[float]

class ColumnReliability(ColumnBase):
quarters: list[float]

class ColumnTrainFormation(ColumnBase):
quarters: list[float]

class ColumnPassengers(ColumnBase):
quarters: list[int]

Column: TypeAlias = (
ColumnOverallRanking
| ColumnComplexity
| ColumnPunctuality
| ColumnReliability
| ColumnTrainFormation
| ColumnPassengers
)

class Data(BaseModel):
id: int = Field(alias="_id")
evu: str = Field(alias="_evu")
evutooltip: str = Field(alias="_evutooltip")
producttype: str = Field(alias="_producttype")
client: str = Field(alias="_client")
fullname: str = Field(alias="_fullname")
subnet: str = Field(alias="_subnet")
runtime: str = Field(alias="_runtime")
line_stations: str = Field(alias="_line_stations")
line: str
ranking: int
overall_ranking: Results.ColumnOverallRanking
complexity: Results.ColumnComplexity
punctuality: Results.ColumnPunctuality
reliability: Results.ColumnReliability
train_formation: Results.ColumnTrainFormation
passengers: Results.ColumnPassengers
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from collections.abc import Iterable
from typing import cast

import pandas as pd
import requests

from ddj_cloud.utils.storage import upload_dataframe

from .models import Filters, Results

BASE_URL = "https://infoportal.mobil.nrw"


def _load_filters():
response = requests.get(f"{BASE_URL}/QmFilterShow.html")
response.raise_for_status()

response_json = response.json()
assert isinstance(response_json, list), "Unexpected response type for QmFilterShow"

return Filters.from_json(response_json)


def _list_param_raw(target: str, items: Iterable[str]):
return f"tx_cpqualitymonitor_ajaxlistfilter[filter][{target}]", ",".join(items)


def _list_param(target: Filters.Target):
return _list_param_raw(target.target, [str(item.title) for item in target.items])


def _load_year(targets: list[Filters.Target], year: int):
url = f"{BASE_URL}/QmAjaxListFilter.html"
post_data = dict(
(
_list_param_raw("year", [str(year)]),
*map(_list_param, targets),
)
)

response = requests.post(url, data=post_data)
response.raise_for_status()

response_json = response.json()
assert isinstance(response_json, dict), "Unexpected response type for QmAjaxListFilter"
assert "data" in response_json, "No data in response"

for result in response_json["data"]:
yield Results.Data.model_validate(result)


def _to_quarter_rows(data: Results.Data, year: int):
quarterly_columns = (
"overall_ranking",
"complexity",
"punctuality",
"reliability",
"train_formation",
"passengers",
)
for quarter in range(4):
# Skip quarters where data is unavailable
# if data.overall_ranking.quarters[quarter] == 0:
# continue

base_data = {
"year": year,
"quarter": quarter,
**data.model_dump(),
}
quarterly_data = {
column: cast(Results.Column, getattr(data, column)).quarters[quarter]
for column in quarterly_columns
}

yield base_data | quarterly_data


def run():
filters_data = _load_filters()

targets_without_year: list[Filters.Target] = []
years_available: list[int] = []

for target in filters_data.targets:
match target.target:
case "year":
years_available = [item.title for item in target.items]
case _:
targets_without_year.append(target)

assert len(years_available) > 0, "No years available"

rows: list[dict] = []
for year in years_available:
for result in _load_year(targets_without_year, year):
rows.extend(_to_quarter_rows(result, year))

df = pd.DataFrame(rows)
upload_dataframe(df, "spnv_qualitaetsmonitor_nrw/data.csv")
1 change: 1 addition & 0 deletions ddj_cloud/scrapers/talsperren/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class ReservoirMeta(TypedDict):
capacity_mio_m3: float
lat: float
lon: float
main_purpose: str


class Federation(Protocol):
Expand Down
3 changes: 3 additions & 0 deletions ddj_cloud/scrapers/talsperren/federations/agger.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,21 @@ class AggerFederation(Federation):
"capacity_mio_m3": 17.06,
"lat": 51.0359,
"lon": 7.6330,
"main_purpose": "Flussregulierung",
},
"Genkeltalsperre": {
"url": "https://gis.aggerverband.de/public/pegel/genkeltalsperre_cm.json",
"capacity_mio_m3": 8.19,
"lat": 51.0618,
"lon": 7.6262,
"main_purpose": "Trinkwasserversorgung",
},
"Wiehltalsperre": {
"url": "https://gis.aggerverband.de/public/pegel/wiehltalsperre_cm.json",
"capacity_mio_m3": 31.85,
"lat": 50.9473,
"lon": 7.6706,
"main_purpose": "Trinkwasserversorgung",
},
}

Expand Down
8 changes: 8 additions & 0 deletions ddj_cloud/scrapers/talsperren/federations/eifel_rur.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class EifelRurFederation(Federation):
"capacity_mio_m3": 19.30,
"lat": 50.4952,
"lon": 6.4216,
"main_purpose": "Trinkwasserversorgung",
},
# Doesn't seem to have data anymore
"Rurtalsperre Gesamt": {
Expand All @@ -35,42 +36,49 @@ class EifelRurFederation(Federation):
"capacity_mio_m3": 203.20,
"lat": 50.637222,
"lon": 6.441944,
"main_purpose": "Trinkwasserversorgung; Flussregulierung",
},
"Rurtalsperre Obersee": {
"id": 13,
"capacity_mio_m3": 17.77,
"lat": 50.6056,
"lon": 6.3925,
"main_purpose": "Trinkwasserversorgung",
},
"Rurtalsperre Hauptsee": {
"id": 12,
"capacity_mio_m3": 184.83,
"lat": 50.637222,
"lon": 6.441944,
"main_purpose": "Flussregulierung",
},
"Urfttalsperre": {
"id": 16,
"capacity_mio_m3": 45.51,
"lat": 50.6029,
"lon": 6.4195,
"main_purpose": "Flussregulierung",
},
"Wehebachtalsperre": {
"id": 17,
"capacity_mio_m3": 25.06,
"lat": 50.7550,
"lon": 6.3401,
"main_purpose": "Trinkwasserversorgung; Flussregulierung",
},
"Stauanlage Heimbach": {
"id": 2,
"capacity_mio_m3": 1.21,
"lat": 50.6285,
"lon": 6.4792,
"main_purpose": "Flussregulierung",
},
"Stauanlage Obermaubach": {
"id": 5,
"capacity_mio_m3": 1.65,
"lat": 50.7143,
"lon": 6.4483,
"main_purpose": "Flussregulierung",
},
}

Expand Down
Loading

0 comments on commit fe21346

Please sign in to comment.