diff --git a/.bumpversion.cfg b/.bumpversion.cfg index a9d9ae3e..5b335316 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.14.4 +current_version = 0.14.5 parse = (?P\d+)\.(?P\d+)\.(?P\d+)((?P(a|na))+(?P\d+))? serialize = {major}.{minor}.{patch}{release}{build} diff --git a/.github/ISSUE_TEMPLATE/issue_template_user_kudos.md b/.github/ISSUE_TEMPLATE/issue_template_user_kudos.md index 39fb7b5c..86890bed 100644 --- a/.github/ISSUE_TEMPLATE/issue_template_user_kudos.md +++ b/.github/ISSUE_TEMPLATE/issue_template_user_kudos.md @@ -12,12 +12,14 @@ It helps the project quite a bit! We will add you to the list of valued users. -Please, insert your information between the double quotes below - fill out at minimum "affiliation" :purple_heart: +Please, insert your information below - fill out at minimum affiliation :purple_heart: -family-names: "" -given-names: "" -alias: "" -affiliation: "" -orcid: "" +:pencil2: **Spaces** and the following special characters are allowed: @ ? ! | . , : ; - _ [ / ( ) \ ] § $ % & = + < > + +family-names: +given-names: +alias: +affiliation: +orcid: Thank you! diff --git a/.github/workflows/ci-production.yml b/.github/workflows/ci-production.yml index 4ba916bf..6300f75a 100644 --- a/.github/workflows/ci-production.yml +++ b/.github/workflows/ci-production.yml @@ -32,7 +32,7 @@ jobs: - name: create package run: python -m build --sdist - name: import open-mastr - run: python -m pip install ./dist/open_mastr-0.14.4.tar.gz + run: python -m pip install ./dist/open_mastr-0.14.5.tar.gz - name: Create credentials file env: MASTR_TOKEN: ${{ secrets.MASTR_TOKEN }} diff --git a/.github/workflows/extend_user_cff.yml b/.github/workflows/extend_user_cff.yml index 593b90ef..e7db64fb 100644 --- a/.github/workflows/extend_user_cff.yml +++ b/.github/workflows/extend_user_cff.yml @@ -85,4 +85,4 @@ jobs: Closes #${{ github.event.issue.number }} - Many thanks ${{ github.actor }}! + Many thanks @${{ github.actor }}! diff --git a/.github/workflows/test-pypi-publish.yml b/.github/workflows/test-pypi-publish.yml index 24afe432..2abdf735 100644 --- a/.github/workflows/test-pypi-publish.yml +++ b/.github/workflows/test-pypi-publish.yml @@ -35,4 +35,4 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_TEST }} - repository_url: https://test.pypi.org/legacy/ \ No newline at end of file + repository-url: https://test.pypi.org/legacy/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index d6179261..78c080ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,29 @@ For each version important additions, changes and removals are listed here. The format is inspired from [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [v0.14.5] New MaStR data model, battery export, various fixes - 2024-10-11 +### Added +- Replace values in NetzbetreiberpruefungStatus with their entries from + Katalogwerte + [#583](https://github.com/OpenEnergyPlatform/open-MaStR/pull/583) +- Add `deleted_market_actors` to data model and prevent crash on unknown tables + [#575](https://github.com/OpenEnergyPlatform/open-MaStR/pull/575) +- Extended documentation of data cleansing process for bulk download + [#568](https://github.com/OpenEnergyPlatform/open-MaStR/pull/568) +- Add OFFIS eV as partner organization + [#493](https://github.com/OpenEnergyPlatform/open-MaStR/pull/493) +### Changed +- Fix usercff workflow + [#545](https://github.com/OpenEnergyPlatform/open-MaStR/issues/544) +- Fix docs on user-defined output path for csv, xml, database + [#549](https://github.com/OpenEnergyPlatform/open-MaStR/issues/549) +- Set pandas version to >=2.2.2 for compatibility with numpy v2.0 + [#553](https://github.com/OpenEnergyPlatform/open-MaStR/issues/553) +- Allow to configure model/service port in `soap_api.download.MaStRAPI` + [#556](https://github.com/OpenEnergyPlatform/open-MaStR/issues/556) +- Allow CSV export of table `storage_units` + [#565](https://github.com/OpenEnergyPlatform/open-MaStR/pull/565) + ## [v0.14.4] Release for the Journal of Open Source Software JOSS - 2024-06-07 ### Added - Extend documentation section `getting started` based on the JOSS Review [#523](https://github.com/OpenEnergyPlatform/open-MaStR/pull/523) @@ -18,7 +41,6 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/ - Fixed missing call to gen_url in case first bulk download fails as xml file for today is not yet available [#534](https://github.com/OpenEnergyPlatform/open-MaStR/pull/534) - Repair links in the documentation page [#536](https://github.com/OpenEnergyPlatform/open-MaStR/pull/536) - ## [v0.14.3] Fix Pypi Release - 2024-04-24 ### Added - Add new table `changed_dso_assignment` [#510](https://github.com/OpenEnergyPlatform/open-MaStR/pull/510) diff --git a/CITATION.cff b/CITATION.cff index 12415aa3..591a84ea 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -25,10 +25,15 @@ authors: alias: "@deniztepe" affiliation: "fortiss" orcid: " https://orcid.org/0000-0002-7605-0173" + - family-names: "Amme" + given-names: "Jonathan" + alias: "@nesnoj" + affiliation: "Reiner Lemoine Institut" + orcid: " https://orcid.org/0000-0002-8563-5261" title: "open-MaStR" type: software license: AGPL-3.0 -version: 0.14.4 +version: 0.14.5 doi: -date-released: 2024-06-07 +date-released: 2024-10-11 url: "https://github.com/OpenEnergyPlatform/open-MaStR/" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a998de13..8c023f6a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -129,6 +129,13 @@ git status ``` #### 2.3. Commit your changes +First, make sure you have the pre-commit hooks installed to have your code +automatically checked on commit for programmatic and stylistic errors: +```bash +pre-commit install +``` + +Now, let's add some file. If the file does not exist on the remote server yet, use: ```bash git add filename.md diff --git a/README.rst b/README.rst index a1d5e077..084c29b1 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,5 @@ -.. image:: https://user-images.githubusercontent.com/14353512/199113556-4b53660f-c628-4138-8d01-3719595ecda1.png +.. image:: docs/images/README_HeaderThreePartners.svg :align: left :target: https://github.com/OpenEnergyPlatform/open-MaStR :alt: MaStR logo @@ -59,8 +59,6 @@ Documentation | Find the `documentation `_ hosted on ReadTheDocs. | The original API documentation can be found on the `Webhilfe des Marktstammdatenregisters `_. -| If you are interested in browsing the MaStR online, check out the privately hosted `Marktstammdatenregister.dev `_. -| Also see the `bundesAPI/Marktstammdaten-API `_ for another implementation. Installation @@ -112,7 +110,13 @@ changes in a `Pull Request `_ - `EE-Status App `_ - `Digiplan Anhalt `_ +- `Data Quality Assessment of the MaStR `_ +External Resources +=================== +Besides open-mastr, some other resources exist that ease the process of working with the Marktstammdatenregister: +- If you are interested in browsing the MaStR online, check out the github organisation `Marktstammdatenregister.dev `_. +- The `bundesAPI/Marktstammdaten-API `_ is another implementation to access data via an official API. Collaboration ============= @@ -129,7 +133,7 @@ Software | This repository is licensed under the **GNU Affero General Public License v3.0 or later** (AGPL-3.0-or-later). | See `LICENSE.md `_ for rights and obligations. | See the *Cite this repository* function or `CITATION.cff `_ for citation of this repository. -| Copyright: `open-MaStR `_ © `Reiner Lemoine Institut `_ © `fortiss `_ | `AGPL-3.0-or-later `_ +| Copyright: `open-MaStR `_ © `Reiner Lemoine Institut `_ © `fortiss `_ © `OFFIS `_ | `AGPL-3.0-or-later `_ Data ---- diff --git a/RELEASE_PROCEDURE.md b/RELEASE_PROCEDURE.md index 521b3717..5cd7684b 100644 --- a/RELEASE_PROCEDURE.md +++ b/RELEASE_PROCEDURE.md @@ -48,13 +48,17 @@ It always has the format `YYYY-MM-DD`, e.g. `2022-05-16`. * On release day, start the release early to ensure sufficient time for reviews * Merge everything on the `develop` branch -### 5. 💠 Create a `release` branch +### 5. Run tests and apply code linting +* Run tests locally with `pytest` and fix errors +* Apply linting with `pre-commit run -a` and fix errors + +### 6. 💠 Create a `release` branch * Checkout `develop` and branch with `git checkout -b release-v0.12.1` * Update version for test release with `bump2version --current-version --new-version patch` -* Commit version update with `git commit -am "version update v0.12.1a1"` +* Commit version update with `git commit -am "version update v0.12.1"` * Push branch with `git push --set-upstream origin release-v0.12.1` -### 6. 📝 Update the version files +### 7. 📝 Update the version files * `📝CHANGELOG.md` * All Pull Request are included * Add a new section with correct version number @@ -62,8 +66,7 @@ It always has the format `YYYY-MM-DD`, e.g. `2022-05-16`. * `📝CITATION.cff` * Update `date-released` -### 7. Optional: Check release on Test-PyPI - +### 8. Optional: Check release on Test-PyPI * Check if the release it correctly displayed on [Test-PyPI](https://test.pypi.org/project/open-mastr/#history) * You can trigger the release manually within github actions using the `run workflow` button on branch `release-v0.12.1` on the workflow `Build and release on pypi tests` * Note: Pre-releases on Test-PyPI are only shown under `Release history` in the navigation bar. @@ -72,7 +75,7 @@ It always has the format `YYYY-MM-DD`, e.g. `2022-05-16`. * Note: The release on Test-PyPI might fail, but it will be the correct release version for the PyPI server. * Push commits to the `release-*` branch -### 8. 🐙 Create a `Release Pull Request` +### 9. 🐙 Create a `Release Pull Request` * Use `📝PR_TEMPLATE_RELEASE` (❗ToDo❗) * Merge `release` into `production` branch * Assign reviewers to check the release @@ -81,7 +84,7 @@ It always has the format `YYYY-MM-DD`, e.g. `2022-05-16`. * Wait for reviews and tests * Merge PR -### 9. 💠 Set the `Git Tag` +### 10. 💠 Set the `Git Tag` * Checkout `production` branch and pull * Check existing tags `git tag -n` * Create new tag: `git tag -a v0.12.1 -m "open-mastr release v0.12.1 with PyPI"` @@ -91,7 +94,7 @@ It always has the format `YYYY-MM-DD`, e.g. `2022-05-16`. * Delete local tag: `git tag -d v0.12.1` * Delete remote tag: `git push --delete origin v0.12.1` -### 10. 🐙 Publish `Release` on GitHub and PyPI +### 11. 🐙 Publish `Release` on GitHub and PyPI * Navigate to your [releases](https://github.com/OpenEnergyPlatform/open-MaStR/releases/) on GitHub and open your draft release. * Summarize key changes in the description * Use the `generate release notes` button provided by github (This only works after the release branch is merged on production) @@ -103,7 +106,7 @@ It always has the format `YYYY-MM-DD`, e.g. `2022-05-16`. ▶️ In the background the GitHub workflow (pypi-publish.yml) will publish the package 📦 on PyPI! -### 11. 🐙 Set up new development +### 12. 🐙 Set up new development * Create a Pull request from `release-*` to `develop` * Create a new **unreleased section** in the `📝CHANGELOG.md` ``` diff --git a/docs/advanced.md b/docs/advanced.md index 9f5589a7..a0db651f 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -1,5 +1,5 @@ For most users, the functionalites described in [Getting Started](getting_started.md) are sufficient. If you want -to examine how you can configure the package's behavior for your own needs, check out [Cofiguration](#configuration). Or you can explore the two main functionalities of the package, namely the [Bulk Download](#bulk-download) +to examine how you can configure the package's behavior for your own needs, check out [Configuration](#configuration). Or you can explore the two main functionalities of the package, namely the [Bulk Download](#bulk-download) or the [SOAP API download](#soap-api-download). ## Configuration @@ -28,6 +28,7 @@ The possible databases are: ### Project directory The directory `$HOME/.open-MaStR` is automatically created. It is used to store configuration files and save data. +You can change this default path, see [environment variables](#environment-variables). Default config files are copied to this directory which can be modified - but with caution. The project home directory is structured as follows (files and folders below `data/` just an example). @@ -87,6 +88,15 @@ The data can then be written to any sql database supported by [sqlalchemy](https For more information regarding the database see [Database settings](#database-settings). +### Environment variables + +There are some environment variables to customize open-MaStR: + +| Variable | Description | Example | +|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------| +| `SQLITE_DATABASE_PATH` | Path to the SQLite file. This allows to use to use multiple instances of the MaStR database. The database instances exist in parallel and are independent of each other. | `/home/mastr-rabbit/.open-MaStR/data/sqlite/your_custom_instance_name.db` | +| `OUTPUT_PATH` | Path to user-defined output directory for CSV data, XML file and database. If not specified, output directory defaults to `$HOME/.open-MaStR/` | Linux: `/home/mastr-rabbit/open-mastr-user-defined-output-path`, Windows: `C:\\Users\\open-mastr-user-defined-output-path` | + ## Bulk download On the homepage [MaStR/Datendownload](https://www.marktstammdatenregister.de/MaStR/Datendownload) a zipped folder containing the whole @@ -195,10 +205,16 @@ if __name__ == "__main__": print(mastr_api.GetLokaleUhrzeit()) ``` -For API calls and their optional parameters refer to [API documentation](https://www.marktstammdatenregister. -de/MaStRHilfe/subpages/webdienst.html). +The MaStR API has different models to query from, the default are power units +("Anlage"). To change this, you can pass the desired model to +[`MaStRAPI`][open_mastr.soap_api.download.MaStRAPI]. +E.g. to query market actors instantiate it using +`MaStRAPI(service_port="Akteur")`. + +For API calls, models and optional parameters refer to the +[API documentation](https://www.marktstammdatenregister.de/MaStRHilfe/subpages/webdienst.html). -???+ example "Example queries and their responses" +???+ example "Example queries and their responses (for model 'Anlage')" === "mastr_api.GetLokaleUhrzeit()" diff --git a/docs/conf.py b/docs/conf.py index 42a70728..93b52ff0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,14 +12,15 @@ # import os import sys -sys.path.insert(0, os.path.abspath('../open_mastr')) + +sys.path.insert(0, os.path.abspath("../open_mastr")) # -- Project information ----------------------------------------------------- -project = 'open-MaStR' -copyright = '2022 Reiner Lemoine Institut and fortiss' -author = '' +project = "open-MaStR" +copyright = "2024 Reiner Lemoine Institut gGmbH and fortiss GmbH and OFFIS e.V." +author = "" # -- General configuration --------------------------------------------------- @@ -28,22 +29,22 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autosectionlabel', - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx_tabs.tabs', - 'm2r2', + "sphinx.ext.autosectionlabel", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx_tabs.tabs", + "m2r2", ] source_suffix = [".rst", ".md"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -51,13 +52,13 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] -html_css_files = ['custom.css'] +html_static_path = ["_static"] +html_css_files = ["custom.css"] -# Autodoc config -autoclass_content = 'both' +# Autodoc config +autoclass_content = "both" diff --git a/docs/dataset.md b/docs/dataset.md index 8e102511..2063cdf0 100644 --- a/docs/dataset.md +++ b/docs/dataset.md @@ -81,6 +81,8 @@ After downloading the MaStR, you will find a database with a large number of tab | permit | | | storage_units | | | kwk | *short for: Combined heat and power (CHP)* | + | deleted_units | Units from all technologies that were deleted or deactivated | + | deleted_market_actors | Market actors that were deleted or deactivated | ### MaStR data model diff --git a/docs/images/README_HeaderThreePartners.svg b/docs/images/README_HeaderThreePartners.svg new file mode 100644 index 00000000..2382cf9f --- /dev/null +++ b/docs/images/README_HeaderThreePartners.svg @@ -0,0 +1,127 @@ + + + + + + + + + + + + + + + + diff --git a/mkdocs.yml b/mkdocs.yml index 76a21770..26d41af3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -93,4 +93,4 @@ site_dir: _build copyright: | - © 2023 RLI and fortiss GmbH + © RLI and fortiss GmbH and OFFIS e.V. diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py index acd626b8..5cde2f75 100644 --- a/open_mastr/mastr.py +++ b/open_mastr/mastr.py @@ -77,7 +77,6 @@ class Mastr: """ def __init__(self, engine="sqlite", connect_to_translated_db=False) -> None: - validate_parameter_format_for_mastr_init(engine) self.output_dir = get_output_dir() @@ -142,6 +141,7 @@ def download( | "nuclear" | Yes | Yes | | "gas" | Yes | Yes | | "storage" | Yes | Yes | + | "storage_units" | Yes | Yes | | "electricity_consumer"| Yes | No | | "location" | Yes | Yes | | "market" | Yes | No | @@ -149,6 +149,7 @@ def download( | "balancing_area" | Yes | No | | "permit" | Yes | Yes | | "deleted_units" | Yes | No | + | "deleted_market_actors"| Yes | No | | "retrofit_units" | Yes | No | date : None or `datetime.datetime` or str, optional @@ -163,8 +164,10 @@ def download( Default to `None`. bulk_cleansing : bool, optional - If True, data cleansing is applied after the download (which is recommended). Default - to True. + If set to True, data cleansing is applied after the download (which is recommended). + In its original format, many entries in the MaStR are encoded with IDs. Columns like + `state` or `fueltype` do not contain entries such as "Hessen" or "Braunkohle", but instead + only contain IDs. Cleansing replaces these IDs with their corresponding original entries. api_processes : int or None or "max", optional Number of parallel processes used to download additional data. Defaults to `None`. If set to "max", the maximum number of possible processes @@ -304,7 +307,7 @@ def to_csv( "balancing_area", "electricity_consumer", "gas_consumer", "gas_producer", "gas_storage", "gas_storage_extended", "grid_connections", "grids", "market_actors", "market_roles", - "locations_extended, 'permit', 'deleted_units' ] + "locations_extended", "permit", "deleted_units", "storage_units"] chunksize: int Defines the chunksize of the tables export. Default value is 500.000 rows to include in each chunk. diff --git a/open_mastr/soap_api/download.py b/open_mastr/soap_api/download.py index 3d7a0b3f..dc96266c 100644 --- a/open_mastr/soap_api/download.py +++ b/open_mastr/soap_api/download.py @@ -39,7 +39,8 @@ class MaStRAPI(object): mastr_api = MaStRAPI( user="SOM123456789012", - key=""koo5eixeiQuoi'w8deighai8ahsh1Ha3eib3coqu7ceeg%ies..." + key="koo5eixeiQuoi'w8deighai8ahsh1Ha3eib3coqu7ceeg%ies...", + service_port="Anlage" ) ``` @@ -69,7 +70,7 @@ class MaStRAPI(object): wrapped SOAP queries. This is handled internally. """ - def __init__(self, user=None, key=None): + def __init__(self, user=None, key=None, service_port="Anlage"): """ Parameters ---------- @@ -80,10 +81,15 @@ def __init__(self, user=None, key=None): key : str , optional Access token of a role (Benutzerrolle). Might look like: "koo5eixeiQuoi'w8deighai8ahsh1Ha3eib3coqu7ceeg%ies..." + service_port : str , optional + Port/model to be used, e.g. "Anlage" or "Akteur", see docs for + full list: + https://www.marktstammdatenregister.de/MaStRHilfe/subpages/webdienst.html + Defaults to "Anlage". """ # Bind MaStR SOAP API functions as instance methods - client, client_bind = _mastr_bindings() + client, client_bind = _mastr_bindings(service_port=service_port) # First, all services of registered service_port (i.e. 'Anlage') for n, f in client_bind: @@ -140,19 +146,27 @@ def wrapper(*args, **kwargs): def _mastr_bindings( + service_port, + service_name="Marktstammdatenregister", + wsdl="https://www.marktstammdatenregister.de/MaStRAPI/wsdl/mastr.wsdl", max_retries=3, pool_connections=100, pool_maxsize=100, timeout=60, operation_timeout=600, - wsdl="https://www.marktstammdatenregister.de/MaStRAPI/wsdl/mastr.wsdl", - service_name="Marktstammdatenregister", - service_port="Anlage", ): """ Parameters ---------- + service_port : str + Port of service to be used. Parameters is passed to `zeep.Client.bind` + See :class:`MaStRAPI` for more information. + service_name : str + Service, defined in wsdl file, that is to be used. Parameters is + passed to zeep.Client.bind + wsdl : str + Url of wsdl file to be used. Parameters is passed to zeep.Client max_retries : int Maximum number of retries for a request. Parameters is passed to requests.adapters.HTTPAdapter @@ -168,14 +182,6 @@ def _mastr_bindings( operation_timeout : int Timeout for API requests (GET/POST in underlying requests package) in seconds. Parameter is passed to `zeep.transports.Transport`. - wsdl : str - Url of wsdl file to be used. Parameters is passed to zeep.Client - service_name : str - Service, defined in wsdl file, that is to be used. Parameters is - passed to zeep.Client.bind - service_port : str - Port of service to be used. Parameters is - passed to zeep.Client.bind Returns ------- @@ -460,7 +466,7 @@ def __init__(self, parallel_processes=None): multiprocessing package) choose False. Defaults to number of cores (including hyperthreading). """ - log.warn( + log.warning( """ The `MaStRDownload` class is deprecated and will not be maintained in the future. To get a full table of the Marktstammdatenregister, use the open_mastr.Mastr.download diff --git a/open_mastr/soap_api/metadata/description.py b/open_mastr/soap_api/metadata/description.py index 8fc55526..a4986959 100644 --- a/open_mastr/soap_api/metadata/description.py +++ b/open_mastr/soap_api/metadata/description.py @@ -33,19 +33,19 @@ def __init__(self, xml=None): self.xml = fh.read() else: # If no XML file is given, the file is read from an URL - zipurl = 'https://www.marktstammdatenregister.de/MaStRHilfe/files/' \ - 'webdienst/Dienstbeschreibung_1_2_39_Produktion.zip' + zipurl = ( + "https://www.marktstammdatenregister.de/MaStRHilfe/files/" + "webdienst/Dienstbeschreibung_1_2_39_Produktion.zip" + ) with urlopen(zipurl) as zipresp: with ZipFile(BytesIO(zipresp.read())) as zfile: - self.xml = zfile.read('xsd/mastrbasetypes.xsd') - - + self.xml = zfile.read("xsd/mastrbasetypes.xsd") # Parse XML and extract relevant data parsed = xmltodict.parse(self.xml, process_namespaces=False) - self.complex_types = parsed['schema']["complexType"] - self.simple_types = parsed['schema']["simpleType"] + self.complex_types = parsed["schema"]["complexType"] + self.simple_types = parsed["schema"]["simpleType"] # Prepare parsed data for documentational purposes abstract_types, parameters, responses, types = self._filter_type_descriptions() @@ -78,13 +78,17 @@ def _filter_type_descriptions(self): raise ValueError("Ohh...") else: # Filter all functions - if item["@name"].startswith(("Get", "Set", "Erneute", "Verschiebe", "Delete")): + if item["@name"].startswith( + ("Get", "Set", "Erneute", "Verschiebe", "Delete") + ): functions.append(item) # Further split the list of functions into paramters and responses if item["@name"].endswith("Parameter"): if "complexContent" in item.keys(): - parameters[item["@name"]] = item["complexContent"]["extension"] + parameters[item["@name"]] = item["complexContent"][ + "extension" + ] else: parameters[item["@name"]] = item elif item["@name"].endswith("Antwort"): @@ -111,12 +115,14 @@ def prepare_simple_type(self): for simple_type in self.simple_types: if "enumeration" in simple_type["restriction"]: - possible_values = [_["@value"] for _ in simple_type["restriction"]["enumeration"]] + possible_values = [ + _["@value"] for _ in simple_type["restriction"]["enumeration"] + ] else: possible_values = [] simple_types_doc[simple_type["@name"]] = { "type": simple_type["restriction"]["@base"], - "values": possible_values + "values": possible_values, } return simple_types_doc @@ -140,7 +146,9 @@ def functions_data_documentation(self): if "annotation" in fcn["sequence"]["element"]: fcn_data = [fcn["sequence"]["element"]] else: - fcn_data = self.types[fcn["sequence"]["element"]["@type"].split(":")[1]]["sequence"]["element"] + fcn_data = self.types[ + fcn["sequence"]["element"]["@type"].split(":")[1] + ]["sequence"]["element"] else: print(type(fcn["sequence"])) print(fcn["sequence"]) @@ -148,41 +156,51 @@ def functions_data_documentation(self): # Add data for inherited columns from base types if "@base" in fcn: - if not fcn["@base"] == 'mastr:AntwortBasis': - fcn_data = _collect_columns_of_base_type(self.types, fcn["@base"].split(":")[1], fcn_data) + if not fcn["@base"] == "mastr:AntwortBasis": + fcn_data = _collect_columns_of_base_type( + self.types, fcn["@base"].split(":")[1], fcn_data + ) function_docs[fcn_name] = {} for column in fcn_data: # Replace MaStR internal types with more general ones if column["@type"].startswith("mastr:"): try: - column_type = self.simple_types_prepared[column["@type"].split(":")[1]]["type"] + column_type = self.simple_types_prepared[ + column["@type"].split(":")[1] + ]["type"] except KeyError: column_type = column["@type"] else: column_type = column["@type"] if "annotation" in column.keys(): - description = column["annotation"]["documentation"].get("#text", None) + description = column["annotation"]["documentation"].get( + "#text", None + ) if description: - description = re.sub(" +", " ", description.replace("\n", "")) + description = re.sub( + " +", " ", description.replace("\n", "") + ) function_docs[fcn_name][column["@name"]] = { - "type": column_type, - "description": description, - "example": column["annotation"]["documentation"].get("m-ex", None) + "type": column_type, + "description": description, + "example": column["annotation"]["documentation"].get( + "m-ex", None + ), } else: function_docs[fcn_name][column["@name"]] = { "type": column_type, # TODO: insert information from simple type here "description": None, - "example": None + "example": None, } # Hack in a descrition for a column that gets created after download while flattening data function_docs["GetEinheitWind"]["HerstellerId"] = { "type": "str", "description": "Id des Herstellers der Einheit", - "example": 923 + "example": 923, } return function_docs @@ -193,7 +211,11 @@ def _collect_columns_of_base_type(base_types, base_type_name, fcn_data): fcn_data += type_description["extension"]["sequence"]["element"] if "@base" in type_description["extension"]: - if not type_description["extension"]["@base"] == 'mastr:AntwortBasis': - fcn_data = _collect_columns_of_base_type(base_types, type_description["extension"]["@base"].split(":")[1], fcn_data) + if not type_description["extension"]["@base"] == "mastr:AntwortBasis": + fcn_data = _collect_columns_of_base_type( + base_types, + type_description["extension"]["@base"].split(":")[1], + fcn_data, + ) return fcn_data diff --git a/open_mastr/soap_api/mirror.py b/open_mastr/soap_api/mirror.py index 9dda3c6e..ad8e9722 100644 --- a/open_mastr/soap_api/mirror.py +++ b/open_mastr/soap_api/mirror.py @@ -99,7 +99,7 @@ def __init__( Number of parallel processes used to download additional data. Defaults to `None`. """ - log.warn( + log.warning( """ The `MaStRMirror` class is deprecated and will not be maintained in the future. To get a full table of the Marktstammdatenregister, use the open_mastr.Mastr.download diff --git a/open_mastr/utils/config.py b/open_mastr/utils/config.py index b1146269..40f67ec8 100644 --- a/open_mastr/utils/config.py +++ b/open_mastr/utils/config.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- - """ Service functions for logging @@ -26,7 +25,11 @@ import logging import logging.config -from open_mastr.utils.constants import TECHNOLOGIES, API_LOCATION_TYPES, ADDITIONAL_TABLES +from open_mastr.utils.constants import ( + TECHNOLOGIES, + API_LOCATION_TYPES, + ADDITIONAL_TABLES, +) log = logging.getLogger(__name__) @@ -57,7 +60,7 @@ def get_output_dir(): """ if "OUTPUT_PATH" in os.environ: - return os.environ.get('OUTPUT_PATH') + return os.environ.get("OUTPUT_PATH") return get_project_home_dir() @@ -76,7 +79,7 @@ def get_data_version_dir(): data_version = get_data_config() if "OUTPUT_PATH" in os.environ: - return os.path.join(os.environ.get('OUTPUT_PATH'), "data", data_version) + return os.path.join(os.environ.get("OUTPUT_PATH"), "data", data_version) return os.path.join(get_project_home_dir(), "data", data_version) @@ -230,9 +233,7 @@ def _filenames_generator(): } # Add file names of processed data - filenames["postprocessed"] = { - tech: f"{prefix}_{tech}.csv" for tech in TECHNOLOGIES - } + filenames["postprocessed"] = {tech: f"{prefix}_{tech}.csv" for tech in TECHNOLOGIES} # Add filenames for location data filenames["raw"].update( @@ -240,8 +241,13 @@ def _filenames_generator(): ) # Add filenames for additional tables - filenames["raw"].update({"additional_table": - {addit_table: f"{prefix}_{addit_table}_raw.csv" for addit_table in ADDITIONAL_TABLES}} + filenames["raw"].update( + { + "additional_table": { + addit_table: f"{prefix}_{addit_table}_raw.csv" + for addit_table in ADDITIONAL_TABLES + } + } ) # Add metadata file diff --git a/open_mastr/utils/constants.py b/open_mastr/utils/constants.py index 18afb2c0..e5cc476b 100644 --- a/open_mastr/utils/constants.py +++ b/open_mastr/utils/constants.py @@ -16,8 +16,10 @@ "balancing_area", "permit", "deleted_units", + "deleted_market_actors", "retrofit_units", "changed_dso_assignment", + "storage_units", ] # Possible values for parameter 'data' with API download method @@ -62,8 +64,10 @@ "market_roles", "permit", "deleted_units", + "deleted_market_actors", "retrofit_units", "changed_dso_assignment", + "storage_units", ] # Possible data types for API download @@ -77,7 +81,7 @@ "location_gas_consumption", ] -# Map bulk data to bulk download tables (xml file names) +# Map bulk data to bulk download tables (XML file names) BULK_INCLUDE_TABLES_MAP = { "wind": ["anlageneegwind", "einheitenwind"], "solar": ["anlageneegsolar", "einheitensolar"], @@ -89,7 +93,8 @@ ], "combustion": ["anlagenkwk", "einheitenverbrennung"], "nuclear": ["einheitenkernkraft"], - "storage": ["anlageneegspeicher", "anlagenstromspeicher", "einheitenstromspeicher"], + "storage": ["anlageneegspeicher", "einheitenstromspeicher"], + "storage_units": ["anlagenstromspeicher"], "gas": [ "anlagengasspeicher", "einheitengaserzeuger", @@ -103,6 +108,7 @@ "balancing_area": ["bilanzierungsgebiete"], "permit": ["einheitengenehmigung"], "deleted_units": ["geloeschteunddeaktivierteeinheiten"], + "deleted_market_actors": ["geloeschteunddeaktiviertemarktakteure"], "retrofit_units": ["ertuechtigungen"], "changed_dso_assignment": ["einheitenaenderungnetzbetreiberzuordnungen"], } @@ -122,6 +128,7 @@ "balancing_area": ["balancing_area"], "permit": ["permit"], "deleted_units": ["deleted_units"], + "deleted_market_actors": ["deleted_market_actors"], "retrofit_units": ["retrofit_units"], "changed_dso_assignment": ["changed_dso_assignment"], } @@ -179,8 +186,10 @@ "balancing_area": "BalancingArea", "permit": "Permit", "deleted_units": "DeletedUnits", + "deleted_market_actors": "DeletedMarketActors", "retrofit_units": "RetrofitUnits", "changed_dso_assignment": "ChangedDSOAssignment", + "storage_units": "StorageUnits", } UNIT_TYPE_MAP = { diff --git a/open_mastr/utils/credentials.py b/open_mastr/utils/credentials.py index c00495f4..ee818828 100644 --- a/open_mastr/utils/credentials.py +++ b/open_mastr/utils/credentials.py @@ -20,12 +20,13 @@ import keyring import logging + log = logging.getLogger(__name__) def _load_config_file(): - config_file = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + config_file = os.path.join(get_project_home_dir(), "config", "credentials.cfg") cfg = cp.ConfigParser() # if not os.path.isdir(open_mastr_home): @@ -35,7 +36,7 @@ def _load_config_file(): cfg.read(config_file) return cfg else: - with open(config_file, 'w') as configfile: + with open(config_file, "w") as configfile: cfg.write(configfile) return cfg @@ -53,7 +54,7 @@ def get_mastr_user(): """ cfg = _load_config_file() section = "MaStR" - cfg_path = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + cfg_path = os.path.join(get_project_home_dir(), "config", "credentials.cfg") try: user = cfg.get(section, "user") @@ -66,10 +67,12 @@ def get_mastr_user(): # except cp.NoOptionError: # raise cp.Error(f"The option 'user' could not by found in the section " # f"{section} in file {cfg_path}.") - log.warning(f"The option 'user' could not by found in the section " - f"{section} in file {cfg_path}. " - f"You might run into trouble when downloading data via the MaStR API." - f"\n Bulk download works without option 'user'.") + log.warning( + f"The option 'user' could not by found in the section " + f"{section} in file {cfg_path}. " + f"You might run into trouble when downloading data via the MaStR API." + f"\n Bulk download works without option 'user'." + ) return None @@ -79,15 +82,19 @@ def check_and_set_mastr_user(): user = get_mastr_user() if not user: - credentials_file = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + credentials_file = os.path.join( + get_project_home_dir(), "config", "credentials.cfg" + ) cfg = _load_config_file() - user = input('\n\nCannot not find a MaStR user name in {config_file}.\n\n' - 'Please enter MaStR-ID (pattern: SOM123456789012): ' - ''.format(config_file=credentials_file)) + user = input( + "\n\nCannot not find a MaStR user name in {config_file}.\n\n" + "Please enter MaStR-ID (pattern: SOM123456789012): " + "".format(config_file=credentials_file) + ) cfg["MaStR"] = {"user": user} - with open(credentials_file, 'w') as configfile: + with open(credentials_file, "w") as configfile: cfg.write(configfile) return user @@ -115,7 +122,7 @@ def get_mastr_token(user): # Retrieving password from keyring does currently fail on headless systems # Prevent from breaking program execution with following try/except clause section = "MaStR" - cfg_path = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + cfg_path = os.path.join(get_project_home_dir(), "config", "credentials.cfg") try: password = keyring.get_password(section, user) except: @@ -127,10 +134,12 @@ def get_mastr_token(user): try: password = cfg.get(section, "token") except (cp.NoSectionError, cp.NoOptionError): - log.warning(f"The option 'token' could not by found in the section " - f"{section} in file {cfg_path}. " - f"You might run into trouble when downloading data via the MaStR API." - f"\n Bulk download works without option 'token'.") + log.warning( + f"The option 'token' could not by found in the section " + f"{section} in file {cfg_path}. " + f"You might run into trouble when downloading data via the MaStR API." + f"\n Bulk download works without option 'token'." + ) password = None return password @@ -142,17 +151,21 @@ def check_and_set_mastr_token(user): if not password: cfg = _load_config_file() - credentials_file = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + credentials_file = os.path.join( + get_project_home_dir(), "config", "credentials.cfg" + ) # If also no password in credentials file, ask the user to input password # Two options: (1) storing in keyring; (2) storing in config file - password = input('\n\nCannot not find a MaStR password, neither in keyring nor in {config_file}.\n\n' - "Please enter a valid access token of a role (Benutzerrolle) " - "associated to the user {user}.\n" - "The token might look like: " - "koo5eixeiQuoi'w8deighai8ahsh1Ha3eib3coqu7ceeg%ies...\n".format( - config_file=credentials_file, - user=user)) + password = input( + "\n\nCannot not find a MaStR password, neither in keyring nor in {config_file}.\n\n" + "Please enter a valid access token of a role (Benutzerrolle) " + "associated to the user {user}.\n" + "The token might look like: " + "koo5eixeiQuoi'w8deighai8ahsh1Ha3eib3coqu7ceeg%ies...\n".format( + config_file=credentials_file, user=user + ) + ) # let the user decide where to store the password # (1) keyring @@ -160,10 +173,15 @@ def check_and_set_mastr_token(user): # (0) don't store, abort # Wait for correct input while True: - choice = int(input("Where do you want to store your password?\n" - "\t(1) Keyring (default, hit ENTER to select)\n" - "\t(2) Config file (credendials.cfg)\n" - "\t(0) Abort. Don't store password\n") or "1\n") + choice = int( + input( + "Where do you want to store your password?\n" + "\t(1) Keyring (default, hit ENTER to select)\n" + "\t(2) Config file (credendials.cfg)\n" + "\t(0) Abort. Don't store password\n" + ) + or "1\n" + ) # check if choice is valid input if choice in [0, 1, 2]: break @@ -175,7 +193,7 @@ def check_and_set_mastr_token(user): keyring.set_password("MaStR", user, password) elif choice == 2: cfg["MaStR"] = {"user": user, "token": password} - with open(credentials_file, 'w') as configfile: + with open(credentials_file, "w") as configfile: cfg.write(configfile) else: log.error("No clue what happened here!?") @@ -199,4 +217,4 @@ def get_zenodo_token(): user = cfg.get(section, "token") return user except (cp.NoSectionError, cp.NoOptionError): - return None \ No newline at end of file + return None diff --git a/open_mastr/utils/helpers.py b/open_mastr/utils/helpers.py index 1ac061bd..ad4f4dd8 100644 --- a/open_mastr/utils/helpers.py +++ b/open_mastr/utils/helpers.py @@ -222,7 +222,7 @@ def validate_parameter_data(method, data) -> None: ) if method == "csv_export" and value not in TECHNOLOGIES + ADDITIONAL_TABLES: raise ValueError( - "Allowed values for parameter data with API method are " + "Allowed values for CSV export are " f"{TECHNOLOGIES} or {ADDITIONAL_TABLES}" ) diff --git a/open_mastr/utils/orm.py b/open_mastr/utils/orm.py index cedbef47..d0d3a218 100644 --- a/open_mastr/utils/orm.py +++ b/open_mastr/utils/orm.py @@ -780,6 +780,14 @@ class DeletedUnits(ParentAllTables, Base): EinheitBetriebsstatus = Column(String) +class DeletedMarketActors(ParentAllTables, Base): + __tablename__ = "deleted_market_actors" + + MarktakteurMastrNummer = Column(String, primary_key=True) + MarktakteurStatus = Column(String) + DatumLetzteAktualisierung = Column(DateTime(timezone=True)) + + class RetrofitUnits(ParentAllTables, Base): __tablename__ = "retrofit_units" @@ -1006,6 +1014,11 @@ class ChangedDSOAssignment(ParentAllTables, Base): "__class__": DeletedUnits, "replace_column_names": None, }, + "geloeschteunddeaktiviertemarktakteure": { + "__name__": DeletedMarketActors.__tablename__, + "__class__": DeletedMarketActors, + "replace_column_names": None, + }, "marktrollen": { "__name__": MarketRoles.__tablename__, "__class__": MarketRoles, diff --git a/open_mastr/xml_download/colums_to_replace.py b/open_mastr/xml_download/colums_to_replace.py index 334f2a05..f35a30a9 100644 --- a/open_mastr/xml_download/colums_to_replace.py +++ b/open_mastr/xml_download/colums_to_replace.py @@ -57,6 +57,8 @@ "Pumpspeichertechnologie", "Einsatzort", # geloeschteunddeaktivierteEinheiten + # geloeschteunddeaktivierteMarktAkteure + "MarktakteurStatus", # lokationen # marktakteure "Personenart", @@ -108,4 +110,6 @@ "Seelage", "ClusterNordsee", "ClusterOstsee", + # various tables + "NetzbetreiberpruefungStatus", ] diff --git a/open_mastr/xml_download/utils_write_to_database.py b/open_mastr/xml_download/utils_write_to_database.py index 9dba5027..4917e9d9 100644 --- a/open_mastr/xml_download/utils_write_to_database.py +++ b/open_mastr/xml_download/utils_write_to_database.py @@ -73,9 +73,16 @@ def is_table_relevant(xml_tablename: str, include_tables: list) -> bool: have it in the database.""" # few tables are only needed for data cleansing of the xml files and contain no # information of relevance - boolean_write_table_to_sql_database = ( - tablename_mapping[xml_tablename]["__class__"] is not None - ) + try: + boolean_write_table_to_sql_database = ( + tablename_mapping[xml_tablename]["__class__"] is not None + ) + except KeyError: + print( + f"Table '{xml_tablename}' is not supported by your open-mastr version and " + f"will be skipped." + ) + return False # check if the table should be written to sql database (depends on user input) include_count = include_tables.count(xml_tablename) diff --git a/postprocessing/helpers.py b/postprocessing/helpers.py index 0cccf27f..5084c7f3 100644 --- a/postprocessing/helpers.py +++ b/postprocessing/helpers.py @@ -1,4 +1,5 @@ from bokeh.palettes import Category10_10 as palette + # import geoviews as gv import bokeh @@ -9,9 +10,9 @@ def plotPowerPlants(df): # size marker according to gross power output iMaxSize = 30 iMinSize = 10 - df["size"] = (df["Bruttoleistung"] - df["Bruttoleistung"].min()) / \ - (df["Bruttoleistung"].max() - df["Bruttoleistung"].min()) * \ - (iMaxSize - iMinSize) + iMinSize + df["size"] = (df["Bruttoleistung"] - df["Bruttoleistung"].min()) / ( + df["Bruttoleistung"].max() - df["Bruttoleistung"].min() + ) * (iMaxSize - iMinSize) + iMinSize # convert datetime to string df["date"] = df["Inbetriebnahmedatum"].dt.strftime("%Y-%m-%d") @@ -41,17 +42,35 @@ def plotPowerPlants(df): for group in groups: df_group = df.loc[ df["Einheittyp"] == group, - ["Name", "Standort", "Bundesland", "Land", "date", - "Einheittyp", "Bruttoleistung", "Laengengrad", "Breitengrad", "size"] + [ + "Name", + "Standort", + "Bundesland", + "Land", + "date", + "Einheittyp", + "Bruttoleistung", + "Laengengrad", + "Breitengrad", + "size", + ], ] - points = gv.Points(df_group, ["Laengengrad", "Breitengrad"], label=group).options( - aspect=2, responsive=True, tools=[hover_tool], size="size", active_tools=['wheel_zoom'], - fill_alpha=0.6, fill_color=colors[group], line_color="white", + points = gv.Points( + df_group, ["Laengengrad", "Breitengrad"], label=group + ).options( + aspect=2, + responsive=True, + tools=[hover_tool], + size="size", + active_tools=["wheel_zoom"], + fill_alpha=0.6, + fill_color=colors[group], + line_color="white", ) - overlay = (overlay * points) + overlay = overlay * points # hide group when clicking on legend overlay.options(click_policy="hide", clone=False) # return figure - return overlay \ No newline at end of file + return overlay diff --git a/postprocessing/orm.py b/postprocessing/orm.py index f8612ee4..5a98ed07 100644 --- a/postprocessing/orm.py +++ b/postprocessing/orm.py @@ -1,7 +1,18 @@ from geoalchemy2 import Geometry from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.schema import MetaData -from sqlalchemy import Column, Integer, String, Float, Sequence, DateTime, Boolean, func, Date, JSON +from sqlalchemy import ( + Column, + Integer, + String, + Float, + Sequence, + DateTime, + Boolean, + func, + Date, + JSON, +) from sqlalchemy.dialects.postgresql import JSONB cleaned_schema = "model_draft" @@ -30,7 +41,6 @@ class BasicUnit(object): StatisikFlag_basic = Column(String) - class Extended(object): EinheitMastrNummer_extended = Column(String) @@ -91,7 +101,7 @@ class Extended(object): Einspeisungsart = Column(String) PraequalifiziertFuerRegelenergie = Column(Boolean) GenMastrNummer_extended = Column(String) - geom = Column(Geometry('POINT')) + geom = Column(Geometry("POINT")) comment = Column(String) @@ -175,6 +185,7 @@ class HydroEeg(Eeg): class StorageEeg(Eeg): pass + class Kwk(object): KwkMastrNummer_kwk = Column(String) @@ -205,7 +216,7 @@ class Permit(object): class WindCleaned(Permit, WindEeg, Extended, BasicUnit, Base): - __tablename__ = 'bnetza_mastr_wind_clean' + __tablename__ = "bnetza_mastr_wind_clean" # wind specific attributes NameWindpark = Column(String) @@ -231,8 +242,7 @@ class WindCleaned(Permit, WindEeg, Extended, BasicUnit, Base): Kuestenentfernung = Column(Float) EegMastrNummer_extended = Column(String) tags = Column(JSONB) - geom_3035 = Column(Geometry('POINT', srid=3035)) - + geom_3035 = Column(Geometry("POINT", srid=3035)) class SolarCleaned(Permit, SolarEeg, Extended, BasicUnit, Base): @@ -288,7 +298,7 @@ class CombustionCleaned(Permit, Kwk, Extended, BasicUnit, Base): AnteiligNutzungsberechtigte = Column(String) Notstromaggregat = Column(Boolean) Einsatzort = Column(String) - KwkMastrNummer_extended = Column(String) # changed here + KwkMastrNummer_extended = Column(String) # changed here Technologie = Column(String) diff --git a/postprocessing/postprocessing.py b/postprocessing/postprocessing.py index e860dbd2..cbb86e26 100644 --- a/postprocessing/postprocessing.py +++ b/postprocessing/postprocessing.py @@ -16,25 +16,13 @@ log = setup_logger() -BKG_VG250 = { - "schema": "boundaries", - "table": "bkg_vg250_1_sta_union_mview" -} +BKG_VG250 = {"schema": "boundaries", "table": "bkg_vg250_1_sta_union_mview"} -OSM_PLZ = { - "schema": "boundaries", - "table": "osm_postcode" -} +OSM_PLZ = {"schema": "boundaries", "table": "osm_postcode"} -OFFSHORE = { - "schema": "model_draft", - "table": "rli_boundaries_offshore" -} +OFFSHORE = {"schema": "model_draft", "table": "rli_boundaries_offshore"} -OSM_WINDPOWER = { - "schema": "model_draft", - "table": "mastr_osm_deu_point_windpower" -} +OSM_WINDPOWER = {"schema": "model_draft", "table": "mastr_osm_deu_point_windpower"} OEP_QUERY_PATTERN = "https://openenergy-platform.org/api/v0/schema/{schema}/tables/{table}/rows?form=csv" @@ -43,7 +31,16 @@ MASTR_RAW_SCHEMA = "model_draft" OPEN_MASTR_SCHEMA = "model_draft" -TECHNOLOGIES = ["wind", "hydro", "solar", "biomass", "combustion", "nuclear", "gsgk", "storage"] +TECHNOLOGIES = [ + "wind", + "hydro", + "solar", + "biomass", + "combustion", + "nuclear", + "gsgk", + "storage", +] orm_map = { "wind": { @@ -113,15 +110,17 @@ def table_to_db(csv_data, table, schema, conn, geom_col="geom", srid=4326): query = "CREATE SCHEMA IF NOT EXISTS {schema}".format(schema=schema) conn.execute(query) - csv_data.to_sql(table, - con=conn, - schema=schema, - dtype={ - geom_col: Geometry(srid=srid), - "plz": String(), - }, - chunksize=100000, - if_exists="replace") + csv_data.to_sql( + table, + con=conn, + schema=schema, + dtype={ + geom_col: Geometry(srid=srid), + "plz": String(), + }, + chunksize=100000, + if_exists="replace", + ) def table_to_db_orm(mapper, data, chunksize=10000): @@ -144,6 +143,7 @@ def table_to_db_orm(mapper, data, chunksize=10000): # Commit each chunk separately session.commit() + def import_boundary_data_csv(schema, table, index_col="id", srid=4326): """ Import additional data for post-processing @@ -166,32 +166,43 @@ def import_boundary_data_csv(schema, table, index_col="id", srid=4326): with db_engine().connect() as con: # Check if table already exists - table_query = "SELECT to_regclass('{schema}.{table}');".format(schema=schema, table=table) + table_query = "SELECT to_regclass('{schema}.{table}');".format( + schema=schema, table=table + ) table_name = "{schema}.{table}".format(schema=schema, table=table) table_exists = table_name in con.execute(table_query).first().values() if not table_exists: # Download CSV file if it does not exist if not csv_file_exists: - log.info("Downloading table {schema}.{table} from OEP".format(schema=schema, table=table)) + log.info( + "Downloading table {schema}.{table} from OEP".format( + schema=schema, table=table + ) + ) urlretrieve( - OEP_QUERY_PATTERN.format(schema=schema, table=table), - csv_file) + OEP_QUERY_PATTERN.format(schema=schema, table=table), csv_file + ) else: log.info("Found {} locally.".format(csv_file)) # Read CSV file - csv_data = pd.read_csv(csv_file, - index_col=index_col) + csv_data = pd.read_csv(csv_file, index_col=index_col) # Prepare geom data for DB upload - csv_data["geom"] = csv_data["geom"].apply(lambda x: WKTElement(wkb_loads(x, hex=True).wkt, srid=srid)) + csv_data["geom"] = csv_data["geom"].apply( + lambda x: WKTElement(wkb_loads(x, hex=True).wkt, srid=srid) + ) # Insert to db table_to_db(csv_data, table, schema, con, srid=srid) log.info("Data from {} successfully imported to database.".format(csv_file)) else: - log.info("Table '{schema}.{table}' already exists in local database".format(schema=schema, table=table)) + log.info( + "Table '{schema}.{table}' already exists in local database".format( + schema=schema, table=table + ) + ) def add_geom_col(df, lat_col="Breitengrad", lon_col="Laengengrad", srid=4326): @@ -219,17 +230,21 @@ def add_geom_col(df, lat_col="Breitengrad", lon_col="Laengengrad", srid=4326): df_with_coords = df.loc[~(df["Breitengrad"].isna() | df["Laengengrad"].isna())] # Just select data with lat/lon in range [(-90,90), (-180,180)] - df_with_coords = df_with_coords[~((df_with_coords["Breitengrad"] < -90) - | (df_with_coords["Breitengrad"] > 90) - | (df_with_coords["Laengengrad"] < -180) - | (df_with_coords["Laengengrad"] > 180)) + df_with_coords = df_with_coords[ + ~( + (df_with_coords["Breitengrad"] < -90) + | (df_with_coords["Breitengrad"] > 90) + | (df_with_coords["Laengengrad"] < -180) + | (df_with_coords["Laengengrad"] > 180) + ) ] df_no_coords = df.loc[~df.index.isin(df_with_coords.index)] - gdf = gpd.GeoDataFrame( - df_with_coords, geometry=gpd.points_from_xy(df_with_coords[lon_col], df_with_coords[lat_col]), - crs="EPSG:{}".format(srid)) + df_with_coords, + geometry=gpd.points_from_xy(df_with_coords[lon_col], df_with_coords[lat_col]), + crs="EPSG:{}".format(srid), + ) gdf["geom"] = gdf["geometry"].apply(lambda x: WKTElement(x.wkt, srid=srid)) gdf.drop(columns=["geometry"], inplace=True) @@ -271,9 +286,15 @@ def run_sql_postprocessing(): if tech_name not in ["gsgk", "storage", "nuclear"]: log.info(f"Run post-processing on {tech_name} data") # Read SQL query from file - with open(os.path.join(os.path.dirname(__file__), - "db-cleansing", - "rli-mastr-{tech_name}-cleansing.sql".format(tech_name=tech_name))) as file: + with open( + os.path.join( + os.path.dirname(__file__), + "db-cleansing", + "rli-mastr-{tech_name}-cleansing.sql".format( + tech_name=tech_name + ), + ) + ) as file: escaped_sql = text(file.read()) # Execute query @@ -334,21 +355,29 @@ def to_csv(limit=None): with session_scope() as session: orm_tech = getattr(orm, orm_map[tech]["cleaned"]) query = session.query(orm_tech).limit(limit) - df = pd.read_sql(query.statement, query.session.bind, index_col="EinheitMastrNummer") + df = pd.read_sql( + query.statement, query.session.bind, index_col="EinheitMastrNummer" + ) csv_file = os.path.join(data_path, filenames["postprocessed"][tech]) - df.to_csv(csv_file, index=True, index_label="EinheitMastrNummer", encoding='utf-8') + df.to_csv( + csv_file, index=True, index_label="EinheitMastrNummer", encoding="utf-8" + ) if df["DatumLetzteAktualisierung"].max() > newest_date: newest_date = df["DatumLetzteAktualisierung"].max() # Save metadata along with data metadata_file = os.path.join(data_path, filenames["metadata"]) - metadata = create_datapackage_meta_json(newest_date, TECHNOLOGIES, data=["raw", "cleaned", "postprocessed"], - json_serialize=False) - - with open(metadata_file, 'w', encoding='utf-8') as f: + metadata = create_datapackage_meta_json( + newest_date, + TECHNOLOGIES, + data=["raw", "cleaned", "postprocessed"], + json_serialize=False, + ) + + with open(metadata_file, "w", encoding="utf-8") as f: json.dump(metadata, f, ensure_ascii=False, indent=4) diff --git a/postprocessing/turbine_match.py b/postprocessing/turbine_match.py index 8b400e4e..caacc537 100644 --- a/postprocessing/turbine_match.py +++ b/postprocessing/turbine_match.py @@ -17,68 +17,109 @@ import pandas as pd import os + def read_csv_turbine(csv_name): - turbines = pd.read_csv(csv_name, header=0, encoding='utf-8', sep=',', error_bad_lines=True, index_col=False, - dtype={'index': int, 'id': int,'turbine_id':int, 'manufacturer': str, 'name': str, 'turbine_type': str, - 'nominal_power': str, 'rotor_diamter': str,'rotor_area': str, 'hub_height': str, - 'max_speed_drive': str, 'wind_class_iec':str, 'wind_zone_dibt': str, - 'power_density': str, 'power_density_2': str,'calculated': str, - 'has_power_curve': str, 'power_curve_wind_speeds': str, 'power_curve_values': str, 'has_cp_curve': str, - 'power_coefficient_curve_wind_speeds': str, 'power_coefficient_curve_values': str, - 'has_ct_curve': str, 'thrust_coefficient_curve_wind_speeds': str, 'thrust_coefficient_curve_values': str, 'source': str}, + turbines = pd.read_csv( + csv_name, + header=0, + encoding="utf-8", + sep=",", + error_bad_lines=True, + index_col=False, + dtype={ + "index": int, + "id": int, + "turbine_id": int, + "manufacturer": str, + "name": str, + "turbine_type": str, + "nominal_power": str, + "rotor_diamter": str, + "rotor_area": str, + "hub_height": str, + "max_speed_drive": str, + "wind_class_iec": str, + "wind_zone_dibt": str, + "power_density": str, + "power_density_2": str, + "calculated": str, + "has_power_curve": str, + "power_curve_wind_speeds": str, + "power_curve_values": str, + "has_cp_curve": str, + "power_coefficient_curve_wind_speeds": str, + "power_coefficient_curve_values": str, + "has_ct_curve": str, + "thrust_coefficient_curve_wind_speeds": str, + "thrust_coefficient_curve_values": str, + "source": str, + }, ) return turbines + def create_dataset(df): - types = [] - for i,r in df.iterrows(): - types.append(prepare_turbine_type(r)) - df.insert(6,'turbine_type_v2',types) - write_to_csv(df, 'turbine_library_t.csv') + types = [] + for i, r in df.iterrows(): + types.append(prepare_turbine_type(r)) + df.insert(6, "turbine_type_v2", types) + write_to_csv(df, "turbine_library_t.csv") def write_to_csv(df, path): - with open(path, mode='a', encoding='utf-8') as file: - df.to_csv(file, sep=',', - mode='a', - header=file.tell() == 0, - line_terminator='\n', - encoding='utf-8') + with open(path, mode="a", encoding="utf-8") as file: + df.to_csv( + file, + sep=",", + mode="a", + header=file.tell() == 0, + line_terminator="\n", + encoding="utf-8", + ) def prepare_turbine_type(turbine): - nom_pow = turbine.nominal_power - diam = turbine.rotor_diameter - man = get_manufacturer_short(turbine.manufacturer, nom_pow, diam) - type_name = man+'-'+str(diam)+'_'+str(int(nom_pow)) - return type_name + nom_pow = turbine.nominal_power + diam = turbine.rotor_diameter + man = get_manufacturer_short(turbine.manufacturer, nom_pow, diam) + type_name = man + "-" + str(diam) + "_" + str(int(nom_pow)) + return type_name def get_manufacturer_short(manufacturer, nom_pow, diam): - man = '' - if manufacturer == 'Nordex': - man = 'N' - if int(nom_pow) == 3000 or int(nom_pow) == 1500: - if int(diam) == 140 or int(diam) ==132 or int(diam) ==125 or int(diam) ==116 or int(diam) ==100 or int(diam) == 82 or int(diam) == 77 or int(diam) == 70: - man = 'AW' - elif manufacturer == 'Adwen/Areva': - man = 'AD' - elif manufacturer == 'Senvion/REpower': - man = 'S' - if int(nom_pow) == 2050 or int(nom_pow) == 2000: - man = 'MM' - elif manufacturer == 'Enercon': - man = 'E' - elif manufacturer == 'Siemens': - man = 'SWT' - elif manufacturer == 'Vestas': - man = 'V' - elif manufacturer == 'Vensys': - man = 'VS' - elif manufacturer == 'GE Wind': - man = 'GE' - elif manufacturer == 'Eno': - man = 'ENO' - elif manufacturer == 'aerodyn': - man = 'SCD' - return man \ No newline at end of file + man = "" + if manufacturer == "Nordex": + man = "N" + if int(nom_pow) == 3000 or int(nom_pow) == 1500: + if ( + int(diam) == 140 + or int(diam) == 132 + or int(diam) == 125 + or int(diam) == 116 + or int(diam) == 100 + or int(diam) == 82 + or int(diam) == 77 + or int(diam) == 70 + ): + man = "AW" + elif manufacturer == "Adwen/Areva": + man = "AD" + elif manufacturer == "Senvion/REpower": + man = "S" + if int(nom_pow) == 2050 or int(nom_pow) == 2000: + man = "MM" + elif manufacturer == "Enercon": + man = "E" + elif manufacturer == "Siemens": + man = "SWT" + elif manufacturer == "Vestas": + man = "V" + elif manufacturer == "Vensys": + man = "VS" + elif manufacturer == "GE Wind": + man = "GE" + elif manufacturer == "Eno": + man = "ENO" + elif manufacturer == "aerodyn": + man = "SCD" + return man diff --git a/pyproject.toml b/pyproject.toml index 2387333b..35a6c1e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,9 +4,9 @@ build-backend = "setuptools.build_meta" [project] name = "open_mastr" -version = "0.14.4" +version = "0.14.5" dependencies = [ - "pandas>=2.1", # pandas 2.1 is needed for dataframe.map() + "pandas>=2.2.2", "numpy", "sqlalchemy>=2.0", "psycopg2-binary", @@ -25,13 +25,15 @@ authors = [ {name = "Muschner Christoph"}, {name = "Kotthoff Florian"}, {name = "Tepe Deniz"}, + {name = "Amme Jonathan"}, {name = "Open Energy Family"}, ] maintainers = [ - {name = "Ludwig Hülk", email = "datenzentrum@rl-institut.de"}, {name = "Florian Kotthoff"}, - {name = "Christoph Muschner", email = "datenzentrum@rl-institut.de"} + {name = "Jonathan Amme", email = "jonathan.amme@rl-institut.de"}, + {name = "Ludwig Hülk", email = "datenzentrum@rl-institut.de"}, + {name = "Christoph Muschner"}, ] description = "A package that provides an interface for downloading and processing the data of the Marktstammdatenregister (MaStR)" readme = "README.rst" @@ -78,4 +80,4 @@ open_mastr = [ include = ["open_mastr", "open_mastr.soap_api", "open_mastr.soap_api.metadata", "open_mastr.utils", "open_mastr.utils.config", "open_mastr.xml_download"] # package names should match these glob patterns (["*"] by default) # from setup.py - not yet included in here -# download_url="https://github.com/OpenEnergyPlatform/open-MaStR/archive""/refs/tags/v0.14.4.tar.gz", +# download_url="https://github.com/OpenEnergyPlatform/open-MaStR/archive""/refs/tags/v0.14.5.tar.gz", diff --git a/scripts/mirror_mastr_csv_export.py b/scripts/mirror_mastr_csv_export.py index 2596d429..00cf6812 100644 --- a/scripts/mirror_mastr_csv_export.py +++ b/scripts/mirror_mastr_csv_export.py @@ -1,4 +1,4 @@ -from open_mastr.utils.helpers import (reverse_fill_basic_units, create_db_query) +from open_mastr.utils.helpers import reverse_fill_basic_units, create_db_query technology = [ @@ -24,6 +24,4 @@ reverse_fill_basic_units() # to csv per tech -create_db_query( - technology=technology, additional_data=data_types, limit=None -) +create_db_query(technology=technology, additional_data=data_types, limit=None) diff --git a/scripts/mirror_mastr_dump.py b/scripts/mirror_mastr_dump.py index ca2a0b66..69a3a3d2 100644 --- a/scripts/mirror_mastr_dump.py +++ b/scripts/mirror_mastr_dump.py @@ -2,8 +2,8 @@ import datetime # Dump data -now = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S') +now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S") dump_file = f"{now}_open-mastr-mirror.backup" mastr_refl = MaStRMirror() -mastr_refl.dump(dump_file) \ No newline at end of file +mastr_refl.dump(dump_file) diff --git a/scripts/mirror_mastr_update_latest.py b/scripts/mirror_mastr_update_latest.py index 0db0b234..40c61681 100644 --- a/scripts/mirror_mastr_update_latest.py +++ b/scripts/mirror_mastr_update_latest.py @@ -2,16 +2,27 @@ import datetime limit = None -technology = ["wind", "biomass", "combustion", "gsgk", "hydro", "nuclear", "storage", "solar"] +technology = [ + "wind", + "biomass", + "combustion", + "gsgk", + "hydro", + "nuclear", + "storage", + "solar", +] data_types = ["unit_data", "eeg_data", "kwk_data", "permit_data"] -location_types = ["location_elec_generation", "location_elec_consumption", "location_gas_generation", - "location_gas_consumption"] +location_types = [ + "location_elec_generation", + "location_elec_consumption", + "location_gas_generation", + "location_gas_consumption", +] processes = 12 mastr_mirror = MaStRMirror( - empty_schema=False, - parallel_processes=processes, - restore_dump=None + empty_schema=False, parallel_processes=processes, restore_dump=None ) # Download basic unit data @@ -21,13 +32,12 @@ for tech in technology: # mastr_mirror.create_additional_data_requests(tech) for data_type in data_types: - mastr_mirror.retrieve_additional_data(tech, data_type, chunksize=1000, limit=limit) + mastr_mirror.retrieve_additional_data( + tech, data_type, chunksize=1000, limit=limit + ) # Download basic location data -mastr_mirror.backfill_locations_basic( - limit=limit, - date="latest" -) +mastr_mirror.backfill_locations_basic(limit=limit, date="latest") # Download extended location data for location_type in location_types: diff --git a/tests/preparation.py b/tests/preparation.py index 12d34823..0f58bd3f 100644 --- a/tests/preparation.py +++ b/tests/preparation.py @@ -1,20 +1,19 @@ import os from open_mastr.utils.config import get_project_home_dir + def create_credentials_file(): """Use token and user stored in GitHub secrets for creating credentials file This is used to allow test workflow to access MaStR database. """ - credentials_file = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + credentials_file = os.path.join(get_project_home_dir(), "config", "credentials.cfg") token = os.getenv("MASTR_TOKEN") user = os.getenv("MASTR_USER") section_title = "[MaStR]" - file_content = f"{section_title}\n" \ - f"user = {user}\n" \ - f"token = {token}\n" + file_content = f"{section_title}\n" f"user = {user}\n" f"token = {token}\n" with open(credentials_file, "w") as credentials_fh: credentials_fh.write(file_content) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 71fbaa14..4a19f4fb 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -66,6 +66,7 @@ def parameter_dict_working_list(): "balancing_area", "permit", "deleted_units", + "deleted_market_actors", "retrofit_units", None, ["wind", "solar"], @@ -369,7 +370,6 @@ def test_db_query_to_csv(tmpdir, engine): os.remove(csv_path) for addit_table in addit_tables: - csv_path = join( get_data_version_dir(), f"bnetza_mastr_{addit_table}_raw.csv", diff --git a/tests/xml_download/test_utils_cleansing_bulk.py b/tests/xml_download/test_utils_cleansing_bulk.py index 38b8e41b..9a29ad76 100644 --- a/tests/xml_download/test_utils_cleansing_bulk.py +++ b/tests/xml_download/test_utils_cleansing_bulk.py @@ -30,6 +30,7 @@ def capture_wrap(): sys.stdout.close = lambda *args: None yield + @pytest.fixture(scope="module") def con(): con = sqlite3.connect(_sqlite_file_path) diff --git a/tests/xml_download/test_utils_download_bulk.py b/tests/xml_download/test_utils_download_bulk.py index 3fe351f6..b4cc0b7d 100644 --- a/tests/xml_download/test_utils_download_bulk.py +++ b/tests/xml_download/test_utils_download_bulk.py @@ -1,33 +1,52 @@ import time from open_mastr.xml_download.utils_download_bulk import gen_url + def test_gen_url(): when = time.strptime("2024-01-01", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240101_23.2.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240101_23.2.zip" + ) when = time.strptime("2024-04-01", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240401_23.2.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240401_23.2.zip" + ) when = time.strptime("2024-04-02", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240402_24.1.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240402_24.1.zip" + ) when = time.strptime("2024-10-01", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241001_24.1.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241001_24.1.zip" + ) when = time.strptime("2024-10-02", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241002_24.2.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241002_24.2.zip" + ) when = time.strptime("2024-12-31", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241231_24.2.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241231_24.2.zip" + )