From c93b8f4e2c643c19366499b216a679db4bded670 Mon Sep 17 00:00:00 2001 From: nesnoj Date: Fri, 11 Oct 2024 11:01:04 +0200 Subject: [PATCH] Apply black #578 --- docs/conf.py | 33 ++-- open_mastr/soap_api/metadata/description.py | 70 ++++++--- open_mastr/utils/config.py | 24 +-- open_mastr/utils/credentials.py | 80 ++++++---- postprocessing/helpers.py | 39 +++-- postprocessing/orm.py | 24 ++- postprocessing/postprocessing.py | 129 ++++++++++------ postprocessing/turbine_match.py | 143 +++++++++++------- scripts/mirror_mastr_csv_export.py | 6 +- scripts/mirror_mastr_dump.py | 4 +- scripts/mirror_mastr_update_latest.py | 32 ++-- tests/preparation.py | 7 +- tests/test_helpers.py | 7 +- .../xml_download/test_utils_cleansing_bulk.py | 1 + .../xml_download/test_utils_download_bulk.py | 31 +++- 15 files changed, 399 insertions(+), 231 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 42a70728..5e578719 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,14 +12,15 @@ # import os import sys -sys.path.insert(0, os.path.abspath('../open_mastr')) + +sys.path.insert(0, os.path.abspath("../open_mastr")) # -- Project information ----------------------------------------------------- -project = 'open-MaStR' -copyright = '2022 Reiner Lemoine Institut and fortiss' -author = '' +project = "open-MaStR" +copyright = "2022 Reiner Lemoine Institut and fortiss" +author = "" # -- General configuration --------------------------------------------------- @@ -28,22 +29,22 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autosectionlabel', - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx_tabs.tabs', - 'm2r2', + "sphinx.ext.autosectionlabel", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx_tabs.tabs", + "m2r2", ] source_suffix = [".rst", ".md"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -51,13 +52,13 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] -html_css_files = ['custom.css'] +html_static_path = ["_static"] +html_css_files = ["custom.css"] -# Autodoc config -autoclass_content = 'both' +# Autodoc config +autoclass_content = "both" diff --git a/open_mastr/soap_api/metadata/description.py b/open_mastr/soap_api/metadata/description.py index 8fc55526..a4986959 100644 --- a/open_mastr/soap_api/metadata/description.py +++ b/open_mastr/soap_api/metadata/description.py @@ -33,19 +33,19 @@ def __init__(self, xml=None): self.xml = fh.read() else: # If no XML file is given, the file is read from an URL - zipurl = 'https://www.marktstammdatenregister.de/MaStRHilfe/files/' \ - 'webdienst/Dienstbeschreibung_1_2_39_Produktion.zip' + zipurl = ( + "https://www.marktstammdatenregister.de/MaStRHilfe/files/" + "webdienst/Dienstbeschreibung_1_2_39_Produktion.zip" + ) with urlopen(zipurl) as zipresp: with ZipFile(BytesIO(zipresp.read())) as zfile: - self.xml = zfile.read('xsd/mastrbasetypes.xsd') - - + self.xml = zfile.read("xsd/mastrbasetypes.xsd") # Parse XML and extract relevant data parsed = xmltodict.parse(self.xml, process_namespaces=False) - self.complex_types = parsed['schema']["complexType"] - self.simple_types = parsed['schema']["simpleType"] + self.complex_types = parsed["schema"]["complexType"] + self.simple_types = parsed["schema"]["simpleType"] # Prepare parsed data for documentational purposes abstract_types, parameters, responses, types = self._filter_type_descriptions() @@ -78,13 +78,17 @@ def _filter_type_descriptions(self): raise ValueError("Ohh...") else: # Filter all functions - if item["@name"].startswith(("Get", "Set", "Erneute", "Verschiebe", "Delete")): + if item["@name"].startswith( + ("Get", "Set", "Erneute", "Verschiebe", "Delete") + ): functions.append(item) # Further split the list of functions into paramters and responses if item["@name"].endswith("Parameter"): if "complexContent" in item.keys(): - parameters[item["@name"]] = item["complexContent"]["extension"] + parameters[item["@name"]] = item["complexContent"][ + "extension" + ] else: parameters[item["@name"]] = item elif item["@name"].endswith("Antwort"): @@ -111,12 +115,14 @@ def prepare_simple_type(self): for simple_type in self.simple_types: if "enumeration" in simple_type["restriction"]: - possible_values = [_["@value"] for _ in simple_type["restriction"]["enumeration"]] + possible_values = [ + _["@value"] for _ in simple_type["restriction"]["enumeration"] + ] else: possible_values = [] simple_types_doc[simple_type["@name"]] = { "type": simple_type["restriction"]["@base"], - "values": possible_values + "values": possible_values, } return simple_types_doc @@ -140,7 +146,9 @@ def functions_data_documentation(self): if "annotation" in fcn["sequence"]["element"]: fcn_data = [fcn["sequence"]["element"]] else: - fcn_data = self.types[fcn["sequence"]["element"]["@type"].split(":")[1]]["sequence"]["element"] + fcn_data = self.types[ + fcn["sequence"]["element"]["@type"].split(":")[1] + ]["sequence"]["element"] else: print(type(fcn["sequence"])) print(fcn["sequence"]) @@ -148,41 +156,51 @@ def functions_data_documentation(self): # Add data for inherited columns from base types if "@base" in fcn: - if not fcn["@base"] == 'mastr:AntwortBasis': - fcn_data = _collect_columns_of_base_type(self.types, fcn["@base"].split(":")[1], fcn_data) + if not fcn["@base"] == "mastr:AntwortBasis": + fcn_data = _collect_columns_of_base_type( + self.types, fcn["@base"].split(":")[1], fcn_data + ) function_docs[fcn_name] = {} for column in fcn_data: # Replace MaStR internal types with more general ones if column["@type"].startswith("mastr:"): try: - column_type = self.simple_types_prepared[column["@type"].split(":")[1]]["type"] + column_type = self.simple_types_prepared[ + column["@type"].split(":")[1] + ]["type"] except KeyError: column_type = column["@type"] else: column_type = column["@type"] if "annotation" in column.keys(): - description = column["annotation"]["documentation"].get("#text", None) + description = column["annotation"]["documentation"].get( + "#text", None + ) if description: - description = re.sub(" +", " ", description.replace("\n", "")) + description = re.sub( + " +", " ", description.replace("\n", "") + ) function_docs[fcn_name][column["@name"]] = { - "type": column_type, - "description": description, - "example": column["annotation"]["documentation"].get("m-ex", None) + "type": column_type, + "description": description, + "example": column["annotation"]["documentation"].get( + "m-ex", None + ), } else: function_docs[fcn_name][column["@name"]] = { "type": column_type, # TODO: insert information from simple type here "description": None, - "example": None + "example": None, } # Hack in a descrition for a column that gets created after download while flattening data function_docs["GetEinheitWind"]["HerstellerId"] = { "type": "str", "description": "Id des Herstellers der Einheit", - "example": 923 + "example": 923, } return function_docs @@ -193,7 +211,11 @@ def _collect_columns_of_base_type(base_types, base_type_name, fcn_data): fcn_data += type_description["extension"]["sequence"]["element"] if "@base" in type_description["extension"]: - if not type_description["extension"]["@base"] == 'mastr:AntwortBasis': - fcn_data = _collect_columns_of_base_type(base_types, type_description["extension"]["@base"].split(":")[1], fcn_data) + if not type_description["extension"]["@base"] == "mastr:AntwortBasis": + fcn_data = _collect_columns_of_base_type( + base_types, + type_description["extension"]["@base"].split(":")[1], + fcn_data, + ) return fcn_data diff --git a/open_mastr/utils/config.py b/open_mastr/utils/config.py index b1146269..40f67ec8 100644 --- a/open_mastr/utils/config.py +++ b/open_mastr/utils/config.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- - """ Service functions for logging @@ -26,7 +25,11 @@ import logging import logging.config -from open_mastr.utils.constants import TECHNOLOGIES, API_LOCATION_TYPES, ADDITIONAL_TABLES +from open_mastr.utils.constants import ( + TECHNOLOGIES, + API_LOCATION_TYPES, + ADDITIONAL_TABLES, +) log = logging.getLogger(__name__) @@ -57,7 +60,7 @@ def get_output_dir(): """ if "OUTPUT_PATH" in os.environ: - return os.environ.get('OUTPUT_PATH') + return os.environ.get("OUTPUT_PATH") return get_project_home_dir() @@ -76,7 +79,7 @@ def get_data_version_dir(): data_version = get_data_config() if "OUTPUT_PATH" in os.environ: - return os.path.join(os.environ.get('OUTPUT_PATH'), "data", data_version) + return os.path.join(os.environ.get("OUTPUT_PATH"), "data", data_version) return os.path.join(get_project_home_dir(), "data", data_version) @@ -230,9 +233,7 @@ def _filenames_generator(): } # Add file names of processed data - filenames["postprocessed"] = { - tech: f"{prefix}_{tech}.csv" for tech in TECHNOLOGIES - } + filenames["postprocessed"] = {tech: f"{prefix}_{tech}.csv" for tech in TECHNOLOGIES} # Add filenames for location data filenames["raw"].update( @@ -240,8 +241,13 @@ def _filenames_generator(): ) # Add filenames for additional tables - filenames["raw"].update({"additional_table": - {addit_table: f"{prefix}_{addit_table}_raw.csv" for addit_table in ADDITIONAL_TABLES}} + filenames["raw"].update( + { + "additional_table": { + addit_table: f"{prefix}_{addit_table}_raw.csv" + for addit_table in ADDITIONAL_TABLES + } + } ) # Add metadata file diff --git a/open_mastr/utils/credentials.py b/open_mastr/utils/credentials.py index c00495f4..ee818828 100644 --- a/open_mastr/utils/credentials.py +++ b/open_mastr/utils/credentials.py @@ -20,12 +20,13 @@ import keyring import logging + log = logging.getLogger(__name__) def _load_config_file(): - config_file = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + config_file = os.path.join(get_project_home_dir(), "config", "credentials.cfg") cfg = cp.ConfigParser() # if not os.path.isdir(open_mastr_home): @@ -35,7 +36,7 @@ def _load_config_file(): cfg.read(config_file) return cfg else: - with open(config_file, 'w') as configfile: + with open(config_file, "w") as configfile: cfg.write(configfile) return cfg @@ -53,7 +54,7 @@ def get_mastr_user(): """ cfg = _load_config_file() section = "MaStR" - cfg_path = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + cfg_path = os.path.join(get_project_home_dir(), "config", "credentials.cfg") try: user = cfg.get(section, "user") @@ -66,10 +67,12 @@ def get_mastr_user(): # except cp.NoOptionError: # raise cp.Error(f"The option 'user' could not by found in the section " # f"{section} in file {cfg_path}.") - log.warning(f"The option 'user' could not by found in the section " - f"{section} in file {cfg_path}. " - f"You might run into trouble when downloading data via the MaStR API." - f"\n Bulk download works without option 'user'.") + log.warning( + f"The option 'user' could not by found in the section " + f"{section} in file {cfg_path}. " + f"You might run into trouble when downloading data via the MaStR API." + f"\n Bulk download works without option 'user'." + ) return None @@ -79,15 +82,19 @@ def check_and_set_mastr_user(): user = get_mastr_user() if not user: - credentials_file = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + credentials_file = os.path.join( + get_project_home_dir(), "config", "credentials.cfg" + ) cfg = _load_config_file() - user = input('\n\nCannot not find a MaStR user name in {config_file}.\n\n' - 'Please enter MaStR-ID (pattern: SOM123456789012): ' - ''.format(config_file=credentials_file)) + user = input( + "\n\nCannot not find a MaStR user name in {config_file}.\n\n" + "Please enter MaStR-ID (pattern: SOM123456789012): " + "".format(config_file=credentials_file) + ) cfg["MaStR"] = {"user": user} - with open(credentials_file, 'w') as configfile: + with open(credentials_file, "w") as configfile: cfg.write(configfile) return user @@ -115,7 +122,7 @@ def get_mastr_token(user): # Retrieving password from keyring does currently fail on headless systems # Prevent from breaking program execution with following try/except clause section = "MaStR" - cfg_path = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + cfg_path = os.path.join(get_project_home_dir(), "config", "credentials.cfg") try: password = keyring.get_password(section, user) except: @@ -127,10 +134,12 @@ def get_mastr_token(user): try: password = cfg.get(section, "token") except (cp.NoSectionError, cp.NoOptionError): - log.warning(f"The option 'token' could not by found in the section " - f"{section} in file {cfg_path}. " - f"You might run into trouble when downloading data via the MaStR API." - f"\n Bulk download works without option 'token'.") + log.warning( + f"The option 'token' could not by found in the section " + f"{section} in file {cfg_path}. " + f"You might run into trouble when downloading data via the MaStR API." + f"\n Bulk download works without option 'token'." + ) password = None return password @@ -142,17 +151,21 @@ def check_and_set_mastr_token(user): if not password: cfg = _load_config_file() - credentials_file = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + credentials_file = os.path.join( + get_project_home_dir(), "config", "credentials.cfg" + ) # If also no password in credentials file, ask the user to input password # Two options: (1) storing in keyring; (2) storing in config file - password = input('\n\nCannot not find a MaStR password, neither in keyring nor in {config_file}.\n\n' - "Please enter a valid access token of a role (Benutzerrolle) " - "associated to the user {user}.\n" - "The token might look like: " - "koo5eixeiQuoi'w8deighai8ahsh1Ha3eib3coqu7ceeg%ies...\n".format( - config_file=credentials_file, - user=user)) + password = input( + "\n\nCannot not find a MaStR password, neither in keyring nor in {config_file}.\n\n" + "Please enter a valid access token of a role (Benutzerrolle) " + "associated to the user {user}.\n" + "The token might look like: " + "koo5eixeiQuoi'w8deighai8ahsh1Ha3eib3coqu7ceeg%ies...\n".format( + config_file=credentials_file, user=user + ) + ) # let the user decide where to store the password # (1) keyring @@ -160,10 +173,15 @@ def check_and_set_mastr_token(user): # (0) don't store, abort # Wait for correct input while True: - choice = int(input("Where do you want to store your password?\n" - "\t(1) Keyring (default, hit ENTER to select)\n" - "\t(2) Config file (credendials.cfg)\n" - "\t(0) Abort. Don't store password\n") or "1\n") + choice = int( + input( + "Where do you want to store your password?\n" + "\t(1) Keyring (default, hit ENTER to select)\n" + "\t(2) Config file (credendials.cfg)\n" + "\t(0) Abort. Don't store password\n" + ) + or "1\n" + ) # check if choice is valid input if choice in [0, 1, 2]: break @@ -175,7 +193,7 @@ def check_and_set_mastr_token(user): keyring.set_password("MaStR", user, password) elif choice == 2: cfg["MaStR"] = {"user": user, "token": password} - with open(credentials_file, 'w') as configfile: + with open(credentials_file, "w") as configfile: cfg.write(configfile) else: log.error("No clue what happened here!?") @@ -199,4 +217,4 @@ def get_zenodo_token(): user = cfg.get(section, "token") return user except (cp.NoSectionError, cp.NoOptionError): - return None \ No newline at end of file + return None diff --git a/postprocessing/helpers.py b/postprocessing/helpers.py index 0cccf27f..5084c7f3 100644 --- a/postprocessing/helpers.py +++ b/postprocessing/helpers.py @@ -1,4 +1,5 @@ from bokeh.palettes import Category10_10 as palette + # import geoviews as gv import bokeh @@ -9,9 +10,9 @@ def plotPowerPlants(df): # size marker according to gross power output iMaxSize = 30 iMinSize = 10 - df["size"] = (df["Bruttoleistung"] - df["Bruttoleistung"].min()) / \ - (df["Bruttoleistung"].max() - df["Bruttoleistung"].min()) * \ - (iMaxSize - iMinSize) + iMinSize + df["size"] = (df["Bruttoleistung"] - df["Bruttoleistung"].min()) / ( + df["Bruttoleistung"].max() - df["Bruttoleistung"].min() + ) * (iMaxSize - iMinSize) + iMinSize # convert datetime to string df["date"] = df["Inbetriebnahmedatum"].dt.strftime("%Y-%m-%d") @@ -41,17 +42,35 @@ def plotPowerPlants(df): for group in groups: df_group = df.loc[ df["Einheittyp"] == group, - ["Name", "Standort", "Bundesland", "Land", "date", - "Einheittyp", "Bruttoleistung", "Laengengrad", "Breitengrad", "size"] + [ + "Name", + "Standort", + "Bundesland", + "Land", + "date", + "Einheittyp", + "Bruttoleistung", + "Laengengrad", + "Breitengrad", + "size", + ], ] - points = gv.Points(df_group, ["Laengengrad", "Breitengrad"], label=group).options( - aspect=2, responsive=True, tools=[hover_tool], size="size", active_tools=['wheel_zoom'], - fill_alpha=0.6, fill_color=colors[group], line_color="white", + points = gv.Points( + df_group, ["Laengengrad", "Breitengrad"], label=group + ).options( + aspect=2, + responsive=True, + tools=[hover_tool], + size="size", + active_tools=["wheel_zoom"], + fill_alpha=0.6, + fill_color=colors[group], + line_color="white", ) - overlay = (overlay * points) + overlay = overlay * points # hide group when clicking on legend overlay.options(click_policy="hide", clone=False) # return figure - return overlay \ No newline at end of file + return overlay diff --git a/postprocessing/orm.py b/postprocessing/orm.py index f8612ee4..5a98ed07 100644 --- a/postprocessing/orm.py +++ b/postprocessing/orm.py @@ -1,7 +1,18 @@ from geoalchemy2 import Geometry from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.schema import MetaData -from sqlalchemy import Column, Integer, String, Float, Sequence, DateTime, Boolean, func, Date, JSON +from sqlalchemy import ( + Column, + Integer, + String, + Float, + Sequence, + DateTime, + Boolean, + func, + Date, + JSON, +) from sqlalchemy.dialects.postgresql import JSONB cleaned_schema = "model_draft" @@ -30,7 +41,6 @@ class BasicUnit(object): StatisikFlag_basic = Column(String) - class Extended(object): EinheitMastrNummer_extended = Column(String) @@ -91,7 +101,7 @@ class Extended(object): Einspeisungsart = Column(String) PraequalifiziertFuerRegelenergie = Column(Boolean) GenMastrNummer_extended = Column(String) - geom = Column(Geometry('POINT')) + geom = Column(Geometry("POINT")) comment = Column(String) @@ -175,6 +185,7 @@ class HydroEeg(Eeg): class StorageEeg(Eeg): pass + class Kwk(object): KwkMastrNummer_kwk = Column(String) @@ -205,7 +216,7 @@ class Permit(object): class WindCleaned(Permit, WindEeg, Extended, BasicUnit, Base): - __tablename__ = 'bnetza_mastr_wind_clean' + __tablename__ = "bnetza_mastr_wind_clean" # wind specific attributes NameWindpark = Column(String) @@ -231,8 +242,7 @@ class WindCleaned(Permit, WindEeg, Extended, BasicUnit, Base): Kuestenentfernung = Column(Float) EegMastrNummer_extended = Column(String) tags = Column(JSONB) - geom_3035 = Column(Geometry('POINT', srid=3035)) - + geom_3035 = Column(Geometry("POINT", srid=3035)) class SolarCleaned(Permit, SolarEeg, Extended, BasicUnit, Base): @@ -288,7 +298,7 @@ class CombustionCleaned(Permit, Kwk, Extended, BasicUnit, Base): AnteiligNutzungsberechtigte = Column(String) Notstromaggregat = Column(Boolean) Einsatzort = Column(String) - KwkMastrNummer_extended = Column(String) # changed here + KwkMastrNummer_extended = Column(String) # changed here Technologie = Column(String) diff --git a/postprocessing/postprocessing.py b/postprocessing/postprocessing.py index e860dbd2..cbb86e26 100644 --- a/postprocessing/postprocessing.py +++ b/postprocessing/postprocessing.py @@ -16,25 +16,13 @@ log = setup_logger() -BKG_VG250 = { - "schema": "boundaries", - "table": "bkg_vg250_1_sta_union_mview" -} +BKG_VG250 = {"schema": "boundaries", "table": "bkg_vg250_1_sta_union_mview"} -OSM_PLZ = { - "schema": "boundaries", - "table": "osm_postcode" -} +OSM_PLZ = {"schema": "boundaries", "table": "osm_postcode"} -OFFSHORE = { - "schema": "model_draft", - "table": "rli_boundaries_offshore" -} +OFFSHORE = {"schema": "model_draft", "table": "rli_boundaries_offshore"} -OSM_WINDPOWER = { - "schema": "model_draft", - "table": "mastr_osm_deu_point_windpower" -} +OSM_WINDPOWER = {"schema": "model_draft", "table": "mastr_osm_deu_point_windpower"} OEP_QUERY_PATTERN = "https://openenergy-platform.org/api/v0/schema/{schema}/tables/{table}/rows?form=csv" @@ -43,7 +31,16 @@ MASTR_RAW_SCHEMA = "model_draft" OPEN_MASTR_SCHEMA = "model_draft" -TECHNOLOGIES = ["wind", "hydro", "solar", "biomass", "combustion", "nuclear", "gsgk", "storage"] +TECHNOLOGIES = [ + "wind", + "hydro", + "solar", + "biomass", + "combustion", + "nuclear", + "gsgk", + "storage", +] orm_map = { "wind": { @@ -113,15 +110,17 @@ def table_to_db(csv_data, table, schema, conn, geom_col="geom", srid=4326): query = "CREATE SCHEMA IF NOT EXISTS {schema}".format(schema=schema) conn.execute(query) - csv_data.to_sql(table, - con=conn, - schema=schema, - dtype={ - geom_col: Geometry(srid=srid), - "plz": String(), - }, - chunksize=100000, - if_exists="replace") + csv_data.to_sql( + table, + con=conn, + schema=schema, + dtype={ + geom_col: Geometry(srid=srid), + "plz": String(), + }, + chunksize=100000, + if_exists="replace", + ) def table_to_db_orm(mapper, data, chunksize=10000): @@ -144,6 +143,7 @@ def table_to_db_orm(mapper, data, chunksize=10000): # Commit each chunk separately session.commit() + def import_boundary_data_csv(schema, table, index_col="id", srid=4326): """ Import additional data for post-processing @@ -166,32 +166,43 @@ def import_boundary_data_csv(schema, table, index_col="id", srid=4326): with db_engine().connect() as con: # Check if table already exists - table_query = "SELECT to_regclass('{schema}.{table}');".format(schema=schema, table=table) + table_query = "SELECT to_regclass('{schema}.{table}');".format( + schema=schema, table=table + ) table_name = "{schema}.{table}".format(schema=schema, table=table) table_exists = table_name in con.execute(table_query).first().values() if not table_exists: # Download CSV file if it does not exist if not csv_file_exists: - log.info("Downloading table {schema}.{table} from OEP".format(schema=schema, table=table)) + log.info( + "Downloading table {schema}.{table} from OEP".format( + schema=schema, table=table + ) + ) urlretrieve( - OEP_QUERY_PATTERN.format(schema=schema, table=table), - csv_file) + OEP_QUERY_PATTERN.format(schema=schema, table=table), csv_file + ) else: log.info("Found {} locally.".format(csv_file)) # Read CSV file - csv_data = pd.read_csv(csv_file, - index_col=index_col) + csv_data = pd.read_csv(csv_file, index_col=index_col) # Prepare geom data for DB upload - csv_data["geom"] = csv_data["geom"].apply(lambda x: WKTElement(wkb_loads(x, hex=True).wkt, srid=srid)) + csv_data["geom"] = csv_data["geom"].apply( + lambda x: WKTElement(wkb_loads(x, hex=True).wkt, srid=srid) + ) # Insert to db table_to_db(csv_data, table, schema, con, srid=srid) log.info("Data from {} successfully imported to database.".format(csv_file)) else: - log.info("Table '{schema}.{table}' already exists in local database".format(schema=schema, table=table)) + log.info( + "Table '{schema}.{table}' already exists in local database".format( + schema=schema, table=table + ) + ) def add_geom_col(df, lat_col="Breitengrad", lon_col="Laengengrad", srid=4326): @@ -219,17 +230,21 @@ def add_geom_col(df, lat_col="Breitengrad", lon_col="Laengengrad", srid=4326): df_with_coords = df.loc[~(df["Breitengrad"].isna() | df["Laengengrad"].isna())] # Just select data with lat/lon in range [(-90,90), (-180,180)] - df_with_coords = df_with_coords[~((df_with_coords["Breitengrad"] < -90) - | (df_with_coords["Breitengrad"] > 90) - | (df_with_coords["Laengengrad"] < -180) - | (df_with_coords["Laengengrad"] > 180)) + df_with_coords = df_with_coords[ + ~( + (df_with_coords["Breitengrad"] < -90) + | (df_with_coords["Breitengrad"] > 90) + | (df_with_coords["Laengengrad"] < -180) + | (df_with_coords["Laengengrad"] > 180) + ) ] df_no_coords = df.loc[~df.index.isin(df_with_coords.index)] - gdf = gpd.GeoDataFrame( - df_with_coords, geometry=gpd.points_from_xy(df_with_coords[lon_col], df_with_coords[lat_col]), - crs="EPSG:{}".format(srid)) + df_with_coords, + geometry=gpd.points_from_xy(df_with_coords[lon_col], df_with_coords[lat_col]), + crs="EPSG:{}".format(srid), + ) gdf["geom"] = gdf["geometry"].apply(lambda x: WKTElement(x.wkt, srid=srid)) gdf.drop(columns=["geometry"], inplace=True) @@ -271,9 +286,15 @@ def run_sql_postprocessing(): if tech_name not in ["gsgk", "storage", "nuclear"]: log.info(f"Run post-processing on {tech_name} data") # Read SQL query from file - with open(os.path.join(os.path.dirname(__file__), - "db-cleansing", - "rli-mastr-{tech_name}-cleansing.sql".format(tech_name=tech_name))) as file: + with open( + os.path.join( + os.path.dirname(__file__), + "db-cleansing", + "rli-mastr-{tech_name}-cleansing.sql".format( + tech_name=tech_name + ), + ) + ) as file: escaped_sql = text(file.read()) # Execute query @@ -334,21 +355,29 @@ def to_csv(limit=None): with session_scope() as session: orm_tech = getattr(orm, orm_map[tech]["cleaned"]) query = session.query(orm_tech).limit(limit) - df = pd.read_sql(query.statement, query.session.bind, index_col="EinheitMastrNummer") + df = pd.read_sql( + query.statement, query.session.bind, index_col="EinheitMastrNummer" + ) csv_file = os.path.join(data_path, filenames["postprocessed"][tech]) - df.to_csv(csv_file, index=True, index_label="EinheitMastrNummer", encoding='utf-8') + df.to_csv( + csv_file, index=True, index_label="EinheitMastrNummer", encoding="utf-8" + ) if df["DatumLetzteAktualisierung"].max() > newest_date: newest_date = df["DatumLetzteAktualisierung"].max() # Save metadata along with data metadata_file = os.path.join(data_path, filenames["metadata"]) - metadata = create_datapackage_meta_json(newest_date, TECHNOLOGIES, data=["raw", "cleaned", "postprocessed"], - json_serialize=False) - - with open(metadata_file, 'w', encoding='utf-8') as f: + metadata = create_datapackage_meta_json( + newest_date, + TECHNOLOGIES, + data=["raw", "cleaned", "postprocessed"], + json_serialize=False, + ) + + with open(metadata_file, "w", encoding="utf-8") as f: json.dump(metadata, f, ensure_ascii=False, indent=4) diff --git a/postprocessing/turbine_match.py b/postprocessing/turbine_match.py index 8b400e4e..caacc537 100644 --- a/postprocessing/turbine_match.py +++ b/postprocessing/turbine_match.py @@ -17,68 +17,109 @@ import pandas as pd import os + def read_csv_turbine(csv_name): - turbines = pd.read_csv(csv_name, header=0, encoding='utf-8', sep=',', error_bad_lines=True, index_col=False, - dtype={'index': int, 'id': int,'turbine_id':int, 'manufacturer': str, 'name': str, 'turbine_type': str, - 'nominal_power': str, 'rotor_diamter': str,'rotor_area': str, 'hub_height': str, - 'max_speed_drive': str, 'wind_class_iec':str, 'wind_zone_dibt': str, - 'power_density': str, 'power_density_2': str,'calculated': str, - 'has_power_curve': str, 'power_curve_wind_speeds': str, 'power_curve_values': str, 'has_cp_curve': str, - 'power_coefficient_curve_wind_speeds': str, 'power_coefficient_curve_values': str, - 'has_ct_curve': str, 'thrust_coefficient_curve_wind_speeds': str, 'thrust_coefficient_curve_values': str, 'source': str}, + turbines = pd.read_csv( + csv_name, + header=0, + encoding="utf-8", + sep=",", + error_bad_lines=True, + index_col=False, + dtype={ + "index": int, + "id": int, + "turbine_id": int, + "manufacturer": str, + "name": str, + "turbine_type": str, + "nominal_power": str, + "rotor_diamter": str, + "rotor_area": str, + "hub_height": str, + "max_speed_drive": str, + "wind_class_iec": str, + "wind_zone_dibt": str, + "power_density": str, + "power_density_2": str, + "calculated": str, + "has_power_curve": str, + "power_curve_wind_speeds": str, + "power_curve_values": str, + "has_cp_curve": str, + "power_coefficient_curve_wind_speeds": str, + "power_coefficient_curve_values": str, + "has_ct_curve": str, + "thrust_coefficient_curve_wind_speeds": str, + "thrust_coefficient_curve_values": str, + "source": str, + }, ) return turbines + def create_dataset(df): - types = [] - for i,r in df.iterrows(): - types.append(prepare_turbine_type(r)) - df.insert(6,'turbine_type_v2',types) - write_to_csv(df, 'turbine_library_t.csv') + types = [] + for i, r in df.iterrows(): + types.append(prepare_turbine_type(r)) + df.insert(6, "turbine_type_v2", types) + write_to_csv(df, "turbine_library_t.csv") def write_to_csv(df, path): - with open(path, mode='a', encoding='utf-8') as file: - df.to_csv(file, sep=',', - mode='a', - header=file.tell() == 0, - line_terminator='\n', - encoding='utf-8') + with open(path, mode="a", encoding="utf-8") as file: + df.to_csv( + file, + sep=",", + mode="a", + header=file.tell() == 0, + line_terminator="\n", + encoding="utf-8", + ) def prepare_turbine_type(turbine): - nom_pow = turbine.nominal_power - diam = turbine.rotor_diameter - man = get_manufacturer_short(turbine.manufacturer, nom_pow, diam) - type_name = man+'-'+str(diam)+'_'+str(int(nom_pow)) - return type_name + nom_pow = turbine.nominal_power + diam = turbine.rotor_diameter + man = get_manufacturer_short(turbine.manufacturer, nom_pow, diam) + type_name = man + "-" + str(diam) + "_" + str(int(nom_pow)) + return type_name def get_manufacturer_short(manufacturer, nom_pow, diam): - man = '' - if manufacturer == 'Nordex': - man = 'N' - if int(nom_pow) == 3000 or int(nom_pow) == 1500: - if int(diam) == 140 or int(diam) ==132 or int(diam) ==125 or int(diam) ==116 or int(diam) ==100 or int(diam) == 82 or int(diam) == 77 or int(diam) == 70: - man = 'AW' - elif manufacturer == 'Adwen/Areva': - man = 'AD' - elif manufacturer == 'Senvion/REpower': - man = 'S' - if int(nom_pow) == 2050 or int(nom_pow) == 2000: - man = 'MM' - elif manufacturer == 'Enercon': - man = 'E' - elif manufacturer == 'Siemens': - man = 'SWT' - elif manufacturer == 'Vestas': - man = 'V' - elif manufacturer == 'Vensys': - man = 'VS' - elif manufacturer == 'GE Wind': - man = 'GE' - elif manufacturer == 'Eno': - man = 'ENO' - elif manufacturer == 'aerodyn': - man = 'SCD' - return man \ No newline at end of file + man = "" + if manufacturer == "Nordex": + man = "N" + if int(nom_pow) == 3000 or int(nom_pow) == 1500: + if ( + int(diam) == 140 + or int(diam) == 132 + or int(diam) == 125 + or int(diam) == 116 + or int(diam) == 100 + or int(diam) == 82 + or int(diam) == 77 + or int(diam) == 70 + ): + man = "AW" + elif manufacturer == "Adwen/Areva": + man = "AD" + elif manufacturer == "Senvion/REpower": + man = "S" + if int(nom_pow) == 2050 or int(nom_pow) == 2000: + man = "MM" + elif manufacturer == "Enercon": + man = "E" + elif manufacturer == "Siemens": + man = "SWT" + elif manufacturer == "Vestas": + man = "V" + elif manufacturer == "Vensys": + man = "VS" + elif manufacturer == "GE Wind": + man = "GE" + elif manufacturer == "Eno": + man = "ENO" + elif manufacturer == "aerodyn": + man = "SCD" + return man diff --git a/scripts/mirror_mastr_csv_export.py b/scripts/mirror_mastr_csv_export.py index 2596d429..00cf6812 100644 --- a/scripts/mirror_mastr_csv_export.py +++ b/scripts/mirror_mastr_csv_export.py @@ -1,4 +1,4 @@ -from open_mastr.utils.helpers import (reverse_fill_basic_units, create_db_query) +from open_mastr.utils.helpers import reverse_fill_basic_units, create_db_query technology = [ @@ -24,6 +24,4 @@ reverse_fill_basic_units() # to csv per tech -create_db_query( - technology=technology, additional_data=data_types, limit=None -) +create_db_query(technology=technology, additional_data=data_types, limit=None) diff --git a/scripts/mirror_mastr_dump.py b/scripts/mirror_mastr_dump.py index ca2a0b66..69a3a3d2 100644 --- a/scripts/mirror_mastr_dump.py +++ b/scripts/mirror_mastr_dump.py @@ -2,8 +2,8 @@ import datetime # Dump data -now = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S') +now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S") dump_file = f"{now}_open-mastr-mirror.backup" mastr_refl = MaStRMirror() -mastr_refl.dump(dump_file) \ No newline at end of file +mastr_refl.dump(dump_file) diff --git a/scripts/mirror_mastr_update_latest.py b/scripts/mirror_mastr_update_latest.py index 0db0b234..40c61681 100644 --- a/scripts/mirror_mastr_update_latest.py +++ b/scripts/mirror_mastr_update_latest.py @@ -2,16 +2,27 @@ import datetime limit = None -technology = ["wind", "biomass", "combustion", "gsgk", "hydro", "nuclear", "storage", "solar"] +technology = [ + "wind", + "biomass", + "combustion", + "gsgk", + "hydro", + "nuclear", + "storage", + "solar", +] data_types = ["unit_data", "eeg_data", "kwk_data", "permit_data"] -location_types = ["location_elec_generation", "location_elec_consumption", "location_gas_generation", - "location_gas_consumption"] +location_types = [ + "location_elec_generation", + "location_elec_consumption", + "location_gas_generation", + "location_gas_consumption", +] processes = 12 mastr_mirror = MaStRMirror( - empty_schema=False, - parallel_processes=processes, - restore_dump=None + empty_schema=False, parallel_processes=processes, restore_dump=None ) # Download basic unit data @@ -21,13 +32,12 @@ for tech in technology: # mastr_mirror.create_additional_data_requests(tech) for data_type in data_types: - mastr_mirror.retrieve_additional_data(tech, data_type, chunksize=1000, limit=limit) + mastr_mirror.retrieve_additional_data( + tech, data_type, chunksize=1000, limit=limit + ) # Download basic location data -mastr_mirror.backfill_locations_basic( - limit=limit, - date="latest" -) +mastr_mirror.backfill_locations_basic(limit=limit, date="latest") # Download extended location data for location_type in location_types: diff --git a/tests/preparation.py b/tests/preparation.py index 12d34823..0f58bd3f 100644 --- a/tests/preparation.py +++ b/tests/preparation.py @@ -1,20 +1,19 @@ import os from open_mastr.utils.config import get_project_home_dir + def create_credentials_file(): """Use token and user stored in GitHub secrets for creating credentials file This is used to allow test workflow to access MaStR database. """ - credentials_file = os.path.join(get_project_home_dir(), 'config', 'credentials.cfg') + credentials_file = os.path.join(get_project_home_dir(), "config", "credentials.cfg") token = os.getenv("MASTR_TOKEN") user = os.getenv("MASTR_USER") section_title = "[MaStR]" - file_content = f"{section_title}\n" \ - f"user = {user}\n" \ - f"token = {token}\n" + file_content = f"{section_title}\n" f"user = {user}\n" f"token = {token}\n" with open(credentials_file, "w") as credentials_fh: credentials_fh.write(file_content) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index f67046d1..4a19f4fb 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -251,12 +251,7 @@ def test_validate_parameter_format_for_mastr_init(db): def test_transform_data_parameter(): - ( - data, - api_data_types, - api_location_types, - harm_log, - ) = transform_data_parameter( + (data, api_data_types, api_location_types, harm_log,) = transform_data_parameter( method="API", data=["wind", "location"], api_data_types=["eeg_data"], diff --git a/tests/xml_download/test_utils_cleansing_bulk.py b/tests/xml_download/test_utils_cleansing_bulk.py index 38b8e41b..9a29ad76 100644 --- a/tests/xml_download/test_utils_cleansing_bulk.py +++ b/tests/xml_download/test_utils_cleansing_bulk.py @@ -30,6 +30,7 @@ def capture_wrap(): sys.stdout.close = lambda *args: None yield + @pytest.fixture(scope="module") def con(): con = sqlite3.connect(_sqlite_file_path) diff --git a/tests/xml_download/test_utils_download_bulk.py b/tests/xml_download/test_utils_download_bulk.py index 3fe351f6..b4cc0b7d 100644 --- a/tests/xml_download/test_utils_download_bulk.py +++ b/tests/xml_download/test_utils_download_bulk.py @@ -1,33 +1,52 @@ import time from open_mastr.xml_download.utils_download_bulk import gen_url + def test_gen_url(): when = time.strptime("2024-01-01", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240101_23.2.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240101_23.2.zip" + ) when = time.strptime("2024-04-01", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240401_23.2.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240401_23.2.zip" + ) when = time.strptime("2024-04-02", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240402_24.1.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240402_24.1.zip" + ) when = time.strptime("2024-10-01", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241001_24.1.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241001_24.1.zip" + ) when = time.strptime("2024-10-02", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241002_24.2.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241002_24.2.zip" + ) when = time.strptime("2024-12-31", "%Y-%m-%d") url = gen_url(when) assert type(url) == str - assert url == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241231_24.2.zip" + assert ( + url + == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20241231_24.2.zip" + )