pypsa-meets-earth · hazemakhalek · Feb 2, 2024 · Feb 1, 2024
diff --git a/Snakefile b/Snakefile
@@ -113,6 +113,13 @@ rule prepare_airports:
         "scripts/prepare_airports.py"
 
 
+rule prepare_urban_percent:
+    output:
+        urban_percent="data/urban_percent.csv",  # TODO move from data to resources
+    script:
+        "scripts/prepare_urban_percent.py"
+
+
 if not config["custom_data"]["gas_network"]:
 
     rule prepare_gas_network:

diff --git a/config.default.yaml b/config.default.yaml
@@ -37,7 +37,7 @@ policy_config:
 clustering_options:
   alternative_clustering: true
 
-countries: ['NG', 'BJ']
+countries: ['MA']
 
 demand_data:
   update_data: true # if true, the workflow downloads the energy balances data saved in data/demand/unsd/data again. Turn on for the first run.
@@ -167,7 +167,7 @@ sector:
     network: true
     network_limit: 2000 #GWkm
     network_routes: gas # "gas or "greenfield". If "gas"  ->  the network data are fetched from ["sector"]["gas"]["network_data"]. If "greenfield"  -> the network follows the topology of electrical transmission lines
-    gas_network_repurposing: true # If true -> ["sector"]["gas"]["network"] is automatically true
+    gas_network_repurposing: true # If true -> ["sector"]["gas"]["network"] is automatically false
     underground_storage: false
 
   oil:

diff --git a/scripts/build_population_layouts.py b/scripts/build_population_layouts.py
@@ -39,24 +39,31 @@
     Iinv = cutout.indicatormatrix(nuts3.geometry)
 
     countries = np.sort(nuts3.country.unique())
-    # countries = np.array(["MA"])
-    urban_fraction = (
-        pd.read_csv(
-            snakemake.input.urban_percent,
-            header=None,
-            index_col=0,
-            names=["fraction"],
-            keep_default_na=False,
-        ).squeeze()
-        / 100.0
+
+    urban_percent_df = pd.read_csv(
+        snakemake.input.urban_percent,
+        usecols=[0, 1, 4],
+        index_col=0,
     )
 
-    # fill missing Balkans values
-    # missing = ["AL", "ME", "MK"]
-    # reference = ["RS", "BA"]
-    # average = urban_fraction[reference].mean()
-    # fill_values = pd.Series({ct: average for ct in missing})
-    # urban_fraction = urban_fraction.append(fill_values)
+    # Filter for the year used in the workflow
+    urban_percent_df = urban_percent_df.loc[
+        (
+            urban_percent_df["Year"]
+            == snakemake.config["scenario"]["planning_horizons"][0]
+        )
+    ]
+
+    # Filter for urban percent column
+    urban_percent_df = urban_percent_df[
+        ["Urban population as percentage of total population"]
+    ]
+
+    # Remove index header
+    urban_percent_df.index.name = None
+
+    # Squeeze into a Series
+    urban_fraction = urban_percent_df.squeeze() / 100.0
 
     # population in each grid cell
     pop_cells = pd.Series(I.dot(nuts3["pop"]))

diff --git a/scripts/prepare_urban_percent.py b/scripts/prepare_urban_percent.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+import logging
+import os
+from pathlib import Path
+
+import country_converter as coco
+import helpers
+import numpy as np
+import pandas as pd
+import py7zr
+import requests
+
+# from helpers import configure_logging
+
+
+# logger = logging.getLogger(__name__)
+
+
+def download_urban_percent():
+    """
+    Downloads the United Nations "Total and urban population, annual" .7z File and extracts it as csv File.
+    The above file was downloaded from the webpage https://unctadstat.unctad.org/datacentre/ as a .7z file. The dataset contains urban percent for most countries from 1950 and predictions until 2050.
+    """
+    url = "https://unctadstat-api.unctad.org/api/reportMetadata/US.PopTotal/bulkfile/355/en"
+
+    # Make a GET request to the URL
+    response = requests.get(url)
+
+    # Check if the request was successful (status code 200)
+    if response.status_code == 200:
+        # Extract the filename from the Content-Disposition header
+        content_disposition = response.headers.get("Content-Disposition")
+        if content_disposition:
+            filename = content_disposition.split("filename=")[1].strip('"')
+        else:
+            filename = "downloaded_file.csv.7z"  # Provide a default filename if Content-Disposition header is not present
+
+        # Write the content of the response to a file
+        with open(filename, "wb") as f:
+            f.write(response.content)
+
+        print(f"Urban percent downloaded successfully as {filename}")
+
+        # Extract the downloaded .7z file
+        with py7zr.SevenZipFile(filename, "r") as archive:
+            archive.extractall()
+
+        print(f"Urban percent extracted successfully")
+
+        # Read the extracted CSV file
+        csv_filename = os.path.splitext(filename)[
+            0
+        ]  # Remove the .7z extension to get the CSV filename
+        urban_percent_orig = pd.read_csv(csv_filename)
+
+        print("Urban percent CSV file read successfully:")
+
+        # Remove the downloaded .7z and .csv files
+        os.remove(filename)
+        os.remove(csv_filename)
+
+    else:
+        print(f"Failed to download file: Status code {response.status_code}")
+
+    return urban_percent_orig
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from helpers import mock_snakemake, sets_path_to_root
+
+        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        snakemake = mock_snakemake("prepare_urban_percent")
+        sets_path_to_root("pypsa-earth-sec")
+    # configure_logging(snakemake)
+
+    # run = snakemake.config.get("run", {})
+    # RDIR = run["name"] + "/" if run.get("name") else ""
+    # store_path_data = Path.joinpath(Path().cwd(), "data")
+    # country_list = country_list_to_geofk(snakemake.config["countries"])'
+
+    df = download_urban_percent().copy()
+
+    # Select the columns that we need to keep
+    df = df[
+        [
+            "Year",
+            "Economy Label",
+            "Absolute value in thousands",
+            "Urban population as percentage of total population",
+        ]
+    ]
+
+    # Keep only years above 2020
+    df = df.loc[(df["Year"] >= 2020)]
+
+    # Add ISO2 country code for each country
+    cc = coco.CountryConverter()
+    Economy_Label = pd.Series(df["Economy Label"])
+    df["country"] = cc.pandas_convert(
+        series=Economy_Label, to="ISO2", not_found="not found"
+    )
+
+    # Drop isos that were not found:
+    df = df.loc[df["country"] != "not found"]
+
+    df = df.set_index("country")
+
+    # Save
+    df.to_csv(snakemake.output[0], sep=",", encoding="utf-8", header="true")
diff --git a/test/config.test1.yaml b/test/config.test1.yaml
@@ -172,7 +172,7 @@ sector:
     network: true
     network_limit: 2000 #GWkm
     network_routes: gas # "gas or "greenfield". If "gas"  ->  the network data are fetched from ["sector"]["gas"]["network_data"]. If "greenfield"  -> the network follows the topology of electrical transmission lines
-    gas_network_repurposing: true # If true -> ["sector"]["gas"]["network"] is automatically true
+    gas_network_repurposing: true # If true -> ["sector"]["gas"]["network"] is automatically false
     underground_storage: false
 
   oil: