EODC

polca · Sep 26, 2023 · 0fe87b8 · 0fe87b8
1 parent 8095491
commit 0fe87b8
Show file tree

Hide file tree

Showing 13 changed files with 974 additions and 437 deletions.
diff --git a/premise/data/additional_inventories/lci-co-firing-power-plants.xlsx b/premise/data/additional_inventories/lci-co-firing-power-plants.xlsx
diff --git a/premise/data_collection.py b/premise/data_collection.py
@@ -6,6 +6,7 @@
 
 import copy
 import csv
+import os
 from functools import lru_cache
 from io import StringIO
 from itertools import chain
@@ -145,11 +146,7 @@ def get_gains_EU_data() -> xr.DataArray:
     )
     gains_emi_EU["sector"] = gains_emi_EU["Sector"] + gains_emi_EU["Activity"]
     gains_emi_EU.drop(
-        [
-            "Sector",
-            "Activity",
-        ],
-        axis=1,
+        ["Sector", "Activity",], axis=1,
     )
 
     gains_emi_EU = gains_emi_EU[~gains_emi_EU["value"].isna()]
@@ -228,19 +225,15 @@ def fix_efficiencies(data: xr.DataArray, min_year: int) -> xr.DataArray:
     # we correct it to 1, as we do not accept
     # that efficiency degrades over time
     data.loc[dict(year=[y for y in data.year.values if y > 2020])] = np.clip(
-        data.loc[dict(year=[y for y in data.year.values if y > 2020])],
-        1,
-        None,
+        data.loc[dict(year=[y for y in data.year.values if y > 2020])], 1, None,
     )
 
     # Inversely, if we are looking at a year prior to 2020
     # and the ratio in efficiency change is superior to 1
     # we correct it to 1, as we do not accept
     # that efficiency in the past was higher than now
     data.loc[dict(year=[y for y in data.year.values if y < 2020])] = np.clip(
-        data.loc[dict(year=[y for y in data.year.values if y < 2020])],
-        None,
-        1,
+        data.loc[dict(year=[y for y in data.year.values if y < 2020])], None, 1,
     )
 
     # ensure that efficiency can not decrease over time
@@ -397,9 +390,7 @@ def __init__(
         new_vars = flatten(new_vars)
 
         data = self.__get_iam_data(
-            key=key,
-            filedir=filepath_iam_files,
-            variables=new_vars,
+            key=key, filedir=filepath_iam_files, variables=new_vars,
         )
 
         self.regions = data.region.values.tolist()
@@ -411,9 +402,7 @@ def __init__(
         )
 
         self.electricity_markets = self.__fetch_market_data(
-            data=data,
-            input_vars=electricity_prod_vars,
-            system_model=self.system_model,
+            data=data, input_vars=electricity_prod_vars, system_model=self.system_model,
         )
 
         self.petrol_markets = self.__fetch_market_data(
@@ -438,12 +427,7 @@ def __init__(
             input_vars={
                 k: v
                 for k, v in fuel_prod_vars.items()
-                if any(
-                    x in k
-                    for x in [
-                        "diesel",
-                    ]
-                )
+                if any(x in k for x in ["diesel",])
             },
             system_model=self.system_model,
         )
@@ -470,12 +454,7 @@ def __init__(
             input_vars={
                 k: v
                 for k, v in fuel_prod_vars.items()
-                if any(
-                    x in k
-                    for x in [
-                        "hydrogen",
-                    ]
-                )
+                if any(x in k for x in ["hydrogen",])
             },
             system_model=self.system_model,
         )
@@ -532,12 +511,7 @@ def __init__(
             efficiency_labels={
                 k: v
                 for k, v in fuel_eff_vars.items()
-                if any(
-                    x in k
-                    for x in [
-                        "diesel",
-                    ]
-                )
+                if any(x in k for x in ["diesel",])
             },
         )
         self.gas_efficiencies = self.get_iam_efficiencies(
@@ -553,12 +527,7 @@ def __init__(
             efficiency_labels={
                 k: v
                 for k, v in fuel_eff_vars.items()
-                if any(
-                    x in k
-                    for x in [
-                        "hydrogen",
-                    ]
-                )
+                if any(x in k for x in ["hydrogen",])
             },
         )
 
@@ -631,25 +600,39 @@ def __get_iam_data(
 
         """
 
-        file_ext = self.model + "_" + self.pathway + ".csv"
-        filepath = Path(filedir) / file_ext
+        # find file in directory which name contains both self.model and self.pathway
+        # Walk through the directory
+        filepath = ""
+        for root, dirs, files in os.walk(filedir):
+            for file in files:
+                # Check if both model and pathway are present in the filename
+                if self.model in file and self.pathway in file:
+                    filepath = Path(os.path.join(root, file))
+
+        if filepath == "":
+            raise FileNotFoundError(
+                f"Could not find any file containing both {self.model} and {self.pathway} in {filedir}"
+            )
 
         if key is None:
             # Uses a non-encrypted file
-            try:
-                with open(filepath, "rb") as file:
-                    # read the encrypted data
-                    encrypted_data = file.read()
-            except FileNotFoundError:
-                file_ext = self.model + "_" + self.pathway + ".mif"
-                filepath = Path(filedir) / file_ext
+            # if extension is ".csv"
+            if filepath.suffix in [".csv", ".mif"]:
+                print(f"Reading {filepath} as csv file")
                 with open(filepath, "rb") as file:
                     # read the encrypted data
                     encrypted_data = file.read()
+                    # create a temp csv-like file to pass to pandas.read_csv()
+                    data = StringIO(str(encrypted_data, "latin-1"))
 
-            # create a temp csv-like file to pass to pandas.read_csv()
-            data = StringIO(str(encrypted_data, "latin-1"))
+            elif filepath.suffix in [".xls", ".xlsx"]:
+                print(f"Reading {filepath} as excel file")
+                data = pd.read_excel(filepath)
 
+            else:
+                raise ValueError(
+                    f"Extension {filepath.suffix} is not supported. Please use .csv, .mif, .xls or .xlsx."
+                )
         else:
             # Uses an encrypted file
             fernet_obj = Fernet(key)
@@ -661,15 +644,18 @@ def __get_iam_data(
             decrypted_data = fernet_obj.decrypt(encrypted_data)
             data = StringIO(str(decrypted_data, "latin-1"))
 
-        dataframe = pd.read_csv(
-            data,
-            sep=get_delimiter(data=copy.copy(data).readline()),
-            encoding="latin-1",
-        )
+        if filepath.suffix in [".csv", ".mif"]:
+            dataframe = pd.read_csv(
+                data,
+                sep=get_delimiter(data=copy.copy(data).readline()),
+                encoding="latin-1",
+            )
+        else:
+            dataframe = data
 
         # if a column name can be an integer
         # we convert it to an integer
-        new_cols = {c: int(c) if c.isdigit() else c for c in dataframe.columns}
+        new_cols = {c: int(c) if str(c).isdigit() else c for c in dataframe.columns}
         dataframe = dataframe.rename(columns=new_cols)
 
         # remove any column that is a string
@@ -889,7 +875,8 @@ def __get_carbon_capture_rate(
         # and that none of the  CO2 emissions are captured
 
         if not any(
-            x in data.variables.values.tolist() for x in dict_vars.get("cement - cco2")
+            x in data.variables.values.tolist()
+            for x in dict_vars.get("cement - cco2", [])
         ):
             cement_rate = xr.DataArray(
                 np.zeros((len(data.region), len(data.year))),
@@ -904,7 +891,8 @@ def __get_carbon_capture_rate(
         cement_rate.coords["variables"] = "cement"
 
         if not any(
-            x in data.variables.values.tolist() for x in dict_vars.get("steel - cco2")
+            x in data.variables.values.tolist()
+            for x in dict_vars.get("steel - cco2", [])
         ):
             steel_rate = xr.DataArray(
                 np.zeros((len(data.region), len(data.year))),
@@ -929,73 +917,76 @@ def __get_carbon_capture_rate(
         # as it is sometimes neglected in the
         # IAM files
 
-        if not any(
-            x in data.variables.values.tolist() for x in dict_vars.get("cement - cco2")
-        ):
-            rate.loc[dict(region="World", variables="cement")] = 0
-        else:
-            try:
-                rate.loc[dict(region="World", variables="cement")] = (
+        if "World" in rate.region.values.tolist():
+            if not any(
+                x in data.variables.values.tolist()
+                for x in dict_vars.get("cement - cco2", [])
+            ):
+                rate.loc[dict(region="World", variables="cement")] = 0
+            else:
+                try:
+                    rate.loc[dict(region="World", variables="cement")] = (
+                        data.loc[
+                            dict(
+                                region=[r for r in self.regions if r != "World"],
+                                variables=dict_vars["cement - cco2"],
+                            )
+                        ]
+                        .sum(dim=["variables", "region"])
+                        .values
+                        / data.loc[
+                            dict(
+                                region=[r for r in self.regions if r != "World"],
+                                variables=dict_vars["cement - co2"],
+                            )
+                        ]
+                        .sum(dim=["variables", "region"])
+                        .values
+                    )
+                except ZeroDivisionError:
+                    rate.loc[dict(region="World", variables="cement")] = 0
+
+                try:
+                    rate.loc[dict(region="World", variables="steel")] = data.loc[
+                        dict(
+                            region=[r for r in self.regions if r != "World"],
+                            variables=dict_vars["steel - cco2"],
+                        )
+                    ].sum(dim=["variables", "region"]) / data.loc[
+                        dict(
+                            region=[r for r in self.regions if r != "World"],
+                            variables=dict_vars["steel - co2"],
+                        )
+                    ].sum(
+                        dim=["variables", "region"]
+                    )
+                except ZeroDivisionError:
+                    rate.loc[dict(region="World", variables="steel")] = 0
+
+            if not any(
+                x in data.variables.values.tolist()
+                for x in dict_vars.get("steel - cco2", [])
+            ):
+                rate.loc[dict(region="World", variables="steel")] = 0
+            else:
+                rate.loc[dict(region="World", variables="steel")] = (
                     data.loc[
                         dict(
                             region=[r for r in self.regions if r != "World"],
-                            variables=dict_vars["cement - cco2"],
+                            variables=dict_vars["steel - cco2"],
                         )
                     ]
                     .sum(dim=["variables", "region"])
                     .values
                     / data.loc[
                         dict(
                             region=[r for r in self.regions if r != "World"],
-                            variables=dict_vars["cement - co2"],
+                            variables=dict_vars["steel - co2"],
                         )
                     ]
                     .sum(dim=["variables", "region"])
                     .values
                 )
-            except ZeroDivisionError:
-                rate.loc[dict(region="World", variables="cement")] = 0
-
-            try:
-                rate.loc[dict(region="World", variables="steel")] = data.loc[
-                    dict(
-                        region=[r for r in self.regions if r != "World"],
-                        variables=dict_vars["steel - cco2"],
-                    )
-                ].sum(dim=["variables", "region"]) / data.loc[
-                    dict(
-                        region=[r for r in self.regions if r != "World"],
-                        variables=dict_vars["steel - co2"],
-                    )
-                ].sum(
-                    dim=["variables", "region"]
-                )
-            except ZeroDivisionError:
-                rate.loc[dict(region="World", variables="steel")] = 0
-
-        if not any(
-            x in data.variables.values.tolist() for x in dict_vars.get("steel - cco2")
-        ):
-            rate.loc[dict(region="World", variables="steel")] = 0
-        else:
-            rate.loc[dict(region="World", variables="steel")] = (
-                data.loc[
-                    dict(
-                        region=[r for r in self.regions if r != "World"],
-                        variables=dict_vars["steel - cco2"],
-                    )
-                ]
-                .sum(dim=["variables", "region"])
-                .values
-                / data.loc[
-                    dict(
-                        region=[r for r in self.regions if r != "World"],
-                        variables=dict_vars["steel - co2"],
-                    )
-                ]
-                .sum(dim=["variables", "region"])
-                .values
-            )
 
         # we ensure that the rate can only be between 0 and 1
         rate.values = np.clip(rate, 0, 1)
@@ -1239,11 +1230,7 @@ def fetch_external_data_coal_power_plants(self):
         df = df.drop(columns=["fuel input"])
         array = (
             df.melt(
-                id_vars=[
-                    "country",
-                    "CHP",
-                    "fuel",
-                ],
+                id_vars=["country", "CHP", "fuel",],
                 var_name="variable",
                 value_name="value",
             )