Merge remote-tracking branch 'origin/master'

polca · Sep 27, 2023 · ef1fc78 · ef1fc78
2 parents 4a9b1d2 + f2de32d
commit ef1fc78
Show file tree

Hide file tree

Showing 15 changed files with 1,291 additions and 97 deletions.
diff --git a/dev/Untitled.ipynb b/dev/Untitled.ipynb
diff --git a/premise/__init__.py b/premise/__init__.py
@@ -1,5 +1,5 @@
 __all__ = ("NewDatabase", "clear_cache", "get_regions_definition")
-__version__ = (1, 6, 9)
+__version__ = (1, 7, 0)
 
 from pathlib import Path
 

diff --git a/premise/data/additional_inventories/lci-co-firing-power-plants.xlsx b/premise/data/additional_inventories/lci-co-firing-power-plants.xlsx
diff --git a/premise/data/additional_inventories/migration_map.csv b/premise/data/additional_inventories/migration_map.csv
@@ -615,4 +615,5 @@ from;to;name_from;ref_prod_from;location_from;name_to;ref_prod_to;location_to
 35;39;transmission network construction, electricity, high voltage;transmission network, long-distance;CH;transmission network construction, electricity, high voltage;transmission network, electricity, high voltage;CH
 36;39;transmission network construction, electricity, high voltage;transmission network, long-distance;CH;transmission network construction, electricity, high voltage;transmission network, electricity, high voltage;CH
 37;39;transmission network construction, electricity, high voltage;transmission network, long-distance;CH;transmission network construction, electricity, high voltage;transmission network, electricity, high voltage;CH
-38;39;transmission network construction, electricity, high voltage;transmission network, long-distance;CH;transmission network construction, electricity, high voltage;transmission network, electricity, high voltage;CH
+38;39;transmission network construction, electricity, high voltage;transmission network, long-distance;CH;transmission network construction, electricity, high voltage;transmission network, electricity, high voltage;CH
+39;38;market for battery cell, Li-ion, LFP;battery cell, Li-ion, LFP;GLO;market for battery cell, Li-ion;battery cell, Li-ion;GLO
diff --git a/premise/data_collection.py b/premise/data_collection.py
@@ -6,6 +6,7 @@
 
 import copy
 import csv
+import os
 from functools import lru_cache
 from io import StringIO
 from itertools import chain
@@ -631,25 +632,39 @@ def __get_iam_data(
 
         """
 
-        file_ext = self.model + "_" + self.pathway + ".csv"
-        filepath = Path(filedir) / file_ext
+        # find file in directory which name contains both self.model and self.pathway
+        # Walk through the directory
+        filepath = ""
+        for root, dirs, files in os.walk(filedir):
+            for file in files:
+                # Check if both model and pathway are present in the filename
+                if self.model in file and self.pathway in file:
+                    filepath = Path(os.path.join(root, file))
+
+        if filepath == "":
+            raise FileNotFoundError(
+                f"Could not find any file containing both {self.model} and {self.pathway} in {filedir}"
+            )
 
         if key is None:
             # Uses a non-encrypted file
-            try:
-                with open(filepath, "rb") as file:
-                    # read the encrypted data
-                    encrypted_data = file.read()
-            except FileNotFoundError:
-                file_ext = self.model + "_" + self.pathway + ".mif"
-                filepath = Path(filedir) / file_ext
+            # if extension is ".csv"
+            if filepath.suffix in [".csv", ".mif"]:
+                print(f"Reading {filepath} as csv file")
                 with open(filepath, "rb") as file:
                     # read the encrypted data
                     encrypted_data = file.read()
+                    # create a temp csv-like file to pass to pandas.read_csv()
+                    data = StringIO(str(encrypted_data, "latin-1"))
 
-            # create a temp csv-like file to pass to pandas.read_csv()
-            data = StringIO(str(encrypted_data, "latin-1"))
+            elif filepath.suffix in [".xls", ".xlsx"]:
+                print(f"Reading {filepath} as excel file")
+                data = pd.read_excel(filepath)
 
+            else:
+                raise ValueError(
+                    f"Extension {filepath.suffix} is not supported. Please use .csv, .mif, .xls or .xlsx."
+                )
         else:
             # Uses an encrypted file
             fernet_obj = Fernet(key)
@@ -661,15 +676,18 @@ def __get_iam_data(
             decrypted_data = fernet_obj.decrypt(encrypted_data)
             data = StringIO(str(decrypted_data, "latin-1"))
 
-        dataframe = pd.read_csv(
-            data,
-            sep=get_delimiter(data=copy.copy(data).readline()),
-            encoding="latin-1",
-        )
+        if filepath.suffix in [".csv", ".mif"]:
+            dataframe = pd.read_csv(
+                data,
+                sep=get_delimiter(data=copy.copy(data).readline()),
+                encoding="latin-1",
+            )
+        else:
+            dataframe = data
 
         # if a column name can be an integer
         # we convert it to an integer
-        new_cols = {c: int(c) if c.isdigit() else c for c in dataframe.columns}
+        new_cols = {c: int(c) if str(c).isdigit() else c for c in dataframe.columns}
         dataframe = dataframe.rename(columns=new_cols)
 
         # remove any column that is a string
@@ -889,7 +907,8 @@ def __get_carbon_capture_rate(
         # and that none of the  CO2 emissions are captured
 
         if not any(
-            x in data.variables.values.tolist() for x in dict_vars.get("cement - cco2")
+            x in data.variables.values.tolist()
+            for x in dict_vars.get("cement - cco2", [])
         ):
             cement_rate = xr.DataArray(
                 np.zeros((len(data.region), len(data.year))),
@@ -904,7 +923,8 @@ def __get_carbon_capture_rate(
         cement_rate.coords["variables"] = "cement"
 
         if not any(
-            x in data.variables.values.tolist() for x in dict_vars.get("steel - cco2")
+            x in data.variables.values.tolist()
+            for x in dict_vars.get("steel - cco2", [])
         ):
             steel_rate = xr.DataArray(
                 np.zeros((len(data.region), len(data.year))),
@@ -929,73 +949,76 @@ def __get_carbon_capture_rate(
         # as it is sometimes neglected in the
         # IAM files
 
-        if not any(
-            x in data.variables.values.tolist() for x in dict_vars.get("cement - cco2")
-        ):
-            rate.loc[dict(region="World", variables="cement")] = 0
-        else:
-            try:
-                rate.loc[dict(region="World", variables="cement")] = (
+        if "World" in rate.region.values.tolist():
+            if not any(
+                x in data.variables.values.tolist()
+                for x in dict_vars.get("cement - cco2", [])
+            ):
+                rate.loc[dict(region="World", variables="cement")] = 0
+            else:
+                try:
+                    rate.loc[dict(region="World", variables="cement")] = (
+                        data.loc[
+                            dict(
+                                region=[r for r in self.regions if r != "World"],
+                                variables=dict_vars["cement - cco2"],
+                            )
+                        ]
+                        .sum(dim=["variables", "region"])
+                        .values
+                        / data.loc[
+                            dict(
+                                region=[r for r in self.regions if r != "World"],
+                                variables=dict_vars["cement - co2"],
+                            )
+                        ]
+                        .sum(dim=["variables", "region"])
+                        .values
+                    )
+                except ZeroDivisionError:
+                    rate.loc[dict(region="World", variables="cement")] = 0
+
+                try:
+                    rate.loc[dict(region="World", variables="steel")] = data.loc[
+                        dict(
+                            region=[r for r in self.regions if r != "World"],
+                            variables=dict_vars["steel - cco2"],
+                        )
+                    ].sum(dim=["variables", "region"]) / data.loc[
+                        dict(
+                            region=[r for r in self.regions if r != "World"],
+                            variables=dict_vars["steel - co2"],
+                        )
+                    ].sum(
+                        dim=["variables", "region"]
+                    )
+                except ZeroDivisionError:
+                    rate.loc[dict(region="World", variables="steel")] = 0
+
+            if not any(
+                x in data.variables.values.tolist()
+                for x in dict_vars.get("steel - cco2", [])
+            ):
+                rate.loc[dict(region="World", variables="steel")] = 0
+            else:
+                rate.loc[dict(region="World", variables="steel")] = (
                     data.loc[
                         dict(
                             region=[r for r in self.regions if r != "World"],
-                            variables=dict_vars["cement - cco2"],
+                            variables=dict_vars["steel - cco2"],
                         )
                     ]
                     .sum(dim=["variables", "region"])
                     .values
                     / data.loc[
                         dict(
                             region=[r for r in self.regions if r != "World"],
-                            variables=dict_vars["cement - co2"],
+                            variables=dict_vars["steel - co2"],
                         )
                     ]
                     .sum(dim=["variables", "region"])
                     .values
                 )
-            except ZeroDivisionError:
-                rate.loc[dict(region="World", variables="cement")] = 0
-
-            try:
-                rate.loc[dict(region="World", variables="steel")] = data.loc[
-                    dict(
-                        region=[r for r in self.regions if r != "World"],
-                        variables=dict_vars["steel - cco2"],
-                    )
-                ].sum(dim=["variables", "region"]) / data.loc[
-                    dict(
-                        region=[r for r in self.regions if r != "World"],
-                        variables=dict_vars["steel - co2"],
-                    )
-                ].sum(
-                    dim=["variables", "region"]
-                )
-            except ZeroDivisionError:
-                rate.loc[dict(region="World", variables="steel")] = 0
-
-        if not any(
-            x in data.variables.values.tolist() for x in dict_vars.get("steel - cco2")
-        ):
-            rate.loc[dict(region="World", variables="steel")] = 0
-        else:
-            rate.loc[dict(region="World", variables="steel")] = (
-                data.loc[
-                    dict(
-                        region=[r for r in self.regions if r != "World"],
-                        variables=dict_vars["steel - cco2"],
-                    )
-                ]
-                .sum(dim=["variables", "region"])
-                .values
-                / data.loc[
-                    dict(
-                        region=[r for r in self.regions if r != "World"],
-                        variables=dict_vars["steel - co2"],
-                    )
-                ]
-                .sum(dim=["variables", "region"])
-                .values
-            )
 
         # we ensure that the rate can only be between 0 and 1
         rate.values = np.clip(rate, 0, 1)
@@ -1098,7 +1121,10 @@ def get_external_data(self, datapackages):
                     .to_xarray()
                 )
 
-                array.coords["year"] = [int(y) for y in array.coords["year"]]
+                # convert to float64
+                array = array.astype(np.float64)
+                # convert year dim to int64
+                array.coords["year"] = array.coords["year"].astype(np.int64)
 
                 data[i]["production volume"] = array
                 regions = subset["region"].unique().tolist()
@@ -1140,7 +1166,10 @@ def get_external_data(self, datapackages):
                         .mean()
                         .to_xarray()
                     )
-                    array.coords["year"] = [int(y) for y in array.coords["year"]]
+                    # convert to float64
+                    array = array.astype(np.float64)
+                    # convert year dim to int64
+                    array.coords["year"] = array.coords["year"].astype(np.int64)
 
                     ref_years = {}
 

diff --git a/premise/ecoinvent_modification.py b/premise/ecoinvent_modification.py
@@ -54,6 +54,7 @@
 
 FILEPATH_OIL_GAS_INVENTORIES = INVENTORY_DIR / "lci-ESU-oil-and-gas.xlsx"
 FILEPATH_CARMA_INVENTORIES = INVENTORY_DIR / "lci-Carma-CCS.xlsx"
+FILEPATH_CO_FIRING_INVENTORIES = INVENTORY_DIR / "lci-co-firing-power-plants.xlsx"
 FILEPATH_CHP_INVENTORIES = INVENTORY_DIR / "lci-combined-heat-power-plant-CCS.xlsx"
 FILEPATH_CC_INVENTORIES = INVENTORY_DIR / "lci-carbon-capture.xlsx"
 FILEPATH_BIOFUEL_INVENTORIES = INVENTORY_DIR / "lci-biofuels.xlsx"
@@ -135,6 +136,7 @@
 FILEPATH_WAVE = INVENTORY_DIR / "lci-wave_energy.xlsx"
 FILEPATH_FUEL_CELL = INVENTORY_DIR / "lci-fuel_cell.xlsx"
 FILEPATH_CSP = INVENTORY_DIR / "lci-concentrating-solar-power.xlsx"
+FILEPATH_HOME_STORAGE_BATTERIES = INVENTORY_DIR / "lci-home-batteries.xlsx"
 
 config = load_constants()
 
@@ -703,6 +705,7 @@ def __import_inventories(self, keep_uncertainty_data: bool = False) -> List[dict
         filepaths = [
             (FILEPATH_OIL_GAS_INVENTORIES, "3.7"),
             (FILEPATH_CARMA_INVENTORIES, "3.5"),
+            (FILEPATH_CO_FIRING_INVENTORIES, "3.5"),
             (FILEPATH_CHP_INVENTORIES, "3.5"),
             (FILEPATH_CC_INVENTORIES, "3.9"),
             (FILEPATH_BIOGAS_INVENTORIES, "3.6"),
@@ -711,6 +714,7 @@ def __import_inventories(self, keep_uncertainty_data: bool = False) -> List[dict
             (FILEPATH_COBALT, "3.8"),
             (FILEPATH_GRAPHITE, "3.8"),
             (FILEPATH_BATTERIES, "3.8"),
+            (FILEPATH_HOME_STORAGE_BATTERIES, "3.9"),
             (FILEPATH_PHOTOVOLTAICS, "3.7"),
             (FILEPATH_HYDROGEN_INVENTORIES, "3.9"),
             (FILEPATH_HYDROGEN_SOLAR_INVENTORIES, "3.9"),

diff --git a/premise/export.py b/premise/export.py
@@ -889,7 +889,9 @@ def generate_superstructure_db(
     # the export to Excel is not an option
     if len(df) > 1048576:
         format = "csv"
-        print("The scenario difference file is too long to be exported to Excel. Exporting to CSV instead.")
+        print(
+            "The scenario difference file is too long to be exported to Excel. Exporting to CSV instead."
+        )
 
     if format == "excel":
         filepath_sdf = filepath / f"scenario_diff_{db_name}.xlsx"