Fix export of superstructure databases: keys are added.

polca · Oct 14, 2023 · 0dac4bd · 0dac4bd
1 parent 6c39abf
commit 0dac4bd
Show file tree

Hide file tree

Showing 7 changed files with 171 additions and 455 deletions.
diff --git a/README.md b/README.md
@@ -61,7 +61,7 @@ ecoinvent 3 to reflect projected energy policy trajectories.
 
 Requirements
 ------------
-* **Python 3.9**
+* **Python 3.9, 3.10 or 3.11**
 * License for [ecoinvent 3][1]
 * Some IAM output files come with the library and are located by default in the subdirectory "/data/iam_output_files". **If you wish to use
  those files, you need to request (by [email](mailto:[email protected])) an encryption key from the developers**.

diff --git a/dev/test_custom_scenarios.ipynb b/dev/test_custom_scenarios.ipynb
diff --git a/premise/__init__.py b/premise/__init__.py
@@ -1,5 +1,5 @@
 __all__ = ("NewDatabase", "clear_cache", "get_regions_definition")
-__version__ = (1, 8, 0, "dev1")
+__version__ = (1, 8, 1)
 
 
 from .ecoinvent_modification import NewDatabase

diff --git a/premise/brightway2.py b/premise/brightway2.py
@@ -25,8 +25,6 @@ def write_brightway_database(data, name, reset_codes=False):
     # Restore parameters to Brightway2 format
     # which allows for uncertainty and comments
     change_db_name(data, name)
-    if reset_codes:
-        reset_all_codes(data)
     link_internal(data)
     check_internal_linking(data)
     BW2Importer(name, data).write_database()
diff --git a/premise/brightway25.py b/premise/brightway25.py
@@ -79,12 +79,10 @@ def biosphere_generator(data, lookup):
         )
 
 
-def write_brightway_database(data, name, reset_codes=False):
+def write_brightway_database(data, name):
     # Restore parameters to Brightway2 format
     # which allows for uncertainty and comments
     change_db_name(data, name)
-    if reset_codes:
-        reset_all_codes(data)
     link_internal(data)
     check_internal_linking(data)
     BW25Importer(name, data).write_database()
diff --git a/premise/ecoinvent_modification.py b/premise/ecoinvent_modification.py
@@ -1375,7 +1375,6 @@ def write_superstructure_db_to_brightway(
         write_brightway_database(
             data=self.database,
             name=name,
-            reset_codes=True,
         )
 
         # generate scenario report

diff --git a/premise/export.py b/premise/export.py
@@ -28,7 +28,7 @@
 from .filesystem_constants import DATA_DIR
 from .inventory_imports import get_correspondence_bio_flows
 from .transformation import BaseTransformation
-from .utils import check_database_name
+from .utils import check_database_name, reset_all_codes
 
 FILEPATH_SIMAPRO_UNITS = DATA_DIR / "utils" / "export" / "simapro_units.yml"
 FILEPATH_SIMAPRO_COMPARTMENTS = (
@@ -523,7 +523,7 @@ def build_datapackage(df, inventories, list_scenarios, ei_version, name):
 
     # check that directory exists, otherwise create it
     Path(DIR_DATAPACKAGE_TEMP).mkdir(parents=True, exist_ok=True)
-    df.to_csv(DIR_DATAPACKAGE_TEMP / "scenario_data.csv", index=False)
+    df.to_csv(DIR_DATAPACKAGE_TEMP / "scenario_data.csv", index=False, encoding="utf-8")
     write_formatted_data(
         name=name, data=inventories, filepath=DIR_DATAPACKAGE_TEMP / "inventories.csv"
     )
@@ -856,9 +856,52 @@ def generate_scenario_difference_file(
     ] = None
     df.loc[df["flow type"] == "production", list_scenarios] = 1.0
 
+    new_db, df = find_technosphere_keys(new_db, df)
+
+
     # return the dataframe and the new db
     return df, new_db, list_acts
 
+def find_technosphere_keys(db, df):
+
+    # erase keys for technosphere and production exchanges
+    df.loc[df["flow type"].isin(["technosphere", "production"]), "from key"] = None
+    df.loc[df["flow type"].isin(["technosphere", "production"]), "to key"] = None
+    df.loc[df["flow type"]=="biosphere", "to key"] = None
+
+    # reset all codes
+    db = reset_all_codes(db)
+
+    # create a dictionary of all activities
+    dict_act = {(a["name"], a["reference product"], a["location"]): (a["database"], a["code"]) for a in db}
+
+    # iterate through df
+    # and fill "from key" and "to key" columns
+    # if None
+
+    df.loc[df["from key"].isnull(), 'from key'] = pd.Series(
+        list(
+            zip(
+                df["from activity name"],
+                df["from reference product"],
+                df["from location"]
+            )
+        )
+    ).map(dict_act)
+
+    df.loc[df["to key"].isnull(), 'to key'] = pd.Series(
+        list(
+            zip(
+                df["to activity name"],
+                df["to reference product"],
+                df["to location"]
+            )
+        )
+    ).map(dict_act)
+
+    return db, df
+
+
 
 def generate_superstructure_db(
     origin_db,
@@ -917,10 +960,6 @@ def generate_superstructure_db(
     after = len(df)
     print(f"Dropped {before - after} duplicate(s).")
 
-    # remove content from "from key" and "to key"
-    df["from key"] = None
-    df["to key"] = None
-
     # if df is longer than the row limit of Excel,
     # the export to Excel is not an option
     if len(df) > 1048576:
@@ -934,7 +973,7 @@ def generate_superstructure_db(
         df.to_excel(filepath_sdf, index=False)
     elif format == "csv":
         filepath_sdf = filepath / f"scenario_diff_{db_name}.csv"
-        df.to_csv(filepath_sdf, index=False, sep=";")
+        df.to_csv(filepath_sdf, index=False, sep=";", encoding="utf-8")
     elif format == "feather":
         filepath_sdf = filepath / f"scenario_diff_{db_name}.feather"
         df.to_feather(filepath_sdf)