diff --git a/premise/ecoinvent_modification.py b/premise/ecoinvent_modification.py index c7cb173b..3f91cc11 100644 --- a/premise/ecoinvent_modification.py +++ b/premise/ecoinvent_modification.py @@ -870,7 +870,7 @@ def update_electricity(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_electricity( @@ -908,7 +908,7 @@ def update_dac(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_dac( @@ -943,7 +943,7 @@ def update_fuels(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_fuels( @@ -978,7 +978,7 @@ def update_cement(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_cement( @@ -1013,7 +1013,7 @@ def update_steel(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_steel( @@ -1049,7 +1049,7 @@ def update_cars(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_vehicles( @@ -1086,7 +1086,7 @@ def update_two_wheelers(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_vehicles( @@ -1125,7 +1125,7 @@ def update_trucks(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_vehicles( @@ -1163,7 +1163,7 @@ def update_buses(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets, _ = _update_vehicles( @@ -1247,7 +1247,7 @@ def update_emissions(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets = _update_emissions( @@ -1291,7 +1291,7 @@ def update_all(self) -> None: for s, scenario in enumerate(self.scenarios): self.scenarios[s] = results[s][0] - self.modified_datasets.update(results[s][1]) + self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])] else: for scenario in self.scenarios: scenario, self.modified_datasets = _update_all( @@ -1329,33 +1329,14 @@ def write_superstructure_db_to_brightway( cache = {} - # use multiprocessing to speed up the process - if self.multiprocessing: - with ProcessPool(processes=multiprocessing.cpu_count()) as pool: - args = [ - ( - scenario, - cache, - self.version, - self.system_model, - self.modified_datasets, - ) - for scenario in self.scenarios - ] - results = pool.starmap(_prepare_database, args) - - for s, scenario in enumerate(self.scenarios): - self.scenarios[s] = results[s][0] - cache.update(results[s][1]) - else: - for scenario in self.scenarios: - scenario, cache = _prepare_database( - scenario=scenario, - scenario_cache=cache, - version=self.version, - system_model=self.system_model, - modified_datasets=self.modified_datasets, - ) + for scenario in self.scenarios: + scenario, cache = _prepare_database( + scenario=scenario, + scenario_cache=cache, + version=self.version, + system_model=self.system_model, + modified_datasets=self.modified_datasets, + ) if hasattr(self, "datapackages"): list_scenarios = create_scenario_list(self.scenarios, self.datapackages) @@ -1422,33 +1403,15 @@ def write_db_to_brightway(self, name: [str, List[str]] = None): cache = {} - # use multiprocessing to speed up the process - if self.multiprocessing: - with ProcessPool(processes=multiprocessing.cpu_count()) as pool: - args = [ - ( - scenario, - cache, - self.version, - self.system_model, - self.modified_datasets, - ) - for scenario in self.scenarios - ] - results = pool.starmap(_prepare_database, args) - for s, scenario in enumerate(self.scenarios): - self.scenarios[s] = results[s][0] - cache.update(results[s][1]) - else: - for scenario in self.scenarios: - scenario, cache = _prepare_database( - scenario=scenario, - scenario_cache=cache, - version=self.version, - system_model=self.system_model, - modified_datasets=self.modified_datasets, - ) + for scenario in self.scenarios: + scenario, cache = _prepare_database( + scenario=scenario, + scenario_cache=cache, + version=self.version, + system_model=self.system_model, + modified_datasets=self.modified_datasets, + ) for scen, scenario in enumerate(self.scenarios): write_brightway_database( @@ -1501,42 +1464,18 @@ def write_db_to_matrices(self, filepath: str = None): # use multiprocessing to speed up the process # use multiprocessing to speed up the process - if self.multiprocessing: - with ProcessPool(processes=multiprocessing.cpu_count()) as pool: - args = [ - ( - scenario, - cache, - self.version, - self.system_model, - self.modified_datasets, - ) - for scenario in self.scenarios - ] - results = pool.starmap(_prepare_database, args) - - for s, scenario in enumerate(self.scenarios): - self.scenarios[s] = results[s][0] - cache.update(results[s][1]) - with ProcessPool(processes=multiprocessing.cpu_count()) as pool: - args = [ - Export(scenario, filepath[scen], self.version) - for scen, scenario in enumerate(self.scenarios) - ] - pool.map(_export_to_matrices, args) - else: - for scenario in self.scenarios: - scenario, cache = _prepare_database( - scenario=scenario, - scenario_cache=cache, - version=self.version, - system_model=self.system_model, - modified_datasets=self.modified_datasets, - ) + for scenario in self.scenarios: + scenario, cache = _prepare_database( + scenario=scenario, + scenario_cache=cache, + version=self.version, + system_model=self.system_model, + modified_datasets=self.modified_datasets, + ) - for scen, scenario in enumerate(self.scenarios): - Export(scenario, filepath[scen], self.version).export_db_to_matrices() + for scen, scenario in enumerate(self.scenarios): + Export(scenario, filepath[scen], self.version).export_db_to_matrices() # generate scenario report self.generate_scenario_report() @@ -1562,42 +1501,18 @@ def write_db_to_simapro(self, filepath: str = None): cache = {} # use multiprocessing to speed up the process - if self.multiprocessing: - with ProcessPool(processes=multiprocessing.cpu_count()) as pool: - args = [ - ( - scenario, - cache, - self.version, - self.system_model, - self.modified_datasets, - ) - for scenario in self.scenarios - ] - results = pool.starmap(_prepare_database, args) - - for s, scenario in enumerate(self.scenarios): - self.scenarios[s] = results[s][0] - cache.update(results[s][1]) - with ProcessPool(processes=multiprocessing.cpu_count()) as pool: - args = [ - Export(scenario, filepath, self.version) - for scen, scenario in enumerate(self.scenarios) - ] - pool.map(_export_to_simapro, args) - else: - for scenario in self.scenarios: - scenario, cache = _prepare_database( - scenario=scenario, - scenario_cache=cache, - version=self.version, - system_model=self.system_model, - modified_datasets=self.modified_datasets, - ) + for scenario in self.scenarios: + scenario, cache = _prepare_database( + scenario=scenario, + scenario_cache=cache, + version=self.version, + system_model=self.system_model, + modified_datasets=self.modified_datasets, + ) - for scen, scenario in enumerate(self.scenarios): - Export(scenario, filepath, self.version).export_db_to_simapro() + for scen, scenario in enumerate(self.scenarios): + Export(scenario, filepath, self.version).export_db_to_simapro() # generate scenario report self.generate_scenario_report() @@ -1616,32 +1531,15 @@ def write_datapackage(self, name: str = f"datapackage_{date.today()}"): cache = {} # use multiprocessing to speed up the process - if self.multiprocessing: - with ProcessPool(processes=multiprocessing.cpu_count()) as pool: - args = [ - ( - scenario, - cache, - self.version, - self.system_model, - self.modified_datasets, - ) - for scenario in self.scenarios - ] - results = pool.starmap(_prepare_database, args) - for s, scenario in enumerate(self.scenarios): - self.scenarios[s] = results[s][0] - cache.update(results[s][1]) - else: - for scenario in self.scenarios: - scenario, cache = _prepare_database( - scenario=scenario, - scenario_cache=cache, - version=self.version, - system_model=self.system_model, - modified_datasets=self.modified_datasets, - ) + for scenario in self.scenarios: + scenario, cache = _prepare_database( + scenario=scenario, + scenario_cache=cache, + version=self.version, + system_model=self.system_model, + modified_datasets=self.modified_datasets, + ) if hasattr(self, "datapackages"): list_scenarios = create_scenario_list(self.scenarios, self.datapackages) diff --git a/premise/export.py b/premise/export.py index 2038c2bc..6343a62c 100644 --- a/premise/export.py +++ b/premise/export.py @@ -523,7 +523,7 @@ def build_datapackage(df, inventories, list_scenarios, ei_version, name): # check that directory exists, otherwise create it Path(DIR_DATAPACKAGE_TEMP).mkdir(parents=True, exist_ok=True) - df.to_csv(DIR_DATAPACKAGE_TEMP / "scenario_data.csv", index=False, encoding="utf-8") + df.to_csv(DIR_DATAPACKAGE_TEMP / "scenario_data.csv", index=False, encoding="utf-8-sig") write_formatted_data( name=name, data=inventories, filepath=DIR_DATAPACKAGE_TEMP / "inventories.csv" ) @@ -769,6 +769,9 @@ def generate_scenario_difference_file( c_name, c_ref, c_cat, c_loc, c_unit, c_type = acts_ind[i[0]] s_name, s_ref, s_cat, s_loc, s_unit, s_type = acts_ind[i[1]] + database_name = db_name + exc_key_supplier = None + if s_type == "biosphere": database_name = "biosphere3" @@ -795,15 +798,6 @@ def generate_scenario_difference_file( ], ) - else: - database_name = db_name - exc_key_supplier = ( - db_name, - fetch_exchange_code(s_name, s_ref, s_loc, s_unit), - ) - - exc_key_consumer = (db_name, fetch_exchange_code(c_name, c_ref, c_loc, c_unit)) - row = [ s_name, s_ref, @@ -818,7 +812,7 @@ def generate_scenario_difference_file( c_cat, c_unit, db_name, - exc_key_consumer, + None, s_type, ] @@ -965,13 +959,13 @@ def generate_superstructure_db( if format == "excel": filepath_sdf = filepath / f"scenario_diff_{db_name}.xlsx" - df.to_excel(filepath_sdf, index=False) + df.to_excel(filepath_sdf, index=False, encoding="utf-8") elif format == "csv": filepath_sdf = filepath / f"scenario_diff_{db_name}.csv" - df.to_csv(filepath_sdf, index=False, sep=";", encoding="utf-8") + df.to_csv(filepath_sdf, index=False, sep=";", encoding="utf-8-sig") elif format == "feather": filepath_sdf = filepath / f"scenario_diff_{db_name}.feather" - df.to_feather(filepath_sdf) + df.to_feather(filepath_sdf, encoding="utf-8") else: raise ValueError(f"Unknown format {format}")