Skip to content

Commit

Permalink
Fix export of superstructure databases: keys are added, encoding is c…
Browse files Browse the repository at this point in the history
…hanged.
  • Loading branch information
romainsacchi committed Oct 15, 2023
1 parent fd9bde4 commit e6635c8
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 171 deletions.
212 changes: 55 additions & 157 deletions premise/ecoinvent_modification.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,7 @@ def update_electricity(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_electricity(
Expand Down Expand Up @@ -908,7 +908,7 @@ def update_dac(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_dac(
Expand Down Expand Up @@ -943,7 +943,7 @@ def update_fuels(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_fuels(
Expand Down Expand Up @@ -978,7 +978,7 @@ def update_cement(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_cement(
Expand Down Expand Up @@ -1013,7 +1013,7 @@ def update_steel(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_steel(
Expand Down Expand Up @@ -1049,7 +1049,7 @@ def update_cars(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_vehicles(
Expand Down Expand Up @@ -1086,7 +1086,7 @@ def update_two_wheelers(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_vehicles(
Expand Down Expand Up @@ -1125,7 +1125,7 @@ def update_trucks(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_vehicles(
Expand Down Expand Up @@ -1163,7 +1163,7 @@ def update_buses(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets, _ = _update_vehicles(
Expand Down Expand Up @@ -1247,7 +1247,7 @@ def update_emissions(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets = _update_emissions(
Expand Down Expand Up @@ -1291,7 +1291,7 @@ def update_all(self) -> None:

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
self.modified_datasets.update(results[s][1])
self.modified_datasets[(scenario["model"], scenario["pathway"], scenario["year"])] = results[s][1][(scenario["model"], scenario["pathway"], scenario["year"])]
else:
for scenario in self.scenarios:
scenario, self.modified_datasets = _update_all(
Expand Down Expand Up @@ -1329,33 +1329,14 @@ def write_superstructure_db_to_brightway(

cache = {}

# use multiprocessing to speed up the process
if self.multiprocessing:
with ProcessPool(processes=multiprocessing.cpu_count()) as pool:
args = [
(
scenario,
cache,
self.version,
self.system_model,
self.modified_datasets,
)
for scenario in self.scenarios
]
results = pool.starmap(_prepare_database, args)

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
cache.update(results[s][1])
else:
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)

if hasattr(self, "datapackages"):
list_scenarios = create_scenario_list(self.scenarios, self.datapackages)
Expand Down Expand Up @@ -1422,33 +1403,15 @@ def write_db_to_brightway(self, name: [str, List[str]] = None):

cache = {}

# use multiprocessing to speed up the process
if self.multiprocessing:
with ProcessPool(processes=multiprocessing.cpu_count()) as pool:
args = [
(
scenario,
cache,
self.version,
self.system_model,
self.modified_datasets,
)
for scenario in self.scenarios
]
results = pool.starmap(_prepare_database, args)

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
cache.update(results[s][1])
else:
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)

for scen, scenario in enumerate(self.scenarios):
write_brightway_database(
Expand Down Expand Up @@ -1501,42 +1464,18 @@ def write_db_to_matrices(self, filepath: str = None):

# use multiprocessing to speed up the process
# use multiprocessing to speed up the process
if self.multiprocessing:
with ProcessPool(processes=multiprocessing.cpu_count()) as pool:
args = [
(
scenario,
cache,
self.version,
self.system_model,
self.modified_datasets,
)
for scenario in self.scenarios
]
results = pool.starmap(_prepare_database, args)

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
cache.update(results[s][1])

with ProcessPool(processes=multiprocessing.cpu_count()) as pool:
args = [
Export(scenario, filepath[scen], self.version)
for scen, scenario in enumerate(self.scenarios)
]
pool.map(_export_to_matrices, args)
else:
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)

for scen, scenario in enumerate(self.scenarios):
Export(scenario, filepath[scen], self.version).export_db_to_matrices()
for scen, scenario in enumerate(self.scenarios):
Export(scenario, filepath[scen], self.version).export_db_to_matrices()

# generate scenario report
self.generate_scenario_report()
Expand All @@ -1562,42 +1501,18 @@ def write_db_to_simapro(self, filepath: str = None):
cache = {}

# use multiprocessing to speed up the process
if self.multiprocessing:
with ProcessPool(processes=multiprocessing.cpu_count()) as pool:
args = [
(
scenario,
cache,
self.version,
self.system_model,
self.modified_datasets,
)
for scenario in self.scenarios
]
results = pool.starmap(_prepare_database, args)

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
cache.update(results[s][1])

with ProcessPool(processes=multiprocessing.cpu_count()) as pool:
args = [
Export(scenario, filepath, self.version)
for scen, scenario in enumerate(self.scenarios)
]
pool.map(_export_to_simapro, args)
else:
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)

for scen, scenario in enumerate(self.scenarios):
Export(scenario, filepath, self.version).export_db_to_simapro()
for scen, scenario in enumerate(self.scenarios):
Export(scenario, filepath, self.version).export_db_to_simapro()

# generate scenario report
self.generate_scenario_report()
Expand All @@ -1616,32 +1531,15 @@ def write_datapackage(self, name: str = f"datapackage_{date.today()}"):

cache = {}
# use multiprocessing to speed up the process
if self.multiprocessing:
with ProcessPool(processes=multiprocessing.cpu_count()) as pool:
args = [
(
scenario,
cache,
self.version,
self.system_model,
self.modified_datasets,
)
for scenario in self.scenarios
]
results = pool.starmap(_prepare_database, args)

for s, scenario in enumerate(self.scenarios):
self.scenarios[s] = results[s][0]
cache.update(results[s][1])
else:
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)
for scenario in self.scenarios:
scenario, cache = _prepare_database(
scenario=scenario,
scenario_cache=cache,
version=self.version,
system_model=self.system_model,
modified_datasets=self.modified_datasets,
)

if hasattr(self, "datapackages"):
list_scenarios = create_scenario_list(self.scenarios, self.datapackages)
Expand Down
22 changes: 8 additions & 14 deletions premise/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def build_datapackage(df, inventories, list_scenarios, ei_version, name):

# check that directory exists, otherwise create it
Path(DIR_DATAPACKAGE_TEMP).mkdir(parents=True, exist_ok=True)
df.to_csv(DIR_DATAPACKAGE_TEMP / "scenario_data.csv", index=False, encoding="utf-8")
df.to_csv(DIR_DATAPACKAGE_TEMP / "scenario_data.csv", index=False, encoding="utf-8-sig")
write_formatted_data(
name=name, data=inventories, filepath=DIR_DATAPACKAGE_TEMP / "inventories.csv"
)
Expand Down Expand Up @@ -769,6 +769,9 @@ def generate_scenario_difference_file(
c_name, c_ref, c_cat, c_loc, c_unit, c_type = acts_ind[i[0]]
s_name, s_ref, s_cat, s_loc, s_unit, s_type = acts_ind[i[1]]

database_name = db_name
exc_key_supplier = None

if s_type == "biosphere":
database_name = "biosphere3"

Expand All @@ -795,15 +798,6 @@ def generate_scenario_difference_file(
],
)

else:
database_name = db_name
exc_key_supplier = (
db_name,
fetch_exchange_code(s_name, s_ref, s_loc, s_unit),
)

exc_key_consumer = (db_name, fetch_exchange_code(c_name, c_ref, c_loc, c_unit))

row = [
s_name,
s_ref,
Expand All @@ -818,7 +812,7 @@ def generate_scenario_difference_file(
c_cat,
c_unit,
db_name,
exc_key_consumer,
None,
s_type,
]

Expand Down Expand Up @@ -965,13 +959,13 @@ def generate_superstructure_db(

if format == "excel":
filepath_sdf = filepath / f"scenario_diff_{db_name}.xlsx"
df.to_excel(filepath_sdf, index=False)
df.to_excel(filepath_sdf, index=False, encoding="utf-8")
elif format == "csv":
filepath_sdf = filepath / f"scenario_diff_{db_name}.csv"
df.to_csv(filepath_sdf, index=False, sep=";", encoding="utf-8")
df.to_csv(filepath_sdf, index=False, sep=";", encoding="utf-8-sig")
elif format == "feather":
filepath_sdf = filepath / f"scenario_diff_{db_name}.feather"
df.to_feather(filepath_sdf)
df.to_feather(filepath_sdf, encoding="utf-8")
else:
raise ValueError(f"Unknown format {format}")

Expand Down

0 comments on commit e6635c8

Please sign in to comment.