Skip to content

Commit

Permalink
EODC
Browse files Browse the repository at this point in the history
  • Loading branch information
romainsacchi committed Sep 26, 2023
1 parent 8095491 commit 0fe87b8
Show file tree
Hide file tree
Showing 13 changed files with 974 additions and 437 deletions.
Binary file not shown.
221 changes: 104 additions & 117 deletions premise/data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import copy
import csv
import os
from functools import lru_cache
from io import StringIO
from itertools import chain
Expand Down Expand Up @@ -145,11 +146,7 @@ def get_gains_EU_data() -> xr.DataArray:
)
gains_emi_EU["sector"] = gains_emi_EU["Sector"] + gains_emi_EU["Activity"]
gains_emi_EU.drop(
[
"Sector",
"Activity",
],
axis=1,
["Sector", "Activity",], axis=1,
)

gains_emi_EU = gains_emi_EU[~gains_emi_EU["value"].isna()]
Expand Down Expand Up @@ -228,19 +225,15 @@ def fix_efficiencies(data: xr.DataArray, min_year: int) -> xr.DataArray:
# we correct it to 1, as we do not accept
# that efficiency degrades over time
data.loc[dict(year=[y for y in data.year.values if y > 2020])] = np.clip(
data.loc[dict(year=[y for y in data.year.values if y > 2020])],
1,
None,
data.loc[dict(year=[y for y in data.year.values if y > 2020])], 1, None,
)

# Inversely, if we are looking at a year prior to 2020
# and the ratio in efficiency change is superior to 1
# we correct it to 1, as we do not accept
# that efficiency in the past was higher than now
data.loc[dict(year=[y for y in data.year.values if y < 2020])] = np.clip(
data.loc[dict(year=[y for y in data.year.values if y < 2020])],
None,
1,
data.loc[dict(year=[y for y in data.year.values if y < 2020])], None, 1,
)

# ensure that efficiency can not decrease over time
Expand Down Expand Up @@ -397,9 +390,7 @@ def __init__(
new_vars = flatten(new_vars)

data = self.__get_iam_data(
key=key,
filedir=filepath_iam_files,
variables=new_vars,
key=key, filedir=filepath_iam_files, variables=new_vars,
)

self.regions = data.region.values.tolist()
Expand All @@ -411,9 +402,7 @@ def __init__(
)

self.electricity_markets = self.__fetch_market_data(
data=data,
input_vars=electricity_prod_vars,
system_model=self.system_model,
data=data, input_vars=electricity_prod_vars, system_model=self.system_model,
)

self.petrol_markets = self.__fetch_market_data(
Expand All @@ -438,12 +427,7 @@ def __init__(
input_vars={
k: v
for k, v in fuel_prod_vars.items()
if any(
x in k
for x in [
"diesel",
]
)
if any(x in k for x in ["diesel",])
},
system_model=self.system_model,
)
Expand All @@ -470,12 +454,7 @@ def __init__(
input_vars={
k: v
for k, v in fuel_prod_vars.items()
if any(
x in k
for x in [
"hydrogen",
]
)
if any(x in k for x in ["hydrogen",])
},
system_model=self.system_model,
)
Expand Down Expand Up @@ -532,12 +511,7 @@ def __init__(
efficiency_labels={
k: v
for k, v in fuel_eff_vars.items()
if any(
x in k
for x in [
"diesel",
]
)
if any(x in k for x in ["diesel",])
},
)
self.gas_efficiencies = self.get_iam_efficiencies(
Expand All @@ -553,12 +527,7 @@ def __init__(
efficiency_labels={
k: v
for k, v in fuel_eff_vars.items()
if any(
x in k
for x in [
"hydrogen",
]
)
if any(x in k for x in ["hydrogen",])
},
)

Expand Down Expand Up @@ -631,25 +600,39 @@ def __get_iam_data(
"""

file_ext = self.model + "_" + self.pathway + ".csv"
filepath = Path(filedir) / file_ext
# find file in directory which name contains both self.model and self.pathway
# Walk through the directory
filepath = ""
for root, dirs, files in os.walk(filedir):
for file in files:
# Check if both model and pathway are present in the filename
if self.model in file and self.pathway in file:
filepath = Path(os.path.join(root, file))

if filepath == "":
raise FileNotFoundError(
f"Could not find any file containing both {self.model} and {self.pathway} in {filedir}"
)

if key is None:
# Uses a non-encrypted file
try:
with open(filepath, "rb") as file:
# read the encrypted data
encrypted_data = file.read()
except FileNotFoundError:
file_ext = self.model + "_" + self.pathway + ".mif"
filepath = Path(filedir) / file_ext
# if extension is ".csv"
if filepath.suffix in [".csv", ".mif"]:
print(f"Reading {filepath} as csv file")
with open(filepath, "rb") as file:
# read the encrypted data
encrypted_data = file.read()
# create a temp csv-like file to pass to pandas.read_csv()
data = StringIO(str(encrypted_data, "latin-1"))

# create a temp csv-like file to pass to pandas.read_csv()
data = StringIO(str(encrypted_data, "latin-1"))
elif filepath.suffix in [".xls", ".xlsx"]:
print(f"Reading {filepath} as excel file")
data = pd.read_excel(filepath)

else:
raise ValueError(
f"Extension {filepath.suffix} is not supported. Please use .csv, .mif, .xls or .xlsx."
)
else:
# Uses an encrypted file
fernet_obj = Fernet(key)
Expand All @@ -661,15 +644,18 @@ def __get_iam_data(
decrypted_data = fernet_obj.decrypt(encrypted_data)
data = StringIO(str(decrypted_data, "latin-1"))

dataframe = pd.read_csv(
data,
sep=get_delimiter(data=copy.copy(data).readline()),
encoding="latin-1",
)
if filepath.suffix in [".csv", ".mif"]:
dataframe = pd.read_csv(
data,
sep=get_delimiter(data=copy.copy(data).readline()),
encoding="latin-1",
)
else:
dataframe = data

# if a column name can be an integer
# we convert it to an integer
new_cols = {c: int(c) if c.isdigit() else c for c in dataframe.columns}
new_cols = {c: int(c) if str(c).isdigit() else c for c in dataframe.columns}
dataframe = dataframe.rename(columns=new_cols)

# remove any column that is a string
Expand Down Expand Up @@ -889,7 +875,8 @@ def __get_carbon_capture_rate(
# and that none of the CO2 emissions are captured

if not any(
x in data.variables.values.tolist() for x in dict_vars.get("cement - cco2")
x in data.variables.values.tolist()
for x in dict_vars.get("cement - cco2", [])
):
cement_rate = xr.DataArray(
np.zeros((len(data.region), len(data.year))),
Expand All @@ -904,7 +891,8 @@ def __get_carbon_capture_rate(
cement_rate.coords["variables"] = "cement"

if not any(
x in data.variables.values.tolist() for x in dict_vars.get("steel - cco2")
x in data.variables.values.tolist()
for x in dict_vars.get("steel - cco2", [])
):
steel_rate = xr.DataArray(
np.zeros((len(data.region), len(data.year))),
Expand All @@ -929,73 +917,76 @@ def __get_carbon_capture_rate(
# as it is sometimes neglected in the
# IAM files

if not any(
x in data.variables.values.tolist() for x in dict_vars.get("cement - cco2")
):
rate.loc[dict(region="World", variables="cement")] = 0
else:
try:
rate.loc[dict(region="World", variables="cement")] = (
if "World" in rate.region.values.tolist():
if not any(
x in data.variables.values.tolist()
for x in dict_vars.get("cement - cco2", [])
):
rate.loc[dict(region="World", variables="cement")] = 0
else:
try:
rate.loc[dict(region="World", variables="cement")] = (
data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["cement - cco2"],
)
]
.sum(dim=["variables", "region"])
.values
/ data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["cement - co2"],
)
]
.sum(dim=["variables", "region"])
.values
)
except ZeroDivisionError:
rate.loc[dict(region="World", variables="cement")] = 0

try:
rate.loc[dict(region="World", variables="steel")] = data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["steel - cco2"],
)
].sum(dim=["variables", "region"]) / data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["steel - co2"],
)
].sum(
dim=["variables", "region"]
)
except ZeroDivisionError:
rate.loc[dict(region="World", variables="steel")] = 0

if not any(
x in data.variables.values.tolist()
for x in dict_vars.get("steel - cco2", [])
):
rate.loc[dict(region="World", variables="steel")] = 0
else:
rate.loc[dict(region="World", variables="steel")] = (
data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["cement - cco2"],
variables=dict_vars["steel - cco2"],
)
]
.sum(dim=["variables", "region"])
.values
/ data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["cement - co2"],
variables=dict_vars["steel - co2"],
)
]
.sum(dim=["variables", "region"])
.values
)
except ZeroDivisionError:
rate.loc[dict(region="World", variables="cement")] = 0

try:
rate.loc[dict(region="World", variables="steel")] = data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["steel - cco2"],
)
].sum(dim=["variables", "region"]) / data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["steel - co2"],
)
].sum(
dim=["variables", "region"]
)
except ZeroDivisionError:
rate.loc[dict(region="World", variables="steel")] = 0

if not any(
x in data.variables.values.tolist() for x in dict_vars.get("steel - cco2")
):
rate.loc[dict(region="World", variables="steel")] = 0
else:
rate.loc[dict(region="World", variables="steel")] = (
data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["steel - cco2"],
)
]
.sum(dim=["variables", "region"])
.values
/ data.loc[
dict(
region=[r for r in self.regions if r != "World"],
variables=dict_vars["steel - co2"],
)
]
.sum(dim=["variables", "region"])
.values
)

# we ensure that the rate can only be between 0 and 1
rate.values = np.clip(rate, 0, 1)
Expand Down Expand Up @@ -1239,11 +1230,7 @@ def fetch_external_data_coal_power_plants(self):
df = df.drop(columns=["fuel input"])
array = (
df.melt(
id_vars=[
"country",
"CHP",
"fuel",
],
id_vars=["country", "CHP", "fuel",],
var_name="variable",
value_name="value",
)
Expand Down
Loading

0 comments on commit 0fe87b8

Please sign in to comment.