Skip to content

Commit

Permalink
Allow passing encoding for TabData files
Browse files Browse the repository at this point in the history
  • Loading branch information
enekomartinmartinez committed Oct 21, 2024
1 parent 3d2e022 commit 331c781
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 29 deletions.
26 changes: 19 additions & 7 deletions pysd/py_backend/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,9 @@ def get_columns(cls, file_name, vars=None, encoding=None):
file_name: str
Output file to read. Must be csv or tab.
vars: list
List of var names to find in the file.
vars: list or None (optional)
List of var names to find in the file. If None all variables
will be returned. Default is None.
encoding: str or None (optional)
Encoding type to read output file. Needed if the file has special
Expand Down Expand Up @@ -268,14 +269,20 @@ def __init__(self, real_name, py_name, coords, interp="interpolate"):
+ "'raw', 'interpolate', "
+ "'look_forward' or 'hold_backward'")

def load_data(self, file_names):
def load_data(self, file_names, encoding=None):
"""
Load data values from files.
Parameters
----------
file_names: list or str or pathlib.Path
Name of the files to search the variable in.
encoding: list or str or None (optional)
Encoding to be used by the data readers. If a list is given,
then file_names should be a list of the same lenght. If
None or a string is given, this value will be used for all
of them. See documentation from pandas.read_table for
further information. Default is None.
Returns
-------
Expand All @@ -285,9 +292,11 @@ def load_data(self, file_names):
"""
if isinstance(file_names, (str, Path)):
file_names = [file_names]
if isinstance(encoding, str) or encoding is None:
encoding = [encoding]*len(file_names)

for file_name in file_names:
self.data = self._load_data(Path(file_name))
for file_name, encoding_df in zip(file_names, encoding):
self.data = self._load_data(Path(file_name), encoding_df)
if self.data is not None:
break

Expand All @@ -297,7 +306,7 @@ def load_data(self, file_names):
f"Data for {self.real_name} not found in "
f"{', '.join([str(file_name) for file_name in file_names])}")

def _load_data(self, file_name):
def _load_data(self, file_name, encoding):
"""
Load data values from output
Expand All @@ -317,7 +326,10 @@ def _load_data(self, file_name):
if file_name.suffix in [".csv", ".tab"]:

columns, transpose = Columns.get_columns(
file_name, vars=[self.real_name, self.py_name])
file_name,
vars=[self.real_name, self.py_name],
encoding=encoding
)

if not columns:
# the variable is not in the passed file
Expand Down
21 changes: 15 additions & 6 deletions pysd/py_backend/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class Macro(DynamicStateful):
"""
def __init__(self, py_model_file, params=None, return_func=None,
time=None, time_initialization=None, data_files=None,
py_name=None):
data_files_encoding=None, py_name=None):
super().__init__()
self.time = time
self.time_initialization = time_initialization
Expand Down Expand Up @@ -158,7 +158,7 @@ def __init__(self, py_model_file, params=None, return_func=None,

# Load data files
if data_files:
self._get_data(data_files)
self._get_data(data_files, data_files_encoding)

# Assign the cache type to each variable
self._assign_cache_type()
Expand Down Expand Up @@ -221,14 +221,19 @@ def clean_caches(self):
# if nested macros
[macro.clean_caches() for macro in self._macro_elements]

def _get_data(self, data_files):
def _get_data(self, data_files, encoding):
"""Load Data for TabData objects"""
if isinstance(data_files, dict):
for data_file, vars in data_files.items():
if isinstance(encoding, dict):
encoding_df = encoding.get(data_file, None)
else:
encoding_df = encoding
for var in vars:
found = False
for element in self._data_elements:
if var in [element.py_name, element.real_name]:
element.load_data(data_file)
element.load_data(data_file, encoding_df)
found = True
break
if not found:
Expand All @@ -237,7 +242,7 @@ def _get_data(self, data_files):

else:
for element in self._data_elements:
element.load_data(data_files)
element.load_data(data_files, encoding)

def _get_initialize_order(self):
"""
Expand Down Expand Up @@ -1396,11 +1401,13 @@ class Model(Macro):
:class:`pysd.py_backend.model.Macro`
"""
def __init__(self, py_model_file, data_files, initialize, missing_values):
def __init__(self, py_model_file, data_files, data_files_encoding,
initialize, missing_values):
""" Sets up the Python objects """
super().__init__(py_model_file, None, None, Time(),
data_files=data_files)
self.data_files = data_files
self.data_files_encoding = data_files_encoding
self.missing_values = missing_values
# set time component
self.time.stage = 'Load'
Expand Down Expand Up @@ -2159,6 +2166,7 @@ def copy(self, reload=False):
new_model = type(self)(
py_model_file=deepcopy(self.py_model_file),
data_files=deepcopy(self.data_files),
data_files_encoding=deepcopy(self.data_files_encoding),
initialize=initialize,
missing_values=deepcopy(self.missing_values)
)
Expand Down Expand Up @@ -2194,6 +2202,7 @@ def reload(self):
"""
self.__init__(self.py_model_file, data_files=self.data_files,
data_files_encoding=self.data_files_encoding,
initialize=True,
missing_values=self.missing_values)

Expand Down
87 changes: 71 additions & 16 deletions pysd/pysd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
)


def read_xmile(xmile_file, data_files=None, initialize=True,
missing_values="warning"):
def read_xmile(xmile_file, data_files=None, data_files_encoding=None,
initialize=True, missing_values="warning"):
"""
Construct a model from a Xmile file.
Expand All @@ -38,9 +38,20 @@ def read_xmile(xmile_file, data_files=None, initialize=True,
If False, the model will not be initialize when it is loaded.
Default is True.
data_files: list or str or None (optional)
If given the list of files where the necessary data to run the model
is given. Default is None.
data_files: dict or list or str or None
The dictionary with keys the name of file and variables to
load the data from there. Or the list of names or name of the
file to search the data in. Only works for TabData type object
and it is neccessary to provide it. Default is None.
data_files_encoding: list or str or dict or None (optional)
Encoding for data_files. If a string or None is passed this
value will be used for all the files. If data_files is a list,
a list of the same length could be used to specify different
encodings. If data_files is a dictionary, a dictionary with the
same keys could be used, being the values the encodings. See
documentation from pandas.read_table for further information.
Default is None.
missing_values: str ("warning", "error", "ignore", "keep") (optional)
What to do with missing values. If "warning" (default)
Expand Down Expand Up @@ -75,15 +86,20 @@ def read_xmile(xmile_file, data_files=None, initialize=True,
py_model_file = ModelBuilder(abs_model).build_model()

# load Python file
model = load(py_model_file, data_files, initialize, missing_values)
model = load(
py_model_file,
data_files, data_files_encoding,
initialize,
missing_values
)
model.xmile_file = str(xmile_file)

return model


def read_vensim(mdl_file, data_files=None, initialize=True,
missing_values="warning", split_views=False,
encoding=None, **kwargs):
def read_vensim(mdl_file, data_files=None, data_files_encoding=None,
initialize=True, missing_values="warning",
split_views=False, encoding=None, **kwargs):
"""
Construct a model from Vensim `.mdl` file.
Expand All @@ -96,9 +112,29 @@ def read_vensim(mdl_file, data_files=None, initialize=True,
If False, the model will not be initialize when it is loaded.
Default is True.
data_files: list or str or None (optional)
If given the list of files where the necessary data to run the model
is given. Default is None.
data_files: dict or list or str or None
The dictionary with keys the name of file and variables to
load the data from there. Or the list of names or name of the
file to search the data in. Only works for TabData type object
and it is neccessary to provide it. Default is None.
data_files_encoding: list or str or dict or None (optional)
Encoding for data_files. If a string or None is passed this
value will be used for all the files. If data_files is a list,
a list of the same length could be used to specify different
encodings. If data_files is a dictionary, a dictionary with the
same keys could be used, being the values the encodings. See
documentation from pandas.read_table for further information.
Default is None.
data_files_encoding: list or str or dict or None (optional)
Encoding for data_files. If a string or None is passed this
value will be used for all the files. If data_files is a list,
a list of the same length could be used to specify different
encodings. If data_files is a dictionary, a dictionary with the
same keys could be used, being the values the encodings. See
documentation from pandas.read_table for further information.
Default is None.
missing_values: str ("warning", "error", "ignore", "keep") (optional)
What to do with missing values. If "warning" (default)
Expand Down Expand Up @@ -155,14 +191,19 @@ def read_vensim(mdl_file, data_files=None, initialize=True,
py_model_file = ModelBuilder(abs_model).build_model()

# load Python file
model = load(py_model_file, data_files, initialize, missing_values)
model = load(
py_model_file,
data_files, data_files_encoding,
initialize,
missing_values
)
model.mdl_file = str(mdl_file)

return model


def load(py_model_file, data_files=None, initialize=True,
missing_values="warning"):
def load(py_model_file, data_files=None, data_files_encoding=None,
initialize=True, missing_values="warning"):
"""
Load a Python-converted model file.
Expand All @@ -182,6 +223,15 @@ def load(py_model_file, data_files=None, initialize=True,
file to search the data in. Only works for TabData type object
and it is neccessary to provide it. Default is None.
data_files_encoding: list or str or dict or None (optional)
Encoding for data_files. If a string or None is passed this
value will be used for all the files. If data_files is a list,
a list of the same length could be used to specify different
encodings. If data_files is a dictionary, a dictionary with the
same keys could be used, being the values the encodings. See
documentation from pandas.read_table for further information.
Default is None.
missing_values : str ("warning", "error", "ignore", "keep") (optional)
What to do with missing values. If "warning" (default)
shows a warning message and interpolates the values.
Expand All @@ -195,4 +245,9 @@ def load(py_model_file, data_files=None, initialize=True,
>>> model = load('../tests/test-models/samples/teacup/teacup.py')
"""
return Model(py_model_file, data_files, initialize, missing_values)
return Model(
py_model_file,
data_files, data_files_encoding,
initialize,
missing_values
)

0 comments on commit 331c781

Please sign in to comment.