From 331c78193904e53e36f110ff4a13fa925a5e7db3 Mon Sep 17 00:00:00 2001 From: Eneko Martin-Martinez Date: Mon, 21 Oct 2024 16:30:42 +0200 Subject: [PATCH] Allow passing encoding for TabData files --- pysd/py_backend/data.py | 26 ++++++++---- pysd/py_backend/model.py | 21 +++++++--- pysd/pysd.py | 87 ++++++++++++++++++++++++++++++++-------- 3 files changed, 105 insertions(+), 29 deletions(-) diff --git a/pysd/py_backend/data.py b/pysd/py_backend/data.py index f0589ce3..13d9c7c4 100644 --- a/pysd/py_backend/data.py +++ b/pysd/py_backend/data.py @@ -125,8 +125,9 @@ def get_columns(cls, file_name, vars=None, encoding=None): file_name: str Output file to read. Must be csv or tab. - vars: list - List of var names to find in the file. + vars: list or None (optional) + List of var names to find in the file. If None all variables + will be returned. Default is None. encoding: str or None (optional) Encoding type to read output file. Needed if the file has special @@ -268,7 +269,7 @@ def __init__(self, real_name, py_name, coords, interp="interpolate"): + "'raw', 'interpolate', " + "'look_forward' or 'hold_backward'") - def load_data(self, file_names): + def load_data(self, file_names, encoding=None): """ Load data values from files. @@ -276,6 +277,12 @@ def load_data(self, file_names): ---------- file_names: list or str or pathlib.Path Name of the files to search the variable in. + encoding: list or str or None (optional) + Encoding to be used by the data readers. If a list is given, + then file_names should be a list of the same lenght. If + None or a string is given, this value will be used for all + of them. See documentation from pandas.read_table for + further information. Default is None. Returns ------- @@ -285,9 +292,11 @@ def load_data(self, file_names): """ if isinstance(file_names, (str, Path)): file_names = [file_names] + if isinstance(encoding, str) or encoding is None: + encoding = [encoding]*len(file_names) - for file_name in file_names: - self.data = self._load_data(Path(file_name)) + for file_name, encoding_df in zip(file_names, encoding): + self.data = self._load_data(Path(file_name), encoding_df) if self.data is not None: break @@ -297,7 +306,7 @@ def load_data(self, file_names): f"Data for {self.real_name} not found in " f"{', '.join([str(file_name) for file_name in file_names])}") - def _load_data(self, file_name): + def _load_data(self, file_name, encoding): """ Load data values from output @@ -317,7 +326,10 @@ def _load_data(self, file_name): if file_name.suffix in [".csv", ".tab"]: columns, transpose = Columns.get_columns( - file_name, vars=[self.real_name, self.py_name]) + file_name, + vars=[self.real_name, self.py_name], + encoding=encoding + ) if not columns: # the variable is not in the passed file diff --git a/pysd/py_backend/model.py b/pysd/py_backend/model.py index bb800a9b..803a520d 100644 --- a/pysd/py_backend/model.py +++ b/pysd/py_backend/model.py @@ -73,7 +73,7 @@ class Macro(DynamicStateful): """ def __init__(self, py_model_file, params=None, return_func=None, time=None, time_initialization=None, data_files=None, - py_name=None): + data_files_encoding=None, py_name=None): super().__init__() self.time = time self.time_initialization = time_initialization @@ -158,7 +158,7 @@ def __init__(self, py_model_file, params=None, return_func=None, # Load data files if data_files: - self._get_data(data_files) + self._get_data(data_files, data_files_encoding) # Assign the cache type to each variable self._assign_cache_type() @@ -221,14 +221,19 @@ def clean_caches(self): # if nested macros [macro.clean_caches() for macro in self._macro_elements] - def _get_data(self, data_files): + def _get_data(self, data_files, encoding): + """Load Data for TabData objects""" if isinstance(data_files, dict): for data_file, vars in data_files.items(): + if isinstance(encoding, dict): + encoding_df = encoding.get(data_file, None) + else: + encoding_df = encoding for var in vars: found = False for element in self._data_elements: if var in [element.py_name, element.real_name]: - element.load_data(data_file) + element.load_data(data_file, encoding_df) found = True break if not found: @@ -237,7 +242,7 @@ def _get_data(self, data_files): else: for element in self._data_elements: - element.load_data(data_files) + element.load_data(data_files, encoding) def _get_initialize_order(self): """ @@ -1396,11 +1401,13 @@ class Model(Macro): :class:`pysd.py_backend.model.Macro` """ - def __init__(self, py_model_file, data_files, initialize, missing_values): + def __init__(self, py_model_file, data_files, data_files_encoding, + initialize, missing_values): """ Sets up the Python objects """ super().__init__(py_model_file, None, None, Time(), data_files=data_files) self.data_files = data_files + self.data_files_encoding = data_files_encoding self.missing_values = missing_values # set time component self.time.stage = 'Load' @@ -2159,6 +2166,7 @@ def copy(self, reload=False): new_model = type(self)( py_model_file=deepcopy(self.py_model_file), data_files=deepcopy(self.data_files), + data_files_encoding=deepcopy(self.data_files_encoding), initialize=initialize, missing_values=deepcopy(self.missing_values) ) @@ -2194,6 +2202,7 @@ def reload(self): """ self.__init__(self.py_model_file, data_files=self.data_files, + data_files_encoding=self.data_files_encoding, initialize=True, missing_values=self.missing_values) diff --git a/pysd/pysd.py b/pysd/pysd.py index 66e103ce..2d8b8174 100644 --- a/pysd/pysd.py +++ b/pysd/pysd.py @@ -24,8 +24,8 @@ ) -def read_xmile(xmile_file, data_files=None, initialize=True, - missing_values="warning"): +def read_xmile(xmile_file, data_files=None, data_files_encoding=None, + initialize=True, missing_values="warning"): """ Construct a model from a Xmile file. @@ -38,9 +38,20 @@ def read_xmile(xmile_file, data_files=None, initialize=True, If False, the model will not be initialize when it is loaded. Default is True. - data_files: list or str or None (optional) - If given the list of files where the necessary data to run the model - is given. Default is None. + data_files: dict or list or str or None + The dictionary with keys the name of file and variables to + load the data from there. Or the list of names or name of the + file to search the data in. Only works for TabData type object + and it is neccessary to provide it. Default is None. + + data_files_encoding: list or str or dict or None (optional) + Encoding for data_files. If a string or None is passed this + value will be used for all the files. If data_files is a list, + a list of the same length could be used to specify different + encodings. If data_files is a dictionary, a dictionary with the + same keys could be used, being the values the encodings. See + documentation from pandas.read_table for further information. + Default is None. missing_values: str ("warning", "error", "ignore", "keep") (optional) What to do with missing values. If "warning" (default) @@ -75,15 +86,20 @@ def read_xmile(xmile_file, data_files=None, initialize=True, py_model_file = ModelBuilder(abs_model).build_model() # load Python file - model = load(py_model_file, data_files, initialize, missing_values) + model = load( + py_model_file, + data_files, data_files_encoding, + initialize, + missing_values + ) model.xmile_file = str(xmile_file) return model -def read_vensim(mdl_file, data_files=None, initialize=True, - missing_values="warning", split_views=False, - encoding=None, **kwargs): +def read_vensim(mdl_file, data_files=None, data_files_encoding=None, + initialize=True, missing_values="warning", + split_views=False, encoding=None, **kwargs): """ Construct a model from Vensim `.mdl` file. @@ -96,9 +112,29 @@ def read_vensim(mdl_file, data_files=None, initialize=True, If False, the model will not be initialize when it is loaded. Default is True. - data_files: list or str or None (optional) - If given the list of files where the necessary data to run the model - is given. Default is None. + data_files: dict or list or str or None + The dictionary with keys the name of file and variables to + load the data from there. Or the list of names or name of the + file to search the data in. Only works for TabData type object + and it is neccessary to provide it. Default is None. + + data_files_encoding: list or str or dict or None (optional) + Encoding for data_files. If a string or None is passed this + value will be used for all the files. If data_files is a list, + a list of the same length could be used to specify different + encodings. If data_files is a dictionary, a dictionary with the + same keys could be used, being the values the encodings. See + documentation from pandas.read_table for further information. + Default is None. + + data_files_encoding: list or str or dict or None (optional) + Encoding for data_files. If a string or None is passed this + value will be used for all the files. If data_files is a list, + a list of the same length could be used to specify different + encodings. If data_files is a dictionary, a dictionary with the + same keys could be used, being the values the encodings. See + documentation from pandas.read_table for further information. + Default is None. missing_values: str ("warning", "error", "ignore", "keep") (optional) What to do with missing values. If "warning" (default) @@ -155,14 +191,19 @@ def read_vensim(mdl_file, data_files=None, initialize=True, py_model_file = ModelBuilder(abs_model).build_model() # load Python file - model = load(py_model_file, data_files, initialize, missing_values) + model = load( + py_model_file, + data_files, data_files_encoding, + initialize, + missing_values + ) model.mdl_file = str(mdl_file) return model -def load(py_model_file, data_files=None, initialize=True, - missing_values="warning"): +def load(py_model_file, data_files=None, data_files_encoding=None, + initialize=True, missing_values="warning"): """ Load a Python-converted model file. @@ -182,6 +223,15 @@ def load(py_model_file, data_files=None, initialize=True, file to search the data in. Only works for TabData type object and it is neccessary to provide it. Default is None. + data_files_encoding: list or str or dict or None (optional) + Encoding for data_files. If a string or None is passed this + value will be used for all the files. If data_files is a list, + a list of the same length could be used to specify different + encodings. If data_files is a dictionary, a dictionary with the + same keys could be used, being the values the encodings. See + documentation from pandas.read_table for further information. + Default is None. + missing_values : str ("warning", "error", "ignore", "keep") (optional) What to do with missing values. If "warning" (default) shows a warning message and interpolates the values. @@ -195,4 +245,9 @@ def load(py_model_file, data_files=None, initialize=True, >>> model = load('../tests/test-models/samples/teacup/teacup.py') """ - return Model(py_model_file, data_files, initialize, missing_values) + return Model( + py_model_file, + data_files, data_files_encoding, + initialize, + missing_values + )