From 84f4430cd6203a0ef5fcdb294812c1341f85ab39 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Tue, 9 Nov 2021 17:35:05 +0100
Subject: [PATCH 01/16] Add data class and parse data vars

---
 docs/development/internal_functions.rst       |   2 +-
 docs/development/vensim_translation.rst       |   2 +-
 pysd/_version.py                              |   2 +-
 pysd/py_backend/components.py                 |  83 +++
 pysd/py_backend/external.py                   |  34 +-
 pysd/py_backend/utils.py                      | 590 ++++--------------
 pysd/pysd.py                                  |   4 +-
 pysd/tools/benchmarking.py                    |  84 +--
 pysd/{py_backend => translation}/builder.py   |  86 ++-
 pysd/translation/utils.py                     | 520 +++++++++++++++
 .../vensim/__init__.py                        |   0
 .../vensim/vensim2py.py                       |  25 +-
 .../xmile/SMILE2Py.py                         |   2 +-
 .../xmile/__init__.py                         |   0
 .../xmile/smile.grammar                       |   0
 .../xmile/xmile2py.py                         |   0
 tests/unit_test_builder.py                    |  24 +-
 tests/unit_test_translation_utils.py          | 245 ++++++++
 tests/unit_test_utils.py                      | 241 -------
 tests/unit_test_vensim2py.py                  |  96 +--
 tests/unit_test_xmile2py.py                   |   2 +-
 21 files changed, 1093 insertions(+), 949 deletions(-)
 rename pysd/{py_backend => translation}/builder.py (96%)
 create mode 100644 pysd/translation/utils.py
 rename pysd/{py_backend => translation}/vensim/__init__.py (100%)
 rename pysd/{py_backend => translation}/vensim/vensim2py.py (98%)
 rename pysd/{py_backend => translation}/xmile/SMILE2Py.py (99%)
 rename pysd/{py_backend => translation}/xmile/__init__.py (100%)
 rename pysd/{py_backend => translation}/xmile/smile.grammar (100%)
 rename pysd/{py_backend => translation}/xmile/xmile2py.py (100%)
 create mode 100644 tests/unit_test_translation_utils.py

diff --git a/docs/development/internal_functions.rst b/docs/development/internal_functions.rst
index 14ff0720..22b4da1f 100644
--- a/docs/development/internal_functions.rst
+++ b/docs/development/internal_functions.rst
@@ -29,7 +29,7 @@ Special functions needed for model execution
 Building the python model file
 ------------------------------
 
-.. automodule:: pysd.py_backend.builder
+.. automodule:: pysd.translation.builder
    :members:
    :undoc-members:
    :private-members:
diff --git a/docs/development/vensim_translation.rst b/docs/development/vensim_translation.rst
index 5f4f4249..ecc1727b 100644
--- a/docs/development/vensim_translation.rst
+++ b/docs/development/vensim_translation.rst
@@ -34,7 +34,7 @@ parsed, and may throw an error. Future releases will handle this with more grace
 Used Functions for Translation
 ------------------------------
 
-.. automodule:: pysd.py_backend.vensim.vensim2py
+.. automodule:: pysd.translation.vensim.vensim2py
    :members:
    :undoc-members:
    :private-members:
diff --git a/pysd/_version.py b/pysd/_version.py
index 8c0d5d5b..9aa3f903 100644
--- a/pysd/_version.py
+++ b/pysd/_version.py
@@ -1 +1 @@
-__version__ = "2.0.0"
+__version__ = "2.1.0"
diff --git a/pysd/py_backend/components.py b/pysd/py_backend/components.py
index 46fc4d8a..954b8361 100644
--- a/pysd/py_backend/components.py
+++ b/pysd/py_backend/components.py
@@ -3,10 +3,16 @@
 """
 
 import os
+import warnings
+import re
 import random
 from importlib.machinery import SourceFileLoader
 
+import numpy as np
+import xarray as xr
+
 from pysd._version import __version__
+from .utils import load_outputs, get_columns_to_load
 
 
 class Components(object):
@@ -158,3 +164,80 @@ def update(self, value):
     def reset(self):
         """ Reset time value to the initial """
         self._time = self._initial_time
+
+
+class Data(object):
+    def __init__(self, data=None, file_name=None, var=None, transpose=False,
+                 coords={}, interp="interpolate"):
+
+        self.interp = interp
+        self.is_float = not bool(coords)
+        if not data:
+            self.data = self.load_from_output(
+                file_name, var, coords, transpose)
+        else:
+            self.data = data
+
+    @staticmethod
+    def load_from_output(file_name, var, coords, transpose):
+
+        if not coords:
+            # 0 dimensional data
+            values = load_outputs(file_name, transpose, columns=[var])
+            return xr.DataArray(
+                values[var].values,
+                {'time': values.index.values},
+                ['time'])
+
+        # subscripted data
+        dims = list(coords)
+
+        values = load_outputs(
+            file_name, transpose,
+            columns=get_columns_to_load(file_name, transpose, vars=[var]))
+
+        out = xr.DataArray(
+            np.nan,
+            {'time': values.index.values, **coords},
+            ['time'] + dims)
+
+        for var in values.columns:
+            coords = {
+                dim: [coord]
+                for (dim, coord)
+                in zip(dims, re.split(r'\[|\]|\s*,\s*', var)[1:-1])
+            }
+            out.loc[coords] = np.expand_dims(
+                values[var].values,
+                axis=tuple(range(1, len(coords)+1))
+            )
+        return out
+
+    def __call__(self, time):
+        if time in self.data['time'].values:
+            outdata = self.data.sel(time=time)
+        elif self.interp == "raw":
+            return np.nan
+        elif time > self.data['time'].values[-1]:
+            warnings.warn(
+              self.py_name + "\n"
+              + "extrapolating data above the maximum value of the time")
+            outdata = self.data[-1]
+        elif time < self.data['time'].values[0]:
+            warnings.warn(
+              self.py_name + "\n"
+              + "extrapolating data below the minimum value of the time")
+            outdata = self.data[0]
+        elif self.interp == "interpolate":
+            outdata = self.data.interp(time=time)
+        elif self.interp == 'look forward':
+            outdata = self.data.sel(time=time, method="backfill")
+        elif self.interp == 'hold backward':
+            outdata = self.data.sel(time=time, method="pad")
+
+        if self.is_float:
+            # if data has no-coords return a float
+            return float(outdata)
+        else:
+            # Remove time coord from the DataArray
+            return outdata.reset_coords('time', drop=True)
diff --git a/pysd/py_backend/external.py b/pysd/py_backend/external.py
index 90f6148b..41427236 100644
--- a/pysd/py_backend/external.py
+++ b/pysd/py_backend/external.py
@@ -12,6 +12,7 @@
 import xarray as xr
 from openpyxl import load_workbook
 from . import utils
+from .components import Data
 
 
 class Excels():
@@ -685,7 +686,7 @@ def _series_selector(self, x_row_or_col, cell):
                 return "name"
 
 
-class ExtData(External):
+class ExtData(External, Data):
     """
     Class for Vensim GET XLS DATA/GET DIRECT DATA
     """
@@ -700,6 +701,7 @@ def __init__(self, file_name, sheet, time_row_or_col, cell,
         self.coordss = [coords]
         self.root = root
         self.interp = interp
+        self.is_float = not bool(coords)
 
         # check if the interpolation method is valid
         if not interp:
@@ -745,36 +747,6 @@ def initialize(self):
             in zip(self.files, self.sheets, self.time_row_or_cols,
                    self.cells, self.coordss)])
 
-    def __call__(self, time):
-
-        if time in self.data['time'].values:
-            outdata = self.data.sel(time=time)
-        elif self.interp == "raw":
-            return np.nan
-        elif time > self.data['time'].values[-1]:
-            warnings.warn(
-              self.py_name + "\n"
-              + "extrapolating data above the maximum value of the time")
-            outdata = self.data[-1]
-        elif time < self.data['time'].values[0]:
-            warnings.warn(
-              self.py_name + "\n"
-              + "extrapolating data below the minimum value of the time")
-            outdata = self.data[0]
-        elif self.interp == "interpolate":
-            outdata = self.data.interp(time=time)
-        elif self.interp == 'look forward':
-            outdata = self.data.sel(time=time, method="backfill")
-        elif self.interp == 'hold backward':
-            outdata = self.data.sel(time=time, method="pad")
-
-        if self.coordss[0]:
-            # Remove time coord from the DataArray
-            return outdata.reset_coords('time', drop=True)
-        else:
-            # if data has no-coords return a float
-            return float(outdata)
-
 
 class ExtLookup(External):
     """
diff --git a/pysd/py_backend/utils.py b/pysd/py_backend/utils.py
index 152a4a6b..47b1a7d0 100644
--- a/pysd/py_backend/utils.py
+++ b/pysd/py_backend/utils.py
@@ -5,9 +5,8 @@
 """
 
 import os
-import warnings
 import json
-from collections.abc import Mapping
+from chardet.universaldetector import UniversalDetector
 
 import regex as re
 import progressbar
@@ -15,18 +14,6 @@
 import xarray as xr
 import pandas as pd
 
-# used to create python safe names with the variable reserved_words
-from keyword import kwlist
-from builtins import __dir__ as bidir
-from .decorators import __dir__ as ddir
-from .external import __dir__ as edir
-from .functions import __dir__ as fdir
-from .statefuls import __dir__ as sdir
-
-
-reserved_words = set(dir() + fdir() + edir() + ddir() + sdir() + bidir())
-reserved_words = reserved_words.union(kwlist)
-
 
 def xrmerge(*das):
     """
@@ -78,362 +65,6 @@ def xrsplit(array):
     return sp_list
 
 
-def find_subscript_name(subscript_dict, element, avoid=[]):
-    """
-    Given a subscript dictionary, and a member of a subscript family,
-    return the first key of which the member is within the value list.
-    If element is already a subscript name, return that.
-
-    Parameters
-    ----------
-    subscript_dict: dict
-        Follows the {'subscript name':['list','of','subscript','elements']}
-        format.
-
-    element: str
-
-    avoid: list (optional)
-        List of subscripts to avoid. Default is an empty list.
-
-    Returns
-    -------
-
-    Examples
-    --------
-    >>> find_subscript_name({'Dim1': ['A', 'B'],
-    ...                      'Dim2': ['C', 'D', 'E'],
-    ...                      'Dim3': ['F', 'G', 'H', 'I']},
-    ...                      'D')
-    'Dim2'
-    >>> find_subscript_name({'Dim1': ['A', 'B'],
-    ...                      'Dim2': ['A', 'B'],
-    ...                      'Dim3': ['A', 'B']},
-    ...                      'B')
-    'Dim1'
-    >>> find_subscript_name({'Dim1': ['A', 'B'],
-    ...                      'Dim2': ['A', 'B'],
-    ...                      'Dim3': ['A', 'B']},
-    ...                      'B',
-    ...                      avoid=['Dim1'])
-    'Dim2'
-    """
-    if element in subscript_dict.keys():
-        return element
-
-    for name, elements in subscript_dict.items():
-        if element in elements and name not in avoid:
-            return name
-
-
-def make_coord_dict(subs, subscript_dict, terse=True):
-    """
-    This is for assisting with the lookup of a particular element, such that
-    the output of this function would take the place of %s in this expression.
-
-    `variable.loc[%s]`
-
-    Parameters
-    ----------
-    subs: list of strings
-        coordinates, either as names of dimensions, or positions within
-        a dimension.
-
-    subscript_dict: dict
-        the full dictionary of subscript names and values.
-
-    terse: bool (optional)
-        If True, includes only elements that do not cover the full range of
-        values in their respective dimension.If False, returns all dimensions.
-        Default is True.
-
-    Returns
-    -------
-    coordinates: dict
-        Coordinates needed to access the xarray quantities we're interested in.
-
-    Examples
-    --------
-    >>> make_coord_dict(['Dim1', 'D'], {'Dim1': ['A', 'B', 'C'],
-    ...                                 'Dim2': ['D', 'E', 'F']})
-    {'Dim2': ['D']}
-    >>> make_coord_dict(['Dim1', 'D'], {'Dim1': ['A', 'B', 'C'],
-    ...                                 'Dim2':['D', 'E', 'F']}, terse=False)
-    {'Dim2': ['D'], 'Dim1': ['A', 'B', 'C']}
-
-    """
-    sub_elems_list = [y for x in subscript_dict.values() for y in x]
-    coordinates = {}
-    for sub in subs:
-        if sub in sub_elems_list:
-            name = find_subscript_name(subscript_dict, sub, avoid=subs)
-            coordinates[name] = [sub]
-        elif not terse:
-            coordinates[sub] = subscript_dict[sub]
-    return coordinates
-
-
-def make_merge_list(subs_list, subscript_dict, element=""):
-    """
-    This is for assisting when building xrmerge. From a list of subscript
-    lists returns the final subscript list after mergin. Necessary when
-    merging variables with subscripts comming from different definitions.
-
-    Parameters
-    ----------
-    subs_list: list of lists of strings
-        Coordinates, either as names of dimensions, or positions within
-        a dimension.
-
-    subscript_dict: dict
-        The full dictionary of subscript names and values.
-
-    element: str (optional)
-        Element name, if given it will be printed with any error or
-        warning message. Default is "".
-
-    Returns
-    -------
-    dims: list
-        Final subscripts after merging.
-
-    Examples
-    --------
-    >>> make_merge_list([['upper'], ['C']], {'all': ['A', 'B', 'C'],
-    ...                                      'upper': ['A', 'B']})
-    ['all']
-
-    """
-    coords_set = [set() for i in range(len(subs_list[0]))]
-    coords_list = [
-        make_coord_dict(subs, subscript_dict, terse=False)
-        for subs in subs_list
-    ]
-
-    # update coords set
-    [[coords_set[i].update(coords[dim]) for i, dim in enumerate(coords)]
-     for coords in coords_list]
-
-    dims = [None] * len(coords_set)
-    # create an array with the name of the subranges for all merging elements
-    dims_list = np.array([list(coords) for coords in coords_list]).transpose()
-    indexes = np.arange(len(dims))
-
-    for i, coord2 in enumerate(coords_set):
-        dims1 = [
-            dim for dim in dims_list[i]
-            if dim is not None and set(subscript_dict[dim]) == coord2
-        ]
-        if dims1:
-            # if the given coordinate already matches return it
-            dims[i] = dims1[0]
-        else:
-            # find a suitable coordinate
-            other_dims = dims_list[indexes != i]
-            for name, elements in subscript_dict.items():
-                if coord2 == set(elements) and name not in other_dims:
-                    dims[i] = name
-                    break
-
-            if not dims[i]:
-                # the dimension is incomplete use the smaller
-                # dimension that completes it
-                for name, elements in subscript_dict.items():
-                    if coord2.issubset(set(elements))\
-                      and name not in other_dims:
-                        dims[i] = name
-                        warnings.warn(
-                            element
-                            + "\nDimension given by subscripts:"
-                            + "\n\t{}\nis incomplete ".format(coord2)
-                            + "using {} instead.".format(name)
-                            + "\nSubscript_dict:"
-                            + "\n\t{}".format(subscript_dict)
-                        )
-                        break
-
-            if not dims[i]:
-                for name, elements in subscript_dict.items():
-                    if coord2 == set(elements):
-                        j = 1
-                        while name + str(j) in subscript_dict.keys():
-                            j += 1
-                        subscript_dict[name + str(j)] = elements
-                        dims[i] = name + str(j)
-                        warnings.warn(
-                            element
-                            + "\nAdding new subscript range to"
-                            + " subscript_dict:\n"
-                            + name + str(j) + ": " + ', '.join(elements))
-                        break
-
-            if not dims[i]:
-                # not able to find the correct dimension
-                raise ValueError(
-                    element
-                    + "\nImpossible to find the dimension that contains:"
-                    + "\n\t{}\nFor subscript_dict:".format(coord2)
-                    + "\n\t{}".format(subscript_dict)
-                )
-
-    return dims
-
-
-def make_python_identifier(string, namespace=None):
-    """
-    Takes an arbitrary string and creates a valid Python identifier.
-
-    If the input string is in the namespace, return its value.
-
-    If the python identifier created is already in the namespace,
-    but the input string is not (ie, two similar strings resolve to
-    the same python identifier)
-
-    or if the identifier is a reserved word in the reserved_words
-    list, or is a python default reserved word,
-    adds _1, or if _1 is in the namespace, _2, etc.
-
-    Parameters
-    ----------
-    string: str
-        The text to be converted into a valid python identifier.
-
-    namespace: dict
-        Map of existing translations into python safe identifiers.
-        This is to ensure that two strings are not translated into
-        the same python identifier. If string is already in the namespace
-        its value will be returned. Otherwise, namespace will be mutated
-        adding string as a new key and its value.
-
-    Returns
-    -------
-    identifier: str
-        A vaild python identifier based on the input string.
-
-    Examples
-    --------
-    >>> make_python_identifier('Capital')
-    'capital'
-
-    >>> make_python_identifier('multiple words')
-    'multiple_words'
-
-    >>> make_python_identifier('multiple     spaces')
-    'multiple_spaces'
-
-    When the name is a python keyword, add '_1' to differentiate it
-    >>> make_python_identifier('for')
-    'for_1'
-
-    Remove leading and trailing whitespace
-    >>> make_python_identifier('  whitespace  ')
-    'whitespace'
-
-    Remove most special characters outright:
-    >>> make_python_identifier('H@t tr!ck')
-    'ht_trck'
-
-    remove leading digits
-    >>> make_python_identifier('123abc')
-    'nvs_123abc'
-
-    already in namespace
-    >>> make_python_identifier('Var$', namespace={'Var$': 'var'})
-    ''var'
-
-    namespace conflicts
-    >>> make_python_identifier('Var@', namespace={'Var$': 'var'})
-    'var_1'
-
-    >>> make_python_identifier('Var$', namespace={'Var@': 'var',
-    ...                                           'Var%':'var_1'})
-    'var_2'
-
-    References
-    ----------
-    Identifiers must follow the convention outlined here:
-        https://docs.python.org/2/reference/lexical_analysis.html#identifiers
-
-    """
-    if namespace is None:
-        namespace = dict()
-
-    if string in namespace:
-        return namespace[string]
-
-    # create a working copy (and make it lowercase, while we're at it)
-    s = string.lower()
-
-    # remove leading and trailing whitespace
-    s = s.strip()
-
-    # Make spaces into underscores
-    s = re.sub(r"[\s\t\n]+", "_", s)
-
-    # Remove invalid characters
-    s = re.sub(r"[^\p{l}\p{m}\p{n}_]", "", s)
-
-    # If leading character is not a letter add nvs_.
-    # Only letters can be leading characters.
-    if re.findall(r"^[^\p{l}_]", s):
-        s = "nvs_" + s
-    elif re.findall(r"^_", s):
-        s = "nvs" + s
-
-    # reserved the names of PySD functions and methods and other vars
-    # in the namespace
-    used_words = reserved_words.union(namespace.values())
-
-    # Check that the string is not a python identifier
-    identifier = s
-    i = 1
-    while identifier in used_words:
-        identifier = s + '_' + str(i)
-        i += 1
-
-    namespace[string] = identifier
-
-    return identifier
-
-
-def make_add_identifier(identifier, build_names):
-    """
-    Takes an existing used Python identifier and attatch a unique
-    identifier with ADD_# ending.
-
-    Used for add new information to an existing external object.
-    build_names will be updated inside this functions as a set
-    is mutable.
-
-    Parameters
-    ----------
-    identifier: str
-      Existing python identifier.
-
-    build_names: set
-      Set of the already used identifiers for external objects.
-
-    Returns
-    -------
-    identifier: str
-      A vaild python identifier based on the input indentifier
-      and the existing ones.
-
-    """
-    identifier += "ADD_"
-    number = 1
-    # iterate until finding a non-used identifier
-    while identifier + str(number) in build_names:
-        number += 1
-
-    # update identifier
-    identifier += str(number)
-
-    # update the build names
-    build_names.add(identifier)
-
-    return identifier
-
-
 def get_return_elements(return_columns, namespace):
     """
     Takes a list of return elements formatted in vensim's format
@@ -678,72 +309,6 @@ def rearrange(data, dims, coords):
     return None
 
 
-def simplify_subscript_input(coords, subscript_dict, return_full, merge_subs):
-    """
-    Parameters
-    ----------
-    coords: dict
-        Coordinates to write in the model file.
-
-    subscript_dict: dict
-        The subscript dictionary of the model file.
-
-    return_full: bool
-        If True the when coords == subscript_dict, '_subscript_dict'
-        will be returned
-
-    merge_subs: list of strings
-        List of the final subscript range of the python array after
-        merging with other objects
-
-    Returns
-    -------
-    coords: str
-        The equations to generate the coord dicttionary in the model file.
-
-    """
-
-    if coords == subscript_dict and return_full:
-        # variable defined with all the subscripts
-        return "_subscript_dict"
-
-    coordsp = []
-    for ndim, (dim, coord) in zip(merge_subs, coords.items()):
-        # find dimensions can be retrieved from _subscript_dict
-        if coord == subscript_dict[dim]:
-            # use _subscript_dict
-            coordsp.append(f"'{ndim}': _subscript_dict['{dim}']")
-        else:
-            # write whole dict
-            coordsp.append(f"'{ndim}': {coord}")
-
-    return "{" + ", ".join(coordsp) + "}"
-
-
-def add_entries_underscore(*dictionaries):
-    """
-    Expands dictionaries adding new keys underscoring the white spaces
-    in the old ones. As the dictionaries are mutable objects this functions
-    will add the new entries to the already existing dictionaries with
-    no need to return a new one.
-
-    Parameters
-    ----------
-    *dictionaries: dict(s)
-        The dictionary or dictionaries to add the entries with underscore.
-
-    Return
-    ------
-    None
-
-    """
-    for dictionary in dictionaries:
-        keys = list(dictionary)
-        for name in keys:
-            dictionary[re.sub(" ", "_", name)] = dictionary[name]
-    return
-
-
 def load_model_data(root_dir, model_name):
 
     """
@@ -839,76 +404,151 @@ def load_modules(module_name, module_content, work_dir, submodules):
     return "\n\n".join(submodules)
 
 
-def clean_file_names(*args):
+def load_outputs(file_name, transpose=False, columns=None, encoding=None):
     """
-    Removes special characters and makes clean file names
+    Load outputs file
 
     Parameters
     ----------
-    *args: tuple
-        Any number of strings to to clean
+    file_name: str
+        Output file to read. Must be csv or tab.
+
+    transpose: bool (optional)
+        If True reads transposed outputs file, i.e. one variable per row.
+        Default is False.
+
+    columns: list or None (optional)
+        List of the column names to load. If None loads all the columns.
+        Default is None.
+        NOTE: if transpose=False, the loading will be faster as only
+        selected columns will be loaded. If transpose=True the whole
+        file must be read and it will be subselected later.
+
+    encoding: str or None (optional)
+        Encoding type to read output file. Needed if the file has special
+        characters. Default is None.
 
     Returns
     -------
-    clean: list
-        List containing the clean strings
+    pandas.DataFrame
+        A pandas.DataFrame with the outputs values.
+
     """
-    clean = []
-    for name in args:
-        clean.append(re.sub(
-                            r"[\W]+", "", name.replace(" ", "_")
-                            ).lstrip("0123456789")
-                     )
-    return clean
+    read_func = {'.csv': pd.read_csv, '.tab': pd.read_table}
+
+    if columns:
+        columns = set(columns)
+        if not transpose:
+            columns.add("Time")
+
+    for end, func in read_func.items():
+        if file_name.lower().endswith(end):
+            if transpose:
+                out = func(file_name,
+                           encoding=encoding,
+                           index_col=0).T
+                if columns:
+                    out = out[columns]
+            else:
+                out = func(file_name,
+                           encoding=encoding,
+                           usecols=columns,
+                           index_col="Time")
 
+            out.index = out.index.astype(float)
+            # return the dataframe removing nan index values
+            return out[~np.isnan(out.index)]
 
-def merge_nested_dicts(original_dict, dict_to_merge):
+    raise ValueError(
+        f"\nNot able to read '{file_name}'. "
+        + f"Only {', '.join(list(read_func))} files are accepted.")
+
+
+def get_columns_to_load(file_name, transpose=False, vars=None, encoding=None):
     """
-    Merge dictionaries recursively, preserving common keys.
+    Returns the variable names in the given file.
 
     Parameters
     ----------
-    original_dict: dict
-        Dictionary onto which the merge is executed.
+    file_name: str
+        Output file to read. Must be csv or tab.
+
+    transpose: bool (optional)
+        If True reads transposed outputs file, i.e. one variable per row.
+        Default is False.
 
-    dict_to_merge: dict
-        Dictionary to be merged to the original_dict.
+    vars: list or None (optional)
+        List of the vars names to return. If None loads all the column names.
+        Default is None.
+
+    encoding: str or None (optional)
+        Encoding type to read output file. Needed if the file has special
+        characters. Default is None.
 
     Returns
     -------
-        None
+    set
+        Set of columns for var.
+
     """
+    read_func = {'.csv': pd.read_csv, '.tab': pd.read_table}
+    out = None
 
-    for k, v in dict_to_merge.items():
-        if (k in original_dict and isinstance(original_dict[k], dict)
-                and isinstance(dict_to_merge[k], Mapping)):
-            merge_nested_dicts(original_dict[k], dict_to_merge[k])
-        else:
-            original_dict[k] = dict_to_merge[k]
+    for end, func in read_func.items():
+        if file_name.lower().endswith(end):
+            if transpose:
+                out = func(file_name,
+                           encoding=encoding,
+                           usecols=[0]).iloc[:, 0].to_list()
+            else:
+                out = func(file_name,
+                           encoding=encoding,
+                           nrows=0).iloc[:, 1:]
+
+            out = set(out)
+
+    if out is None:
+        raise ValueError(
+            f"\nNot able to read '{file_name}'. "
+            + f"Only {', '.join(list(read_func))} files are accepted.")
+
+    if vars is None:
+        return out
+
+    else:
+        outs = set()
+        for var in out:
+            if var in vars:
+                outs.add(var)
+                vars.remove(var)
+            else:
+                for var1 in vars:
+                    if var.startswith(var1 + "["):
+                        outs.add(var)
 
+        return outs
 
-def update_dependency(dependency, deps_dict):
+
+def detect_encoding(filename):
     """
-    Update dependency in dependencies dict.
+    Detects the encoding of a file.
 
     Parameters
     ----------
-    dependency: str
-        The dependency to add to the dependency dict.
-
-    deps_dict: dict
-        The dictionary of dependencies. If dependency is in deps_dict add 1
-        to its value. Otherwise, add dependency to deps_dict with value 1.
+    filename: str
+        Name of the file to detect the encoding.
 
     Returns
     -------
-    None
+    encoding: str
+        The encoding of the file.
 
     """
-    if dependency in deps_dict:
-        deps_dict[dependency] += 1
-    else:
-        deps_dict[dependency] = 1
+    detector = UniversalDetector()
+    for line in open(filename, 'rb').readlines():
+        detector.feed(line)
+    detector.close()
+    return detector.result['encoding']
 
 
 class ProgressBar:
diff --git a/pysd/pysd.py b/pysd/pysd.py
index 18f4eb85..82c91162 100644
--- a/pysd/pysd.py
+++ b/pysd/pysd.py
@@ -54,7 +54,7 @@ def read_xmile(xmile_file, initialize=True, missing_values="warning"):
     >>> model = read_xmile('../tests/test-models/samples/teacup/teacup.xmile')
 
     """
-    from .py_backend.xmile.xmile2py import translate_xmile
+    from .translation.xmile.xmile2py import translate_xmile
 
     py_model_file = translate_xmile(xmile_file)
     model = load(py_model_file, initialize, missing_values)
@@ -109,7 +109,7 @@ def read_vensim(mdl_file, initialize=True, missing_values="warning",
     >>> model = read_vensim('../tests/test-models/samples/teacup/teacup.mdl')
 
     """
-    from .py_backend.vensim.vensim2py import translate_vensim
+    from .translation.vensim.vensim2py import translate_vensim
 
     py_model_file = translate_vensim(mdl_file, split_views, **kwargs)
     model = load(py_model_file, initialize, missing_values)
diff --git a/pysd/tools/benchmarking.py b/pysd/tools/benchmarking.py
index acd0ce29..da755788 100644
--- a/pysd/tools/benchmarking.py
+++ b/pysd/tools/benchmarking.py
@@ -8,9 +8,9 @@
 
 import numpy as np
 import pandas as pd
-from chardet.universaldetector import UniversalDetector
 
 from pysd import read_vensim, read_xmile
+from ..py_backend.utils import load_outputs, detect_encoding
 
 
 def runner(model_file, canonical_file=None, transpose=False):
@@ -65,66 +65,6 @@ def runner(model_file, canonical_file=None, transpose=False):
     return model.run(return_columns=canon.columns), canon
 
 
-def load_outputs(file_name, transpose=False, columns=None, encoding=None):
-    """
-    Load outputs file
-
-    Parameters
-    ----------
-    file_name: str
-        Output file to read. Must be csv or tab.
-
-    transpose: bool (optional)
-        If True reads transposed outputs file, i.e. one variable per row.
-        Default is False.
-
-    columns: list or None (optional)
-        List of the column names to load. If None loads all the columns.
-        Default is None.
-        NOTE: if transpose=False, the loading will be faster as only
-        selected columns will be loaded. If transpose=True the whole
-        file must be read and it will be subselected later.
-
-    encoding: str or None (optional)
-        Encoding type to read output file. Needed if the file has special
-        characters. Default is None.
-
-    Returns
-    -------
-    pandas.DataFrame
-        A pandas.DataFrame with the outputs values.
-
-    """
-    read_func = {'.csv': pd.read_csv, '.tab': pd.read_table}
-
-    if columns:
-        columns = set(columns)
-        if not transpose:
-            columns.add("Time")
-
-    for end, func in read_func.items():
-        if file_name.lower().endswith(end):
-            if transpose:
-                out = func(file_name,
-                           encoding=encoding,
-                           index_col=0).T
-                if columns:
-                    out = out[columns]
-            else:
-                out = func(file_name,
-                           encoding=encoding,
-                           usecols=columns,
-                           index_col="Time")
-
-            out.index = out.index.astype(float)
-            # return the dataframe removing nan index values
-            return out[~np.isnan(out.index)]
-
-    raise ValueError(
-        f"\nNot able to read '{file_name}'. "
-        + f"Only {', '.join(list(read_func))} files are accepted.")
-
-
 def assert_frames_close(actual, expected, assertion="raise",
                         verbose=False, precision=2, **kwargs):
     """
@@ -319,25 +259,3 @@ def _remove_constant_nan(df):
     """
     nan_cols = np.isnan(df.iloc[1:, :]).all()
     df.loc[:, nan_cols] = df.loc[:, nan_cols].iloc[0].values
-
-
-def detect_encoding(filename):
-    """
-    Detects the encoding of a file.
-
-    Parameters
-    ----------
-    filename: str
-        Name of the file to detect the encoding.
-
-    Returns
-    -------
-    encoding: str
-        The encoding of the file.
-
-    """
-    detector = UniversalDetector()
-    for line in open(filename, 'rb').readlines():
-        detector.feed(line)
-    detector.close()
-    return detector.result['encoding']
diff --git a/pysd/py_backend/builder.py b/pysd/translation/builder.py
similarity index 96%
rename from pysd/py_backend/builder.py
rename to pysd/translation/builder.py
index 48487d47..7585c7e1 100644
--- a/pysd/py_backend/builder.py
+++ b/pysd/translation/builder.py
@@ -744,52 +744,50 @@ def merge_partial_elements(element_list):
     outs = dict()  # output data structure
 
     for element in element_list:
-        if element["py_expr"] != "None":  # for
-            name = element["py_name"]
-            if name not in outs:
-
-                # Use 'expr' for Vensim models, and 'eqn' for Xmile
-                # (This makes the Vensim equation prettier.)
-                eqn = element["expr"] if "expr" in element else element["eqn"]
-                outs[name] = {
-                    "py_name": element["py_name"],
-                    "real_name": element["real_name"],
-                    "doc": element["doc"],
-                    "py_expr": [element["py_expr"]],  # in a list
-                    "unit": element["unit"],
-                    "subs": [element["subs"]],
-                    "merge_subs": element["merge_subs"]
-                    if "merge_subs" in element else None,
-                    "dependencies": element["dependencies"]
-                    if "dependencies" in element else None,
-                    "lims": element["lims"],
-                    "eqn": [eqn.replace(r"\ ", "")],
-                    "kind": element["kind"],
-                    "arguments": element["arguments"],
-                }
+        name = element["py_name"]
+        if name not in outs:
+            # Use 'expr' for Vensim models, and 'eqn' for Xmile
+            # (This makes the Vensim equation prettier.)
+            eqn = element["expr"] if "expr" in element else element["eqn"]
+            outs[name] = {
+                "py_name": element["py_name"],
+                "real_name": element["real_name"],
+                "doc": element["doc"],
+                "py_expr": [element["py_expr"]],  # in a list
+                "unit": element["unit"],
+                "subs": [element["subs"]],
+                "merge_subs": element["merge_subs"]
+                if "merge_subs" in element else None,
+                "dependencies": element["dependencies"]
+                if "dependencies" in element else None,
+                "lims": element["lims"],
+                "eqn": [eqn.replace(r"\ ", "")],
+                "kind": element["kind"],
+                "arguments": element["arguments"],
+            }
 
-            else:
-                eqn = element["expr"] if "expr" in element else element["eqn"]
-
-                outs[name]["doc"] = outs[name]["doc"] or element["doc"]
-                outs[name]["unit"] = outs[name]["unit"] or element["unit"]
-                outs[name]["lims"] = outs[name]["lims"] or element["lims"]
-                outs[name]["eqn"] += [eqn.replace(r"\ ", "")]
-                outs[name]["py_expr"] += [element["py_expr"]]
-                outs[name]["subs"] += [element["subs"]]
-                if outs[name]["dependencies"] is not None:
-                    if name.startswith("_"):
-                        # stateful object merge initial and step
-                        for target in outs[name]["dependencies"]:
-                            _merge_dependencies(
-                                outs[name]["dependencies"][target],
-                                element["dependencies"][target])
-                    else:
-                        # regular element
+        else:
+            eqn = element["expr"] if "expr" in element else element["eqn"]
+
+            outs[name]["doc"] = outs[name]["doc"] or element["doc"]
+            outs[name]["unit"] = outs[name]["unit"] or element["unit"]
+            outs[name]["lims"] = outs[name]["lims"] or element["lims"]
+            outs[name]["eqn"] += [eqn.replace(r"\ ", "")]
+            outs[name]["py_expr"] += [element["py_expr"]]
+            outs[name]["subs"] += [element["subs"]]
+            if outs[name]["dependencies"] is not None:
+                if name.startswith("_"):
+                    # stateful object merge initial and step
+                    for target in outs[name]["dependencies"]:
                         _merge_dependencies(
-                            outs[name]["dependencies"],
-                            element["dependencies"])
-                outs[name]["arguments"] = element["arguments"]
+                            outs[name]["dependencies"][target],
+                            element["dependencies"][target])
+                else:
+                    # regular element
+                    _merge_dependencies(
+                        outs[name]["dependencies"],
+                        element["dependencies"])
+            outs[name]["arguments"] = element["arguments"]
 
     return list(outs.values())
 
diff --git a/pysd/translation/utils.py b/pysd/translation/utils.py
new file mode 100644
index 00000000..712c9a93
--- /dev/null
+++ b/pysd/translation/utils.py
@@ -0,0 +1,520 @@
+"""
+These are general utilities used by the builder.py, functions.py or the
+model file. Vensim's function equivalents should not go here but in
+functions.py
+"""
+
+import warnings
+from collections.abc import Mapping
+
+import regex as re
+import numpy as np
+
+# used to create python safe names with the variable reserved_words
+from keyword import kwlist
+from builtins import __dir__ as bidir
+from ..py_backend.components import __dir__ as cdir
+from ..py_backend.decorators import __dir__ as ddir
+from ..py_backend.external import __dir__ as edir
+from ..py_backend.functions import __dir__ as fdir
+from ..py_backend.statefuls import __dir__ as sdir
+from ..py_backend.utils import __dir__ as udir
+
+
+reserved_words = set(
+    dir() + bidir() + cdir() + fdir() + edir() + ddir() + sdir() + udir())
+reserved_words = reserved_words.union(kwlist)
+
+
+def find_subscript_name(subscript_dict, element, avoid=[]):
+    """
+    Given a subscript dictionary, and a member of a subscript family,
+    return the first key of which the member is within the value list.
+    If element is already a subscript name, return that.
+
+    Parameters
+    ----------
+    subscript_dict: dict
+        Follows the {'subscript name':['list','of','subscript','elements']}
+        format.
+
+    element: str
+
+    avoid: list (optional)
+        List of subscripts to avoid. Default is an empty list.
+
+    Returns
+    -------
+
+    Examples
+    --------
+    >>> find_subscript_name({'Dim1': ['A', 'B'],
+    ...                      'Dim2': ['C', 'D', 'E'],
+    ...                      'Dim3': ['F', 'G', 'H', 'I']},
+    ...                      'D')
+    'Dim2'
+    >>> find_subscript_name({'Dim1': ['A', 'B'],
+    ...                      'Dim2': ['A', 'B'],
+    ...                      'Dim3': ['A', 'B']},
+    ...                      'B')
+    'Dim1'
+    >>> find_subscript_name({'Dim1': ['A', 'B'],
+    ...                      'Dim2': ['A', 'B'],
+    ...                      'Dim3': ['A', 'B']},
+    ...                      'B',
+    ...                      avoid=['Dim1'])
+    'Dim2'
+    """
+    if element in subscript_dict.keys():
+        return element
+
+    for name, elements in subscript_dict.items():
+        if element in elements and name not in avoid:
+            return name
+
+
+def make_coord_dict(subs, subscript_dict, terse=True):
+    """
+    This is for assisting with the lookup of a particular element, such that
+    the output of this function would take the place of %s in this expression.
+
+    `variable.loc[%s]`
+
+    Parameters
+    ----------
+    subs: list of strings
+        coordinates, either as names of dimensions, or positions within
+        a dimension.
+
+    subscript_dict: dict
+        the full dictionary of subscript names and values.
+
+    terse: bool (optional)
+        If True, includes only elements that do not cover the full range of
+        values in their respective dimension.If False, returns all dimensions.
+        Default is True.
+
+    Returns
+    -------
+    coordinates: dict
+        Coordinates needed to access the xarray quantities we're interested in.
+
+    Examples
+    --------
+    >>> make_coord_dict(['Dim1', 'D'], {'Dim1': ['A', 'B', 'C'],
+    ...                                 'Dim2': ['D', 'E', 'F']})
+    {'Dim2': ['D']}
+    >>> make_coord_dict(['Dim1', 'D'], {'Dim1': ['A', 'B', 'C'],
+    ...                                 'Dim2':['D', 'E', 'F']}, terse=False)
+    {'Dim2': ['D'], 'Dim1': ['A', 'B', 'C']}
+
+    """
+    sub_elems_list = [y for x in subscript_dict.values() for y in x]
+    coordinates = {}
+    for sub in subs:
+        if sub in sub_elems_list:
+            name = find_subscript_name(subscript_dict, sub, avoid=subs)
+            coordinates[name] = [sub]
+        elif not terse:
+            coordinates[sub] = subscript_dict[sub]
+    return coordinates
+
+
+def make_merge_list(subs_list, subscript_dict, element=""):
+    """
+    This is for assisting when building xrmerge. From a list of subscript
+    lists returns the final subscript list after mergin. Necessary when
+    merging variables with subscripts comming from different definitions.
+
+    Parameters
+    ----------
+    subs_list: list of lists of strings
+        Coordinates, either as names of dimensions, or positions within
+        a dimension.
+
+    subscript_dict: dict
+        The full dictionary of subscript names and values.
+
+    element: str (optional)
+        Element name, if given it will be printed with any error or
+        warning message. Default is "".
+
+    Returns
+    -------
+    dims: list
+        Final subscripts after merging.
+
+    Examples
+    --------
+    >>> make_merge_list([['upper'], ['C']], {'all': ['A', 'B', 'C'],
+    ...                                      'upper': ['A', 'B']})
+    ['all']
+
+    """
+    coords_set = [set() for i in range(len(subs_list[0]))]
+    coords_list = [
+        make_coord_dict(subs, subscript_dict, terse=False)
+        for subs in subs_list
+    ]
+
+    # update coords set
+    [[coords_set[i].update(coords[dim]) for i, dim in enumerate(coords)]
+     for coords in coords_list]
+
+    dims = [None] * len(coords_set)
+    # create an array with the name of the subranges for all merging elements
+    dims_list = np.array([list(coords) for coords in coords_list]).transpose()
+    indexes = np.arange(len(dims))
+
+    for i, coord2 in enumerate(coords_set):
+        dims1 = [
+            dim for dim in dims_list[i]
+            if dim is not None and set(subscript_dict[dim]) == coord2
+        ]
+        if dims1:
+            # if the given coordinate already matches return it
+            dims[i] = dims1[0]
+        else:
+            # find a suitable coordinate
+            other_dims = dims_list[indexes != i]
+            for name, elements in subscript_dict.items():
+                if coord2 == set(elements) and name not in other_dims:
+                    dims[i] = name
+                    break
+
+            if not dims[i]:
+                # the dimension is incomplete use the smaller
+                # dimension that completes it
+                for name, elements in subscript_dict.items():
+                    if coord2.issubset(set(elements))\
+                      and name not in other_dims:
+                        dims[i] = name
+                        warnings.warn(
+                            element
+                            + "\nDimension given by subscripts:"
+                            + "\n\t{}\nis incomplete ".format(coord2)
+                            + "using {} instead.".format(name)
+                            + "\nSubscript_dict:"
+                            + "\n\t{}".format(subscript_dict)
+                        )
+                        break
+
+            if not dims[i]:
+                for name, elements in subscript_dict.items():
+                    if coord2 == set(elements):
+                        j = 1
+                        while name + str(j) in subscript_dict.keys():
+                            j += 1
+                        subscript_dict[name + str(j)] = elements
+                        dims[i] = name + str(j)
+                        warnings.warn(
+                            element
+                            + "\nAdding new subscript range to"
+                            + " subscript_dict:\n"
+                            + name + str(j) + ": " + ', '.join(elements))
+                        break
+
+            if not dims[i]:
+                # not able to find the correct dimension
+                raise ValueError(
+                    element
+                    + "\nImpossible to find the dimension that contains:"
+                    + "\n\t{}\nFor subscript_dict:".format(coord2)
+                    + "\n\t{}".format(subscript_dict)
+                )
+
+    return dims
+
+
+def make_python_identifier(string, namespace=None):
+    """
+    Takes an arbitrary string and creates a valid Python identifier.
+
+    If the input string is in the namespace, return its value.
+
+    If the python identifier created is already in the namespace,
+    but the input string is not (ie, two similar strings resolve to
+    the same python identifier)
+
+    or if the identifier is a reserved word in the reserved_words
+    list, or is a python default reserved word,
+    adds _1, or if _1 is in the namespace, _2, etc.
+
+    Parameters
+    ----------
+    string: str
+        The text to be converted into a valid python identifier.
+
+    namespace: dict
+        Map of existing translations into python safe identifiers.
+        This is to ensure that two strings are not translated into
+        the same python identifier. If string is already in the namespace
+        its value will be returned. Otherwise, namespace will be mutated
+        adding string as a new key and its value.
+
+    Returns
+    -------
+    identifier: str
+        A vaild python identifier based on the input string.
+
+    Examples
+    --------
+    >>> make_python_identifier('Capital')
+    'capital'
+
+    >>> make_python_identifier('multiple words')
+    'multiple_words'
+
+    >>> make_python_identifier('multiple     spaces')
+    'multiple_spaces'
+
+    When the name is a python keyword, add '_1' to differentiate it
+    >>> make_python_identifier('for')
+    'for_1'
+
+    Remove leading and trailing whitespace
+    >>> make_python_identifier('  whitespace  ')
+    'whitespace'
+
+    Remove most special characters outright:
+    >>> make_python_identifier('H@t tr!ck')
+    'ht_trck'
+
+    remove leading digits
+    >>> make_python_identifier('123abc')
+    'nvs_123abc'
+
+    already in namespace
+    >>> make_python_identifier('Var$', namespace={'Var$': 'var'})
+    ''var'
+
+    namespace conflicts
+    >>> make_python_identifier('Var@', namespace={'Var$': 'var'})
+    'var_1'
+
+    >>> make_python_identifier('Var$', namespace={'Var@': 'var',
+    ...                                           'Var%':'var_1'})
+    'var_2'
+
+    References
+    ----------
+    Identifiers must follow the convention outlined here:
+        https://docs.python.org/2/reference/lexical_analysis.html#identifiers
+
+    """
+    if namespace is None:
+        namespace = dict()
+
+    if string in namespace:
+        return namespace[string]
+
+    # create a working copy (and make it lowercase, while we're at it)
+    s = string.lower()
+
+    # remove leading and trailing whitespace
+    s = s.strip()
+
+    # Make spaces into underscores
+    s = re.sub(r"[\s\t\n]+", "_", s)
+
+    # Remove invalid characters
+    s = re.sub(r"[^\p{l}\p{m}\p{n}_]", "", s)
+
+    # If leading character is not a letter add nvs_.
+    # Only letters can be leading characters.
+    if re.findall(r"^[^\p{l}_]", s):
+        s = "nvs_" + s
+    elif re.findall(r"^_", s):
+        s = "nvs" + s
+
+    # reserved the names of PySD functions and methods and other vars
+    # in the namespace
+    used_words = reserved_words.union(namespace.values())
+
+    # Check that the string is not a python identifier
+    identifier = s
+    i = 1
+    while identifier in used_words:
+        identifier = s + '_' + str(i)
+        i += 1
+
+    namespace[string] = identifier
+
+    return identifier
+
+
+def make_add_identifier(identifier, build_names):
+    """
+    Takes an existing used Python identifier and attatch a unique
+    identifier with ADD_# ending.
+
+    Used for add new information to an existing external object.
+    build_names will be updated inside this functions as a set
+    is mutable.
+
+    Parameters
+    ----------
+    identifier: str
+      Existing python identifier.
+
+    build_names: set
+      Set of the already used identifiers for external objects.
+
+    Returns
+    -------
+    identifier: str
+      A vaild python identifier based on the input indentifier
+      and the existing ones.
+
+    """
+    identifier += "ADD_"
+    number = 1
+    # iterate until finding a non-used identifier
+    while identifier + str(number) in build_names:
+        number += 1
+
+    # update identifier
+    identifier += str(number)
+
+    # update the build names
+    build_names.add(identifier)
+
+    return identifier
+
+
+def simplify_subscript_input(coords, subscript_dict, return_full, merge_subs):
+    """
+    Parameters
+    ----------
+    coords: dict
+        Coordinates to write in the model file.
+
+    subscript_dict: dict
+        The subscript dictionary of the model file.
+
+    return_full: bool
+        If True the when coords == subscript_dict, '_subscript_dict'
+        will be returned
+
+    merge_subs: list of strings
+        List of the final subscript range of the python array after
+        merging with other objects
+
+    Returns
+    -------
+    coords: str
+        The equations to generate the coord dicttionary in the model file.
+
+    """
+
+    if coords == subscript_dict and return_full:
+        # variable defined with all the subscripts
+        return "_subscript_dict"
+
+    coordsp = []
+    for ndim, (dim, coord) in zip(merge_subs, coords.items()):
+        # find dimensions can be retrieved from _subscript_dict
+        if coord == subscript_dict[dim]:
+            # use _subscript_dict
+            coordsp.append(f"'{ndim}': _subscript_dict['{dim}']")
+        else:
+            # write whole dict
+            coordsp.append(f"'{ndim}': {coord}")
+
+    return "{" + ", ".join(coordsp) + "}"
+
+
+def add_entries_underscore(*dictionaries):
+    """
+    Expands dictionaries adding new keys underscoring the white spaces
+    in the old ones. As the dictionaries are mutable objects this functions
+    will add the new entries to the already existing dictionaries with
+    no need to return a new one.
+
+    Parameters
+    ----------
+    *dictionaries: dict(s)
+        The dictionary or dictionaries to add the entries with underscore.
+
+    Return
+    ------
+    None
+
+    """
+    for dictionary in dictionaries:
+        keys = list(dictionary)
+        for name in keys:
+            dictionary[re.sub(" ", "_", name)] = dictionary[name]
+    return
+
+
+def clean_file_names(*args):
+    """
+    Removes special characters and makes clean file names
+
+    Parameters
+    ----------
+    *args: tuple
+        Any number of strings to to clean
+
+    Returns
+    -------
+    clean: list
+        List containing the clean strings
+    """
+    clean = []
+    for name in args:
+        clean.append(re.sub(
+                            r"[\W]+", "", name.replace(" ", "_")
+                            ).lstrip("0123456789")
+                     )
+    return clean
+
+
+def merge_nested_dicts(original_dict, dict_to_merge):
+    """
+    Merge dictionaries recursively, preserving common keys.
+
+    Parameters
+    ----------
+    original_dict: dict
+        Dictionary onto which the merge is executed.
+
+    dict_to_merge: dict
+        Dictionary to be merged to the original_dict.
+
+    Returns
+    -------
+        None
+    """
+
+    for k, v in dict_to_merge.items():
+        if (k in original_dict and isinstance(original_dict[k], dict)
+                and isinstance(dict_to_merge[k], Mapping)):
+            merge_nested_dicts(original_dict[k], dict_to_merge[k])
+        else:
+            original_dict[k] = dict_to_merge[k]
+
+
+def update_dependency(dependency, deps_dict):
+    """
+    Update dependency in dependencies dict.
+
+    Parameters
+    ----------
+    dependency: str
+        The dependency to add to the dependency dict.
+
+    deps_dict: dict
+        The dictionary of dependencies. If dependency is in deps_dict add 1
+        to its value. Otherwise, add dependency to deps_dict with value 1.
+
+    Returns
+    -------
+    None
+
+    """
+    if dependency in deps_dict:
+        deps_dict[dependency] += 1
+    else:
+        deps_dict[dependency] = 1
diff --git a/pysd/py_backend/vensim/__init__.py b/pysd/translation/vensim/__init__.py
similarity index 100%
rename from pysd/py_backend/vensim/__init__.py
rename to pysd/translation/vensim/__init__.py
diff --git a/pysd/py_backend/vensim/vensim2py.py b/pysd/translation/vensim/vensim2py.py
similarity index 98%
rename from pysd/py_backend/vensim/vensim2py.py
rename to pysd/translation/vensim/vensim2py.py
index e31d2ce5..b8430b92 100644
--- a/pysd/py_backend/vensim/vensim2py.py
+++ b/pysd/translation/vensim/vensim2py.py
@@ -15,7 +15,9 @@
                                     VisitationError,\
                                     ParseError
 
-from .. import builder, utils, external
+from .. import builder, utils
+from ...py_backend.external import ExtSubscript
+from ...py_backend.utils import compute_shape
 
 
 def get_file_sections(file_str):
@@ -299,10 +301,11 @@ def get_equation_components(equation_str, root_path=None):
 
     component_structure_grammar = _include_common_grammar(
         r"""
-    entry = component / data_definition / test_definition / subscript_definition / lookup_definition / subscript_copy
+    entry = component / ext_data_definition / data_definition / test_definition / subscript_definition / lookup_definition / subscript_copy
     component = name _ subscriptlist? _ "=" "="? _ expression
     subscript_definition = name _ ":" _ (imported_subscript / literal_subscript / numeric_range) _ subscript_mapping_list?
-    data_definition = name _ subscriptlist? _ keyword? _ ":=" _ expression
+    ext_data_definition = name _ subscriptlist? _ keyword? _ ":=" _ expression
+    data_definition = name _ subscriptlist? _ keyword
     lookup_definition = name _ subscriptlist? &"(" _ expression  # uses
     # lookahead assertion to capture whole group
     test_definition = name _ subscriptlist? _ &keyword _ expression
@@ -356,6 +359,9 @@ def visit_lookup_definition(self, n, vc):
         def visit_component(self, n, vc):
             self.kind = "component"
 
+        def visit_ext_data_definition(self, n, vc):
+            self.kind = "component"
+
         def visit_data_definition(self, n, vc):
             self.kind = "data"
 
@@ -371,8 +377,7 @@ def visit_imported_subscript(self, n, vc):
             # TODO: allow reading the subscripts from Excel
             # once the model has been translated
             args = [x.strip().strip("'") for x in vc[4].split(",")]
-            self.subscripts += external.ExtSubscript(*args, root=root_path
-                                                     ).subscript
+            self.subscripts += ExtSubscript(*args, root=root_path).subscript
 
         def visit_subscript_copy(self, n, vc):
             self.kind = "subdef"
@@ -1358,7 +1363,7 @@ def visit_array(self, n, vc):
                 if ";" in n.text or "," in n.text:
                     text = n.text.strip(";").replace(" ", "").replace(";", ",")
                     data = np.array([float(s) for s in text.split(",")])
-                    data = data.reshape(utils.compute_shape(coords))
+                    data = data.reshape(compute_shape(coords))
                     datastr = (
                         np.array2string(data, separator=",")
                         .replace("\n", "")
@@ -1673,8 +1678,7 @@ def translate_section(section, macro_list, sketch, root_path, subview_sep=""):
 
     # Parse components to python syntax.
     for element in model_elements:
-        if (element["kind"] == "component" and "py_expr" not in element) or \
-           (element["kind"] == "data"):
+        if element["kind"] == "component" and "py_expr" not in element:
             # TODO: if there is new structure,
             # it should be added to the namespace...
             translation, new_structure = parse_general_expression(
@@ -1688,6 +1692,11 @@ def translate_section(section, macro_list, sketch, root_path, subview_sep=""):
             element.update(translation)
             model_elements += new_structure
 
+        elif element["kind"] == "data":
+            element["eqn"] = element["expr"] = element["arguments"] = ""
+            element["py_expr"] = "None"
+            element["dependencies"] = {"time": 1, "__data__": None}
+
         elif element["kind"] == "lookup":
             translation, new_structure = parse_lookup_expression(
                 element,
diff --git a/pysd/py_backend/xmile/SMILE2Py.py b/pysd/translation/xmile/SMILE2Py.py
similarity index 99%
rename from pysd/py_backend/xmile/SMILE2Py.py
rename to pysd/translation/xmile/SMILE2Py.py
index a2185a6f..8a789af6 100644
--- a/pysd/py_backend/xmile/SMILE2Py.py
+++ b/pysd/translation/xmile/SMILE2Py.py
@@ -264,7 +264,7 @@ def __init__(self, model_namespace={}, subscript_dict={}):
         self.extended_model_namespace.update({'endtime': 'final_time'})
 
         grammar = pkg_resources.resource_string(
-            "pysd", "py_backend/xmile/smile.grammar")
+            "pysd", "translation/xmile/smile.grammar")
         grammar = grammar.decode('ascii').format(
             funcs=format_word_list(functions.keys()),
             in_ops=format_word_list(infix_operators.keys()),
diff --git a/pysd/py_backend/xmile/__init__.py b/pysd/translation/xmile/__init__.py
similarity index 100%
rename from pysd/py_backend/xmile/__init__.py
rename to pysd/translation/xmile/__init__.py
diff --git a/pysd/py_backend/xmile/smile.grammar b/pysd/translation/xmile/smile.grammar
similarity index 100%
rename from pysd/py_backend/xmile/smile.grammar
rename to pysd/translation/xmile/smile.grammar
diff --git a/pysd/py_backend/xmile/xmile2py.py b/pysd/translation/xmile/xmile2py.py
similarity index 100%
rename from pysd/py_backend/xmile/xmile2py.py
rename to pysd/translation/xmile/xmile2py.py
diff --git a/tests/unit_test_builder.py b/tests/unit_test_builder.py
index e03149ab..b4aa3bf5 100644
--- a/tests/unit_test_builder.py
+++ b/tests/unit_test_builder.py
@@ -18,7 +18,7 @@ def runner(string, ns=None):
 
 class TestBuildElement(TestCase):
     def test_no_subs_constant(self):
-        from pysd.py_backend.builder import build_element
+        from pysd.translation.builder import build_element
         string = textwrap.dedent(
             build_element(element={'kind': 'constant',
                                    'subs': [[]],
@@ -39,7 +39,7 @@ def test_no_subs_constant(self):
         self.assertEqual(a, .01)
 
     def test_no_subs_call(self):
-        from pysd.py_backend.builder import build_element
+        from pysd.translation.builder import build_element
         string = textwrap.dedent(
             build_element(element={'kind': 'constant',
                                    'subs': [[]],
@@ -64,7 +64,7 @@ def test_no_subs_call(self):
 class TestBuildFunctionCall(TestCase):
     def test_build_function_not_implemented(self):
         from warnings import catch_warnings
-        from pysd.py_backend.builder import build_function_call
+        from pysd.translation.builder import build_function_call
         args = ['a', 'b']
         nif = {"name": "not_implemented_function",
                "module": "functions",
@@ -78,7 +78,7 @@ def test_build_function_not_implemented(self):
                             in str(ws[0].message))
 
     def test_build_function_with_time_dependency(self):
-        from pysd.py_backend.builder import build_function_call
+        from pysd.translation.builder import build_function_call
         args = ['a', 'b']
         pulse = {
             "name": "pulse",
@@ -97,7 +97,7 @@ def test_build_function_with_time_dependency(self):
         self.assertIn('time', dependencies)
 
     def test_build_function_ignore_arguments(self):
-        from pysd.py_backend.builder import build_function_call
+        from pysd.translation.builder import build_function_call
         args = ['a', 'b', 'c']
         my_func_conf = {
             "name": "my_func",
@@ -124,7 +124,7 @@ def test_build_function_ignore_arguments(self):
                          "my_func(b)")
 
     def test_build_function_lambda_arguments(self):
-        from pysd.py_backend.builder import build_function_call
+        from pysd.translation.builder import build_function_call
         args = ['a', 'b', 'c']
         my_func_conf = {
             "name": "my_func",
@@ -151,7 +151,7 @@ def test_build_function_lambda_arguments(self):
                          "my_func(lambda: a, b, lambda: c)")
 
     def test_build_function_optional_arguments(self):
-        from pysd.py_backend.builder import build_function_call
+        from pysd.translation.builder import build_function_call
         my_func_conf = {
             "name": "my_func",
             "parameters": [
@@ -184,7 +184,7 @@ def test_build_function_optional_arguments(self):
                          "my_func(a, b)")
 
     def test_build_function_predef_arguments(self):
-        from pysd.py_backend.builder import build_function_call
+        from pysd.translation.builder import build_function_call
         args = ['a', 'c']
         my_func_conf = {
             "name": "my_func",
@@ -214,7 +214,7 @@ def test_build_function_predef_arguments(self):
 class TestBuild(TestCase):
     def test_build(self):
         # Todo: add other builder-specific inclusions to this test
-        from pysd.py_backend.builder import build
+        from pysd.translation.builder import build
         actual = textwrap.dedent(
             build(elements=[{'kind': 'component',
                              'subs': [],
@@ -270,7 +270,7 @@ def test_build(self):
 
 class TestMergePartialElements(TestCase):
     def test_single_set(self):
-        from pysd.py_backend.builder import merge_partial_elements
+        from pysd.translation.builder import merge_partial_elements
 
         self.assertEqual(
             merge_partial_elements(
@@ -311,7 +311,7 @@ def test_single_set(self):
               }])
 
     def test_multiple_sets(self):
-        from pysd.py_backend.builder import merge_partial_elements
+        from pysd.translation.builder import merge_partial_elements
         actual = merge_partial_elements(
             [{'py_name': 'a', 'py_expr': 'ms', 'subs': ['Name1', 'element1'],
               'merge_subs': ['Name1', 'Elements'], 'dependencies': {'b': 1},
@@ -378,7 +378,7 @@ def test_multiple_sets(self):
         self.assertIn(actual[1], expected)
 
     def test_non_set(self):
-        from pysd.py_backend.builder import merge_partial_elements
+        from pysd.translation.builder import merge_partial_elements
         actual = merge_partial_elements(
             [{'py_name': 'a', 'py_expr': 'ms', 'subs': ['Name1', 'element1'],
               'merge_subs': ['Name1', 'Elements'], 'dependencies': {'c': 1},
diff --git a/tests/unit_test_translation_utils.py b/tests/unit_test_translation_utils.py
new file mode 100644
index 00000000..10a33434
--- /dev/null
+++ b/tests/unit_test_translation_utils.py
@@ -0,0 +1,245 @@
+from unittest import TestCase
+
+
+class TestTranslationUtils(TestCase):
+
+    def test_add_entries_underscore(self):
+        """"
+        Test for add_entries_undescore
+        """
+        from pysd.translation.utils import add_entries_underscore
+
+        dict1 = {'CD': 10, 'L F': 5}
+        dict2 = {'a b': 1, 'C': 2, 'L M H': 4}
+
+        dict1b = dict1.copy()
+
+        add_entries_underscore(dict1b)
+
+        self.assertTrue('L_F' in dict1b)
+        self.assertEqual(dict1b['L F'], dict1b['L_F'])
+
+        add_entries_underscore(dict1, dict2)
+
+        self.assertTrue('L_F' in dict1)
+        self.assertEqual(dict1['L F'], dict1['L_F'])
+        self.assertTrue('a_b' in dict2)
+        self.assertEqual(dict2['a b'], dict2['a_b'])
+        self.assertTrue('L_M_H' in dict2)
+        self.assertEqual(dict2['L M H'], dict2['L_M_H'])
+
+    def test_make_add_identifier(self):
+        """
+        Test make_add_identifier for the .add methods py_name
+        """
+        from pysd.translation.utils import make_add_identifier
+
+        build_names = set()
+
+        name = "values"
+        build_names.add(name)
+
+        self.assertEqual(make_add_identifier(name, build_names), "valuesADD_1")
+        self.assertEqual(make_add_identifier(name, build_names), "valuesADD_2")
+        self.assertEqual(make_add_identifier(name, build_names), "valuesADD_3")
+
+        name2 = "bb_a"
+        build_names.add(name2)
+        self.assertEqual(make_add_identifier(name2, build_names), "bb_aADD_1")
+        self.assertEqual(make_add_identifier(name, build_names), "valuesADD_4")
+        self.assertEqual(make_add_identifier(name2, build_names), "bb_aADD_2")
+
+    def test_make_python_identifier(self):
+        from pysd.translation.utils import make_python_identifier
+
+        self.assertEqual(
+            make_python_identifier('Capital'), 'capital')
+
+        self.assertEqual(
+            make_python_identifier('multiple words'), 'multiple_words')
+
+        self.assertEqual(
+            make_python_identifier('multiple     spaces'), 'multiple_spaces')
+
+        self.assertEqual(
+            make_python_identifier('for'), 'for_1')
+
+        self.assertEqual(
+            make_python_identifier('  whitespace  '), 'whitespace')
+
+        self.assertEqual(
+            make_python_identifier('H@t tr!ck'), 'ht_trck')
+
+        self.assertEqual(
+            make_python_identifier('123abc'), 'nvs_123abc')
+
+        self.assertEqual(
+            make_python_identifier('Var$', {'Var$': 'var'}),
+            'var')
+
+        self.assertEqual(
+            make_python_identifier('Var@', {'Var$': 'var'}), 'var_1')
+
+        self.assertEqual(
+            make_python_identifier('Var$', {'Var@': 'var', 'Var%': 'var_1'}),
+            'var_2')
+
+        my_vars = ["GDP 2010$", "GDP 2010€", "GDP 2010£"]
+        namespace = {}
+        expected = ["gdp_2010", "gdp_2010_1", "gdp_2010_2"]
+        for var, expect in zip(my_vars, expected):
+            self.assertEqual(
+                make_python_identifier(var, namespace),
+                expect)
+
+        self.assertEqual(
+            make_python_identifier('1995 value'),
+            'nvs_1995_value')
+
+        self.assertEqual(
+            make_python_identifier('$ value'),
+            'nvs_value')
+
+    def test_make_coord_dict(self):
+        from pysd.translation.utils import make_coord_dict
+        self.assertEqual(
+            make_coord_dict(['Dim1', 'D'],
+                            {'Dim1': ['A', 'B', 'C'],
+                             'Dim2': ['D', 'E', 'F']},
+                            terse=True), {'Dim2': ['D']})
+        self.assertEqual(
+            make_coord_dict(['Dim1', 'D'],
+                            {'Dim1': ['A', 'B', 'C'],
+                             'Dim2': ['D', 'E', 'F']},
+                            terse=False), {'Dim1': ['A', 'B', 'C'],
+                                           'Dim2': ['D']})
+
+    def test_find_subscript_name(self):
+        from pysd.translation.utils import find_subscript_name
+
+        self.assertEqual(
+            find_subscript_name({'Dim1': ['A', 'B'],
+                                 'Dim2': ['C', 'D', 'E'],
+                                 'Dim3': ['F', 'G', 'H', 'I']},
+                                'D'), 'Dim2')
+
+        self.assertEqual(
+            find_subscript_name({'Dim1': ['A', 'B'],
+                                 'Dim2': ['C', 'D', 'E'],
+                                 'Dim3': ['F', 'G', 'H', 'I']},
+                                'Dim3'), 'Dim3')
+
+    def test_make_merge_list(self):
+        from warnings import catch_warnings
+        from pysd.translation.utils import make_merge_list
+
+        subscript_dict = {
+            "layers": ["l1", "l2", "l3"],
+            "layers1": ["l1", "l2", "l3"],
+            "up": ["l2", "l3"],
+            "down": ["l1", "l2"],
+            "dim": ["A", "B", "C"],
+            "dim1": ["A", "B", "C"]
+        }
+
+        self.assertEqual(
+            make_merge_list([["l1"], ["up"]],
+                            subscript_dict),
+            ["layers"])
+
+        self.assertEqual(
+            make_merge_list([["l3", "dim1"], ["down", "dim1"]],
+                            subscript_dict),
+            ["layers", "dim1"])
+
+        self.assertEqual(
+            make_merge_list([["l2", "dim1", "dim"], ["l1", "dim1", "dim"]],
+                            subscript_dict),
+            ["down", "dim1", "dim"])
+
+        self.assertEqual(
+            make_merge_list([["layers1", "l2"], ["layers1", "l3"]],
+                            subscript_dict),
+            ["layers1", "up"])
+
+        # incomplete dimension
+        with catch_warnings(record=True) as ws:
+            self.assertEqual(
+                make_merge_list([["A"], ["B"]],
+                                subscript_dict),
+                ["dim"])
+            # use only user warnings
+            wu = [w for w in ws if issubclass(w.category, UserWarning)]
+            self.assertTrue(len(wu), 1)
+            self.assertIn("Dimension given by subscripts:"
+                          + "\n\t{}\nis incomplete ".format({"A", "B"})
+                          + "using {} instead.".format("dim")
+                          + "\nSubscript_dict:"
+                          + "\n\t{}".format(subscript_dict),
+                          str(wu[0].message))
+
+        # invalid dimension
+        try:
+            make_merge_list([["l1"], ["B"]],
+                            subscript_dict)
+            self.assertFail()
+        except ValueError as err:
+            self.assertIn("Impossible to find the dimension that contains:"
+                          + "\n\t{}\nFor subscript_dict:".format({"l1", "B"})
+                          + "\n\t{}".format(subscript_dict),
+                          err.args[0])
+
+        # repeated subscript
+        with catch_warnings(record=True) as ws:
+            make_merge_list([["dim1", "A", "dim"],
+                            ["dim1", "B", "dim"],
+                            ["dim1", "C", "dim"]],
+                            subscript_dict)
+            # use only user warnings
+            wu = [w for w in ws if issubclass(w.category, UserWarning)]
+            self.assertTrue(len(wu), 1)
+            self.assertIn(
+                "Adding new subscript range to subscript_dict:\ndim2: A, B, C",
+                str(wu[0].message))
+
+        subscript_dict2 = {
+            "dim1": ["A", "B", "C", "D"],
+            "dim1n": ["A", "B"],
+            "dim1y": ["C", "D"],
+            "dim2": ["E", "F", "G", "H"],
+            "dim2n": ["E", "F"],
+            "dim2y": ["G", "H"]
+        }
+
+        # merging two subranges
+        self.assertEqual(
+            make_merge_list([["dim1y"],
+                             ["dim1n"]],
+                            subscript_dict2),
+            ["dim1"])
+
+        # final subscript in list
+        self.assertEqual(
+            make_merge_list([["dim1", "dim2n"],
+                             ["dim1n", "dim2y"],
+                             ["dim1y", "dim2y"]],
+                            subscript_dict2),
+            ["dim1", "dim2"])
+
+    def test_update_dependency(self):
+        from pysd.translation.utils import update_dependency
+
+        deps_dict = {}
+
+        update_dependency("var1", deps_dict)
+        self.assertEqual(deps_dict, {"var1": 1})
+
+        update_dependency("var1", deps_dict)
+        self.assertEqual(deps_dict, {"var1": 2})
+
+        update_dependency("var2", deps_dict)
+        self.assertEqual(deps_dict, {"var1": 2, "var2": 1})
+
+        for i in range(10):
+            update_dependency("var1", deps_dict)
+        self.assertEqual(deps_dict, {"var1": 12, "var2": 1})
diff --git a/tests/unit_test_utils.py b/tests/unit_test_utils.py
index 9cc75d3c..1df82f01 100644
--- a/tests/unit_test_utils.py
+++ b/tests/unit_test_utils.py
@@ -2,7 +2,6 @@
 from unittest import TestCase
 
 import pandas as pd
-import numpy as np
 import xarray as xr
 
 from pysd.tools.benchmarking import assert_frames_close
@@ -261,135 +260,10 @@ def test_make_flat_df_times(self):
         self.assertEqual(set(actual.index), set(expected.index))
         self.assertTrue(all(actual['Elem1[B,F]'] == expected['Elem1[B,F]']))
 
-    def test_make_coord_dict(self):
-        import pysd
-        self.assertEqual(
-            pysd.utils.make_coord_dict(['Dim1', 'D'],
-                                       {'Dim1': ['A', 'B', 'C'],
-                                        'Dim2': ['D', 'E', 'F']},
-                                       terse=True), {'Dim2': ['D']})
-        self.assertEqual(
-            pysd.utils.make_coord_dict(['Dim1', 'D'],
-                                       {'Dim1': ['A', 'B', 'C'],
-                                        'Dim2': ['D', 'E', 'F']},
-                                       terse=False), {'Dim1': ['A', 'B', 'C'],
-                                                      'Dim2': ['D']})
-
-    def test_find_subscript_name(self):
-        import pysd
-        self.assertEqual(
-            pysd.utils.find_subscript_name({'Dim1': ['A', 'B'],
-                                            'Dim2': ['C', 'D', 'E'],
-                                            'Dim3': ['F', 'G', 'H', 'I']},
-                                           'D'), 'Dim2')
-
-        self.assertEqual(
-            pysd.utils.find_subscript_name({'Dim1': ['A', 'B'],
-                                            'Dim2': ['C', 'D', 'E'],
-                                            'Dim3': ['F', 'G', 'H', 'I']},
-                                           'Dim3'), 'Dim3')
-
     def test_doctests(self):
         import pysd
         doctest.DocTestSuite(pysd.utils)
 
-    def test_make_merge_list(self):
-        from warnings import catch_warnings
-        from pysd.py_backend.utils import make_merge_list
-
-        subscript_dict = {
-            "layers": ["l1", "l2", "l3"],
-            "layers1": ["l1", "l2", "l3"],
-            "up": ["l2", "l3"],
-            "down": ["l1", "l2"],
-            "dim": ["A", "B", "C"],
-            "dim1": ["A", "B", "C"]
-        }
-
-        self.assertEqual(
-            make_merge_list([["l1"], ["up"]],
-                            subscript_dict),
-            ["layers"])
-
-        self.assertEqual(
-            make_merge_list([["l3", "dim1"], ["down", "dim1"]],
-                            subscript_dict),
-            ["layers", "dim1"])
-
-        self.assertEqual(
-            make_merge_list([["l2", "dim1", "dim"], ["l1", "dim1", "dim"]],
-                            subscript_dict),
-            ["down", "dim1", "dim"])
-
-        self.assertEqual(
-            make_merge_list([["layers1", "l2"], ["layers1", "l3"]],
-                            subscript_dict),
-            ["layers1", "up"])
-
-        # incomplete dimension
-        with catch_warnings(record=True) as ws:
-            self.assertEqual(
-                make_merge_list([["A"], ["B"]],
-                                subscript_dict),
-                ["dim"])
-            # use only user warnings
-            wu = [w for w in ws if issubclass(w.category, UserWarning)]
-            self.assertTrue(len(wu), 1)
-            self.assertIn("Dimension given by subscripts:"
-                          + "\n\t{}\nis incomplete ".format({"A", "B"})
-                          + "using {} instead.".format("dim")
-                          + "\nSubscript_dict:"
-                          + "\n\t{}".format(subscript_dict),
-                          str(wu[0].message))
-
-        # invalid dimension
-        try:
-            make_merge_list([["l1"], ["B"]],
-                            subscript_dict)
-            self.assertFail()
-        except ValueError as err:
-            self.assertIn("Impossible to find the dimension that contains:"
-                          + "\n\t{}\nFor subscript_dict:".format({"l1", "B"})
-                          + "\n\t{}".format(subscript_dict),
-                          err.args[0])
-
-        # repeated subscript
-        with catch_warnings(record=True) as ws:
-            make_merge_list([["dim1", "A", "dim"],
-                            ["dim1", "B", "dim"],
-                            ["dim1", "C", "dim"]],
-                            subscript_dict)
-            # use only user warnings
-            wu = [w for w in ws if issubclass(w.category, UserWarning)]
-            self.assertTrue(len(wu), 1)
-            self.assertIn(
-                "Adding new subscript range to subscript_dict:\ndim2: A, B, C",
-                str(wu[0].message))
-
-        subscript_dict2 = {
-            "dim1": ["A", "B", "C", "D"],
-            "dim1n": ["A", "B"],
-            "dim1y": ["C", "D"],
-            "dim2": ["E", "F", "G", "H"],
-            "dim2n": ["E", "F"],
-            "dim2y": ["G", "H"]
-        }
-
-        # merging two subranges
-        self.assertEqual(
-            make_merge_list([["dim1y"],
-                             ["dim1n"]],
-                            subscript_dict2),
-            ["dim1"])
-
-        # final subscript in list
-        self.assertEqual(
-            make_merge_list([["dim1", "dim2n"],
-                             ["dim1n", "dim2y"],
-                             ["dim1y", "dim2y"]],
-                            subscript_dict2),
-            ["dim1", "dim2"])
-
     def test_compute_shape(self):
         """"
         Test for computing the shape of an array giving coordinates dictionary
@@ -512,103 +386,6 @@ def test_rearrange(self):
         self.assertEqual(None,
                          rearrange(None, ['d2'], _subscript_dict))
 
-    def test_add_entries_underscore(self):
-        """"
-        Test for add_entries_undescore
-        """
-        from pysd.py_backend.utils import add_entries_underscore
-
-        dict1 = {'CD': 10, 'L F': 5}
-        dict2 = {'a b': 1, 'C': 2, 'L M H': 4}
-
-        dict1b = dict1.copy()
-
-        add_entries_underscore(dict1b)
-
-        self.assertTrue('L_F' in dict1b)
-        self.assertEqual(dict1b['L F'], dict1b['L_F'])
-
-        add_entries_underscore(dict1, dict2)
-
-        self.assertTrue('L_F' in dict1)
-        self.assertEqual(dict1['L F'], dict1['L_F'])
-        self.assertTrue('a_b' in dict2)
-        self.assertEqual(dict2['a b'], dict2['a_b'])
-        self.assertTrue('L_M_H' in dict2)
-        self.assertEqual(dict2['L M H'], dict2['L_M_H'])
-
-    def test_make_add_identifier(self):
-        """
-        Test make_add_identifier for the .add methods py_name
-        """
-        from pysd.py_backend.utils import make_add_identifier
-
-        build_names = set()
-
-        name = "values"
-        build_names.add(name)
-
-        self.assertEqual(make_add_identifier(name, build_names), "valuesADD_1")
-        self.assertEqual(make_add_identifier(name, build_names), "valuesADD_2")
-        self.assertEqual(make_add_identifier(name, build_names), "valuesADD_3")
-
-        name2 = "bb_a"
-        build_names.add(name2)
-        self.assertEqual(make_add_identifier(name2, build_names), "bb_aADD_1")
-        self.assertEqual(make_add_identifier(name, build_names), "valuesADD_4")
-        self.assertEqual(make_add_identifier(name2, build_names), "bb_aADD_2")
-
-    def test_make_python_identifier(self):
-        from pysd.py_backend.utils import make_python_identifier
-
-        self.assertEqual(
-            make_python_identifier('Capital'), 'capital')
-
-        self.assertEqual(
-            make_python_identifier('multiple words'), 'multiple_words')
-
-        self.assertEqual(
-            make_python_identifier('multiple     spaces'), 'multiple_spaces')
-
-        self.assertEqual(
-            make_python_identifier('for'), 'for_1')
-
-        self.assertEqual(
-            make_python_identifier('  whitespace  '), 'whitespace')
-
-        self.assertEqual(
-            make_python_identifier('H@t tr!ck'), 'ht_trck')
-
-        self.assertEqual(
-            make_python_identifier('123abc'), 'nvs_123abc')
-
-        self.assertEqual(
-            make_python_identifier('Var$', {'Var$': 'var'}),
-            'var')
-
-        self.assertEqual(
-            make_python_identifier('Var@', {'Var$': 'var'}), 'var_1')
-
-        self.assertEqual(
-            make_python_identifier('Var$', {'Var@': 'var', 'Var%': 'var_1'}),
-            'var_2')
-
-        my_vars = ["GDP 2010$", "GDP 2010€", "GDP 2010£"]
-        namespace = {}
-        expected = ["gdp_2010", "gdp_2010_1", "gdp_2010_2"]
-        for var, expect in zip(my_vars, expected):
-            self.assertEqual(
-                make_python_identifier(var, namespace),
-                expect)
-
-        self.assertEqual(
-            make_python_identifier('1995 value'),
-            'nvs_1995_value')
-
-        self.assertEqual(
-            make_python_identifier('$ value'),
-            'nvs_value')
-
     def test_progressbar(self):
         from pysd.py_backend.utils import ProgressBar
 
@@ -624,21 +401,3 @@ def test_progressbar(self):
         self.assertFalse(hasattr(pbar, 'counter'))
         pbar.update()
         pbar.finish()
-
-    def test_update_dependency(self):
-        from pysd.py_backend.utils import update_dependency
-
-        deps_dict = {}
-
-        update_dependency("var1", deps_dict)
-        self.assertEqual(deps_dict, {"var1": 1})
-
-        update_dependency("var1", deps_dict)
-        self.assertEqual(deps_dict, {"var1": 2})
-
-        update_dependency("var2", deps_dict)
-        self.assertEqual(deps_dict, {"var1": 2, "var2": 1})
-
-        for i in range(10):
-            update_dependency("var1", deps_dict)
-        self.assertEqual(deps_dict, {"var1": 12, "var2": 1})
diff --git a/tests/unit_test_vensim2py.py b/tests/unit_test_vensim2py.py
index 1cb622aa..498355f2 100644
--- a/tests/unit_test_vensim2py.py
+++ b/tests/unit_test_vensim2py.py
@@ -5,7 +5,7 @@
 class TestGetFileSections(unittest.TestCase):
     def test_normal_load(self):
         """normal model file with no macros"""
-        from pysd.py_backend.vensim.vensim2py import get_file_sections
+        from pysd.translation.vensim.vensim2py import get_file_sections
 
         actual = get_file_sections(r"a~b~c| d~e~f| g~h~i|")
         expected = [
@@ -20,7 +20,7 @@ def test_normal_load(self):
 
     def test_macro_only(self):
         """ Macro Only """
-        from pysd.py_backend.vensim.vensim2py import get_file_sections
+        from pysd.translation.vensim.vensim2py import get_file_sections
 
         actual = get_file_sections(":MACRO: MAC(z) a~b~c| :END OF MACRO:")
         expected = [{"returns": [], "params": ["z"], "name": "MAC",
@@ -29,7 +29,7 @@ def test_macro_only(self):
 
     def test_macro_and_model(self):
         """ basic macro and model """
-        from pysd.py_backend.vensim.vensim2py import get_file_sections
+        from pysd.translation.vensim.vensim2py import get_file_sections
 
         actual = get_file_sections(
             ":MACRO: MAC(z) a~b~c| :END OF MACRO: d~e~f| g~h~i|")
@@ -43,7 +43,7 @@ def test_macro_and_model(self):
 
     def test_macro_multiple_inputs(self):
         """ macro with multiple input parameters """
-        from pysd.py_backend.vensim.vensim2py import get_file_sections
+        from pysd.translation.vensim.vensim2py import get_file_sections
 
         actual = get_file_sections(
             ":MACRO: MAC(z, y) a~b~c| :END OF MACRO: d~e~f| g~h~i|"
@@ -58,7 +58,7 @@ def test_macro_multiple_inputs(self):
 
     def test_macro_with_returns(self):
         """ macro with return values """
-        from pysd.py_backend.vensim.vensim2py import get_file_sections
+        from pysd.translation.vensim.vensim2py import get_file_sections
 
         actual = get_file_sections(
             ":MACRO: MAC(z, y :x, w) a~b~c| :END OF MACRO: d~e~f| g~h~i|"
@@ -77,7 +77,7 @@ def test_macro_with_returns(self):
 
     def test_handle_encoding(self):
         """ Handle encoding """
-        from pysd.py_backend.vensim.vensim2py import get_file_sections
+        from pysd.translation.vensim.vensim2py import get_file_sections
 
         actual = get_file_sections(r"{UTF-8} a~b~c| d~e~f| g~h~i|")
         expected = [
@@ -92,7 +92,7 @@ def test_handle_encoding(self):
 
     def test_handle_encoding_like_strings(self):
         """ Handle encoding-like strings in other places in the file """
-        from pysd.py_backend.vensim.vensim2py import get_file_sections
+        from pysd.translation.vensim.vensim2py import get_file_sections
 
         actual = get_file_sections(r"a~b~c| d~e~f{special}| g~h~i|")
         expected = [
@@ -110,7 +110,7 @@ class TestEquationStringParsing(unittest.TestCase):
     """ Tests the 'get_equation_components function """
 
     def test_basics(self):
-        from pysd.py_backend.vensim.vensim2py import get_equation_components
+        from pysd.translation.vensim.vensim2py import get_equation_components
 
         self.assertEqual(
             get_equation_components(r'constant = 25'),
@@ -126,7 +126,7 @@ def test_basics(self):
 
     def test_equals_handling(self):
         """ Parse cases with equal signs within the expression """
-        from pysd.py_backend.vensim.vensim2py import get_equation_components
+        from pysd.translation.vensim.vensim2py import get_equation_components
 
         self.assertEqual(
             get_equation_components(r"Boolean = IF THEN ELSE(1 = 1, 1, 0)"),
@@ -142,7 +142,7 @@ def test_equals_handling(self):
 
     def test_whitespace_handling(self):
         """ Whitespaces should be shortened to a single space """
-        from pysd.py_backend.vensim.vensim2py import get_equation_components
+        from pysd.translation.vensim.vensim2py import get_equation_components
 
         self.assertEqual(
             get_equation_components(
@@ -176,7 +176,7 @@ def test_whitespace_handling(self):
         )
 
     def test_subscript_definition_parsing(self):
-        from pysd.py_backend.vensim.vensim2py import get_equation_components
+        from pysd.translation.vensim.vensim2py import get_equation_components
 
         self.assertEqual(
             get_equation_components(r"""Sub1: Entry 1, Entry 2, Entry 3 """),
@@ -214,7 +214,7 @@ def test_subscript_definition_parsing(self):
             str(err.exception))
 
     def test_subscript_references(self):
-        from pysd.py_backend.vensim.vensim2py import get_equation_components
+        from pysd.translation.vensim.vensim2py import get_equation_components
 
         self.assertEqual(
             get_equation_components(
@@ -269,7 +269,7 @@ def test_subscript_references(self):
         )
 
     def test_lookup_definitions(self):
-        from pysd.py_backend.vensim.vensim2py import get_equation_components
+        from pysd.translation.vensim.vensim2py import get_equation_components
 
         self.assertEqual(
             get_equation_components(r"table([(0,-1)-(45,1)],(0,0),(5,0))"),
@@ -296,7 +296,7 @@ def test_lookup_definitions(self):
         )
 
     def test_get_lookup(self):
-        from pysd.py_backend.vensim.vensim2py import parse_lookup_expression
+        from pysd.translation.vensim.vensim2py import parse_lookup_expression
 
         res = parse_lookup_expression(
             {
@@ -316,7 +316,7 @@ def test_get_lookup(self):
         )
 
     def test_pathological_names(self):
-        from pysd.py_backend.vensim.vensim2py import get_equation_components
+        from pysd.translation.vensim.vensim2py import get_equation_components
 
         self.assertEqual(
             get_equation_components(r'"silly-string" = 25'),
@@ -343,7 +343,7 @@ def test_pathological_names(self):
         )
 
     def test_get_equation_components_error(self):
-        from pysd.py_backend.vensim.vensim2py import get_equation_components
+        from pysd.translation.vensim.vensim2py import get_equation_components
 
         defi = "NIF: NF<x-x>NF"
         try:
@@ -360,13 +360,13 @@ def test_get_equation_components_error(self):
 
 class TestParse_general_expression(unittest.TestCase):
     def test_arithmetic(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression({"expr": "-10^3+4"})
         self.assertEqual(res[0]["py_expr"], "-10**3+4")
 
     def test_arithmetic_scientific(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression({"expr": "1e+4"})
         self.assertEqual(res[0]["py_expr"], "1e+4")
@@ -387,7 +387,7 @@ def test_arithmetic_scientific(self):
         self.assertEqual(res[0]["py_expr"], "-2.0e-43")
 
     def test_caps_handling(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression({"expr": "Abs(-3)"})
         self.assertEqual(res[0]["py_expr"], "np.abs(-3)")
@@ -400,7 +400,7 @@ def test_caps_handling(self):
 
     def test_empty(self):
         from warnings import catch_warnings
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         with catch_warnings(record=True) as ws:
             res = parse_general_expression({"expr": "", "real_name": "Var"})
@@ -412,7 +412,7 @@ def test_empty(self):
         self.assertEqual(res[0]["py_expr"], "None")
 
     def test_function_calls(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression({"expr": "ABS(StockA)",
                                         "real_name": "AB",
@@ -439,14 +439,14 @@ def test_function_calls(self):
         )
 
     def test_id_parsing(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression({"expr": "StockA"},
                                        {"StockA": "stocka"})
         self.assertEqual(res[0]["py_expr"], "stocka()")
 
     def test_logicals(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression(
             {'expr': 'IF THEN ELSE(1 :AND: 0,0,1)'})
@@ -494,7 +494,7 @@ def test_logicals(self):
                  'eqn': 'logical = IF THEN ELSE(1 :AND: 0 :OR: 1,0,1)'})
 
     def test_number_parsing(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
         res = parse_general_expression({'expr': '20'})
         self.assertEqual(res[0]['py_expr'], '20')
 
@@ -508,8 +508,8 @@ def test_number_parsing(self):
         self.assertEqual(res[0]["py_expr"], "-1.3e-10")
 
     def test_nan_parsing(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
-        from pysd.py_backend.builder import Imports
+        from pysd.translation.vensim.vensim2py import parse_general_expression
+        from pysd.translation.builder import Imports
 
         Imports.reset()
         self.assertFalse(Imports._numpy)
@@ -520,7 +520,7 @@ def test_nan_parsing(self):
     def test_stock_construction_function_no_subscripts(self):
         """ stock construction should create a stateful variable and
         reference it """
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
         from pysd.py_backend.statefuls import Integ
 
         res = parse_general_expression(
@@ -541,7 +541,7 @@ def test_stock_construction_function_no_subscripts(self):
         self.assertEqual(res[0]["py_expr"], res[1][0]["py_name"] + "()")
 
     def test_delay_construction_function_no_subscripts(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
         from pysd.py_backend.statefuls import Delay
 
         res = parse_general_expression(
@@ -574,7 +574,7 @@ def test_forecast_construction_function_no_subscripts(self):
         This translation should create a new stateful object to hold the
         forecast elements, and then pass back a reference to that value
         """
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
         from pysd.py_backend.statefuls import Forecast
 
         res = parse_general_expression(
@@ -603,7 +603,7 @@ def test_smooth_construction_function_no_subscripts(self):
         This translation should create a new stateful object to hold the delay
         elements, and then pass back a reference to that value
         """
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
         from pysd.py_backend.statefuls import Smooth
 
         res = parse_general_expression(
@@ -625,7 +625,7 @@ def test_smooth_construction_function_no_subscripts(self):
         self.assertEqual(res[0]["py_expr"], res[1][0]["py_name"] + "()")
 
     def test_subscript_float_initialization(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         _subscript_dict = {
             "Dim": ["A", "B", "C", "D", "E"],
@@ -677,7 +677,7 @@ def test_subscript_float_initialization(self):
         self.assertEqual(a.loc[{"Dim": "B"}], 3.32)
 
     def test_subscript_1d_constant(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         _subscript_dict = {"Dim1": ["A", "B", "C"], "Dim2": ["D", "E"]}
         element = parse_general_expression(
@@ -703,7 +703,7 @@ def test_subscript_1d_constant(self):
         self.assertEqual(a.loc[{"Dim1": "A"}], 1)
 
     def test_subscript_2d_constant(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         _subscript_dict = {"Dim1": ["A", "B", "C"], "Dim2": ["D", "E"]}
         element = parse_general_expression(
@@ -721,7 +721,7 @@ def test_subscript_2d_constant(self):
         self.assertEqual(a.loc[{"Dim1": "B", "Dim2": "E"}], 4)
 
     def test_subscript_3d_depth(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         _subscript_dict = {"Dim1": ["A", "B", "C"], "Dim2": ["D", "E"]}
         element = parse_general_expression(
@@ -743,8 +743,8 @@ def test_subscript_builder(self):
         Testing how subscripts are translated when we have common subscript
         ranges.
         """
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression,\
-            parse_lookup_expression
+        from pysd.translation.vensim.vensim2py import\
+            parse_general_expression, parse_lookup_expression
 
         _subscript_dict = {
             "Dim1": ["A", "B", "C"], "Dim2": ["B", "C"], "Dim3": ["B", "C"]
@@ -853,7 +853,7 @@ def test_subscript_builder(self):
             "'Dim1': _subscript_dict['Dim2']", element[1][0]['py_expr'])
 
     def test_subscript_reference(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression(
             {"expr": "Var A[Dim1, Dim2]", "real_name": "Var2", "eqn": ""},
@@ -916,7 +916,7 @@ def test_subscript_reference(self):
             "float(var_c().loc['B', 'C', 'H'])")
 
     def test_subscript_ranges(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression(
             {"expr": "Var D[Range1]"},
@@ -932,7 +932,7 @@ def test_subscript_ranges(self):
         )
 
     def test_invert_matrix(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression(
             {
@@ -953,7 +953,7 @@ def test_invert_matrix(self):
         self.assertEqual(res[0]["py_expr"], "invert_matrix(a())")
 
     def test_subscript_elmcount(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression(
             {
@@ -975,7 +975,7 @@ def test_subscript_elmcount(self):
             res[0]["py_expr"], )
 
     def test_subscript_logicals(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         res = parse_general_expression(
             {
@@ -1000,7 +1000,7 @@ def test_subscript_logicals(self):
             res[0]["py_expr"], )
 
     def test_ref_with_subscript_prefix(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         # When parsing functions arguments first the subscript ranges are
         # parsed and later the general id is used, however, the if a reference
@@ -1027,7 +1027,7 @@ def test_ref_with_subscript_prefix(self):
             res[0]["py_expr"], )
 
     def test_random_0_1(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         # When parsing functions arguments first the subscript ranges are
         # parsed and later the general id is used, however, the if a reference
@@ -1052,7 +1052,7 @@ def test_random_0_1(self):
             res[0]["py_expr"], )
 
     def test_random_uniform(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         # When parsing functions arguments first the subscript ranges are
         # parsed and later the general id is used, however, the if a reference
@@ -1077,7 +1077,7 @@ def test_random_uniform(self):
             res[0]["py_expr"], )
 
     def test_incomplete_expression(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
         from warnings import catch_warnings
 
         with catch_warnings(record=True) as w:
@@ -1107,7 +1107,7 @@ def test_incomplete_expression(self):
                          "incomplete(unspecified_eqn(), var_a(), var_b())")
 
     def test_parse_general_expression_error(self):
-        from pysd.py_backend.vensim.vensim2py import parse_general_expression
+        from pysd.translation.vensim.vensim2py import parse_general_expression
 
         element = {
             "expr": "NIF(1,3)",
@@ -1129,7 +1129,7 @@ def test_parse_general_expression_error(self):
 
 class TestParse_sketch_line(unittest.TestCase):
     def test_parse_sketch_line(self):
-        from pysd.py_backend.vensim.vensim2py import parse_sketch_line
+        from pysd.translation.vensim.vensim2py import parse_sketch_line
 
         namespace = {'"var-n"': "varn", "Stock": "stock", '"rate-1"': "rate1"}
         lines = [
@@ -1164,7 +1164,7 @@ def test_parse_sketch_line(self):
 class TestParse_private_functions(unittest.TestCase):
     def test__split_sketch_warning(self):
         import warnings
-        from pysd.py_backend.vensim.vensim2py import _split_sketch
+        from pysd.translation.vensim.vensim2py import _split_sketch
 
         model_str = "this is my model"
 
diff --git a/tests/unit_test_xmile2py.py b/tests/unit_test_xmile2py.py
index fff6a7d9..7cdc7379 100644
--- a/tests/unit_test_xmile2py.py
+++ b/tests/unit_test_xmile2py.py
@@ -2,7 +2,7 @@
 import unittest
 import tempfile
 
-from pysd.py_backend.xmile.xmile2py import translate_xmile
+from pysd.translation.xmile.xmile2py import translate_xmile
 
 _root = os.path.dirname(__file__)
 TARGET_STMX_FILE = os.path.join(_root, "test-models/tests/game/test_game.stmx")

From b8e01330c4a17dd706cf721f3911af516ec8f9c2 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Thu, 11 Nov 2021 15:24:04 +0100
Subject: [PATCH 02/16] Make models with data work

---
 pysd/py_backend/components.py            | 153 ++++++++++++++++-------
 pysd/py_backend/statefuls.py             |  59 ++++++---
 pysd/py_backend/utils.py                 |  12 +-
 pysd/pysd.py                             |  16 +--
 pysd/tools/benchmarking.py               |  11 +-
 pysd/translation/builder.py              |  83 +++++++++++-
 pysd/translation/vensim/vensim2py.py     |   7 +-
 tests/integration_test_vensim_pathway.py |   6 +
 8 files changed, 260 insertions(+), 87 deletions(-)

diff --git a/pysd/py_backend/components.py b/pysd/py_backend/components.py
index 954b8361..8d2edb71 100644
--- a/pysd/py_backend/components.py
+++ b/pysd/py_backend/components.py
@@ -148,7 +148,10 @@ def in_return(self):
         if self.return_timestamps is not None:
             return self._time in self.return_timestamps
 
-        return (self._time - self._initial_time) % self.saveper() == 0
+        time_delay = self._time - self._initial_time
+        save_per = self.saveper()
+        prec = self.time_step() * 1e-10
+        return time_delay % save_per < prec or -time_delay % save_per < prec
 
     def add_return_timestamps(self, return_timestamps):
         """ Add return timestamps """
@@ -167,51 +170,10 @@ def reset(self):
 
 
 class Data(object):
-    def __init__(self, data=None, file_name=None, var=None, transpose=False,
-                 coords={}, interp="interpolate"):
-
-        self.interp = interp
+    def __init__(self, data, coords, interp="interpolate"):
+        self.data = data
+        self.interp = "interpolate"
         self.is_float = not bool(coords)
-        if not data:
-            self.data = self.load_from_output(
-                file_name, var, coords, transpose)
-        else:
-            self.data = data
-
-    @staticmethod
-    def load_from_output(file_name, var, coords, transpose):
-
-        if not coords:
-            # 0 dimensional data
-            values = load_outputs(file_name, transpose, columns=[var])
-            return xr.DataArray(
-                values[var].values,
-                {'time': values.index.values},
-                ['time'])
-
-        # subscripted data
-        dims = list(coords)
-
-        values = load_outputs(
-            file_name, transpose,
-            columns=get_columns_to_load(file_name, transpose, vars=[var]))
-
-        out = xr.DataArray(
-            np.nan,
-            {'time': values.index.values, **coords},
-            ['time'] + dims)
-
-        for var in values.columns:
-            coords = {
-                dim: [coord]
-                for (dim, coord)
-                in zip(dims, re.split(r'\[|\]|\s*,\s*', var)[1:-1])
-            }
-            out.loc[coords] = np.expand_dims(
-                values[var].values,
-                axis=tuple(range(1, len(coords)+1))
-            )
-        return out
 
     def __call__(self, time):
         if time in self.data['time'].values:
@@ -241,3 +203,104 @@ def __call__(self, time):
         else:
             # Remove time coord from the DataArray
             return outdata.reset_coords('time', drop=True)
+
+
+class RegData(Data):
+    def __init__(self, real_name, py_name, coords, interp="interpolate"):
+        self.real_name = real_name
+        self.py_name = py_name
+        self.coords = coords
+        self.interp = interp
+        self.is_float = not bool(coords)
+        self.data = None
+
+    def load_data(self, file_names):
+        """
+        Load data values from files.
+
+        Parameters
+        ----------
+        file_names: list
+            Name of the files to search the variable in.
+
+        Returns
+        -------
+        out: xarray.DataArray
+            Resulting data array with the time in the first dimension.
+
+        """
+
+        for file_name in file_names:
+            self.data = self._load_data(file_name)
+            if self.data is not None:
+                break
+
+        if self.data is None:
+            raise ValueError(
+                f"_data_{self.py_name}\n"
+                f"Data for {self.real_name} not found in "
+                f"{', '.join(file_names)}")
+
+    def _load_data(self, file_name):
+        """
+        Load data values from output
+
+        Parameters
+        ----------
+        file_name: str
+            Name of the file to search the variable in.
+
+        Returns
+        -------
+        out: xarray.DataArray or None
+            Resulting data array with the time in the first dimension.
+
+        """
+        # get columns to load variable
+        columns = get_columns_to_load(
+            file_name, False, vars=[self.real_name, self.py_name])
+
+        if not columns:
+            # try reading transposed file
+            columns = get_columns_to_load(
+                file_name, True, vars=[self.real_name, self.py_name])
+
+            if not columns:
+                # variable not found
+                return None
+            else:
+                # variable must be read from a transposed file
+                transpose = True
+        else:
+            # variable found
+            transpose = False
+
+        if not self.coords:
+            # 0 dimensional data
+            values = load_outputs(file_name, transpose, columns=columns)
+            return xr.DataArray(
+                values.iloc[:, 0].values,
+                {'time': values.index.values},
+                ['time'])
+
+        # subscripted data
+        dims = list(self.coords)
+
+        values = load_outputs(file_name, transpose, columns=columns)
+
+        out = xr.DataArray(
+            np.nan,
+            {'time': values.index.values, **self.coords},
+            ['time'] + dims)
+
+        for column in values.columns:
+            coords = {
+                dim: [coord]
+                for (dim, coord)
+                in zip(dims, re.split(r'\[|\]|\s*,\s*', column)[1:-1])
+            }
+            out.loc[coords] = np.expand_dims(
+                values[column].values,
+                axis=tuple(range(1, len(coords)+1))
+            )
+        return out
diff --git a/pysd/py_backend/statefuls.py b/pysd/py_backend/statefuls.py
index 360d424c..cfa78b45 100644
--- a/pysd/py_backend/statefuls.py
+++ b/pysd/py_backend/statefuls.py
@@ -17,7 +17,7 @@ class objects.
 from .functions import zidz, if_then_else
 from .external import External, Excels
 from .decorators import Cache, constant_cache
-from .components import Components, Time
+from .components import Components, Time, Data, RegData
 
 from pysd._version import __version__
 
@@ -586,7 +586,8 @@ class Macro(DynamicStateful):
     """
 
     def __init__(self, py_model_file, params=None, return_func=None,
-                 time=None, time_initialization=None, py_name=None):
+                 time=None, time_initialization=None, data_files=None,
+                 py_name=None):
         """
         The model object will be created with components drawn from a
         translated python model file.
@@ -646,6 +647,14 @@ def __init__(self, py_model_file, params=None, return_func=None,
             if isinstance(getattr(self.components, name), Macro)
         ]
 
+        self._data_elements = [
+            getattr(self.components, name) for name in dir(self.components)
+            if isinstance(getattr(self.components, name), RegData)
+        ]
+
+        if data_files:
+            self._get_data(data_files)
+
         self._assign_cache_type()
         self._get_initialize_order()
 
@@ -664,6 +673,18 @@ def clean_caches(self):
         # if nested macros
         [macro.clean_caches() for macro in self._macro_elements]
 
+    def _get_data(self, data_files):
+        if isinstance(data_files, dict):
+            for data_file, vars in data_files.items():
+                for var in vars:
+                    for element in self._data_elements:
+                        if var in element.var:
+                            element.load_data(data_file)
+
+        else:
+            for element in self._data_elements:
+                element.load_data(data_files)
+
     def _get_initialize_order(self):
         """
         Get the initialization order of the stateful elements
@@ -990,9 +1011,9 @@ def get_args(self, param):
 
         """
         if isinstance(param, str):
-            func_name = utils.get_value_by_insensitive_key_or_value(
+            func_name = utils.get_key_and_value_by_insensitive_key_or_value(
                 param,
-                self.components._namespace) or param
+                self.components._namespace)[1] or param
 
             func = getattr(self.components, func_name)
         else:
@@ -1030,9 +1051,9 @@ def get_coords(self, param):
 
         """
         if isinstance(param, str):
-            func_name = utils.get_value_by_insensitive_key_or_value(
+            func_name = utils.get_key_and_value_by_insensitive_key_or_value(
                 param,
-                self.components._namespace) or param
+                self.components._namespace)[1] or param
 
             func = getattr(self.components, func_name)
 
@@ -1076,9 +1097,9 @@ def __getitem__(self, param):
         It will crash if the model component takes arguments.
 
         """
-        func_name = utils.get_value_by_insensitive_key_or_value(
+        func_name = utils.get_key_and_value_by_insensitive_key_or_value(
             param,
-            self.components._namespace) or param
+            self.components._namespace)[1] or param
 
         if self.get_args(getattr(self.components, func_name)):
             raise ValueError(
@@ -1109,9 +1130,9 @@ def get_series_data(self, param):
         >>> model['Room temperature']
 
         """
-        func_name = utils.get_value_by_insensitive_key_or_value(
+        func_name = utils.get_key_and_value_by_insensitive_key_or_value(
             param,
-            self.components._namespace) or param
+            self.components._namespace)[1] or param
 
         try:
             if func_name.startswith("_ext_"):
@@ -1150,9 +1171,9 @@ def set_components(self, params, new=False):
         # TODO: make this compatible with loading outputs from other files
 
         for key, value in params.items():
-            func_name = utils.get_value_by_insensitive_key_or_value(
+            func_name = utils.get_key_and_value_by_insensitive_key_or_value(
                 key,
-                self.components._namespace)
+                self.components._namespace)[1]
 
             if isinstance(value, np.ndarray) or isinstance(value, list):
                 raise TypeError(
@@ -1293,8 +1314,9 @@ def set_initial_value(self, t, initial_value):
         modified_statefuls = set()
 
         for key, value in initial_value.items():
-            component_name = utils.get_value_by_insensitive_key_or_value(
-                key, self.components._namespace)
+            component_name =\
+                utils.get_key_and_value_by_insensitive_key_or_value(
+                    key, self.components._namespace)[1]
             if component_name is not None:
                 if self.components._dependencies[component_name]:
                     deps = list(self.components._dependencies[component_name])
@@ -1447,11 +1469,13 @@ def __str__(self):
 
 
 class Model(Macro):
-    def __init__(self, py_model_file, initialize, missing_values):
+    def __init__(self, py_model_file, data_files, initialize, missing_values):
         """ Sets up the python objects """
-        super().__init__(py_model_file, None, None, Time())
+        super().__init__(py_model_file, None, None, Time(),
+                         data_files=data_files)
         self.time.stage = 'Load'
         self.time.set_control_vars(**self.components._control_vars)
+        self.data_files = data_files
         self.missing_values = missing_values
         if initialize:
             self.initialize()
@@ -1608,7 +1632,8 @@ def reload(self):
         Reloads the model from the translated model file, so that all the
         parameters are back to their original value.
         """
-        self.__init__(self.py_model_file, initialize=True,
+        self.__init__(self.py_model_file, data_files=self.data_files,
+                      initialize=True,
                       missing_values=self.missing_values)
 
     def _default_return_columns(self, which):
diff --git a/pysd/py_backend/utils.py b/pysd/py_backend/utils.py
index 47b1a7d0..7fd350b6 100644
--- a/pysd/py_backend/utils.py
+++ b/pysd/py_backend/utils.py
@@ -256,15 +256,13 @@ def compute_shape(coords, reshape_len=None, py_name=""):
     return [1] * (reshape_len - shape_len) + shape
 
 
-def get_value_by_insensitive_key_or_value(key, dict):
+def get_key_and_value_by_insensitive_key_or_value(key, dict):
     lower_key = key.lower()
     for real_key, real_value in dict.items():
-        if real_key.lower() == lower_key:
-            return dict[real_key]
-        if real_value.lower() == lower_key:
-            return real_value
+        if real_key.lower() == lower_key or real_value.lower() == lower_key:
+            return real_key, real_value
 
-    return None
+    return None, None
 
 
 def rearrange(data, dims, coords):
@@ -503,7 +501,7 @@ def get_columns_to_load(file_name, transpose=False, vars=None, encoding=None):
             else:
                 out = func(file_name,
                            encoding=encoding,
-                           nrows=0).iloc[:, 1:]
+                           nrows=1).iloc[:, 1:]
 
             out = set(out)
 
diff --git a/pysd/pysd.py b/pysd/pysd.py
index 82c91162..c2411a28 100644
--- a/pysd/pysd.py
+++ b/pysd/pysd.py
@@ -22,7 +22,8 @@
     )
 
 
-def read_xmile(xmile_file, initialize=True, missing_values="warning"):
+def read_xmile(xmile_file, data_files=None, initialize=True,
+               missing_values="warning"):
     """
     Construct a model from `.xmile` file.
 
@@ -57,13 +58,13 @@ def read_xmile(xmile_file, initialize=True, missing_values="warning"):
     from .translation.xmile.xmile2py import translate_xmile
 
     py_model_file = translate_xmile(xmile_file)
-    model = load(py_model_file, initialize, missing_values)
+    model = load(py_model_file, data_files, initialize, missing_values)
     model.xmile_file = xmile_file
     return model
 
 
-def read_vensim(mdl_file, initialize=True, missing_values="warning",
-                split_views=False, **kwargs):
+def read_vensim(mdl_file, data_files=None, initialize=True,
+                missing_values="warning", split_views=False, **kwargs):
     """
     Construct a model from Vensim `.mdl` file.
 
@@ -112,12 +113,13 @@ def read_vensim(mdl_file, initialize=True, missing_values="warning",
     from .translation.vensim.vensim2py import translate_vensim
 
     py_model_file = translate_vensim(mdl_file, split_views, **kwargs)
-    model = load(py_model_file, initialize, missing_values)
+    model = load(py_model_file, data_files, initialize, missing_values)
     model.mdl_file = mdl_file
     return model
 
 
-def load(py_model_file, initialize=True, missing_values="warning"):
+def load(py_model_file, data_files=None, initialize=True,
+         missing_values="warning"):
     """
     Load a python-converted model file.
 
@@ -145,4 +147,4 @@ def load(py_model_file, initialize=True, missing_values="warning"):
 
     """
 
-    return Model(py_model_file, initialize, missing_values)
+    return Model(py_model_file, data_files, initialize, missing_values)
diff --git a/pysd/tools/benchmarking.py b/pysd/tools/benchmarking.py
index da755788..8012e35a 100644
--- a/pysd/tools/benchmarking.py
+++ b/pysd/tools/benchmarking.py
@@ -13,7 +13,7 @@
 from ..py_backend.utils import load_outputs, detect_encoding
 
 
-def runner(model_file, canonical_file=None, transpose=False):
+def runner(model_file, canonical_file=None, transpose=False, data_files=None):
     """
     Translates and runs a model and returns its output and the
     canonical output.
@@ -31,6 +31,9 @@ def runner(model_file, canonical_file=None, transpose=False):
         If True reads transposed canonical file, i.e. one variable per row.
         Default is False.
 
+    data_files: list (optional)
+        List of the data files needed to run the model.
+
     Returns
     -------
     output, canon: (pandas.DataFrame, pandas.DataFrame)
@@ -54,9 +57,9 @@ def runner(model_file, canonical_file=None, transpose=False):
 
     # load model
     if model_file.lower().endswith('.mdl'):
-        model = read_vensim(model_file)
+        model = read_vensim(model_file, data_files)
     elif model_file.lower().endswith(".xmile"):
-        model = read_xmile(model_file)
+        model = read_xmile(model_file, data_files)
     else:
         raise ValueError('\nModelfile should be *.mdl or *.xmile')
 
@@ -250,7 +253,7 @@ def assert_allclose(x, y, rtol=1.e-5, atol=1.e-5):
     None
 
     """
-    return (abs(x - y) <= atol + rtol * abs(y)).all()
+    return ((abs(x - y) <= atol + rtol * abs(y)) + x.isna()*y.isna()).all()
 
 
 def _remove_constant_nan(df):
diff --git a/pysd/translation/builder.py b/pysd/translation/builder.py
index 7585c7e1..07ae4d4b 100644
--- a/pysd/translation/builder.py
+++ b/pysd/translation/builder.py
@@ -27,9 +27,11 @@ class Imports():
     Class to save the imported modules information for intelligent import
     """
     _numpy, _xarray, _subs = False, False, False
-    _functions, _statefuls, _external, _utils = set(), set(), set(), set()
+    _functions, _statefuls, _external, _components, _utils =\
+        set(), set(), set(), set(), set()
     _external_libs = {"numpy": "np", "xarray": "xr"}
-    _internal_libs = ["functions", "statefuls", "external", "utils"]
+    _internal_libs = [
+        "functions", "statefuls", "external", "components","utils"]
 
     @classmethod
     def add(cls, module, function=None):
@@ -108,7 +110,8 @@ def reset(cls):
         Reset the imported modules
         """
         cls._numpy, cls._xarray, cls._subs = False, False, False
-        cls._functions, cls._external, cls._utils = set(), set(), set()
+        cls._functions, cls._statefuls, cls._external, cls._components,\
+            cls._utils = set(), set(), set(), set(), set()
 
 
 # Variable to save identifiers of external objects
@@ -646,8 +649,8 @@ def build_element(element, subscript_dict):
         # to rewrite subscripted values with model.run(params=X) or
         # model.run(initial_condition=(n,x))
         element["subs_doc"] = "%s" % element["merge_subs"]
-        if element["kind"] in ["component", "setup",
-                               "constant", "component_ext_data"]:
+        if element["kind"] in ["component", "setup", "constant",
+                               "component_ext_data", "data"]:
             # the decorator is not always necessary as the objects
             # defined as xarrays in the model will have the right
             # dimensions always, we should try to reduce to the
@@ -669,7 +672,7 @@ def build_element(element, subscript_dict):
     # convert newline indicator and add expected level of indentation
     element["doc"] = element["doc"].replace("\\", "\n").replace("\n", "\n    ")
 
-    if element["kind"] in ["stateful", "external"]:
+    if element["kind"] in ["stateful", "external", "reg_data"]:
         func = """
     %(py_name)s = %(py_expr)s
             """ % {
@@ -1757,6 +1760,74 @@ def add_initial(identifier, value, deps):
     return "%s()" % stateful["py_name"], [stateful]
 
 
+def add_reg_data(identifier, real_name, subs,
+                 subscript_dict, merge_subs, keyword):
+    """
+    Constructs a external object for handling Vensim's GET XLS DATA and
+    GET DIRECT DATA functionality.
+
+    Parameters
+    ----------
+    identifier: str
+        The python-safe name of the external values.
+
+    real_name: str
+        The real name of the variable.
+
+    subs: list of strings
+        List of strings of subscript indices that correspond to the
+        list of expressions, and collectively define the shape of the output.
+
+    subscript_dict: dict
+        Dictionary describing the possible dimensions of the stock's
+        subscripts.
+
+    merge_subs: list of strings
+        List of the final subscript range of the python array after
+        merging with other objects.
+
+    keyword: str
+        Data retrieval method ('interpolate', 'look forward', 'hold backward').
+
+    Returns
+    -------
+    reference: str
+        Reference to the ExtData object `__call__` method, which will
+        return the retrieved value of data for the current time step.
+
+    new_structure: list
+        List of element construction dictionaries for the builder to assemble.
+
+    """
+    Imports.add("components", "RegData")
+
+    coords = utils.simplify_subscript_input(
+        utils.make_coord_dict(subs, subscript_dict, terse=False),
+        subscript_dict, return_full=False, merge_subs=merge_subs)
+    keyword = (
+        "'%s'" % keyword.strip(":").lower() if isinstance(keyword, str) else
+        keyword)
+    name = "_data_%s" % identifier
+
+    data = {
+        "py_name": name,
+        "parent_name": identifier,
+        "real_name": "Data for %s" % identifier,
+        "doc": "Provides data for data variable %s" % identifier,
+        "py_expr": "RegData('%s', '%s', %s, %s)" % (
+            real_name, identifier, coords, keyword),
+        "unit": "None",
+        "lims": "None",
+        "eqn": "None",
+        "subs": subs,
+        "merge_subs": merge_subs,
+        "kind": "reg_data",
+        "arguments": "",
+    }
+
+    return "%s(time())" % data["py_name"], [data]
+
+
 def add_ext_data(identifier, file_name, tab, time_row_or_col, cell, subs,
                  subscript_dict, merge_subs, keyword):
     """
diff --git a/pysd/translation/vensim/vensim2py.py b/pysd/translation/vensim/vensim2py.py
index b8430b92..fc57c5a0 100644
--- a/pysd/translation/vensim/vensim2py.py
+++ b/pysd/translation/vensim/vensim2py.py
@@ -1694,8 +1694,13 @@ def translate_section(section, macro_list, sketch, root_path, subview_sep=""):
 
         elif element["kind"] == "data":
             element["eqn"] = element["expr"] = element["arguments"] = ""
-            element["py_expr"] = "None"
+            element["py_expr"], new_structure = builder.add_reg_data(
+                element["py_name"], element["real_name"],
+                element["subs"], subscript_dict, element["merge_subs"],
+                element["keyword"])
+
             element["dependencies"] = {"time": 1, "__data__": None}
+            model_elements += new_structure
 
         elif element["kind"] == "lookup":
             translation, new_structure = parse_lookup_expression(
diff --git a/tests/integration_test_vensim_pathway.py b/tests/integration_test_vensim_pathway.py
index 773dc712..5831fe5b 100644
--- a/tests/integration_test_vensim_pathway.py
+++ b/tests/integration_test_vensim_pathway.py
@@ -50,6 +50,12 @@ def test_constant_expressions(self):
         output, canon = runner(test_models + '/constant_expressions/test_constant_expressions.mdl')
         assert_frames_close(output, canon, rtol=rtol)
 
+    def test_data_from_other_model(self):
+        output, canon = runner(
+            test_models + '/data_from_other_model/test_data_from_other_model.mdl',
+            data_files=[test_models + '/data_from_other_model/data.tab'])
+        assert_frames_close(output, canon, rtol=rtol)
+
     def test_delay_fixed(self):
         # issue https://github.com/JamesPHoughton/pysd/issues/147
         with warnings.catch_warnings():

From 09f5f43cab94269c64910783cfeac06b9876ba3e Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Thu, 11 Nov 2021 17:10:58 +0100
Subject: [PATCH 03/16] Update cli to read data files

---
 pysd/cli/main.py       | 22 +++++++++++++++++++---
 pysd/cli/parser.py     | 31 +++++++++++++++++++++++++++++++
 pysd/pysd.py           | 12 ++++++++++++
 tests/unit_test_cli.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/pysd/cli/main.py b/pysd/cli/main.py
index c6db345e..0ae1bdaa 100644
--- a/pysd/cli/main.py
+++ b/pysd/cli/main.py
@@ -25,8 +25,9 @@ def main(args):
     """
     options = parser.parse_args(args)
 
-    model = load(options.model_file, options.missing_values,
-                 options.split_views, subview_sep=options.subview_sep)
+    model = load(options.model_file, options.data_files,
+                 options.missing_values, options.split_views,
+                 subview_sep=options.subview_sep)
 
     if not options.run:
         print("\nFinished!")
@@ -44,7 +45,7 @@ def main(args):
     sys.exit()
 
 
-def load(model_file, missing_values, split_views, **kwargs):
+def load(model_file, data_files, missing_values, split_views, **kwargs):
     """
     Translate and load model file.
 
@@ -53,6 +54,18 @@ def load(model_file, missing_values, split_views, **kwargs):
     model_file: str
         Vensim, Xmile or PySD model file.
 
+    data_files: list
+        If given the list of files where the necessary data to run the model
+        is given.
+
+    missing_values : str ("warning", "error", "ignore", "keep")
+        What to do with missing values. If "warning" (default)
+        shows a warning message and interpolates the values.
+        If "raise" raises an error. If "ignore" interpolates
+        the values without showing anything. If "keep" it will keep
+        the missing values, this option may cause the integration to
+        fail, but it may be used to check the quality of the data.
+
     split_views: bool (optional)
         If True, the sketch is parsed to detect model elements in each
         model view, and then translate each view in a separate python
@@ -74,14 +87,17 @@ def load(model_file, missing_values, split_views, **kwargs):
     if model_file.lower().endswith(".mdl"):
         print("\nTranslating model file...\n")
         return pysd.read_vensim(model_file, initialize=False,
+                                data_files=data_files,
                                 missing_values=missing_values,
                                 split_views=split_views, **kwargs)
     elif model_file.lower().endswith(".xmile"):
         print("\nTranslating model file...\n")
         return pysd.read_xmile(model_file, initialize=False,
+                               data_files=data_files,
                                missing_values=missing_values)
     else:
         return pysd.load(model_file, initialize=False,
+                         data_files=data_files,
                          missing_values=missing_values)
 
 
diff --git a/pysd/cli/parser.py b/pysd/cli/parser.py
index df734e1d..522441b6 100644
--- a/pysd/cli/parser.py
+++ b/pysd/cli/parser.py
@@ -54,6 +54,33 @@ def check_model(string):
     return string
 
 
+def check_data_file(string):
+    """
+    Check that data file is a tab or csv file and that exists.
+    """
+    if not string.endswith('.tab') and not string.endswith('.csv'):
+        parser.error(
+            f'when parsing {string}'
+            '\nThe data file name must be .tab or .csv...')
+    elif not os.path.isfile(string):
+        parser.error(
+            f'when parsing {string}'
+            '\nThe data file does not exist...')
+    else:
+        return string
+
+
+def split_files(string):
+    """
+    Splits the data files and returns and error if file doesn't exists
+    --data 'file1.tab, file2.csv' -> ['file1.tab', 'file2.csv']
+    --data file1.tab -> ['file1.tab']
+
+    """
+    print([check_data_file(s.strip()) for s in string.split(',')])
+    return [check_data_file(s.strip()) for s in string.split(',')]
+
+
 def split_columns(string):
     """
     Splits the return-columns argument or reads it from .txt
@@ -217,6 +244,10 @@ def __call__(self, parser, namespace, values, option_string=None):
     help='provide the return time stamps separated by commas, if given '
          '--saveper will be ignored')
 
+model_arguments.add_argument(
+    '-D', '--data', dest='data_files',
+    action='store', type=split_files, metavar='\'FILE1, FILE2, .., FILEN\'',
+    help='input data file or files to run the model')
 
 #########################
 # Translation arguments #
diff --git a/pysd/pysd.py b/pysd/pysd.py
index c2411a28..ea4ca4fb 100644
--- a/pysd/pysd.py
+++ b/pysd/pysd.py
@@ -36,6 +36,10 @@ def read_xmile(xmile_file, data_files=None, initialize=True,
         If False, the model will not be initialize when it is loaded.
         Default is True.
 
+    data_files: list or None (optional)
+        If given the list of files where the necessary data to run the model
+        is given. Default is None.
+
     missing_values : str ("warning", "error", "ignore", "keep") (optional)
         What to do with missing values. If "warning" (default)
         shows a warning message and interpolates the values.
@@ -77,6 +81,10 @@ def read_vensim(mdl_file, data_files=None, initialize=True,
         If False, the model will not be initialize when it is loaded.
         Default is True.
 
+    data_files: list or None (optional)
+        If given the list of files where the necessary data to run the model
+        is given. Default is None.
+
     missing_values : str ("warning", "error", "ignore", "keep") (optional)
         What to do with missing values. If "warning" (default)
         shows a warning message and interpolates the values.
@@ -133,6 +141,10 @@ def load(py_model_file, data_files=None, initialize=True,
         If False, the model will not be initialize when it is loaded.
         Default is True.
 
+    data_files: list or None (optional)
+        If given the list of files where the necessary data to run the model
+        is given. Default is None.
+
     missing_values : str ("warning", "error", "ignore", "keep") (optional)
         What to do with missing values. If "warning" (default)
         shows a warning message and interpolates the values.
diff --git a/tests/unit_test_cli.py b/tests/unit_test_cli.py
index 45f357f1..4a93207b 100644
--- a/tests/unit_test_cli.py
+++ b/tests/unit_test_cli.py
@@ -518,6 +518,48 @@ def test_export_import(self):
         assert_frames_close(stocks1, stocks.loc[[0, 10]])
         assert_frames_close(stocks2, stocks.loc[[20, 30]])
 
+    def test_run_model_with_data(self):
+        data_file = os.path.join(
+            _root, "test-models/tests/data_from_other_model/data.tab")
+        model_file = os.path.join(
+            _root,
+            "test-models/tests/data_from_other_model/"
+            + "test_data_from_other_model.mdl")
+
+        command = f"{call} -o {out_tab_file} -D {data_file}"\
+                  f" {model_file}"
+
+        out = subprocess.run(split_bash(command), capture_output=True)
+        self.assertEqual(out.returncode, 0)
+        stocks = load_outputs(out_tab_file)
+        canon = load_outputs(os.path.join(
+            _root,
+            "test-models/tests/data_from_other_model/output.tab"))
+
+        assert_frames_close(stocks[canon.columns], canon)
+
+        # invalid data file
+        command = f"{call} -o {out_tab_file} -D my_file.txt"\
+                  f" {model_file}"
+
+        out = subprocess.run(split_bash(command), capture_output=True)
+        self.assertNotEqual(out.returncode, 0)
+        stderr = out.stderr.decode(encoding_stderr)
+        self.assertIn("PySD: error: when parsing my_file.txt", stderr)
+        self.assertIn(
+            "The data file name must be .tab or .csv...", stderr)
+
+        # not found data file
+        command = f"{call} -o {out_tab_file} -D my_file.tab"\
+                  f" {model_file}"
+
+        out = subprocess.run(split_bash(command), capture_output=True)
+        self.assertNotEqual(out.returncode, 0)
+        stderr = out.stderr.decode(encoding_stderr)
+        self.assertIn("PySD: error: when parsing my_file.tab", stderr)
+        self.assertIn(
+            "The data file does not exist...", stderr)
+
     def test_save_without_name(self):
         import re
 

From f9675b999fa3f22abf8ae8eedc6156a2f6158947 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Thu, 11 Nov 2021 17:19:49 +0100
Subject: [PATCH 04/16] Update test-models

---
 tests/test-models | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-models b/tests/test-models
index 9745972a..8ffa7541 160000
--- a/tests/test-models
+++ b/tests/test-models
@@ -1 +1 @@
-Subproject commit 9745972abecce6c36d2c414d85729b65f97ccbd8
+Subproject commit 8ffa754117dc7b3ffdc7677fc9ae9304b6849357

From a93902c37bdc9294165edb43ca30616baefa0bd3 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Fri, 12 Nov 2021 12:57:06 +0100
Subject: [PATCH 05/16] Add tests and document

---
 docs/basic_usage.rst                          |  21 ++++
 pysd/py_backend/components.py                 |  77 +++++++-----
 pysd/py_backend/statefuls.py                  |   8 +-
 pysd/py_backend/utils.py                      |  21 +++-
 pysd/pysd.py                                  |   6 +-
 tests/integration_test_vensim_pathway.py      |   2 +-
 tests/more-tests/data_model/data1.tab         |   7 ++
 tests/more-tests/data_model/data2.tab         |   7 ++
 tests/more-tests/data_model/data3.tab         |   7 ++
 .../more-tests/data_model/test_data_model.mdl | 111 ++++++++++++++++++
 tests/unit_test_benchmarking.py               |  51 --------
 tests/unit_test_pysd.py                       |  97 +++++++++++++++
 tests/unit_test_utils.py                      | 109 +++++++++++++++++
 13 files changed, 431 insertions(+), 93 deletions(-)
 create mode 100644 tests/more-tests/data_model/data1.tab
 create mode 100644 tests/more-tests/data_model/data2.tab
 create mode 100644 tests/more-tests/data_model/data3.tab
 create mode 100644 tests/more-tests/data_model/test_data_model.mdl

diff --git a/docs/basic_usage.rst b/docs/basic_usage.rst
index 161f3fd7..c5d6391d 100644
--- a/docs/basic_usage.rst
+++ b/docs/basic_usage.rst
@@ -58,6 +58,27 @@ To show a progressbar during the model integration the progress flag can be pass
 
    >>> stocks = model.run(progress=True)
 
+Running models with DATA type components
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Venim's regular DATA type components are given by an empty expression in the model equation. This values are read from a binary `.vdf` file. PySD allows running models with this kind of data definition using the data_files argument when calling :py:func:`.run()` command, e.g.::
+
+   >>> stocks = model.run(data_files="input_data.tab")
+
+Several files can be passed by using a list, then if the data information has not been found in the first file, the next one will be used until finding the data values::
+
+   >>> stocks = model.run(data_files=["input_data.tab", "input_data2.tab", ..., "input_datan.tab"])
+
+If a variable is given in different files to choose the specific file a dictionary can be used::
+
+   >>> stocks = model.run(data_files={"input_data.tab": ["data_var1", "data_var3"], "input_data2.tab": ["data_var2"]})
+
+.. note::
+   Only `tab` and `csv` files are supported, they should be given as a table, each variable one column (or row) and the time in the first column (or first row). The column (or row) names can be given using the original name or using python names.
+
+.. note::
+   Subscripted variables must be given in the vensim format, one column (or row) per subscript combination. Example of column names for 2x2 variable:
+      `subs var[A, C]`  `subs var[B, C]`  `subs var[A, D]`  `subs var[B, D]`
+
 Outputting various run information
 ----------------------------------
 The :py:func:`.run()` command has a few options that make it more useful. In many situations we want to access components of the model other than merely the stocks – we can specify which components of the model should be included in the returned dataframe by including them in a list that we pass to the :py:func:`.run()` command, using the return_columns keyword argument::
diff --git a/pysd/py_backend/components.py b/pysd/py_backend/components.py
index 8d2edb71..777bf240 100644
--- a/pysd/py_backend/components.py
+++ b/pysd/py_backend/components.py
@@ -170,39 +170,48 @@ def reset(self):
 
 
 class Data(object):
-    def __init__(self, data, coords, interp="interpolate"):
-        self.data = data
-        self.interp = "interpolate"
-        self.is_float = not bool(coords)
+    # TODO add __init__ and use this clas for used input pandas.Series
+    # as Data
+    # def __init__(self, data, coords, interp="interpolate"):
 
     def __call__(self, time):
-        if time in self.data['time'].values:
-            outdata = self.data.sel(time=time)
-        elif self.interp == "raw":
-            return np.nan
-        elif time > self.data['time'].values[-1]:
-            warnings.warn(
-              self.py_name + "\n"
-              + "extrapolating data above the maximum value of the time")
-            outdata = self.data[-1]
-        elif time < self.data['time'].values[0]:
-            warnings.warn(
-              self.py_name + "\n"
-              + "extrapolating data below the minimum value of the time")
-            outdata = self.data[0]
-        elif self.interp == "interpolate":
-            outdata = self.data.interp(time=time)
-        elif self.interp == 'look forward':
-            outdata = self.data.sel(time=time, method="backfill")
-        elif self.interp == 'hold backward':
-            outdata = self.data.sel(time=time, method="pad")
-
-        if self.is_float:
-            # if data has no-coords return a float
-            return float(outdata)
-        else:
-            # Remove time coord from the DataArray
-            return outdata.reset_coords('time', drop=True)
+        try:
+            if time in self.data['time'].values:
+                outdata = self.data.sel(time=time)
+            elif self.interp == "raw":
+                return np.nan
+            elif time > self.data['time'].values[-1]:
+                warnings.warn(
+                    self.py_name + "\n"
+                    + "extrapolating data above the maximum value of the time")
+                outdata = self.data[-1]
+            elif time < self.data['time'].values[0]:
+                warnings.warn(
+                    self.py_name + "\n"
+                    + "extrapolating data below the minimum value of the time")
+                outdata = self.data[0]
+            elif self.interp == "interpolate":
+                outdata = self.data.interp(time=time)
+            elif self.interp == 'look forward':
+                outdata = self.data.sel(time=time, method="backfill")
+            elif self.interp == 'hold backward':
+                outdata = self.data.sel(time=time, method="pad")
+
+            if self.is_float:
+                # if data has no-coords return a float
+                return float(outdata)
+            else:
+                # Remove time coord from the DataArray
+                return outdata.reset_coords('time', drop=True)
+        except Exception as err:
+            if self.data is None:
+                raise ValueError(
+                    self.py_name + "\n"
+                    "Trying to interpolate data variable before loading"
+                    " the data...")
+            else:
+                # raise any other possible error
+                raise err
 
 
 class RegData(Data):
@@ -220,7 +229,7 @@ def load_data(self, file_names):
 
         Parameters
         ----------
-        file_names: list
+        file_names: list or str
             Name of the files to search the variable in.
 
         Returns
@@ -229,6 +238,8 @@ def load_data(self, file_names):
             Resulting data array with the time in the first dimension.
 
         """
+        if isinstance(file_names, str):
+            file_names = [file_names]
 
         for file_name in file_names:
             self.data = self._load_data(file_name)
@@ -256,6 +267,8 @@ def _load_data(self, file_name):
             Resulting data array with the time in the first dimension.
 
         """
+        # TODO manage missing values on data, as external elements, create a
+        # hierarchy from External?
         # get columns to load variable
         columns = get_columns_to_load(
             file_name, False, vars=[self.real_name, self.py_name])
diff --git a/pysd/py_backend/statefuls.py b/pysd/py_backend/statefuls.py
index cfa78b45..48431331 100644
--- a/pysd/py_backend/statefuls.py
+++ b/pysd/py_backend/statefuls.py
@@ -677,9 +677,15 @@ def _get_data(self, data_files):
         if isinstance(data_files, dict):
             for data_file, vars in data_files.items():
                 for var in vars:
+                    found = False
                     for element in self._data_elements:
-                        if var in element.var:
+                        if var in [element.py_name, element.real_name]:
                             element.load_data(data_file)
+                            found = True
+                            break
+                    if not found:
+                        raise ValueError(
+                            f"'{var}' not found as model data variable")
 
         else:
             for element in self._data_elements:
diff --git a/pysd/py_backend/utils.py b/pysd/py_backend/utils.py
index 7fd350b6..960554fb 100644
--- a/pysd/py_backend/utils.py
+++ b/pysd/py_backend/utils.py
@@ -497,11 +497,13 @@ def get_columns_to_load(file_name, transpose=False, vars=None, encoding=None):
             if transpose:
                 out = func(file_name,
                            encoding=encoding,
-                           usecols=[0]).iloc[:, 0].to_list()
+                           usecols=[0],
+                           dtype=str).iloc[:, 0].to_list()
             else:
                 out = func(file_name,
                            encoding=encoding,
-                           nrows=1).iloc[:, 1:]
+                           nrows=1,
+                           dtype=str).iloc[:, 1:]
 
             out = set(out)
 
@@ -511,17 +513,26 @@ def get_columns_to_load(file_name, transpose=False, vars=None, encoding=None):
             + f"Only {', '.join(list(read_func))} files are accepted.")
 
     if vars is None:
+        # Not var specified, return all available variables
         return out
 
     else:
+        vars_extended = []
+        for var in vars:
+            vars_extended.append(var)
+            if var.startswith('"') and var.endswith('"'):
+                # the variables in "" are reded without " by pandas
+                vars_extended.append(var[1:-1])
         outs = set()
         for var in out:
-            if var in vars:
+            if var in vars_extended:
+                # var is in vars_extended (no subscripts)
                 outs.add(var)
-                vars.remove(var)
+                vars_extended.remove(var)
             else:
-                for var1 in vars:
+                for var1 in vars_extended:
                     if var.startswith(var1 + "["):
+                        # var is subscripted
                         outs.add(var)
 
         return outs
diff --git a/pysd/pysd.py b/pysd/pysd.py
index ea4ca4fb..0c3e48c1 100644
--- a/pysd/pysd.py
+++ b/pysd/pysd.py
@@ -36,7 +36,7 @@ def read_xmile(xmile_file, data_files=None, initialize=True,
         If False, the model will not be initialize when it is loaded.
         Default is True.
 
-    data_files: list or None (optional)
+    data_files: list or str or None (optional)
         If given the list of files where the necessary data to run the model
         is given. Default is None.
 
@@ -81,7 +81,7 @@ def read_vensim(mdl_file, data_files=None, initialize=True,
         If False, the model will not be initialize when it is loaded.
         Default is True.
 
-    data_files: list or None (optional)
+    data_files: list or str or None (optional)
         If given the list of files where the necessary data to run the model
         is given. Default is None.
 
@@ -141,7 +141,7 @@ def load(py_model_file, data_files=None, initialize=True,
         If False, the model will not be initialize when it is loaded.
         Default is True.
 
-    data_files: list or None (optional)
+    data_files: list or str or None (optional)
         If given the list of files where the necessary data to run the model
         is given. Default is None.
 
diff --git a/tests/integration_test_vensim_pathway.py b/tests/integration_test_vensim_pathway.py
index 5831fe5b..c9b456f2 100644
--- a/tests/integration_test_vensim_pathway.py
+++ b/tests/integration_test_vensim_pathway.py
@@ -53,7 +53,7 @@ def test_constant_expressions(self):
     def test_data_from_other_model(self):
         output, canon = runner(
             test_models + '/data_from_other_model/test_data_from_other_model.mdl',
-            data_files=[test_models + '/data_from_other_model/data.tab'])
+            data_files=test_models + '/data_from_other_model/data.tab')
         assert_frames_close(output, canon, rtol=rtol)
 
     def test_delay_fixed(self):
diff --git a/tests/more-tests/data_model/data1.tab b/tests/more-tests/data_model/data1.tab
new file mode 100644
index 00000000..cb4b69dd
--- /dev/null
+++ b/tests/more-tests/data_model/data1.tab
@@ -0,0 +1,7 @@
+Time	data 1	Data 2	data-3	Time	data 1	Data 2
+0	0	0	0	0	0	0
+2	2	4	6	2	2	4
+4	4	8	12	4	4	8
+6	6	12	18	6	6	12
+8	8	16	24	8	8	16
+10	10	20	30	10	10	20
diff --git a/tests/more-tests/data_model/data2.tab b/tests/more-tests/data_model/data2.tab
new file mode 100644
index 00000000..75873939
--- /dev/null
+++ b/tests/more-tests/data_model/data2.tab
@@ -0,0 +1,7 @@
+Time	0	2	4	6	8	10
+data_1	-5	-3	-1	1	3	5
+data_2	-5	-1	3	7	11	15
+data3	-5	1	7	13	19	25
+Time	0	2	4	6	8	10
+data_1	-5	-3	-1	1	3	5
+data_2	-5	-1	3	7	11	15
diff --git a/tests/more-tests/data_model/data3.tab b/tests/more-tests/data_model/data3.tab
new file mode 100644
index 00000000..212146cc
--- /dev/null
+++ b/tests/more-tests/data_model/data3.tab
@@ -0,0 +1,7 @@
+Time	data 1	Data 2
+0	-0	-0
+2	-2	-4
+4	-4	-8
+6	-6	-12
+8	-8	-16
+10	-10	-20
diff --git a/tests/more-tests/data_model/test_data_model.mdl b/tests/more-tests/data_model/test_data_model.mdl
new file mode 100644
index 00000000..4475106d
--- /dev/null
+++ b/tests/more-tests/data_model/test_data_model.mdl
@@ -0,0 +1,111 @@
+{UTF-8}
+data 1:INTERPOLATE:
+	~	
+	~		|
+
+Data 2:INTERPOLATE:
+	~	
+	~		|
+
+"data-3":INTERPOLATE:
+	~	
+	~		|
+
+var1=
+	data 1
+	~	
+	~		|
+
+var2=
+	Data 2
+	~	
+	~		|
+
+var3=
+	"data-3"
+	~	
+	~		|
+
+********************************************************
+	.Control
+********************************************************~
+		Simulation Control Parameters
+	|
+
+FINAL TIME  = 10
+	~	Month
+	~	The final time for the simulation.
+	|
+
+INITIAL TIME  = 0
+	~	Month
+	~	The initial time for the simulation.
+	|
+
+SAVEPER  = 1
+	~	Month [0,?]
+	~	The frequency with which output is stored.
+	|
+
+TIME STEP  = 1
+	~	Month [0,?]
+	~	The time step for the simulation.
+	|
+
+\\\---/// Sketch information - do not modify anything except names
+V300  Do not put anything below this section - it will be ignored
+*View 1
+$192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,100,0
+10,1,data 1,429,173,19,11,8,3,0,0,0,0,0,0
+10,2,Data 2,438,249,25,10,8,131,0,0,0,0,0,0
+10,3,"data-3",438,334,22,11,8,3,0,0,0,0,0,0
+10,4,var1,617,171,16,11,8,3,0,0,0,0,0,0
+10,5,var2,618,249,16,11,8,3,0,0,0,0,0,0
+10,6,var3,620,337,16,11,8,3,0,0,0,0,0,0
+1,7,3,6,0,0,0,0,0,128,0,-1--1--1,,1|(525,334)|
+1,8,2,5,0,0,0,0,0,128,0,-1--1--1,,1|(525,249)|
+1,9,1,4,0,0,0,0,0,128,0,-1--1--1,,1|(517,172)|
+10,10,Time,429,203,26,11,8,2,1,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
+1,11,10,1,0,1,0,0,0,64,0,-1--1--1,,1|(429,195)|
+10,12,Time,438,278,26,11,8,2,1,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
+1,13,12,2,0,1,0,0,0,64,0,-1--1--1,,1|(438,270)|
+10,14,Time,438,364,26,11,8,2,1,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
+1,15,14,3,0,1,0,0,0,64,0,-1--1--1,,1|(438,356)|
+///---\\\
+:L<%^E!@
+1:Current.vdf
+9:Current
+15:0,0,0,0,0,0
+19:100,0
+27:0,
+34:0,
+4:Time
+5:var1
+35:Date
+36:YYYY-MM-DD
+37:2000
+38:1
+39:1
+40:2
+41:0
+42:1
+24:0
+25:10
+26:10
+57:1
+54:0
+55:0
+59:0
+56:0
+58:0
+44:65001
+46:0
+45:1
+49:1
+50:0
+51:
+52:
+53:
+43:out3
+47:Current
+48:
diff --git a/tests/unit_test_benchmarking.py b/tests/unit_test_benchmarking.py
index 31bef2b3..696c6c46 100644
--- a/tests/unit_test_benchmarking.py
+++ b/tests/unit_test_benchmarking.py
@@ -31,22 +31,6 @@ def test_non_valid_model(self):
             'Modelfile should be *.mdl or *.xmile',
             str(err.exception))
 
-    def test_non_valid_outputs(self):
-        from pysd.tools.benchmarking import load_outputs
-
-        with self.assertRaises(ValueError) as err:
-            load_outputs(
-                os.path.join(
-                    _root,
-                    "more-tests/not_vensim/test_not_vensim.txt"))
-
-        self.assertIn(
-            "Not able to read '",
-            str(err.exception))
-        self.assertIn(
-            "more-tests/not_vensim/test_not_vensim.txt'.",
-            str(err.exception))
-
     def test_different_frames_error(self):
         from pysd.tools.benchmarking import load_outputs, assert_frames_close
 
@@ -153,41 +137,6 @@ def test_different_frames_warning(self):
                 "Expected values:\n\t",
                 str(wu[0].message))
 
-    def test_transposed_frame(self):
-        from pysd.tools.benchmarking import load_outputs, assert_frames_close
-
-        assert_frames_close(
-            load_outputs(os.path.join(_root, "data/out_teacup.csv")),
-            load_outputs(
-                os.path.join(_root, "data/out_teacup_transposed.csv"),
-                transpose=True))
-
-    def test_load_columns(self):
-        from pysd.tools.benchmarking import load_outputs
-
-        out0 = load_outputs(
-            os.path.join(_root, "data/out_teacup.csv"))
-
-        out1 = load_outputs(
-            os.path.join(_root, "data/out_teacup.csv"),
-            columns=["Room Temperature", "Teacup Temperature"])
-
-        out2 = load_outputs(
-            os.path.join(_root, "data/out_teacup_transposed.csv"),
-            transpose=True,
-            columns=["Heat Loss to Room"])
-
-        self.assertEqual(
-            set(out1.columns),
-            set(["Room Temperature", "Teacup Temperature"]))
-
-        self.assertEqual(
-            set(out2.columns),
-            set(["Heat Loss to Room"]))
-
-        self.assertTrue((out0.index == out1.index).all())
-        self.assertTrue((out0.index == out2.index).all())
-
     def test_different_cols(self):
         from warnings import catch_warnings
         from pysd.tools.benchmarking import assert_frames_close
diff --git a/tests/unit_test_pysd.py b/tests/unit_test_pysd.py
index 57de5fdd..4accbc7b 100644
--- a/tests/unit_test_pysd.py
+++ b/tests/unit_test_pysd.py
@@ -1978,6 +1978,103 @@ def test_change_constant_pipe(self):
             test_model_constant_pipe.replace(".mdl", ".py"))
 
 
+class TestDataReading(unittest.TestCase):
+    data_folder = os.path.join(_root, "more-tests/data_model/")
+    data_model = os.path.join(data_folder, "test_data_model.mdl")
+
+    def test_no_data_files_provided(self):
+        from pysd import read_vensim
+        model = read_vensim(self.data_model)
+
+        with self.assertRaises(ValueError) as err:
+            model.run(return_columns=["var1", "var2", "var3"])
+
+        self.assertIn("Trying to interpolate data variable before loading"
+                      " the data...", str(err.exception))
+
+    def test_missing_data(self):
+        from pysd import read_vensim
+
+        with self.assertRaises(ValueError) as err:
+            read_vensim(
+                self.data_model, data_files=self.data_folder+"data3.tab")
+
+        self.assertIn(
+            "Data for \"data-3\" not found in "
+            "/home/eneko/CREAF/dev/pysd/tests/more-tests/data_model/data3.tab",
+            str(err.exception))
+
+    def test_get_data_variable_not_found_from_dict_file(self):
+        from pysd import read_vensim
+
+        with self.assertRaises(ValueError) as err:
+            read_vensim(
+                self.data_model,
+                data_files={
+                    self.data_folder+"data1.tab": ["non-existing-var"]})
+
+        self.assertIn(
+            "'non-existing-var' not found as model data variable",
+            str(err.exception))
+
+    def test_get_data_from_one_file(self):
+        from pysd import read_vensim
+
+        model = read_vensim(
+            self.data_model, data_files=self.data_folder+"data1.tab")
+        out = model.run(return_columns=["var1", "var2", "var3"])
+        times = np.arange(11)
+        expected = pd.DataFrame(
+            index=times,
+            data={'var1': times, "var2": 2*times, "var3": 3*times})
+
+        assert_frames_close(out, expected)
+
+    def test_get_data_from_two_file(self):
+        from pysd import read_vensim
+
+        model = read_vensim(
+            self.data_model,
+            data_files=[self.data_folder+"data3.tab",
+                        self.data_folder+"data1.tab"])
+        out = model.run(return_columns=["var1", "var2", "var3"])
+        times = np.arange(11)
+        expected = pd.DataFrame(
+            index=times,
+            data={'var1': -times, "var2": -2*times, "var3": 3*times})
+
+        assert_frames_close(out, expected)
+
+    def test_get_data_from_transposed_file(self):
+        from pysd import read_vensim
+
+        model = read_vensim(
+            self.data_model,
+            data_files=[self.data_folder+"data2.tab"])
+        out = model.run(return_columns=["var1", "var2", "var3"])
+        times = np.arange(11)
+        expected = pd.DataFrame(
+            index=times,
+            data={'var1': times-5, "var2": 2*times-5, "var3": 3*times-5})
+
+        assert_frames_close(out, expected)
+
+    def test_get_data_from_dict_file(self):
+        from pysd import read_vensim
+
+        model = read_vensim(
+            self.data_model,
+            data_files={self.data_folder+"data2.tab": ["\"data-3\""],
+                        self.data_folder+"data1.tab": ["data_1", "Data 2"]})
+        out = model.run(return_columns=["var1", "var2", "var3"])
+        times = np.arange(11)
+        expected = pd.DataFrame(
+            index=times,
+            data={'var1': times, "var2": 2*times, "var3": 3*times-5})
+
+        assert_frames_close(out, expected)
+
+
 class TestExportImport(unittest.TestCase):
     def test_run_export_import_integ(self):
         from pysd import read_vensim
diff --git a/tests/unit_test_utils.py b/tests/unit_test_utils.py
index 1df82f01..e5a9e71d 100644
--- a/tests/unit_test_utils.py
+++ b/tests/unit_test_utils.py
@@ -1,4 +1,5 @@
 import doctest
+import os
 from unittest import TestCase
 
 import pandas as pd
@@ -6,6 +7,8 @@
 
 from pysd.tools.benchmarking import assert_frames_close
 
+_root = os.path.dirname(__file__)
+
 
 class TestUtils(TestCase):
 
@@ -386,6 +389,112 @@ def test_rearrange(self):
         self.assertEqual(None,
                          rearrange(None, ['d2'], _subscript_dict))
 
+
+class TestLoadOutputs(TestCase):
+    def test_non_valid_outputs(self):
+        from pysd.py_backend.utils import load_outputs
+
+        with self.assertRaises(ValueError) as err:
+            load_outputs(
+                os.path.join(
+                    _root,
+                    "more-tests/not_vensim/test_not_vensim.txt"))
+
+        self.assertIn(
+            "Not able to read '",
+            str(err.exception))
+        self.assertIn(
+            "more-tests/not_vensim/test_not_vensim.txt'.",
+            str(err.exception))
+
+    def test_transposed_frame(self):
+        from pysd.py_backend.utils import load_outputs
+
+        assert_frames_close(
+            load_outputs(os.path.join(_root, "data/out_teacup.csv")),
+            load_outputs(
+                os.path.join(_root, "data/out_teacup_transposed.csv"),
+                transpose=True))
+
+    def test_load_columns(self):
+        from pysd.py_backend.utils import load_outputs
+
+        out0 = load_outputs(
+            os.path.join(_root, "data/out_teacup.csv"))
+
+        out1 = load_outputs(
+            os.path.join(_root, "data/out_teacup.csv"),
+            columns=["Room Temperature", "Teacup Temperature"])
+
+        out2 = load_outputs(
+            os.path.join(_root, "data/out_teacup_transposed.csv"),
+            transpose=True,
+            columns=["Heat Loss to Room"])
+
+        self.assertEqual(
+            set(out1.columns),
+            set(["Room Temperature", "Teacup Temperature"]))
+
+        self.assertEqual(
+            set(out2.columns),
+            set(["Heat Loss to Room"]))
+
+        self.assertTrue((out0.index == out1.index).all())
+        self.assertTrue((out0.index == out2.index).all())
+
+    def test_non_valid_outputs_get_columns(self):
+        from pysd.py_backend.utils import get_columns_to_load
+
+        with self.assertRaises(ValueError) as err:
+            get_columns_to_load(
+                os.path.join(
+                    _root,
+                    "more-tests/not_vensim/test_not_vensim.txt"))
+
+        self.assertIn(
+            "Not able to read '",
+            str(err.exception))
+        self.assertIn(
+            "more-tests/not_vensim/test_not_vensim.txt'.",
+            str(err.exception))
+
+    def test_transposed_frame_get_columns(self):
+        from pysd.py_backend.utils import get_columns_to_load
+
+        self.assertEqual(get_columns_to_load(
+            os.path.join(_root, "data/out_teacup.csv")),
+                         get_columns_to_load(
+            os.path.join(_root, "data/out_teacup_transposed.csv"),
+            transpose=True)
+        )
+
+    def test_load_column_get_columns(self):
+        from pysd.py_backend.utils import get_columns_to_load
+
+        out0 = get_columns_to_load(
+            os.path.join(_root, "data/out_teacup.csv"))
+
+        out1 = get_columns_to_load(
+            os.path.join(_root, "data/out_teacup.csv"),
+            vars=["Room Temperature", "Teacup Temperature"])
+
+        out2 = get_columns_to_load(
+            os.path.join(_root, "data/out_teacup_transposed.csv"),
+            transpose=True,
+            vars=["Heat Loss to Room"])
+
+        self.assertTrue(out1.issubset(out0))
+        self.assertEqual(
+            out1,
+            set(["Room Temperature", "Teacup Temperature"]))
+
+        self.assertTrue(out2.issubset(out0))
+        self.assertEqual(
+            out2,
+            set(["Heat Loss to Room"]))
+
+
+class TestProgressbar(TestCase):
     def test_progressbar(self):
         from pysd.py_backend.utils import ProgressBar
 

From 7c034e0ccf45e5e7538625ffe9e857bde66f5f45 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Fri, 12 Nov 2021 16:59:06 +0100
Subject: [PATCH 06/16] Create another file for data and add a singleton

---
 docs/basic_usage.rst                 |   2 +-
 pysd/cli/main.py                     |   5 +-
 pysd/cli/parser.py                   |   1 -
 pysd/py_backend/components.py        | 156 ---------------
 pysd/py_backend/data.py              | 274 +++++++++++++++++++++++++++
 pysd/py_backend/external.py          |   2 +-
 pysd/py_backend/statefuls.py         |   5 +-
 pysd/py_backend/utils.py             |  76 --------
 pysd/translation/builder.py          |  21 +-
 pysd/translation/vensim/vensim2py.py |   2 +-
 10 files changed, 294 insertions(+), 250 deletions(-)
 create mode 100644 pysd/py_backend/data.py

diff --git a/docs/basic_usage.rst b/docs/basic_usage.rst
index c5d6391d..2a67244c 100644
--- a/docs/basic_usage.rst
+++ b/docs/basic_usage.rst
@@ -60,7 +60,7 @@ To show a progressbar during the model integration the progress flag can be pass
 
 Running models with DATA type components
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Venim's regular DATA type components are given by an empty expression in the model equation. This values are read from a binary `.vdf` file. PySD allows running models with this kind of data definition using the data_files argument when calling :py:func:`.run()` command, e.g.::
+Venim's regular DATA type components are given by an empty expression in the model equation. These values are read from a binary `.vdf` file. PySD allows running models with this kind of data definition using the data_files argument when calling :py:func:`.run()` command, e.g.::
 
    >>> stocks = model.run(data_files="input_data.tab")
 
diff --git a/pysd/cli/main.py b/pysd/cli/main.py
index 0ae1bdaa..ec9f2302 100644
--- a/pysd/cli/main.py
+++ b/pysd/cli/main.py
@@ -1,6 +1,7 @@
 import sys
 import os
 
+from csv import QUOTE_NONE
 from datetime import datetime
 
 from .parser import parser
@@ -163,6 +164,8 @@ def save(output, options):
     else:
         sep = ","
 
-    output.to_csv(output_file, sep, index_label="Time")
+    # QUOTE_NONE used to print the csv/tab files af vensim does with special
+    # characterse, e.g.: "my-var"[Dimension]
+    output.to_csv(output_file, sep, index_label="Time", quoting=QUOTE_NONE)
 
     print(f"Data saved in '{output_file}'")
diff --git a/pysd/cli/parser.py b/pysd/cli/parser.py
index 522441b6..1ecd981f 100644
--- a/pysd/cli/parser.py
+++ b/pysd/cli/parser.py
@@ -77,7 +77,6 @@ def split_files(string):
     --data file1.tab -> ['file1.tab']
 
     """
-    print([check_data_file(s.strip()) for s in string.split(',')])
     return [check_data_file(s.strip()) for s in string.split(',')]
 
 
diff --git a/pysd/py_backend/components.py b/pysd/py_backend/components.py
index 777bf240..5eccf370 100644
--- a/pysd/py_backend/components.py
+++ b/pysd/py_backend/components.py
@@ -3,16 +3,10 @@
 """
 
 import os
-import warnings
-import re
 import random
 from importlib.machinery import SourceFileLoader
 
-import numpy as np
-import xarray as xr
-
 from pysd._version import __version__
-from .utils import load_outputs, get_columns_to_load
 
 
 class Components(object):
@@ -167,153 +161,3 @@ def update(self, value):
     def reset(self):
         """ Reset time value to the initial """
         self._time = self._initial_time
-
-
-class Data(object):
-    # TODO add __init__ and use this clas for used input pandas.Series
-    # as Data
-    # def __init__(self, data, coords, interp="interpolate"):
-
-    def __call__(self, time):
-        try:
-            if time in self.data['time'].values:
-                outdata = self.data.sel(time=time)
-            elif self.interp == "raw":
-                return np.nan
-            elif time > self.data['time'].values[-1]:
-                warnings.warn(
-                    self.py_name + "\n"
-                    + "extrapolating data above the maximum value of the time")
-                outdata = self.data[-1]
-            elif time < self.data['time'].values[0]:
-                warnings.warn(
-                    self.py_name + "\n"
-                    + "extrapolating data below the minimum value of the time")
-                outdata = self.data[0]
-            elif self.interp == "interpolate":
-                outdata = self.data.interp(time=time)
-            elif self.interp == 'look forward':
-                outdata = self.data.sel(time=time, method="backfill")
-            elif self.interp == 'hold backward':
-                outdata = self.data.sel(time=time, method="pad")
-
-            if self.is_float:
-                # if data has no-coords return a float
-                return float(outdata)
-            else:
-                # Remove time coord from the DataArray
-                return outdata.reset_coords('time', drop=True)
-        except Exception as err:
-            if self.data is None:
-                raise ValueError(
-                    self.py_name + "\n"
-                    "Trying to interpolate data variable before loading"
-                    " the data...")
-            else:
-                # raise any other possible error
-                raise err
-
-
-class RegData(Data):
-    def __init__(self, real_name, py_name, coords, interp="interpolate"):
-        self.real_name = real_name
-        self.py_name = py_name
-        self.coords = coords
-        self.interp = interp
-        self.is_float = not bool(coords)
-        self.data = None
-
-    def load_data(self, file_names):
-        """
-        Load data values from files.
-
-        Parameters
-        ----------
-        file_names: list or str
-            Name of the files to search the variable in.
-
-        Returns
-        -------
-        out: xarray.DataArray
-            Resulting data array with the time in the first dimension.
-
-        """
-        if isinstance(file_names, str):
-            file_names = [file_names]
-
-        for file_name in file_names:
-            self.data = self._load_data(file_name)
-            if self.data is not None:
-                break
-
-        if self.data is None:
-            raise ValueError(
-                f"_data_{self.py_name}\n"
-                f"Data for {self.real_name} not found in "
-                f"{', '.join(file_names)}")
-
-    def _load_data(self, file_name):
-        """
-        Load data values from output
-
-        Parameters
-        ----------
-        file_name: str
-            Name of the file to search the variable in.
-
-        Returns
-        -------
-        out: xarray.DataArray or None
-            Resulting data array with the time in the first dimension.
-
-        """
-        # TODO manage missing values on data, as external elements, create a
-        # hierarchy from External?
-        # get columns to load variable
-        columns = get_columns_to_load(
-            file_name, False, vars=[self.real_name, self.py_name])
-
-        if not columns:
-            # try reading transposed file
-            columns = get_columns_to_load(
-                file_name, True, vars=[self.real_name, self.py_name])
-
-            if not columns:
-                # variable not found
-                return None
-            else:
-                # variable must be read from a transposed file
-                transpose = True
-        else:
-            # variable found
-            transpose = False
-
-        if not self.coords:
-            # 0 dimensional data
-            values = load_outputs(file_name, transpose, columns=columns)
-            return xr.DataArray(
-                values.iloc[:, 0].values,
-                {'time': values.index.values},
-                ['time'])
-
-        # subscripted data
-        dims = list(self.coords)
-
-        values = load_outputs(file_name, transpose, columns=columns)
-
-        out = xr.DataArray(
-            np.nan,
-            {'time': values.index.values, **self.coords},
-            ['time'] + dims)
-
-        for column in values.columns:
-            coords = {
-                dim: [coord]
-                for (dim, coord)
-                in zip(dims, re.split(r'\[|\]|\s*,\s*', column)[1:-1])
-            }
-            out.loc[coords] = np.expand_dims(
-                values[column].values,
-                axis=tuple(range(1, len(coords)+1))
-            )
-        return out
diff --git a/pysd/py_backend/data.py b/pysd/py_backend/data.py
new file mode 100644
index 00000000..16e6695d
--- /dev/null
+++ b/pysd/py_backend/data.py
@@ -0,0 +1,274 @@
+import warnings
+import re
+
+import numpy as np
+import xarray as xr
+import pandas as pd
+
+from .utils import load_outputs
+
+
+class Columns():
+    """
+    Class to save the read columns in data files
+    """
+    _files = {}
+
+    @classmethod
+    def read(cls, file_name, encoding=None):
+        """
+        Read the Excel file or return the previously read one
+        """
+        if file_name in cls._files:
+            return cls._files[file_name]
+        else:
+            columns = cls.read_file(file_name, encoding)
+            cls._files[file_name] = columns
+            return columns
+
+    @classmethod
+    def read_file(cls, file_name, encoding=None):
+        """
+        Get the columns from an output csv or tab file.
+
+        Parameters
+        ----------
+        file_name: str
+            Output file to read. Must be csv or tab.
+
+        encoding: str or None (optional)
+            Encoding type to read output file. Needed if the file has special
+            characters. Default is None.
+
+        Returns
+        -------
+        out, transposed: set, bool
+            The set of the columns in the output file and a boolean flag to
+            indicate if the output file is transposed.
+
+        """
+        out = cls.read_line(file_name, encoding)
+        if out is None:
+            return None
+        transpose = False
+
+        try:
+            [float(col) for col in out]
+            out = cls.read_row(file_name, encoding)
+            transpose = True
+            [float(col) for col in out]
+        except ValueError:
+            return out, transpose
+        else:
+            #TODO writte and test
+            raise ValueError("Invalid file format, variable names must in")
+
+    @classmethod
+    def read_line(cls, file_name, encoding=None):
+        # TODO add decode method if encoding is pased
+
+        with open(file_name, 'r') as file:
+            header = file.readline().rstrip()
+
+        if file_name.lower().endswith(".tab"):
+            return set(header.split("\t")[1:])
+        elif file_name.lower().endswith(".csv"):
+            # TODO improve like previous to go faster
+            # splitting csv is not easy as , are in subscripts
+            return set(pd.read_csv(file_name,
+                                   nrows=0,
+                                   encoding=encoding,
+                                   dtype=str,
+                                   header=0).iloc[:, 1:])
+        else:
+            return None
+
+    @classmethod
+    def read_row(cls, file_name, encoding=None):
+        if file_name.lower().endswith(".tab"):
+            return set(pd.read_table(file_name,
+                                     usecols=[0],
+                                     encoding=encoding,
+                                     dtype=str).iloc[:, 0].to_list())
+        elif file_name.lower().endswith(".csv"):
+            return set(pd.read_csv(file_name,
+                                   usecols=[0],
+                                   encoding=encoding,
+                                   dtype=str).iloc[:, 0].to_list())
+
+    @classmethod
+    def get_columns(cls, file_name, vars=None, encoding=None):
+        if vars is None:
+            # Not var specified, return all available variables
+            return cls.read(file_name, encoding)
+
+        columns, transpose = cls.read(file_name, encoding)
+
+        vars_extended = []
+        for var in vars:
+            vars_extended.append(var)
+            if var.startswith('"') and var.endswith('"'):
+                # the variables in "" are reded without " by pandas
+                vars_extended.append(var[1:-1])
+
+        outs = set()
+        for var in columns:
+            if var in vars_extended:
+                # var is in vars_extended (no subscripts)
+                outs.add(var)
+                vars_extended.remove(var)
+            else:
+                for var1 in vars_extended:
+                    if var.startswith(var1 + "["):
+                        # var is subscripted
+                        outs.add(var)
+
+        return outs, transpose
+
+    @classmethod
+    def clean(cls):
+        """
+        Clean the dictionary of read files
+        """
+        cls._files = {}
+
+
+class Data(object):
+    # TODO add __init__ and use this clas for used input pandas.Series
+    # as Data
+    # def __init__(self, data, coords, interp="interpolate"):
+
+    def __call__(self, time):
+        try:
+            if time in self.data['time'].values:
+                outdata = self.data.sel(time=time)
+            elif self.interp == "raw":
+                return np.nan
+            elif time > self.data['time'].values[-1]:
+                warnings.warn(
+                    self.py_name + "\n"
+                    + "extrapolating data above the maximum value of the time")
+                outdata = self.data[-1]
+            elif time < self.data['time'].values[0]:
+                warnings.warn(
+                    self.py_name + "\n"
+                    + "extrapolating data below the minimum value of the time")
+                outdata = self.data[0]
+            elif self.interp == "interpolate":
+                outdata = self.data.interp(time=time)
+            elif self.interp == 'look forward':
+                outdata = self.data.sel(time=time, method="backfill")
+            elif self.interp == 'hold backward':
+                outdata = self.data.sel(time=time, method="pad")
+
+            if self.is_float:
+                # if data has no-coords return a float
+                return float(outdata)
+            else:
+                # Remove time coord from the DataArray
+                return outdata.reset_coords('time', drop=True)
+        except Exception as err:
+            if self.data is None:
+                raise ValueError(
+                    self.py_name + "\n"
+                    "Trying to interpolate data variable before loading"
+                    " the data...")
+            else:
+                # raise any other possible error
+                raise err
+
+
+class TabData(Data):
+    """
+    Data from tabular file tab/cls, it could be from Vensim output.
+    """
+    def __init__(self, real_name, py_name, coords, interp="interpolate"):
+        self.real_name = real_name
+        self.py_name = py_name
+        self.coords = coords
+        self.interp = interp
+        self.is_float = not bool(coords)
+        self.data = None
+
+    def load_data(self, file_names):
+        """
+        Load data values from files.
+
+        Parameters
+        ----------
+        file_names: list or str
+            Name of the files to search the variable in.
+
+        Returns
+        -------
+        out: xarray.DataArray
+            Resulting data array with the time in the first dimension.
+
+        """
+        if isinstance(file_names, str):
+            file_names = [file_names]
+
+        for file_name in file_names:
+            self.data = self._load_data(file_name)
+            if self.data is not None:
+                break
+
+        if self.data is None:
+            raise ValueError(
+                f"_data_{self.py_name}\n"
+                f"Data for {self.real_name} not found in "
+                f"{', '.join(file_names)}")
+
+    def _load_data(self, file_name):
+        """
+        Load data values from output
+
+        Parameters
+        ----------
+        file_name: str
+            Name of the file to search the variable in.
+
+        Returns
+        -------
+        out: xarray.DataArray or None
+            Resulting data array with the time in the first dimension.
+
+        """
+        # get columns to load variable
+        columns, transpose = Columns.get_columns(
+            file_name, vars=[self.real_name, self.py_name])
+
+        if not columns:
+            # the variable is not in the passed file
+            return None
+
+        if not self.coords:
+            # 0 dimensional data
+            values = load_outputs(file_name, transpose, columns=columns)
+            return xr.DataArray(
+                values.iloc[:, 0].values,
+                {'time': values.index.values},
+                ['time'])
+
+        # subscripted data
+        dims = list(self.coords)
+
+        values = load_outputs(file_name, transpose, columns=columns)
+
+        out = xr.DataArray(
+            np.nan,
+            {'time': values.index.values, **self.coords},
+            ['time'] + dims)
+
+        for column in values.columns:
+            coords = {
+                dim: [coord]
+                for (dim, coord)
+                in zip(dims, re.split(r'\[|\]|\s*,\s*', column)[1:-1])
+            }
+            out.loc[coords] = np.expand_dims(
+                values[column].values,
+                axis=tuple(range(1, len(coords)+1))
+            )
+        return out
+
diff --git a/pysd/py_backend/external.py b/pysd/py_backend/external.py
index 41427236..12fed233 100644
--- a/pysd/py_backend/external.py
+++ b/pysd/py_backend/external.py
@@ -12,7 +12,7 @@
 import xarray as xr
 from openpyxl import load_workbook
 from . import utils
-from .components import Data
+from .data import Data
 
 
 class Excels():
diff --git a/pysd/py_backend/statefuls.py b/pysd/py_backend/statefuls.py
index 48431331..773c4aa5 100644
--- a/pysd/py_backend/statefuls.py
+++ b/pysd/py_backend/statefuls.py
@@ -17,7 +17,8 @@ class objects.
 from .functions import zidz, if_then_else
 from .external import External, Excels
 from .decorators import Cache, constant_cache
-from .components import Components, Time, Data, RegData
+from .data import Data, TabData
+from .components import Components, Time
 
 from pysd._version import __version__
 
@@ -649,7 +650,7 @@ def __init__(self, py_model_file, params=None, return_func=None,
 
         self._data_elements = [
             getattr(self.components, name) for name in dir(self.components)
-            if isinstance(getattr(self.components, name), RegData)
+            if isinstance(getattr(self.components, name), TabData)
         ]
 
         if data_files:
diff --git a/pysd/py_backend/utils.py b/pysd/py_backend/utils.py
index 960554fb..148dd063 100644
--- a/pysd/py_backend/utils.py
+++ b/pysd/py_backend/utils.py
@@ -462,82 +462,6 @@ def load_outputs(file_name, transpose=False, columns=None, encoding=None):
         + f"Only {', '.join(list(read_func))} files are accepted.")
 
 
-def get_columns_to_load(file_name, transpose=False, vars=None, encoding=None):
-    """
-    Returns the variable names in the given file.
-
-    Parameters
-    ----------
-    file_name: str
-        Output file to read. Must be csv or tab.
-
-    transpose: bool (optional)
-        If True reads transposed outputs file, i.e. one variable per row.
-        Default is False.
-
-    vars: list or None (optional)
-        List of the vars names to return. If None loads all the column names.
-        Default is None.
-
-    encoding: str or None (optional)
-        Encoding type to read output file. Needed if the file has special
-        characters. Default is None.
-
-    Returns
-    -------
-    set
-        Set of columns for var.
-
-    """
-    read_func = {'.csv': pd.read_csv, '.tab': pd.read_table}
-    out = None
-
-    for end, func in read_func.items():
-        if file_name.lower().endswith(end):
-            if transpose:
-                out = func(file_name,
-                           encoding=encoding,
-                           usecols=[0],
-                           dtype=str).iloc[:, 0].to_list()
-            else:
-                out = func(file_name,
-                           encoding=encoding,
-                           nrows=1,
-                           dtype=str).iloc[:, 1:]
-
-            out = set(out)
-
-    if out is None:
-        raise ValueError(
-            f"\nNot able to read '{file_name}'. "
-            + f"Only {', '.join(list(read_func))} files are accepted.")
-
-    if vars is None:
-        # Not var specified, return all available variables
-        return out
-
-    else:
-        vars_extended = []
-        for var in vars:
-            vars_extended.append(var)
-            if var.startswith('"') and var.endswith('"'):
-                # the variables in "" are reded without " by pandas
-                vars_extended.append(var[1:-1])
-        outs = set()
-        for var in out:
-            if var in vars_extended:
-                # var is in vars_extended (no subscripts)
-                outs.add(var)
-                vars_extended.remove(var)
-            else:
-                for var1 in vars_extended:
-                    if var.startswith(var1 + "["):
-                        # var is subscripted
-                        outs.add(var)
-
-        return outs
-
-
 def detect_encoding(filename):
     """
     Detects the encoding of a file.
diff --git a/pysd/translation/builder.py b/pysd/translation/builder.py
index 07ae4d4b..0661452f 100644
--- a/pysd/translation/builder.py
+++ b/pysd/translation/builder.py
@@ -27,11 +27,11 @@ class Imports():
     Class to save the imported modules information for intelligent import
     """
     _numpy, _xarray, _subs = False, False, False
-    _functions, _statefuls, _external, _components, _utils =\
+    _functions, _statefuls, _external, _data, _utils =\
         set(), set(), set(), set(), set()
     _external_libs = {"numpy": "np", "xarray": "xr"}
     _internal_libs = [
-        "functions", "statefuls", "external", "components","utils"]
+        "functions", "statefuls", "external", "data", "utils"]
 
     @classmethod
     def add(cls, module, function=None):
@@ -110,7 +110,7 @@ def reset(cls):
         Reset the imported modules
         """
         cls._numpy, cls._xarray, cls._subs = False, False, False
-        cls._functions, cls._statefuls, cls._external, cls._components,\
+        cls._functions, cls._statefuls, cls._external, cls._data,\
             cls._utils = set(), set(), set(), set(), set()
 
 
@@ -672,7 +672,7 @@ def build_element(element, subscript_dict):
     # convert newline indicator and add expected level of indentation
     element["doc"] = element["doc"].replace("\\", "\n").replace("\n", "\n    ")
 
-    if element["kind"] in ["stateful", "external", "reg_data"]:
+    if element["kind"] in ["stateful", "external", "tab_data"]:
         func = """
     %(py_name)s = %(py_expr)s
             """ % {
@@ -1760,11 +1760,10 @@ def add_initial(identifier, value, deps):
     return "%s()" % stateful["py_name"], [stateful]
 
 
-def add_reg_data(identifier, real_name, subs,
+def add_tab_data(identifier, real_name, subs,
                  subscript_dict, merge_subs, keyword):
     """
-    Constructs a external object for handling Vensim's GET XLS DATA and
-    GET DIRECT DATA functionality.
+    Constructs an object for handling Vensim's regular DATA components.
 
     Parameters
     ----------
@@ -1792,14 +1791,14 @@ def add_reg_data(identifier, real_name, subs,
     Returns
     -------
     reference: str
-        Reference to the ExtData object `__call__` method, which will
+        Reference to the TabData object `__call__` method, which will
         return the retrieved value of data for the current time step.
 
     new_structure: list
         List of element construction dictionaries for the builder to assemble.
 
     """
-    Imports.add("components", "RegData")
+    Imports.add("data", "TabData")
 
     coords = utils.simplify_subscript_input(
         utils.make_coord_dict(subs, subscript_dict, terse=False),
@@ -1814,14 +1813,14 @@ def add_reg_data(identifier, real_name, subs,
         "parent_name": identifier,
         "real_name": "Data for %s" % identifier,
         "doc": "Provides data for data variable %s" % identifier,
-        "py_expr": "RegData('%s', '%s', %s, %s)" % (
+        "py_expr": "TabData('%s', '%s', %s, %s)" % (
             real_name, identifier, coords, keyword),
         "unit": "None",
         "lims": "None",
         "eqn": "None",
         "subs": subs,
         "merge_subs": merge_subs,
-        "kind": "reg_data",
+        "kind": "tab_data",
         "arguments": "",
     }
 
diff --git a/pysd/translation/vensim/vensim2py.py b/pysd/translation/vensim/vensim2py.py
index fc57c5a0..d2b4614a 100644
--- a/pysd/translation/vensim/vensim2py.py
+++ b/pysd/translation/vensim/vensim2py.py
@@ -1694,7 +1694,7 @@ def translate_section(section, macro_list, sketch, root_path, subview_sep=""):
 
         elif element["kind"] == "data":
             element["eqn"] = element["expr"] = element["arguments"] = ""
-            element["py_expr"], new_structure = builder.add_reg_data(
+            element["py_expr"], new_structure = builder.add_tab_data(
                 element["py_name"], element["real_name"],
                 element["subs"], subscript_dict, element["merge_subs"],
                 element["keyword"])

From a0ee419f8a5f5c0d2476bf38ce3fea45874a7f2f Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Mon, 15 Nov 2021 10:33:43 +0100
Subject: [PATCH 07/16] Move tests for data

---
 pysd/py_backend/data.py           |  16 +-
 pysd/py_backend/statefuls.py      |   2 +-
 pysd/translation/utils.py         |   6 +-
 tests/data/out_teacup_no_head.csv | 241 ++++++++++++++++++++++++++++++
 tests/unit_test_data.py           | 143 ++++++++++++++++++
 tests/unit_test_pysd.py           |   2 +-
 tests/unit_test_utils.py          |  51 -------
 7 files changed, 402 insertions(+), 59 deletions(-)
 create mode 100644 tests/data/out_teacup_no_head.csv
 create mode 100644 tests/unit_test_data.py

diff --git a/pysd/py_backend/data.py b/pysd/py_backend/data.py
index 16e6695d..58644064 100644
--- a/pysd/py_backend/data.py
+++ b/pysd/py_backend/data.py
@@ -49,7 +49,10 @@ def read_file(cls, file_name, encoding=None):
         """
         out = cls.read_line(file_name, encoding)
         if out is None:
-            return None
+            raise ValueError(
+                f"\nNot able to read '{file_name}'. "
+                + "Only '.csv', '.tab' files are accepted.")
+
         transpose = False
 
         try:
@@ -60,8 +63,9 @@ def read_file(cls, file_name, encoding=None):
         except ValueError:
             return out, transpose
         else:
-            #TODO writte and test
-            raise ValueError("Invalid file format, variable names must in")
+            raise ValueError(
+                f"Invalid file format '{file_name}'... varible names "
+                "should appear in the first row or in the first column...")
 
     @classmethod
     def read_line(cls, file_name, encoding=None):
@@ -110,6 +114,9 @@ def get_columns(cls, file_name, vars=None, encoding=None):
             if var.startswith('"') and var.endswith('"'):
                 # the variables in "" are reded without " by pandas
                 vars_extended.append(var[1:-1])
+            else:
+                vars_extended.append('"' + var)
+                vars_extended.append('"' + var + '"')
 
         outs = set()
         for var in columns:
@@ -134,7 +141,7 @@ def clean(cls):
 
 
 class Data(object):
-    # TODO add __init__ and use this clas for used input pandas.Series
+    # TODO add __init__ and use this class for used input pandas.Series
     # as Data
     # def __init__(self, data, coords, interp="interpolate"):
 
@@ -234,6 +241,7 @@ def _load_data(self, file_name):
             Resulting data array with the time in the first dimension.
 
         """
+        # TODO inlcude missing values managment as External objects
         # get columns to load variable
         columns, transpose = Columns.get_columns(
             file_name, vars=[self.real_name, self.py_name])
diff --git a/pysd/py_backend/statefuls.py b/pysd/py_backend/statefuls.py
index 773c4aa5..0e7cb545 100644
--- a/pysd/py_backend/statefuls.py
+++ b/pysd/py_backend/statefuls.py
@@ -17,7 +17,7 @@ class objects.
 from .functions import zidz, if_then_else
 from .external import External, Excels
 from .decorators import Cache, constant_cache
-from .data import Data, TabData
+from .data import TabData
 from .components import Components, Time
 
 from pysd._version import __version__
diff --git a/pysd/translation/utils.py b/pysd/translation/utils.py
index 712c9a93..601926f6 100644
--- a/pysd/translation/utils.py
+++ b/pysd/translation/utils.py
@@ -14,7 +14,8 @@
 from keyword import kwlist
 from builtins import __dir__ as bidir
 from ..py_backend.components import __dir__ as cdir
-from ..py_backend.decorators import __dir__ as ddir
+from ..py_backend.data import __dir__ as ddir
+from ..py_backend.decorators import __dir__ as dedir
 from ..py_backend.external import __dir__ as edir
 from ..py_backend.functions import __dir__ as fdir
 from ..py_backend.statefuls import __dir__ as sdir
@@ -22,7 +23,8 @@
 
 
 reserved_words = set(
-    dir() + bidir() + cdir() + fdir() + edir() + ddir() + sdir() + udir())
+    dir() + bidir() + cdir() + ddir() + dedir() + edir() + fdir()
+    + sdir() + udir())
 reserved_words = reserved_words.union(kwlist)
 
 
diff --git a/tests/data/out_teacup_no_head.csv b/tests/data/out_teacup_no_head.csv
new file mode 100644
index 00000000..271acd5c
--- /dev/null
+++ b/tests/data/out_teacup_no_head.csv
@@ -0,0 +1,241 @@
+0,10,11,70,180
+0.125,10,10.8625,70,178.625
+0.25,10,10.7267,70,177.267
+0.375,10,10.5926,70,175.926
+0.5,10,10.4602,70,174.602
+0.625,10,10.3295,70,173.295
+0.75,10,10.2004,70,172.004
+0.875,10,10.0729,70,170.729
+1,10,9.94694,70,169.469
+1.125,10,9.8226,70,168.226
+1.25,10,9.69982,70,166.998
+1.375,10,9.57857,70,165.786
+1.5,10,9.45884,70,164.588
+1.625,10,9.3406,70,163.406
+1.75,10,9.22385,70,162.238
+1.875,10,9.10855,70,161.085
+2,10,8.99469,70,159.947
+2.125,10,8.88226,70,158.823
+2.25,10,8.77123,70,157.712
+2.375,10,8.66159,70,156.616
+2.5,10,8.55332,70,155.533
+2.625,10,8.4464,70,154.464
+2.75,10,8.34082,70,153.408
+2.875,10,8.23656,70,152.366
+3,10,8.13361,70,151.336
+3.125,10,8.03194,70,150.319
+3.25,10,7.93154,70,149.315
+3.375,10,7.83239,70,148.324
+3.5,10,7.73449,70,147.345
+3.625,10,7.63781,70,146.378
+3.75,10,7.54233,70,145.423
+3.875,10,7.44805,70,144.481
+4,10,7.35495,70,143.55
+4.125,10,7.26302,70,142.63
+4.25,10,7.17223,70,141.722
+4.375,10,7.08258,70,140.826
+4.5,10,6.99404,70,139.94
+4.625,10,6.90662,70,139.066
+4.75,10,6.82029,70,138.203
+4.875,10,6.73503,70,137.35
+5,10,6.65085,70,136.508
+5.125,10,6.56771,70,135.677
+5.25,10,6.48561,70,134.856
+5.375,10,6.40454,70,134.045
+5.5,10,6.32449,70,133.245
+5.625,10,6.24543,70,132.454
+5.75,10,6.16736,70,131.674
+5.875,10,6.09027,70,130.903
+6,10,6.01414,70,130.141
+6.125,10,5.93896,70,129.39
+6.25,10,5.86473,70,128.647
+6.375,10,5.79142,70,127.914
+6.5,10,5.71903,70,127.19
+6.625,10,5.64754,70,126.475
+6.75,10,5.57694,70,125.769
+6.875,10,5.50723,70,125.072
+7,10,5.43839,70,124.384
+7.125,10,5.37041,70,123.704
+7.25,10,5.30328,70,123.033
+7.375,10,5.23699,70,122.37
+7.5,10,5.17153,70,121.715
+7.625,10,5.10688,70,121.069
+7.75,10,5.04305,70,120.43
+7.875,10,4.98001,70,119.8
+8,10,4.91776,70,119.178
+8.125,10,4.85629,70,118.563
+8.25,10,4.79558,70,117.956
+8.375,10,4.73564,70,117.356
+8.5,10,4.67644,70,116.764
+8.625,10,4.61799,70,116.18
+8.75,10,4.56026,70,115.603
+8.875,10,4.50326,70,115.033
+9,10,4.44697,70,114.47
+9.125,10,4.39138,70,113.914
+9.25,10,4.33649,70,113.365
+9.375,10,4.28228,70,112.823
+9.5,10,4.22876,70,112.288
+9.625,10,4.1759,70,111.759
+9.75,10,4.1237,70,111.237
+9.875,10,4.07215,70,110.722
+10,10,4.02125,70,110.212
+10.125,10,3.97098,70,109.71
+10.25,10,3.92135,70,109.213
+10.375,10,3.87233,70,108.723
+10.5,10,3.82393,70,108.239
+10.625,10,3.77613,70,107.761
+10.75,10,3.72893,70,107.289
+10.875,10,3.68231,70,106.823
+11,10,3.63628,70,106.363
+11.125,10,3.59083,70,105.908
+11.25,10,3.54595,70,105.459
+11.375,10,3.50162,70,105.016
+11.5,10,3.45785,70,104.579
+11.625,10,3.41463,70,104.146
+11.75,10,3.37195,70,103.719
+11.875,10,3.3298,70,103.298
+12,10,3.28817,70,102.882
+12.125,10,3.24707,70,102.471
+12.25,10,3.20648,70,102.065
+12.375,10,3.1664,70,101.664
+12.5,10,3.12682,70,101.268
+12.625,10,3.08774,70,100.877
+12.75,10,3.04914,70,100.491
+12.875,10,3.01103,70,100.11
+13,10,2.97339,70,99.7339
+13.125,10,2.93622,70,99.3622
+13.25,10,2.89952,70,98.9952
+13.375,10,2.86327,70,98.6327
+13.5,10,2.82748,70,98.2748
+13.625,10,2.79214,70,97.9214
+13.75,10,2.75724,70,97.5724
+13.875,10,2.72277,70,97.2277
+14,10,2.68874,70,96.8874
+14.125,10,2.65513,70,96.5513
+14.25,10,2.62194,70,96.2194
+14.375,10,2.58917,70,95.8917
+14.5,10,2.5568,70,95.568
+14.625,10,2.52484,70,95.2484
+14.75,10,2.49328,70,94.9328
+14.875,10,2.46211,70,94.6211
+15,10,2.43134,70,94.3134
+15.125,10,2.40095,70,94.0095
+15.25,10,2.37093,70,93.7093
+15.375,10,2.3413,70,93.413
+15.5,10,2.31203,70,93.1203
+15.625,10,2.28313,70,92.8313
+15.75,10,2.25459,70,92.5459
+15.875,10,2.22641,70,92.2641
+16,10,2.19858,70,91.9858
+16.125,10,2.1711,70,91.711
+16.25,10,2.14396,70,91.4396
+16.375,10,2.11716,70,91.1716
+16.5,10,2.09069,70,90.9069
+16.625,10,2.06456,70,90.6456
+16.75,10,2.03875,70,90.3875
+16.875,10,2.01327,70,90.1327
+17,10,1.9881,70,89.881
+17.125,10,1.96325,70,89.6325
+17.25,10,1.93871,70,89.3871
+17.375,10,1.91448,70,89.1448
+17.5,10,1.89055,70,88.9055
+17.625,10,1.86691,70,88.6691
+17.75,10,1.84358,70,88.4358
+17.875,10,1.82053,70,88.2053
+18,10,1.79778,70,87.9778
+18.125,10,1.7753,70,87.753
+18.25,10,1.75311,70,87.5311
+18.375,10,1.7312,70,87.312
+18.5,10,1.70956,70,87.0956
+18.625,10,1.68819,70,86.8819
+18.75,10,1.66709,70,86.6709
+18.875,10,1.64625,70,86.4625
+19,10,1.62567,70,86.2567
+19.125,10,1.60535,70,86.0535
+19.25,10,1.58528,70,85.8528
+19.375,10,1.56547,70,85.6547
+19.5,10,1.5459,70,85.459
+19.625,10,1.52657,70,85.2657
+19.75,10,1.50749,70,85.0749
+19.875,10,1.48865,70,84.8865
+20,10,1.47004,70,84.7004
+20.125,10,1.45166,70,84.5166
+20.25,10,1.43352,70,84.3352
+20.375,10,1.4156,70,84.156
+20.5,10,1.3979,70,83.979
+20.625,10,1.38043,70,83.8043
+20.75,10,1.36318,70,83.6318
+20.875,10,1.34614,70,83.4614
+21,10,1.32931,70,83.2931
+21.125,10,1.31269,70,83.1269
+21.25,10,1.29628,70,82.9628
+21.375,10,1.28008,70,82.8008
+21.5,10,1.26408,70,82.6408
+21.625,10,1.24828,70,82.4828
+21.75,10,1.23268,70,82.3268
+21.875,10,1.21727,70,82.1727
+22,10,1.20205,70,82.0205
+22.125,10,1.18703,70,81.8703
+22.25,10,1.17219,70,81.7219
+22.375,10,1.15754,70,81.5754
+22.5,10,1.14307,70,81.4307
+22.625,10,1.12878,70,81.2878
+22.75,10,1.11467,70,81.1467
+22.875,10,1.10073,70,81.0073
+23,10,1.08698,70,80.8698
+23.125,10,1.07339,70,80.7339
+23.25,10,1.05997,70,80.5997
+23.375,10,1.04672,70,80.4672
+23.5,10,1.03364,70,80.3364
+23.625,10,1.02072,70,80.2072
+23.75,10,1.00796,70,80.0796
+23.875,10,0.99536,70,79.9536
+24,10,0.982918,70,79.8292
+24.125,10,0.970631,70,79.7063
+24.25,10,0.958498,70,79.585
+24.375,10,0.946517,70,79.4652
+24.5,10,0.934686,70,79.3469
+24.625,10,0.923002,70,79.23
+24.75,10,0.911465,70,79.1146
+24.875,10,0.900072,70,79.0007
+25,10,0.888821,70,78.8882
+25.125,10,0.877711,70,78.7771
+25.25,10,0.86674,70,78.6674
+25.375,10,0.855905,70,78.5591
+25.5,10,0.845206,70,78.4521
+25.625,10,0.834641,70,78.3464
+25.75,10,0.824208,70,78.2421
+25.875,10,0.813905,70,78.1391
+26,10,0.803732,70,78.0373
+26.125,10,0.793685,70,77.9369
+26.25,10,0.783764,70,77.8376
+26.375,10,0.773967,70,77.7397
+26.5,10,0.764292,70,77.6429
+26.625,10,0.754739,70,77.5474
+26.75,10,0.745304,70,77.453
+26.875,10,0.735988,70,77.3599
+27,10,0.726788,70,77.2679
+27.125,10,0.717703,70,77.177
+27.25,10,0.708732,70,77.0873
+27.375,10,0.699873,70,76.9987
+27.5,10,0.691124,70,76.9112
+27.625,10,0.682485,70,76.8249
+27.75,10,0.673954,70,76.7395
+27.875,10,0.66553,70,76.6553
+28,10,0.657211,70,76.5721
+28.125,10,0.648995,70,76.49
+28.25,10,0.640883,70,76.4088
+28.375,10,0.632872,70,76.3287
+28.5,10,0.624961,70,76.2496
+28.625,10,0.617149,70,76.1715
+28.75,10,0.609435,70,76.0944
+28.875,10,0.601817,70,76.0182
+29,10,0.594295,70,75.9429
+29.125,10,0.586866,70,75.8687
+29.25,10,0.57953,70,75.7953
+29.375,10,0.572286,70,75.7229
+29.5,10,0.565133,70,75.6513
+29.625,10,0.558069,70,75.5807
+29.75,10,0.551093,70,75.5109
+29.875,10,0.544205,70,75.442
+30,10,0.537402,70,75.374
diff --git a/tests/unit_test_data.py b/tests/unit_test_data.py
new file mode 100644
index 00000000..bf1c611b
--- /dev/null
+++ b/tests/unit_test_data.py
@@ -0,0 +1,143 @@
+import os
+import itertools
+import unittest
+
+_root = os.path.dirname(__file__)
+
+
+class TestColumns(unittest.TestCase):
+    def test_clean_columns(self):
+        from pysd.py_backend.data import Columns
+        Columns.clean()
+        self.assertEqual(Columns._files, {})
+        Columns.read(
+            os.path.join(_root, "data/out_teacup.csv"))
+        self.assertNotEqual(Columns._files, {})
+        self.assertIn(os.path.join(_root, "data/out_teacup.csv"),
+                      Columns._files)
+        Columns.clean()
+        self.assertEqual(Columns._files, {})
+
+    def test_non_valid_outputs(self):
+        from pysd.py_backend.data import Columns
+
+        with self.assertRaises(ValueError) as err:
+            Columns.read_file(
+                os.path.join(
+                    _root,
+                    "more-tests/not_vensim/test_not_vensim.txt"))
+
+        self.assertIn(
+            "Not able to read '",
+            str(err.exception))
+        self.assertIn(
+            "more-tests/not_vensim/test_not_vensim.txt'.",
+            str(err.exception))
+
+    def test_non_valid_file_format(self):
+        from pysd.py_backend.data import Columns
+
+        file_name = os.path.join(_root, "data/out_teacup_no_head.csv")
+        with self.assertRaises(ValueError) as err:
+            Columns.read_file(file_name)
+
+        self.assertIn(
+            f"Invalid file format '{file_name}'... varible names "
+            + "should appear in the first row or in the first column...",
+            str(err.exception))
+
+    def test_transposed_frame(self):
+        from pysd.py_backend.data import Columns
+
+        cols1, trans1 = Columns.get_columns(
+            os.path.join(_root, "data/out_teacup.csv"))
+        cols2, trans2 = Columns.get_columns(
+            os.path.join(_root, "data/out_teacup_transposed.csv"))
+        Columns.clean()
+
+        self.assertEqual(cols1, cols2)
+        self.assertFalse(trans1)
+        self.assertTrue(trans2)
+
+    def test_get_columns(self):
+        from pysd.py_backend.data import Columns
+
+        cols0, trans0 = Columns.get_columns(
+            os.path.join(_root, "data/out_teacup.csv"))
+
+        cols1, trans1 = Columns.get_columns(
+            os.path.join(_root, "data/out_teacup.csv"),
+            vars=["Room Temperature", "Teacup Temperature"])
+
+        cols2, trans2 = Columns.get_columns(
+            os.path.join(_root, "data/out_teacup_transposed.csv"),
+            vars=["Heat Loss to Room"])
+
+        cols3 = Columns.get_columns(
+            os.path.join(_root, "data/out_teacup_transposed.csv"),
+            vars=["No column"])[0]
+
+        Columns.clean()
+
+        self.assertTrue(cols1.issubset(cols0))
+        self.assertEqual(
+            cols1,
+            set(["Room Temperature", "Teacup Temperature"]))
+
+        self.assertTrue(cols2.issubset(cols0))
+        self.assertEqual(
+            cols2,
+            set(["Heat Loss to Room"]))
+
+        self.assertEqual(cols3, set())
+
+        self.assertFalse(trans0)
+        self.assertFalse(trans1)
+        self.assertTrue(trans2)
+
+    def test_get_columns_subscripted(self):
+        from pysd.py_backend.data import Columns
+
+        data_file = os.path.join(
+            _root,
+            "test-models/tests/subscript_3d_arrays_widthwise/output.tab"
+        )
+
+        data_file2 = os.path.join(
+            _root,
+            "test-models/tests/subscript_2d_arrays/output.tab"
+        )
+
+        subsd = {
+            "d3": ["Depth 1", "Depth 2"],
+            "d2": ["Column 1", "Column 2"],
+            "d1": ["Entry 1", "Entry 2", "Entry 3"]
+        }
+
+        cols1 = Columns.get_columns(
+            data_file,
+            vars=["Three Dimensional Constant"])[0]
+
+        expected = {
+            "\"Three Dimensional Constant[" + ",".join(el) + "]\""
+            for el in itertools.product(subsd["d1"], subsd["d2"], subsd["d3"])
+        }
+
+        self.assertEqual(cols1, expected)
+
+        cols2 = Columns.get_columns(
+            data_file2,
+            vars=["Rate A", "Stock A"])[0]
+
+        subs = list(itertools.product(subsd["d1"], subsd["d2"]))
+        expected = {
+            "\"Rate A[" + ",".join(el) + "]\""
+            for el in subs
+        }
+
+        expected.update({
+            "\"Stock A[" + ",".join(el) + "]\""
+            for el in subs
+        })
+
+        self.assertEqual(cols2, expected)
diff --git a/tests/unit_test_pysd.py b/tests/unit_test_pysd.py
index 4accbc7b..f4e3c11b 100644
--- a/tests/unit_test_pysd.py
+++ b/tests/unit_test_pysd.py
@@ -2001,7 +2001,7 @@ def test_missing_data(self):
 
         self.assertIn(
             "Data for \"data-3\" not found in "
-            "/home/eneko/CREAF/dev/pysd/tests/more-tests/data_model/data3.tab",
+            + self.data_folder + "data3.tab",
             str(err.exception))
 
     def test_get_data_variable_not_found_from_dict_file(self):
diff --git a/tests/unit_test_utils.py b/tests/unit_test_utils.py
index e5a9e71d..81a2cf8e 100644
--- a/tests/unit_test_utils.py
+++ b/tests/unit_test_utils.py
@@ -442,57 +442,6 @@ def test_load_columns(self):
         self.assertTrue((out0.index == out1.index).all())
         self.assertTrue((out0.index == out2.index).all())
 
-    def test_non_valid_outputs_get_columns(self):
-        from pysd.py_backend.utils import get_columns_to_load
-
-        with self.assertRaises(ValueError) as err:
-            get_columns_to_load(
-                os.path.join(
-                    _root,
-                    "more-tests/not_vensim/test_not_vensim.txt"))
-
-        self.assertIn(
-            "Not able to read '",
-            str(err.exception))
-        self.assertIn(
-            "more-tests/not_vensim/test_not_vensim.txt'.",
-            str(err.exception))
-
-    def test_transposed_frame_get_columns(self):
-        from pysd.py_backend.utils import get_columns_to_load
-
-        self.assertEqual(get_columns_to_load(
-            os.path.join(_root, "data/out_teacup.csv")),
-                         get_columns_to_load(
-            os.path.join(_root, "data/out_teacup_transposed.csv"),
-            transpose=True)
-        )
-
-    def test_load_column_get_columns(self):
-        from pysd.py_backend.utils import get_columns_to_load
-
-        out0 = get_columns_to_load(
-            os.path.join(_root, "data/out_teacup.csv"))
-
-        out1 = get_columns_to_load(
-            os.path.join(_root, "data/out_teacup.csv"),
-            vars=["Room Temperature", "Teacup Temperature"])
-
-        out2 = get_columns_to_load(
-            os.path.join(_root, "data/out_teacup_transposed.csv"),
-            transpose=True,
-            vars=["Heat Loss to Room"])
-
-        self.assertTrue(out1.issubset(out0))
-        self.assertEqual(
-            out1,
-            set(["Room Temperature", "Teacup Temperature"]))
-
-        self.assertTrue(out2.issubset(out0))
-        self.assertEqual(
-            out2,
-            set(["Heat Loss to Room"]))
-
 
 class TestProgressbar(TestCase):
     def test_progressbar(self):

From f58283fd57f8577097f47e1b9084a6b522e99e80 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Mon, 15 Nov 2021 12:07:38 +0100
Subject: [PATCH 08/16] Document Columns class

---
 pysd/py_backend/data.py | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/pysd/py_backend/data.py b/pysd/py_backend/data.py
index 58644064..6b83dddd 100644
--- a/pysd/py_backend/data.py
+++ b/pysd/py_backend/data.py
@@ -17,7 +17,7 @@ class Columns():
     @classmethod
     def read(cls, file_name, encoding=None):
         """
-        Read the Excel file or return the previously read one
+        Read the columns from the data file or return the previously read ones
         """
         if file_name in cls._files:
             return cls._files[file_name]
@@ -69,6 +69,9 @@ def read_file(cls, file_name, encoding=None):
 
     @classmethod
     def read_line(cls, file_name, encoding=None):
+        """
+        Read the firts row and return a set of it.
+        """
         # TODO add decode method if encoding is pased
 
         with open(file_name, 'r') as file:
@@ -89,6 +92,9 @@ def read_line(cls, file_name, encoding=None):
 
     @classmethod
     def read_row(cls, file_name, encoding=None):
+        """
+        Read the firts column and return a set of it.
+        """
         if file_name.lower().endswith(".tab"):
             return set(pd.read_table(file_name,
                                      usecols=[0],
@@ -102,6 +108,30 @@ def read_row(cls, file_name, encoding=None):
 
     @classmethod
     def get_columns(cls, file_name, vars=None, encoding=None):
+        """
+        Get columns names from a tab or csv file and return those that
+        match with the given ones.
+
+        Parameters
+        ----------
+        file_name: str
+            Output file to read. Must be csv or tab.
+
+        vars: list
+            List of var names to find in the file.
+
+        encoding: str or None (optional)
+            Encoding type to read output file. Needed if the file has special
+            characters. Default is None.
+
+        Return
+        ------
+        columns, transpose: set, bool
+            The set of columns as they are named in the input file and a
+            boolean flag to indicate if the input file is transposed or
+            not.
+
+        """
         if vars is None:
             # Not var specified, return all available variables
             return cls.read(file_name, encoding)
@@ -115,6 +145,7 @@ def get_columns(cls, file_name, vars=None, encoding=None):
                 # the variables in "" are reded without " by pandas
                 vars_extended.append(var[1:-1])
             else:
+                # the variable may have " on its name in the tab or csv file
                 vars_extended.append('"' + var)
                 vars_extended.append('"' + var + '"')
 
@@ -279,4 +310,3 @@ def _load_data(self, file_name):
                 axis=tuple(range(1, len(coords)+1))
             )
         return out
-

From 9fc768a4552e2c7cb4aafac416f6235d5f8c24ae Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Mon, 15 Nov 2021 14:40:55 +0100
Subject: [PATCH 09/16] Add two new test for data

---
 tests/unit_test_data.py | 54 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tests/unit_test_data.py b/tests/unit_test_data.py
index bf1c611b..8bdef7dc 100644
--- a/tests/unit_test_data.py
+++ b/tests/unit_test_data.py
@@ -2,6 +2,8 @@
 import itertools
 import unittest
 
+import xarray as xr
+
 _root = os.path.dirname(__file__)
 
 
@@ -141,3 +143,55 @@ def test_get_columns_subscripted(self):
         })
 
         self.assertEqual(cols2, expected)
+
+
+class TestData(unittest.TestCase):
+    # Several Data cases are tested in unit_test_external while some other
+    # are tested indirectly in unit_test_pysd and integration_test_vensim
+
+    def test_no_data_error(self):
+        from pysd.py_backend.data import Data
+        obj = Data()
+        obj.data = None
+        obj.interp = "interpolate"
+        obj.py_name = "data"
+        with self.assertRaises(ValueError) as err:
+            obj(1.5)
+
+        self.assertIn(
+            "Trying to interpolate data variable before loading the data...",
+            str(err.exception))
+
+    def test_invalid_data_regular_error(self):
+        # test that try/except block on call doesn't catch errors differents
+        # than data = None
+        from pysd.py_backend.data import Data
+
+        obj = Data()
+
+        obj.data = 3
+
+        with self.assertRaises(TypeError) as err:
+            obj(1.5)
+
+        self.assertIn(
+            "'int' object is not subscriptable",
+            str(err.exception))
+
+        obj.data = xr.DataArray([10, 20], {'dim1': [0, 1]}, ['dim1'])
+
+        with self.assertRaises(KeyError) as err:
+            obj(1.5)
+
+        self.assertIn(
+            "'time'",
+            str(err.exception))
+
+        obj.data = xr.DataArray([10, 20], {'time': [0, 1]}, ['time'])
+
+        with self.assertRaises(AttributeError) as err:
+            obj(1.5)
+
+        self.assertIn(
+            "'Data' object has no attribute 'interp'",
+            str(err.exception))

From 120877a32988761c47f804bf424b81a0d3b16332 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Mon, 15 Nov 2021 15:47:00 +0100
Subject: [PATCH 10/16] Use read_table to get colum names

Avoid using the common file reader, pandas has an unique way to work with " and other file readers may fail.
---
 pysd/py_backend/data.py | 17 +++++------------
 tests/unit_test_data.py |  6 +++---
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/pysd/py_backend/data.py b/pysd/py_backend/data.py
index 6b83dddd..da65bcb8 100644
--- a/pysd/py_backend/data.py
+++ b/pysd/py_backend/data.py
@@ -72,16 +72,13 @@ def read_line(cls, file_name, encoding=None):
         """
         Read the firts row and return a set of it.
         """
-        # TODO add decode method if encoding is pased
-
-        with open(file_name, 'r') as file:
-            header = file.readline().rstrip()
-
         if file_name.lower().endswith(".tab"):
-            return set(header.split("\t")[1:])
+            return set(pd.read_table(file_name,
+                                     nrows=0,
+                                     encoding=encoding,
+                                     dtype=str,
+                                     header=0).iloc[:, 1:])
         elif file_name.lower().endswith(".csv"):
-            # TODO improve like previous to go faster
-            # splitting csv is not easy as , are in subscripts
             return set(pd.read_csv(file_name,
                                    nrows=0,
                                    encoding=encoding,
@@ -144,10 +141,6 @@ def get_columns(cls, file_name, vars=None, encoding=None):
             if var.startswith('"') and var.endswith('"'):
                 # the variables in "" are reded without " by pandas
                 vars_extended.append(var[1:-1])
-            else:
-                # the variable may have " on its name in the tab or csv file
-                vars_extended.append('"' + var)
-                vars_extended.append('"' + var + '"')
 
         outs = set()
         for var in columns:
diff --git a/tests/unit_test_data.py b/tests/unit_test_data.py
index 8bdef7dc..2b1f07d0 100644
--- a/tests/unit_test_data.py
+++ b/tests/unit_test_data.py
@@ -121,7 +121,7 @@ def test_get_columns_subscripted(self):
             vars=["Three Dimensional Constant"])[0]
 
         expected = {
-            "\"Three Dimensional Constant[" + ",".join(el) + "]\""
+            "Three Dimensional Constant[" + ",".join(el) + "]"
             for el in itertools.product(subsd["d1"], subsd["d2"], subsd["d3"])
         }
 
@@ -133,12 +133,12 @@ def test_get_columns_subscripted(self):
 
         subs = list(itertools.product(subsd["d1"], subsd["d2"]))
         expected = {
-            "\"Rate A[" + ",".join(el) + "]\""
+            "Rate A[" + ",".join(el) + "]"
             for el in subs
         }
 
         expected.update({
-            "\"Stock A[" + ",".join(el) + "]\""
+            "Stock A[" + ",".join(el) + "]"
             for el in subs
         })
 

From 3f09394a4f094e9d5c1b00a74567042fc1443454 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Fri, 26 Nov 2021 13:52:03 +0100
Subject: [PATCH 11/16] Add method to identify module or submodule elements

---
 pysd/py_backend/statefuls.py | 76 ++++++++++++++++++++++++++++++++++++
 pysd/py_backend/utils.py     | 18 +++++++++
 2 files changed, 94 insertions(+)

diff --git a/pysd/py_backend/statefuls.py b/pysd/py_backend/statefuls.py
index 0e7cb545..148b0e8b 100644
--- a/pysd/py_backend/statefuls.py
+++ b/pysd/py_backend/statefuls.py
@@ -22,6 +22,8 @@ class objects.
 
 from pysd._version import __version__
 
+from pysd.py_backend import components
+
 
 small_vensim = 1e-6  # What is considered zero according to Vensim Help
 
@@ -1634,6 +1636,80 @@ def run(self, params=None, return_columns=None, return_timestamps=None,
 
         return return_df
 
+    def get_dependencies(self, vars):
+        """
+        Get the dependencies of a set of variables or modules.
+
+        Parameters
+        ----------
+        vars: set or list
+            Set or list of variables to get the dependencies from
+
+        Returns
+        -------
+        dependencies: set
+            Set of dependencies nedded to run vars.
+
+        """
+        dependencies = set()
+        current_vars = set()
+        for var in vars:
+            py_name = utils.get_key_and_value_by_insensitive_key_or_value(
+                    var,
+                    self.components._namespace)[1]
+            if py_name is None:
+                current_vars.update(self.get_vars_in_module(var))
+                pass
+            else:
+                current_vars.add(py_name)
+
+        return current_vars
+        return dependencies
+
+    def get_vars_in_module(self, module):
+        """
+        Return the name of python vars in a module.
+
+        Parameters
+        ----------
+        module: str
+            Name of the module to search in.
+
+        Returns
+        -------
+        vars: set
+            Set of varible names in the given module.
+
+        """
+        modules = self.components._modules.copy()
+
+        while modules:
+            # find the module or the submodule
+            if module in modules:
+                module_content = [modules[module]]
+                break
+            new_modules = {}
+            [new_modules.update(value) for value in modules.values()
+             if isinstance(value, dict)]
+            modules = new_modules
+
+        if not modules:
+            raise ValueError(f"Module or submodule '{module}' not found...\n")
+
+        vars, new_content = set(), []
+
+        while module_content:
+            # find the vars in the module or the submodule
+            for content in module_content:
+                if isinstance(content, list):
+                    vars.update(content)
+                else:
+                    [new_content.append(value) for value in content.values()]
+
+            module_content, new_content = new_content, []
+
+        return vars
+
     def reload(self):
         """
         Reloads the model from the translated model file, so that all the
diff --git a/pysd/py_backend/utils.py b/pysd/py_backend/utils.py
index 148dd063..3201de2e 100644
--- a/pysd/py_backend/utils.py
+++ b/pysd/py_backend/utils.py
@@ -257,6 +257,24 @@ def compute_shape(coords, reshape_len=None, py_name=""):
 
 
 def get_key_and_value_by_insensitive_key_or_value(key, dict):
+    """
+    Providing a key or value in a dictionary search for the real key and value
+    in the dictionary ignoring case sensitivity.
+
+    Parameters
+    ----------
+    key: str
+        Key or value to look for in the dictionary.
+    dict: dict
+        Dictionary to search in.
+
+    Returns
+    -------
+    real key, real value: (str, str) or (None, None)
+        The real key and value that appear in the dictionary or a tuple
+        of Nones if the input key is not in the dictionary.
+
+    """
     lower_key = key.lower()
     for real_key, real_value in dict.items():
         if real_key.lower() == lower_key or real_value.lower() == lower_key:

From 0326ce7ef25c3c1032663b926d9fb614940bef8c Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Wed, 1 Dec 2021 17:55:57 +0100
Subject: [PATCH 12/16] Correct small bugs

---
 pysd/cli/parser.py          | 12 +++++++++++-
 pysd/translation/builder.py |  5 ++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/pysd/cli/parser.py b/pysd/cli/parser.py
index 1ecd981f..b748f402 100644
--- a/pysd/cli/parser.py
+++ b/pysd/cli/parser.py
@@ -110,6 +110,15 @@ def split_timestamps(string):
             f'See {docs} for examples.')
 
 
+def split_subview_sep(string):
+    """
+    Splits the subview separators
+    --subview-sep ' - ,.' -> [' - ', '.']
+
+    """
+    return string.split(",")
+
+
 def split_vars(string):
     """
     Splits the arguments from new_values.
@@ -270,7 +279,8 @@ def __call__(self, parser, namespace, values, option_string=None):
 
 trans_arguments.add_argument(
     '--subview-sep', dest='subview_sep',
-    action='store', type=str, default="", metavar='STRING',
+    action='store', type=split_subview_sep, default=[],
+    metavar='\'STRING1,STRING2,..,STRINGN\'',
     help='further division of views split in subviews, by identifying the'
          'separator string in the view name, only availabe if --split-views'
          ' is used')
diff --git a/pysd/translation/builder.py b/pysd/translation/builder.py
index 0661452f..ccf97678 100644
--- a/pysd/translation/builder.py
+++ b/pysd/translation/builder.py
@@ -176,7 +176,7 @@ def process_views_tree(view_name,
             subview_elems = []
             for element in elements:
                 if element.get("py_name") in view_content or \
-                   element.get("parent_name", None) in view_content:
+                   element.get("parent_name") in view_content:
                     subview_elems.append(element)
 
             _build_separate_module(subview_elems, subscript_dict,
@@ -752,6 +752,8 @@ def merge_partial_elements(element_list):
             # Use 'expr' for Vensim models, and 'eqn' for Xmile
             # (This makes the Vensim equation prettier.)
             eqn = element["expr"] if "expr" in element else element["eqn"]
+            parent_name = element["parent_name"] if "parent_name" in element\
+                else None
             outs[name] = {
                 "py_name": element["py_name"],
                 "real_name": element["real_name"],
@@ -765,6 +767,7 @@ def merge_partial_elements(element_list):
                 if "dependencies" in element else None,
                 "lims": element["lims"],
                 "eqn": [eqn.replace(r"\ ", "")],
+                "parent_name": parent_name,
                 "kind": element["kind"],
                 "arguments": element["arguments"],
             }

From be650d2d899ba12b86b5645c918efb7c781b92a6 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Wed, 1 Dec 2021 17:57:53 +0100
Subject: [PATCH 13/16] Allow running submodel and include some test.

Some other tests to be added and others to be corrected.
---
 pysd/py_backend/decorators.py                 |   2 +
 pysd/py_backend/statefuls.py                  | 154 ++++++-
 pysd/py_backend/utils.py                      |   7 +
 pysd/pysd.py                                  |   3 +-
 pysd/translation/vensim/vensim2py.py          |  18 +-
 tests/conftest.py                             |  14 +
 tests/integration_test_vensim_pathway.py      |   5 +
 .../split_model/test_split_model.mdl          |  55 ++-
 .../test_split_model_sub_subviews.mdl         |  36 +-
 .../split_model/test_split_model_subviews.mdl |  20 +-
 .../test_split_model_with_macro.mdl           |   5 +-
 tests/pytest.ini                              |   2 +-
 .../pytest_select_submodel.py                 | 156 +++++++
 .../vensim2py/pytest_split_views.py           | 180 ++++++++
 tests/unit_test_pysd.py                       | 399 ------------------
 15 files changed, 602 insertions(+), 454 deletions(-)
 create mode 100644 tests/conftest.py
 create mode 100644 tests/pytest_pysd/user_interaction/pytest_select_submodel.py
 create mode 100644 tests/pytest_translation/vensim2py/pytest_split_views.py

diff --git a/pysd/py_backend/decorators.py b/pysd/py_backend/decorators.py
index 33d2438a..f796dee1 100644
--- a/pysd/py_backend/decorators.py
+++ b/pysd/py_backend/decorators.py
@@ -13,6 +13,8 @@ def subs(dims, subcoords):
     xarray.DataArray. The algorithm is a simple version of utils.rearrange
     """
     def decorator(function):
+        function.dims = dims
+
         @wraps(function)
         def wrapper(*args):
             data = function(*args)
diff --git a/pysd/py_backend/statefuls.py b/pysd/py_backend/statefuls.py
index 148b0e8b..31322092 100644
--- a/pysd/py_backend/statefuls.py
+++ b/pysd/py_backend/statefuls.py
@@ -22,8 +22,6 @@ class objects.
 
 from pysd._version import __version__
 
-from pysd.py_backend import components
-
 
 small_vensim = 1e-6  # What is considered zero according to Vensim Help
 
@@ -1069,19 +1067,24 @@ def get_coords(self, param):
         else:
             func = param
 
-        if not self.get_args(func):
+        # TODO simplify this, make all model elements have a dims attribute
+        if hasattr(func, "dims"):
+            dims = func.dims
+            coords = {dim: self.components._subscript_dict[dim]
+                      for dim in dims}
+            return coords, dims
+        elif hasattr(func, "state") and isinstance(func.state, xr.DataArray):
             value = func()
-        else:
+        elif self.get_args(func) and isinstance(func(0), xr.DataArray):
             value = func(0)
-
-        if isinstance(value, xr.DataArray):
-            dims = list(value.dims)
-            coords = {coord: list(value.coords[coord].values)
-                      for coord in value.coords}
-            return coords, dims
         else:
             return None
 
+        dims = list(value.dims)
+        coords = {coord: list(value.coords[coord].values)
+                  for coord in value.coords}
+        return coords, dims
+
     def __getitem__(self, param):
         """
         Returns the current value of a model component.
@@ -1233,6 +1236,8 @@ def set_components(self, params, new=False):
                               stacklevel=2)
 
             new_function.__name__ = func_name
+            if dims:
+                new_function.dims = dims
             self.components._set_component(func_name, new_function)
             if func_name in self.cache.cached_funcs:
                 self.cache.cached_funcs.remove(func_name)
@@ -1636,14 +1641,57 @@ def run(self, params=None, return_columns=None, return_timestamps=None,
 
         return return_df
 
-    def get_dependencies(self, vars):
+    def select_submodel(self, vars=[], modules=[], contour_values={}):
+        c_vars, d_vars, s_deps = self._get_dependencies(vars, modules)
+        warnings.warn(
+            "Selecting submodel, "
+            "to run the full model again use model.reload()")
+
+        self._stateful_elements = {
+            name: getattr(self.components, name)
+            for name in s_deps
+            if isinstance(getattr(self.components, name), Stateful)
+        }
+        self._dynamicstateful_elements = [
+            getattr(self.components, name) for name in s_deps
+            if isinstance(getattr(self.components, name), DynamicStateful)
+        ]
+        self._macro_elements = [
+            getattr(self.components, name) for name in s_deps
+            if isinstance(getattr(self.components, name), Macro)
+        ]
+
+        all_deps = d_vars["initial"].copy()
+        all_deps.update(d_vars["step"])
+        all_deps.update(d_vars["lookup"])
+
+        all_vars = all_deps.copy()
+        all_vars.update(c_vars)
+
+        for real_name, py_name in self.components._namespace.copy().items():
+            if py_name not in all_vars:
+                del self.components._namespace[real_name]
+                del self.components._dependencies[py_name]
+
+        for py_name in self.components._dependencies.copy().keys():
+            if py_name.startswith("_") and py_name not in s_deps:
+                del self.components._dependencies[py_name]
+
+        self.set_components({element: np.nan for element in all_deps})
+        self.set_components(contour_values)
+        self._assign_cache_type()
+        self._get_initialize_order()
+
+    def get_dependencies(self, vars=[], modules=[]):
         """
         Get the dependencies of a set of variables or modules.
 
         Parameters
         ----------
-        vars: set or list
+        vars: dict
             Set or list of variables to get the dependencies from
+        modules: dict
+            Set or list of modules to get the dependencies from
 
         Returns
         -------
@@ -1651,20 +1699,84 @@ def get_dependencies(self, vars):
             Set of dependencies nedded to run vars.
 
         """
-        dependencies = set()
-        current_vars = set()
+        c_vars, d_vars, s_deps = self._get_dependencies(vars, modules)
+
+        text = utils.print_objects_format(c_vars, "Selected variables")
+
+        if d_vars["initial"]:
+            text += utils.print_objects_format(
+                d_vars["initial"],
+                "\nDependencies for initialization only")
+        if d_vars["step"]:
+            text += utils.print_objects_format(
+                d_vars["step"],
+                "\nDependencies that may change over time")
+        if d_vars["lookup"]:
+            text += utils.print_objects_format(
+                d_vars["lookup"],
+                "\nLookup table dependencies")
+
+        text += utils.print_objects_format(
+            s_deps,
+            "\nStateful objects integrated with the selected variables")
+
+        print(text)
+
+    def _get_dependencies(self, vars=[], modules=[]):
+        """
+        Get the dependencies of a set of variables or modules.
+
+        Parameters
+        ----------
+        vars: dict
+            Set or list of variables to get the dependencies from
+        modules: dict
+            Set or list of modules to get the dependencies from
+
+        Returns
+        -------
+        dependencies: set
+            Set of dependencies nedded to run vars.
+
+        """
+        def check_dep(dependencies, initial=False):
+            for dep in dependencies:
+                if dep in c_vars or dep.startswith("__"):
+                    continue
+                elif dep.startswith("_"):
+                    s_deps.add(dep)
+                    dep = self.components._dependencies[dep]
+                    check_dep(dep["initial"], True)
+                    check_dep(dep["step"])
+                else:
+                    if initial and dep not in d_deps["step"]\
+                       and dep not in d_deps["lookup"]:
+                        d_deps["initial"].add(dep)
+                    else:
+                        if dep in d_deps["initial"]:
+                            d_deps["initial"].remove(dep)
+                        if self.get_args(dep):
+                            d_deps["lookup"].add(dep)
+                        else:
+                            d_deps["step"].add(dep)
+
+        d_deps = {"initial": set(), "step": set(), "lookup": set()}
+        s_deps = set()
+        c_vars = {"time", "time_step", "initial_time", "final_time", "saveper"}
         for var in vars:
             py_name = utils.get_key_and_value_by_insensitive_key_or_value(
                     var,
                     self.components._namespace)[1]
-            if py_name is None:
-                current_vars.update(self.get_vars_in_module(var))
-                pass
-            else:
-                current_vars.add(py_name)
+            c_vars.add(py_name)
+        for module in modules:
+            c_vars.update(self.get_vars_in_module(module))
+
+        for var in c_vars:
+            if var == "time":
+                continue
+            check_dep(self.components._dependencies[var])
 
-        return current_vars
-        return dependencies
+        return c_vars, d_deps, s_deps
 
     def get_vars_in_module(self, module):
         """
diff --git a/pysd/py_backend/utils.py b/pysd/py_backend/utils.py
index 3201de2e..f4996f73 100644
--- a/pysd/py_backend/utils.py
+++ b/pysd/py_backend/utils.py
@@ -502,6 +502,13 @@ def detect_encoding(filename):
     return detector.result['encoding']
 
 
+def print_objects_format(object_set, text):
+    text += " (total %(n_obj)s):\n\t%(objs)s\n" % {
+        "n_obj": len(object_set),
+        "objs": ", ".join(object_set)
+    }
+    return text
+
 class ProgressBar:
     """
     Progress bar for integration
diff --git a/pysd/pysd.py b/pysd/pysd.py
index 0c3e48c1..faa7caff 100644
--- a/pysd/pysd.py
+++ b/pysd/pysd.py
@@ -158,5 +158,4 @@ def load(py_model_file, data_files=None, initialize=True,
     >>> model = load('../tests/test-models/samples/teacup/teacup.py')
 
     """
-
-    return Model(py_model_file, data_files, initialize, missing_values)
+    return Model(str(py_model_file), data_files, initialize, missing_values)
diff --git a/pysd/translation/vensim/vensim2py.py b/pysd/translation/vensim/vensim2py.py
index d2b4614a..a6578f3b 100644
--- a/pysd/translation/vensim/vensim2py.py
+++ b/pysd/translation/vensim/vensim2py.py
@@ -5,6 +5,7 @@
 """
 
 import os
+import pathlib
 import re
 import warnings
 from io import open
@@ -1876,7 +1877,7 @@ def translate_vensim(mdl_file, split_views, **kwargs):
 
     Parameters
     ----------
-    mdl_file: str
+    mdl_file: str or pathlib.PosixPath
         File path of a vensim model file to translate to python.
 
     split_views: bool
@@ -1901,20 +1902,22 @@ def translate_vensim(mdl_file, split_views, **kwargs):
     # character used to place subviews in the parent view folder
     subview_sep = kwargs.get("subview_sep", "")
 
-    root_path = os.path.split(mdl_file)[0]
+    if isinstance(mdl_file, str):
+        mdl_file = pathlib.Path(mdl_file)
+
+    root_path = mdl_file.parent
     with open(mdl_file, "r", encoding="UTF-8") as in_file:
         text = in_file.read()
 
     # check for model extension
-    if not mdl_file.lower().endswith(".mdl"):
+    if mdl_file.suffix.lower() != ".mdl":
         raise ValueError(
             "The file to translate, "
             + mdl_file
             + " is not a vensim model. It must end with mdl extension."
         )
-    mdl_insensitive = re.compile(re.escape('.mdl'), re.IGNORECASE)
-    outfile_name = mdl_insensitive.sub(".py", mdl_file)
-    out_dir = os.path.dirname(outfile_name)
+
+    outfile_name = mdl_file.with_suffix(".py")
 
     if split_views:
         text, sketch = _split_sketch(text)
@@ -1929,8 +1932,7 @@ def translate_vensim(mdl_file, split_views, **kwargs):
         else:  # separate macro elements into their own files
             section["py_name"] = utils.make_python_identifier(
                 section["name"])
-            section["file_name"] = os.path.join(
-                out_dir,
+            section["file_name"] = root_path.joinpath(
                 section["py_name"] + ".py")
 
     macro_list = [s for s in file_sections if s["name"] != "_main_"]
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..a6be8df1
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,14 @@
+import pytest
+from pathlib import Path
+
+
+@pytest.fixture(scope="session")
+def _root():
+    # root directory
+    return Path(__file__).parent.resolve()
+
+
+@pytest.fixture(scope="class")
+def shared_tmpdir(tmpdir_factory):
+    # shared temporary directory for each class
+    return Path(tmpdir_factory.mktemp("shared"))
diff --git a/tests/integration_test_vensim_pathway.py b/tests/integration_test_vensim_pathway.py
index c9b456f2..521c6d92 100644
--- a/tests/integration_test_vensim_pathway.py
+++ b/tests/integration_test_vensim_pathway.py
@@ -42,6 +42,11 @@ def test_chained_initialization(self):
         output, canon = runner(test_models + '/chained_initialization/test_chained_initialization.mdl')
         assert_frames_close(output, canon, rtol=rtol)
 
+    @unittest.skip("Working on it")
+    def test_conditional_subscripts(self):
+        output, canon = runner(test_models + '/conditional_subscripts/test_conditional_subscripts.mdl')
+        assert_frames_close(output, canon, rtol=rtol)
+
     def test_control_vars(self):
         output, canon = runner(test_models + '/control_vars/test_control_vars.mdl')
         assert_frames_close(output, canon, rtol=rtol)
diff --git a/tests/more-tests/split_model/test_split_model.mdl b/tests/more-tests/split_model/test_split_model.mdl
index bd02981f..c45a195a 100644
--- a/tests/more-tests/split_model/test_split_model.mdl
+++ b/tests/more-tests/split_model/test_split_model.mdl
@@ -1,27 +1,43 @@
 {UTF-8}
-another var=
-	3*Stock
+other stock= INTEG (
+	6*"var-n",
+		3)
 	~	
 	~		|
 
-"rate-1"=
-	"var-n"
+"variable-x"=
+	lookup table(6*another var)
 	~	
 	~		|
 
-"var-n"=
-	5
+initial stock=
+	0.1
 	~	
 	~		|
 
-"variable-x"=
-	6*another var
+lookup table(
+	(1,0), (10,2), (100,2), (1000,3), (10000,4))
 	~	
 	~		|
 
 Stock= INTEG (
 	"rate-1",
-		1)
+		initial stock)
+	~	
+	~		|
+
+another var=
+	3*Stock
+	~	
+	~		|
+
+"rate-1"=
+	"var-n"
+	~	
+	~		|
+
+"var-n"=
+	5
 	~	
 	~		|
 
@@ -66,6 +82,14 @@ $255-128-0,0,Times New Roman|12||0-0-0|0-0-0|0-192-192|-1--1--1|-1--1--1|96,96,1
 Stock
 10,8,"var-n",207,367,18,11,8,3,0,0,0,0,0,0
 1,9,8,6,0,0,0,0,0,128,0,-1--1--1,,1|(288,318)|
+10,10,initial stock,431,347,36,11,8,3,0,0,0,0,0,0
+1,11,10,1,0,0,0,0,0,128,1,-1--1--1,,1|(457,302)|
+10,12,other stock,371,488,40,20,3,3,0,0,0,0,0,0
+12,13,48,95,500,10,8,0,3,0,0,-1,0,0,0
+11,14,0,218,500,6,8,2,3,0,0,1,0,0,0
+1,15,14,12,4,0,0,22,0,0,0,-1--1--1,,1|(277,500)|
+1,16,14,13,100,0,0,22,0,0,0,-1--1--1,,1|(158,500)|
+1,17,8,14,0,0,0,0,0,128,0,-1--1--1,,1|(211,429)|
 \\\---/// Sketch information - do not modify anything except names
 V300  Do not put anything below this section - it will be ignored
 *View-2
@@ -73,15 +97,22 @@ $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,1
 10,1,another var,89,168,36,11,8,3,0,0,0,0,0,0
 10,2,Stock,334,243,29,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
 1,3,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(221,209)|
+10,4,lookup table,222,122,40,11,8,3,0,0,0,0,0,0
 \\\---/// Sketch information - do not modify anything except names
 V300  Do not put anything below this section - it will be ignored
 *View 3
 $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,100,0
 10,1,"variable-x",191,176,32,11,8,3,0,0,0,0,0,0
-10,2,another var,223,395,45,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
+10,2,another var,199,388,45,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
 12,3,0,461,148,43,11,8,7,0,0,-1,0,0,0
 This is view 3
-1,4,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(208,292)|
+1,4,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(195,288)|
+10,5,lookup table,334,294,49,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
+1,6,5,1,0,0,0,0,0,128,0,-1--1--1,,1|(267,239)|
+\\\---/// Sketch information - do not modify anything except names
+V300  Do not put anything below this section - it will be ignored
+*View 4
+$192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,100,0
 ///---\\\
 :L<%^E!@
 1:Current.vdf
@@ -91,7 +122,7 @@ This is view 3
 27:0,
 34:0,
 4:Time
-5:another var
+5:other stock
 35:Date
 36:YYYY-MM-DD
 37:2000
diff --git a/tests/more-tests/split_model/test_split_model_sub_subviews.mdl b/tests/more-tests/split_model/test_split_model_sub_subviews.mdl
index 07bafdaa..74cf9fb2 100644
--- a/tests/more-tests/split_model/test_split_model_sub_subviews.mdl
+++ b/tests/more-tests/split_model/test_split_model_sub_subviews.mdl
@@ -1,4 +1,20 @@
 {UTF-8}
+other stock= INTEG (
+	var tolo,
+		0)
+	~	
+	~		|
+
+interesting var 2 looked up=
+	look up definition(interesting var 2)
+	~	
+	~		|
+
+look up definition(
+	(1,0), (10,1), (50,1.5), (100,4), (1000,5), (10000,3), (100000,4))
+	~	
+	~		|
+
 var tolo=
 	55+great var
 	~	
@@ -102,6 +118,7 @@ $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,1
 12,3,0,461,148,43,11,8,7,0,0,-1,0,0,0
 This is view 2
 1,4,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(208,292)|
+10,5,look up definition,409,316,54,11,8,3,0,0,0,0,0,0
 \\\---/// Sketch information - do not modify anything except names
 V300  Do not put anything below this section - it will be ignored
 *View 3.Subview 1-Sview 1
@@ -116,6 +133,10 @@ $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,1
 10,1,interesting var 1,249,173,59,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
 10,2,interesting var 2,484,153,50,11,8,3,0,0,0,0,0,0
 1,3,1,2,0,0,0,0,0,128,0,-1--1--1,,1|(363,163)|
+10,4,interesting var 2 looked up,501,230,50,19,8,3,0,0,0,0,0,0
+1,5,2,4,0,0,0,0,0,128,0,-1--1--1,,1|(489,180)|
+10,6,look up definition,274,221,34,19,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|0||128-128-128
+1,7,6,4,0,0,0,0,0,128,0,-1--1--1,,1|(372,224)|
 \\\---/// Sketch information - do not modify anything except names
 V300  Do not put anything below this section - it will be ignored
 *View 3.subview 2-sview 3
@@ -125,18 +146,25 @@ $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,1
 V300  Do not put anything below this section - it will be ignored
 *View 3.Subview 2-sview 4
 $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,100,0
-10,1,var tolo,162,186,25,11,8,3,0,0,0,0,0,0
-10,2,great var,128,103,38,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|0||128-128-128
-1,3,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(141,138)|
+10,1,var tolo,295,147,25,11,8,3,0,0,0,0,0,0
+10,2,great var,128,103,38,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
+1,3,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(211,124)|
+10,6,other stock,414,201,40,20,3,3,0,0,0,0,0,0
+12,7,48,214,216,10,8,0,3,0,0,-1,0,0,0
+11,8,0,299,214,6,8,2,3,0,0,1,0,0,0
+1,9,8,6,4,0,0,22,0,0,0,-1--1--1,,1|(339,214)|
+1,10,8,7,100,0,0,22,0,0,0,-1--1--1,,1|(258,214)|
+1,16,1,8,0,0,0,0,0,128,0,-1--1--1,,1|(296,176)|
 ///---\\\
 :L<%^E!@
+1:Current.vdf
 9:Current
 15:0,0,0,0,0,0
 19:100,6
 27:0,
 34:0,
 4:Time
-5:var tolo
+5:other stock
 35:Date
 36:YYYY-MM-DD
 37:2000
diff --git a/tests/more-tests/split_model/test_split_model_subviews.mdl b/tests/more-tests/split_model/test_split_model_subviews.mdl
index e302c23c..4a3eb08b 100644
--- a/tests/more-tests/split_model/test_split_model_subviews.mdl
+++ b/tests/more-tests/split_model/test_split_model_subviews.mdl
@@ -1,4 +1,10 @@
 {UTF-8}
+other stock= INTEG (
+	"variable-x",
+		5)
+	~	
+	~		|
+
 another var=
 	3*Stock
 	~	
@@ -77,21 +83,27 @@ $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,1
 V300  Do not put anything below this section - it will be ignored
 *View 2
 $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,100,0
-10,1,"variable-x",191,176,32,11,8,3,0,0,0,0,0,0
+10,1,"variable-x",186,173,32,11,8,3,0,0,0,0,0,0
 10,2,another var,223,395,45,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
 12,3,0,461,148,43,11,8,7,0,0,-1,0,0,0
 This is view 2
-1,4,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(208,292)|
+1,4,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(205,290)|
+10,5,other stock,459,338,40,20,3,3,0,0,0,0,0,0
+12,6,48,288,322,10,8,0,3,0,0,-1,0,0,0
+11,7,0,358,324,6,8,2,3,0,0,1,0,0,0
+1,8,7,5,4,0,0,22,0,0,0,-1--1--1,,1|(391,324)|
+1,9,7,6,100,0,0,22,0,0,0,-1--1--1,,1|(325,324)|
+1,10,1,7,1,0,0,0,0,128,0,-1--1--1,,1|(305,221)|
 ///---\\\
 :L<%^E!@
 1:Current.vdf
 9:Current
 15:0,0,0,0,0,0
-19:100,0
+19:100,2
 27:0,
 34:0,
 4:Time
-5:another var
+5:other stock
 35:Date
 36:YYYY-MM-DD
 37:2000
diff --git a/tests/more-tests/split_model_with_macro/test_split_model_with_macro.mdl b/tests/more-tests/split_model_with_macro/test_split_model_with_macro.mdl
index a38e34e7..ad8cfad9 100644
--- a/tests/more-tests/split_model_with_macro/test_split_model_with_macro.mdl
+++ b/tests/more-tests/split_model_with_macro/test_split_model_with_macro.mdl
@@ -71,14 +71,13 @@ $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,1
 1,3,2,1,0,0,0,0,0,128,0,-1--1--1,,1|(156,125)|
 ///---\\\
 :L<%^E!@
-1:Z:\Development\pysd\tests\test-models\tests\macro_stock\Current.vdf
 9:Current
 15:0,0,0,0,0,0
-19:100,1
+19:100,0
 27:2,
 34:0,
 4:Time
-5:new var
+5:macro output
 35:Date
 36:YYYY-MM-DD
 37:2000
diff --git a/tests/pytest.ini b/tests/pytest.ini
index 033e9f5f..d6f5a6e0 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -1,2 +1,2 @@
 [pytest]
-python_files = unit_test_*.py integration_test_*.py
\ No newline at end of file
+python_files = unit_test_*.py integration_test_*.py pytest_*/**/*.py
diff --git a/tests/pytest_pysd/user_interaction/pytest_select_submodel.py b/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
new file mode 100644
index 00000000..dc67365c
--- /dev/null
+++ b/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
@@ -0,0 +1,156 @@
+
+import pytest
+import shutil
+import numpy as np
+
+import pysd
+
+
+@pytest.mark.parametrize(
+    "model_path,subview_sep,variables,modules,n_deps,dep_vars",
+    [
+        (
+            "test_split_model",
+            [],
+            ["stock"],
+            [],
+            (6, 1, 1, 0, 1),
+            {"rate1": 4, "initial_stock": 2}
+        ),
+        (
+            "test_split_model_subviews",
+            ["."],
+            [],
+            ["view_1"],
+            (9, 0, 0, 0, 1),
+            {
+
+            }
+        ),
+        (
+            "test_split_model_sub_subviews",
+            [".", "-"],
+            ["variablex"],
+            ["subview_1", "submodule_1"],
+            (12, 0, 1, 1, 1),
+            {"another_var": 5, "look_up_definition": 3}
+        )
+    ],
+)
+class TestSubmodel:
+    """Submodel selecting class"""
+    # messages for selecting submodules
+    messages = [
+            "Selected variables",
+            "Dependencies for initialization only",
+            "Dependencies that may change over time",
+            "Lookup table dependencies",
+            "Stateful objects integrated with the selected variables"
+            ]
+    warning = "Selecting submodel, "\
+        + "to run the full model again use model.reload()"
+    common_vars = {
+        'initial_time', 'time_step', 'final_time', 'time', 'saveper', 'stock'
+        }
+
+    @pytest.fixture
+    def models_dir(self, _root):
+        return _root.joinpath("more-tests/split_model")
+
+    @pytest.fixture
+    def model(self, shared_tmpdir, models_dir, model_path, subview_sep):
+        """
+        Translate the model or read a translated version.
+        This way each file is only translated once.
+        """
+        # expected file
+        file = shared_tmpdir.joinpath(model_path + '.py')
+        if file.is_file():
+            # load already translated file
+            return pysd.load(file)
+        else:
+            # copy mdl file to tmp_dir and translate it
+            file = shared_tmpdir.joinpath(model_path + '.mdl')
+            shutil.copy(
+                models_dir.joinpath(model_path + '.mdl'),
+                file)
+            return pysd.read_vensim(
+                file,
+                split_views=True, subview_sep=subview_sep)
+
+    def test__get_dependencies(self, model, variables, modules,
+                               n_deps, dep_vars):
+
+        # get selected vars and dependencies in sets and dictionary
+        out = model._get_dependencies(vars=variables, modules=modules)
+        assert len(out[0]) == n_deps[0]
+        assert len(out[1]["initial"]) == n_deps[1]
+        assert len(out[1]["step"]) == n_deps[2]
+        assert len(out[1]["lookup"]) == n_deps[3]
+        assert len(out[2]) == n_deps[4]
+
+        assert self.common_vars.issubset(out[0])
+
+    def test_get_dependencies(self, capsys, model, variables, modules,
+                              n_deps, dep_vars):
+        # get the dependencies information of the selected variables and
+        # modules by stdout
+        model.get_dependencies(vars=variables, modules=modules)
+
+        captured = capsys.readouterr()  # capture stdout
+
+        for n, message in zip(n_deps, self.messages):
+            if n != 0:
+                # check the message with the number of dependencies of
+                # each type
+                assert message + " (total %s):\n" % n in captured.out
+            else:
+                # if not dependencies not message should be printed
+                assert message not in captured.out
+
+        # assert _integ_stock is in the message as the included stateful object
+        assert "_integ_stock" in captured.out
+
+        # assert all dependencies of the submodel are in the message
+        for var in dep_vars:
+            assert var in captured.out
+
+    def test_select_submodel(self, model, variables, modules,
+                             n_deps, dep_vars):
+
+        # assert original stateful elements
+        assert len(model._dynamicstateful_elements) == 2
+        assert "_integ_other_stock" in model._stateful_elements
+        assert "_integ_other_stock" in model.components._dependencies
+        assert "other_stock" in model.components._dependencies
+        assert "other stock" in model.components._namespace
+        assert "_integ_stock" in model._stateful_elements
+        assert "_integ_stock" in model.components._dependencies
+        assert "stock" in model.components._dependencies
+        assert "Stock" in model.components._namespace
+
+        # select submodel
+        with pytest.warns(UserWarning, match=self.warning):
+            model.select_submodel(vars=variables, modules=modules)
+
+        # assert stateful elements change
+        assert len(model._dynamicstateful_elements) == 1
+        assert "_integ_other_stock" not in model._stateful_elements
+        assert "_integ_other_stock" not in model.components._dependencies
+        assert "other_stock" not in model.components._dependencies
+        assert "other stock" not in model.components._namespace
+        assert "_integ_stock" in model._stateful_elements
+        assert "_integ_stock" in model.components._dependencies
+        assert "stock" in model.components._dependencies
+        assert "Stock" in model.components._namespace
+
+        if not dep_vars:
+            # totally independent submodels can run without producing
+            # nan values
+            assert not np.any(np.isnan(model.run()))
+        else:
+            # running the model without redefining dependencies will
+            # produce nan values
+            assert np.any(np.isnan(model.run()))
+            # redefine dependencies
+            assert not np.any(np.isnan(model.run(params=dep_vars)))
diff --git a/tests/pytest_translation/vensim2py/pytest_split_views.py b/tests/pytest_translation/vensim2py/pytest_split_views.py
new file mode 100644
index 00000000..8b4a9a68
--- /dev/null
+++ b/tests/pytest_translation/vensim2py/pytest_split_views.py
@@ -0,0 +1,180 @@
+
+import pytest
+from pathlib import Path
+import shutil
+
+import pysd
+from pysd.tools.benchmarking import assert_frames_close
+
+
+@pytest.mark.parametrize(
+    "model_path,subview_sep,modules,macros,original_vars,py_vars,"
+    + "stateful_objs",
+    [
+        (
+            Path("more-tests/split_model/test_split_model.mdl"),
+            [],
+            ["view_1", "view2", "view_3"],
+            [],
+            ["Stock"],
+            ["another_var", "rate1", "varn", "variablex", "stock"],
+            ["_integ_stock"]
+        ),
+        (
+            Path("more-tests/split_model/test_split_model_subviews.mdl"),
+            ["."],
+            ["view_1/submodule_1", "view_1/submodule_2", "view_2"],
+            [],
+            ["Stock"],
+            ["another_var", "rate1", "varn", "variablex", "stock"],
+            ["_integ_stock"]
+        ),
+        (
+            Path("more-tests/split_model/test_split_model_sub_subviews.mdl"),
+            [".", "-"],
+            [
+                "view_1/submodule_1", "view_1/submodule_2", "view_2",
+                "view_3/subview_1/sview_1", "view_3/subview_1/sview_2",
+                "view_3/subview_2/sview_3", "view_3/subview_2/sview_4"
+            ],
+            [],
+            ["Stock"],
+            ["another_var", "rate1", "varn", "variablex", "stock",
+             "interesting_var_2", "great_var"],
+            ["_integ_stock"]
+        ),
+        (
+            Path("more-tests/split_model_with_macro/"
+                 + "test_split_model_with_macro.mdl"),
+            [".", "-"],
+            ["view_1", "view_2"],
+            ["expression_macro"],
+            ["new var"],
+            ["new_var"],
+            ["_macro_macro_output"]
+        ),
+        (
+            Path("more-tests/split_model_vensim_8_2_1/"
+                 + "test_split_model_vensim_8_2_1.mdl"),
+            [],
+            ["teacup", "cream"],
+            [],
+            ["Teacup Temperature", "Cream Temperature"],
+            ["teacup_temperature", "cream_temperature"],
+            ["integ_teacup_temperature", "integ_cream_temperature"]
+        )
+    ],
+)
+class TestSplitViews:
+    """
+    Test for splitting Vensim views in modules and submodules
+    """
+    @pytest.fixture
+    def model_file(self, shared_tmpdir, model_path):
+        return shared_tmpdir.joinpath(model_path.name)
+
+    @pytest.fixture
+    def expected_files(self, shared_tmpdir, _root, model_path,
+                       model_file, modules, macros):
+        model_name = model_path.stem
+        shutil.copy(
+            _root.joinpath(model_path),
+            model_file
+        )
+        modules_dir = shared_tmpdir.joinpath("modules_" + model_name)
+        files = {
+            shared_tmpdir.joinpath("_namespace_" + model_name + ".json"),
+            shared_tmpdir.joinpath("_subscripts_" + model_name + ".json"),
+            shared_tmpdir.joinpath("_dependencies_" + model_name + ".json"),
+            modules_dir.joinpath("_modules.json")
+        }
+        [files.add(modules_dir.joinpath(module + ".py")) for module in modules]
+        [files.add(shared_tmpdir.joinpath(macro + ".py")) for macro in macros]
+        return files
+
+    def test_read_vensim_split_model(self, model_file, subview_sep,
+                                     expected_files, modules,
+                                     original_vars, py_vars,
+                                     stateful_objs):
+        # assert that the files don't exist in the temporary directory
+        for file in expected_files:
+            assert not file.is_file(), f"File {file} already exists..."
+
+        # translate split model
+        model_split = pysd.read_vensim(model_file, split_views=True,
+                                       subview_sep=subview_sep)
+
+        # assert that all the files have been created
+        for file in expected_files:
+            assert file.is_file(), f"File {file} has not been created..."
+
+        # check the dictionaries
+        assert isinstance(model_split.components._namespace, dict)
+        assert isinstance(model_split.components._subscript_dict, dict)
+        assert isinstance(model_split.components._dependencies, dict)
+        assert isinstance(model_split.components._modules, dict)
+
+        # assert taht main modules are dictionary keys
+        for module in modules:
+            assert module.split("/")[0]\
+                in model_split.components._modules.keys()
+
+        # assert that original variables are in the namespace
+        for var in original_vars:
+            assert var in model_split.components._namespace.keys()
+
+        # assert that the functions are not defined in the main file
+        model_py_file = model_file.with_suffix(".py")
+        with open(model_py_file, 'r') as file:
+            file_content = file.read()
+        for var in py_vars:
+            assert "def %s()" % var not in file_content
+        for var in stateful_objs:
+            assert "%s = " % var not in file_content
+
+        # translation without splitting
+        model_non_split = pysd.read_vensim(model_file, split_views=False)
+
+        # assert that the functions are defined in the main file
+        with open(model_py_file, 'r') as file:
+            file_content = file.read()
+        for var in py_vars:
+            assert "def %s()" % var in file_content
+        for var in stateful_objs:
+            assert "%s = " % var in file_content
+
+        # check that both models give the same result
+        assert_frames_close(
+            model_split.run(), model_non_split.run(), atol=0, rtol=0)
+
+
+@pytest.mark.parametrize(
+    "model_path,subview_sep,warning_message",
+    [
+        (
+            Path("test-models/samples/teacup/teacup.mdl"),
+            [],
+            "Only a single view with no subviews was detected. The model"
+            + " will be built in a single file."
+        ),
+        (
+            Path("more-tests/split_model/test_split_model_sub_subviews.mdl"),
+            ["a"],
+            "The given subview separators were not matched in any view name."
+        ),
+    ],
+)
+class TestSplitViewsWarnings:
+    """
+    Test for warnings while splitting views.
+    """
+    @pytest.fixture
+    def model(self, shared_tmpdir, model_path, _root):
+        # move model file to temporary dir
+        file = shared_tmpdir.joinpath(model_path.name)
+        shutil.copy(_root.joinpath(model_path), file)
+        return file
+
+    def test_split_view_warnings(self, model, subview_sep, warning_message):
+        with pytest.warns(UserWarning, match=warning_message):
+            pysd.read_vensim(model, split_views=True, subview_sep=subview_sep)
diff --git a/tests/unit_test_pysd.py b/tests/unit_test_pysd.py
index f4e3c11b..3a1c9a28 100644
--- a/tests/unit_test_pysd.py
+++ b/tests/unit_test_pysd.py
@@ -1,6 +1,5 @@
 import unittest
 import os
-import shutil
 from warnings import simplefilter, catch_warnings
 import pandas as pd
 import numpy as np
@@ -1466,404 +1465,6 @@ def test_delay_reinitializes(self):
         self.assertTrue(all(res1 == res2))
 
 
-class TestSplitViews(unittest.TestCase):
-    def test_read_vensim_split_model(self):
-        import pysd
-
-        root_dir = more_tests + "/split_model/"
-
-        model_name = "test_split_model"
-        model_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=True
-        )
-
-        namespace_filename = "_namespace_" + model_name + ".json"
-        subscript_dict_filename = "_subscripts_" + model_name + ".json"
-        dependencies_filename = "_dependencies_" + model_name + ".json"
-        modules_filename = "_modules.json"
-        modules_dirname = "modules_" + model_name
-
-        # check that _namespace and _subscript_dict json files where created
-        self.assertTrue(os.path.isfile(root_dir + namespace_filename))
-        self.assertTrue(os.path.isfile(root_dir + subscript_dict_filename))
-        self.assertTrue(os.path.isfile(root_dir + dependencies_filename))
-
-        # check that the main model file was created
-        self.assertTrue(os.path.isfile(root_dir + model_name + ".py"))
-
-        # check that the modules folder was created
-        self.assertTrue(os.path.isdir(root_dir + modules_dirname))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/" + modules_filename)
-        )
-
-        # check creation of module files
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/" + "view_1.py"))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/" + "view2.py"))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/" + "view_3.py"))
-
-        # check dictionaries
-        self.assertIn("Stock", model_split.components._namespace.keys())
-        self.assertIn("view2", model_split.components._modules.keys())
-        self.assertIsInstance(model_split.components._subscript_dict, dict)
-
-        with open(root_dir + model_name + ".py", 'r') as file:
-            file_content = file.read()
-
-        # assert that the functions are not defined in the main file
-        self.assertNotIn("def another_var()", file_content)
-        self.assertNotIn("def rate1()", file_content)
-        self.assertNotIn("def varn()", file_content)
-        self.assertNotIn("def variablex()", file_content)
-        self.assertNotIn("def stock()", file_content)
-
-        # check that the results of the split model are the same than those
-        # without splitting
-        model_non_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=False
-        )
-
-        result_split = model_split.run()
-        result_non_split = model_non_split.run()
-
-        # results of a split model are the same that those of the regular
-        # model (un-split)
-        assert_frames_close(result_split, result_non_split, atol=0, rtol=0)
-
-        with open(root_dir + model_name + ".py", 'r') as file:
-            file_content = file.read()
-
-        # assert that the functions are in the main file for regular trans
-        self.assertIn("def another_var()", file_content)
-        self.assertIn("def rate1()", file_content)
-        self.assertIn("def varn()", file_content)
-        self.assertIn("def variablex()", file_content)
-        self.assertIn("def stock()", file_content)
-
-        # remove newly created files
-        os.remove(root_dir + model_name + ".py")
-        os.remove(root_dir + namespace_filename)
-        os.remove(root_dir + subscript_dict_filename)
-        os.remove(root_dir + dependencies_filename)
-
-        # remove newly created modules folder
-        shutil.rmtree(root_dir + modules_dirname)
-
-    def test_read_vensim_split_model_vensim_8_2_1(self):
-        import pysd
-
-        root_dir = os.path.join(_root, "more-tests/split_model_vensim_8_2_1/")
-
-        model_name = "test_split_model_vensim_8_2_1"
-        with catch_warnings(record=True):
-            model_split = pysd.read_vensim(
-                root_dir + model_name + ".mdl",
-                split_views=True, subview_sep=".")
-
-        namespace_filename = "_namespace_" + model_name + ".json"
-        subscript_dict_filename = "_subscripts_" + model_name + ".json"
-        dependencies_filename = "_dependencies_" + model_name + ".json"
-        modules_filename = "_modules.json"
-        modules_dirname = "modules_" + model_name
-
-        # check that _namespace and _subscript_dict json files where created
-        self.assertTrue(os.path.isfile(root_dir + namespace_filename))
-        self.assertTrue(os.path.isfile(root_dir + subscript_dict_filename))
-        self.assertTrue(os.path.isfile(root_dir + dependencies_filename))
-
-        # check that the main model file was created
-        self.assertTrue(os.path.isfile(root_dir + model_name + ".py"))
-
-        # check that the modules folder was created
-        self.assertTrue(os.path.isdir(root_dir + modules_dirname))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/" + modules_filename)
-        )
-
-        # check creation of module files
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/" + "teacup.py"))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/" + "cream.py"))
-
-        # check dictionaries
-        self.assertIn("Cream Temperature",
-                      model_split.components._namespace.keys())
-        self.assertIn("cream", model_split.components._modules.keys())
-        self.assertIsInstance(model_split.components._subscript_dict, dict)
-
-        with open(root_dir + model_name + ".py", 'r') as file:
-            file_content = file.read()
-
-        # assert that the functions are not defined in the main file
-        self.assertNotIn("def teacup_temperature()", file_content)
-        self.assertNotIn("def cream_temperature()", file_content)
-
-        # check that the results of the split model are the same than those
-        # without splitting
-        model_non_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=False
-        )
-
-        result_split = model_split.run()
-        result_non_split = model_non_split.run()
-
-        # results of a split model are the same that those of the regular
-        # model (un-split)
-        assert_frames_close(result_split, result_non_split, atol=0, rtol=0)
-
-        with open(root_dir + model_name + ".py", 'r') as file:
-            file_content = file.read()
-
-        # assert that the functions are in the main file for regular trans
-        self.assertIn("def teacup_temperature()", file_content)
-        self.assertIn("def cream_temperature()", file_content)
-
-        # remove newly created files
-        os.remove(root_dir + model_name + ".py")
-        os.remove(root_dir + namespace_filename)
-        os.remove(root_dir + subscript_dict_filename)
-        os.remove(root_dir + dependencies_filename)
-
-        # remove newly created modules folder
-        shutil.rmtree(root_dir + modules_dirname)
-
-    def test_read_vensim_split_model_subviews(self):
-        import pysd
-
-        root_dir = os.path.join(_root, "more-tests/split_model/")
-
-        model_name = "test_split_model_subviews"
-        model_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=True,
-            subview_sep=["."]
-        )
-
-        namespace_filename = "_namespace_" + model_name + ".json"
-        subscript_dict_filename = "_subscripts_" + model_name + ".json"
-        dependencies_filename = "_dependencies_" + model_name + ".json"
-        modules_dirname = "modules_" + model_name
-
-        # check that the modules folders were created
-        self.assertTrue(os.path.isdir(root_dir + modules_dirname + "/view_1"))
-
-        # check creation of module files
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/view_1/" +
-                           "submodule_1.py"))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/view_1/" +
-                           "submodule_2.py"))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/view_2.py"))
-
-        with open(root_dir + model_name + ".py", 'r') as file:
-            file_content = file.read()
-
-        # assert that the functions are not defined in the main file
-        self.assertNotIn("def another_var()", file_content)
-        self.assertNotIn("def rate1()", file_content)
-        self.assertNotIn("def varn()", file_content)
-        self.assertNotIn("def variablex()", file_content)
-        self.assertNotIn("def stock()", file_content)
-
-        # check that the results of the split model are the same than those
-        # without splitting
-        model_non_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=False
-        )
-
-        result_split = model_split.run()
-        result_non_split = model_non_split.run()
-
-        # results of a split model are the same that those of the regular
-        # model (un-split)
-        assert_frames_close(result_split, result_non_split, atol=0, rtol=0)
-
-        with open(root_dir + model_name + ".py", 'r') as file:
-            file_content = file.read()
-
-        # assert that the functions are in the main file for regular trans
-        self.assertIn("def another_var()", file_content)
-        self.assertIn("def rate1()", file_content)
-        self.assertIn("def varn()", file_content)
-        self.assertIn("def variablex()", file_content)
-        self.assertIn("def stock()", file_content)
-
-        # remove newly created files
-        os.remove(root_dir + model_name + ".py")
-        os.remove(root_dir + namespace_filename)
-        os.remove(root_dir + subscript_dict_filename)
-        os.remove(root_dir + dependencies_filename)
-
-        # remove newly created modules folder
-        shutil.rmtree(root_dir + modules_dirname)
-
-    def test_read_vensim_split_model_several_subviews(self):
-        import pysd
-
-        root_dir = os.path.join(_root, "more-tests/split_model/")
-
-        model_name = "test_split_model_sub_subviews"
-        model_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=True,
-            subview_sep=[".", "-"]
-        )
-
-        namespace_filename = "_namespace_" + model_name + ".json"
-        subscript_dict_filename = "_subscripts_" + model_name + ".json"
-        dependencies_filename = "_dependencies_" + model_name + ".json"
-        modules_dirname = "modules_" + model_name
-
-        # check that the modules folders were created
-        self.assertTrue(os.path.isdir(root_dir + modules_dirname + "/view_1"))
-        self.assertTrue(os.path.isdir(root_dir + modules_dirname + "/view_3"))
-        self.assertTrue(os.path.isdir(root_dir + modules_dirname + "/view_3" +
-                        "/subview_1"))
-        self.assertTrue(os.path.isdir(root_dir + modules_dirname + "/view_3" +
-                        "/subview_2"))
-        # check creation of module files
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/view_2.py"))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/view_1/" +
-                           "submodule_1.py"))
-        self.assertTrue(
-            os.path.isfile(root_dir + modules_dirname + "/view_1/" +
-                           "submodule_2.py"))
-        self.assertTrue(os.path.isfile(root_dir + modules_dirname + "/view_3" +
-                        "/subview_1" + "/sview_1.py"))
-        self.assertTrue(os.path.isfile(root_dir + modules_dirname + "/view_3" +
-                        "/subview_1" + "/sview_2.py"))
-        self.assertTrue(os.path.isfile(root_dir + modules_dirname + "/view_3" +
-                        "/subview_2" + "/sview_3.py"))
-        self.assertTrue(os.path.isfile(root_dir + modules_dirname + "/view_3" +
-                        "/subview_2" + "/sview_4.py"))
-
-        with open(root_dir + model_name + ".py", 'r') as file:
-            file_content = file.read()
-
-        # assert that the functions are not defined in the main file
-        self.assertNotIn("def another_var()", file_content)
-        self.assertNotIn("def rate1()", file_content)
-        self.assertNotIn("def varn()", file_content)
-        self.assertNotIn("def variablex()", file_content)
-        self.assertNotIn("def stock()", file_content)
-        self.assertNotIn("def interesting_var_2()", file_content)
-        self.assertNotIn("def great_var()", file_content)
-
-        # check that the results of the split model are the same than those
-        # without splitting
-        model_non_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=False
-        )
-
-        result_split = model_split.run()
-        result_non_split = model_non_split.run()
-
-        # results of a split model are the same that those of the regular
-        # model (un-split)
-        assert_frames_close(result_split, result_non_split, atol=0, rtol=0)
-
-        with open(root_dir + model_name + ".py", 'r') as file:
-            file_content = file.read()
-
-        # assert that the functions are in the main file for regular trans
-        self.assertIn("def another_var()", file_content)
-        self.assertIn("def rate1()", file_content)
-        self.assertIn("def varn()", file_content)
-        self.assertIn("def variablex()", file_content)
-        self.assertIn("def stock()", file_content)
-        self.assertIn("def interesting_var_2()", file_content)
-        self.assertIn("def great_var()", file_content)
-
-        # remove newly created files
-        os.remove(root_dir + model_name + ".py")
-        os.remove(root_dir + namespace_filename)
-        os.remove(root_dir + subscript_dict_filename)
-        os.remove(root_dir + dependencies_filename)
-
-        # remove newly created modules folder
-        shutil.rmtree(root_dir + modules_dirname)
-
-    def test_read_vensim_split_model_with_macro(self):
-        import pysd
-
-        root_dir = more_tests + "/split_model_with_macro/"
-
-        model_name = "test_split_model_with_macro"
-        model_non_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=False
-        )
-
-        namespace_filename = "_namespace_" + model_name + ".json"
-        subscript_dict_filename = "_subscripts_" + model_name + ".json"
-        dependencies_filename = "_dependencies_" + model_name + ".json"
-        modules_dirname = "modules_" + model_name
-
-        # running split model
-        result_non_split = model_non_split.run()
-
-        model_split = pysd.read_vensim(
-            root_dir + model_name + ".mdl", split_views=True
-        )
-        result_split = model_split.run()
-
-        # results of a split model are the same that those of the regular model
-        assert_frames_close(result_split, result_non_split, atol=0, rtol=0)
-
-        # remove newly created files
-        os.remove(root_dir + model_name + ".py")
-        os.remove(root_dir + "expression_macro.py")
-        os.remove(root_dir + namespace_filename)
-        os.remove(root_dir + subscript_dict_filename)
-        os.remove(root_dir + dependencies_filename)
-
-        # remove newly created modules folder
-        shutil.rmtree(root_dir + modules_dirname)
-
-    def test_read_vensim_split_model_warning(self):
-        import pysd
-        # setting the split_views=True when the model has a single
-        # view should generate a warning
-        with catch_warnings(record=True) as ws:
-            pysd.read_vensim(
-                test_model, split_views=True
-            )  # set stock value using params
-
-        wu = [w for w in ws if issubclass(w.category, UserWarning)]
-
-        self.assertEqual(len(wu), 1)
-        self.assertTrue(
-            "Only a single view with no subviews was detected" in str(
-                wu[0].message)
-        )
-
-    def test_read_vensim_split_model_non_matching_separator_warning(self):
-        import pysd
-        # setting the split_views=True when the model has a single
-        # view should generate a warning
-
-        root_dir = os.path.join(_root, "more-tests/split_model/")
-
-        model_name = "test_split_model_sub_subviews"
-
-        with catch_warnings(record=True) as ws:
-            pysd.read_vensim(root_dir + model_name + ".mdl", split_views=True,
-                             subview_sep=["a"])
-
-        wu = [w for w in ws if issubclass(w.category, UserWarning)]
-
-        self.assertEqual(len(wu), 1)
-        self.assertTrue(
-            "The given subview separators were not matched in" in str(
-                wu[0].message)
-        )
-
-
 class TestDependencies(unittest.TestCase):
     def test_teacup_deps(self):
         from pysd import read_vensim

From a2b657ef9538d50277df2e4faa6b976cc4013f5d Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Thu, 2 Dec 2021 09:16:46 +0100
Subject: [PATCH 14/16] Update tests

---
 pysd/py_backend/statefuls.py                  |  8 ++-
 pysd/translation/vensim/vensim2py.py          |  2 +-
 .../pytest_select_submodel.py                 | 64 +++++++++++++++----
 .../vensim2py/pytest_split_views.py           |  2 +
 tests/unit_test_builder.py                    |  5 ++
 5 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/pysd/py_backend/statefuls.py b/pysd/py_backend/statefuls.py
index 31322092..355fad0a 100644
--- a/pysd/py_backend/statefuls.py
+++ b/pysd/py_backend/statefuls.py
@@ -1793,7 +1793,11 @@ def get_vars_in_module(self, module):
             Set of varible names in the given module.
 
         """
-        modules = self.components._modules.copy()
+        try:
+            modules = self.components._modules.copy()
+        except NameError:
+            raise ValueError(
+                "Trying to get a module from a non-modularized model")
 
         while modules:
             # find the module or the submodule
@@ -1806,7 +1810,7 @@ def get_vars_in_module(self, module):
             modules = new_modules
 
         if not modules:
-            raise ValueError(f"Module or submodule '{module}' not found...\n")
+            raise NameError(f"Module or submodule '{module}' not found...\n")
 
         vars, new_content = set(), []
 
diff --git a/pysd/translation/vensim/vensim2py.py b/pysd/translation/vensim/vensim2py.py
index a6578f3b..fabb8b13 100644
--- a/pysd/translation/vensim/vensim2py.py
+++ b/pysd/translation/vensim/vensim2py.py
@@ -1913,7 +1913,7 @@ def translate_vensim(mdl_file, split_views, **kwargs):
     if mdl_file.suffix.lower() != ".mdl":
         raise ValueError(
             "The file to translate, "
-            + mdl_file
+            + str(mdl_file)
             + " is not a vensim model. It must end with mdl extension."
         )
 
diff --git a/tests/pytest_pysd/user_interaction/pytest_select_submodel.py b/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
index dc67365c..2b4f9ca5 100644
--- a/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
+++ b/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
@@ -1,6 +1,7 @@
 
 import pytest
 import shutil
+from pathlib import Path
 import numpy as np
 
 import pysd
@@ -10,7 +11,7 @@
     "model_path,subview_sep,variables,modules,n_deps,dep_vars",
     [
         (
-            "test_split_model",
+            Path("more-tests/split_model/test_split_model.mdl"),
             [],
             ["stock"],
             [],
@@ -18,7 +19,7 @@
             {"rate1": 4, "initial_stock": 2}
         ),
         (
-            "test_split_model_subviews",
+            Path("more-tests/split_model/test_split_model_subviews.mdl"),
             ["."],
             [],
             ["view_1"],
@@ -28,7 +29,7 @@
             }
         ),
         (
-            "test_split_model_sub_subviews",
+            Path("more-tests/split_model/test_split_model_sub_subviews.mdl"),
             [".", "-"],
             ["variablex"],
             ["subview_1", "submodule_1"],
@@ -54,26 +55,20 @@ class TestSubmodel:
         }
 
     @pytest.fixture
-    def models_dir(self, _root):
-        return _root.joinpath("more-tests/split_model")
-
-    @pytest.fixture
-    def model(self, shared_tmpdir, models_dir, model_path, subview_sep):
+    def model(self, shared_tmpdir, model_path, subview_sep, _root):
         """
         Translate the model or read a translated version.
         This way each file is only translated once.
         """
         # expected file
-        file = shared_tmpdir.joinpath(model_path + '.py')
+        file = shared_tmpdir.joinpath(model_path.with_suffix(".py").name)
         if file.is_file():
             # load already translated file
             return pysd.load(file)
         else:
             # copy mdl file to tmp_dir and translate it
-            file = shared_tmpdir.joinpath(model_path + '.mdl')
-            shutil.copy(
-                models_dir.joinpath(model_path + '.mdl'),
-                file)
+            file = shared_tmpdir.joinpath(model_path.name)
+            shutil.copy(_root.joinpath(model_path), file)
             return pysd.read_vensim(
                 file,
                 split_views=True, subview_sep=subview_sep)
@@ -154,3 +149,46 @@ def test_select_submodel(self, model, variables, modules,
             assert np.any(np.isnan(model.run()))
             # redefine dependencies
             assert not np.any(np.isnan(model.run(params=dep_vars)))
+
+
+@pytest.mark.parametrize(
+    "model_path,split_views,module,raise_type,error_message",
+    [
+        (
+            Path("more-tests/split_model/test_split_model.mdl"),
+            True,
+            "view_4",
+            NameError,
+            "Module or submodule 'view_4' not found..."
+
+        ),
+        (
+            Path("more-tests/split_model/test_split_model.mdl"),
+            False,
+            "view_1",
+            ValueError,
+            "Trying to get a module from a non-modularized model"
+
+        )
+    ],
+)
+class TestGetVarsInModuleErrors:
+    @pytest.fixture
+    def model(self, shared_tmpdir, model_path, split_views, _root):
+        """
+        Translate the model.
+        """
+        # mdl file
+        file = shared_tmpdir.joinpath(model_path.name)
+
+        if not file.is_file():
+            # copy mdl file
+            shutil.copy(_root.joinpath(model_path), file)
+
+        return pysd.read_vensim(file, split_views=split_views)
+
+    def test_get_vars_in_module_errors(self, model, module, raise_type,
+                                       error_message):
+        # assert raises are produced
+        with pytest.raises(raise_type, match=error_message):
+            model.get_vars_in_module(module)
diff --git a/tests/pytest_translation/vensim2py/pytest_split_views.py b/tests/pytest_translation/vensim2py/pytest_split_views.py
index 8b4a9a68..d4512a52 100644
--- a/tests/pytest_translation/vensim2py/pytest_split_views.py
+++ b/tests/pytest_translation/vensim2py/pytest_split_views.py
@@ -92,6 +92,8 @@ def expected_files(self, shared_tmpdir, _root, model_path,
         [files.add(shared_tmpdir.joinpath(macro + ".py")) for macro in macros]
         return files
 
+
+    @pytest.mark.filterwarnings("ignore")
     def test_read_vensim_split_model(self, model_file, subview_sep,
                                      expected_files, modules,
                                      original_vars, py_vars,
diff --git a/tests/unit_test_builder.py b/tests/unit_test_builder.py
index b4aa3bf5..313f0b9b 100644
--- a/tests/unit_test_builder.py
+++ b/tests/unit_test_builder.py
@@ -306,6 +306,7 @@ def test_single_set(self):
               'unit': None,
               'eqn': ['eq1', 'eq2', 'eq3'],
               'lims': '',
+              'parent_name': None,
               'dependencies': {'b': 2, 'c': 1, 'time': 8},
               'arguments': ''
               }])
@@ -353,6 +354,7 @@ def test_multiple_sets(self):
                      'unit': None,
                      'eqn': ['eq1', 'eq2', 'eq3'],
                      'lims': '',
+                     'parent_name': None,
                      'dependencies': {'b': 4},
                      'arguments': ''
                      },
@@ -368,6 +370,7 @@ def test_multiple_sets(self):
                      'unit': None,
                      'eqn': ['eq4', 'eq5', 'eq6'],
                      'lims': '',
+                     'parent_name': None,
                      'dependencies': {
                          'initial': {'c': 3, 'd': 1},
                          'step': {'time': 5, 'a': 1, 'd': 5}
@@ -404,6 +407,7 @@ def test_non_set(self):
                      'unit': None,
                      'eqn': ['eq1', 'eq2'],
                      'lims': '',
+                     'parent_name': None,
                      'dependencies': {'b': 2, 'c': 1},
                      'arguments': ''
                      },
@@ -417,6 +421,7 @@ def test_non_set(self):
                      'unit': None,
                      'eqn': ['eq3'],
                      'lims': '',
+                     'parent_name': None,
                      'dependencies': {},
                      'arguments': ''
                      }]

From bf03b1612c15cc4c9d6df317bf2a803f70881b8d Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Thu, 9 Dec 2021 17:53:27 +0100
Subject: [PATCH 15/16] Add documentation and testing

---
 docs/advanced_usage.rst                       |  16 +-
 pysd/py_backend/statefuls.py                  | 195 +++++++++++++++---
 pysd/py_backend/utils.py                      |   5 +
 .../split_model/test_split_model.mdl          |  43 ++--
 .../pytest_select_submodel.py                 |  28 ++-
 .../vensim2py/pytest_split_views.py           |   1 -
 tests/unit_test_cli.py                        |  15 +-
 7 files changed, 248 insertions(+), 55 deletions(-)

diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst
index 888eaea6..0a15f010 100644
--- a/docs/advanced_usage.rst
+++ b/docs/advanced_usage.rst
@@ -66,7 +66,8 @@ In a Vensim model with three separate views (e.g. `view_1`, `view_2` and `view_3
 | │   ├── view_2.py
 | │   └── view_3.py
 | ├── _namespace_many_views_model.json
-| ├── _subscripts_dict_many_views_model.json
+| ├── _subscripts_many_views_model.json
+| ├── _dependencies_many_views_model.json
 | ├── many_views_model.py
 |
 |
@@ -109,3 +110,16 @@ the new simulation will have initial time equal to 50 with the saved values from
 .. warning::
   Exported data is saved and loaded using `pickle <https://docs.python.org/3/library/pickle.html>`_, this data can be incompatible with future versions of
   *PySD* or *xarray*. In order to prevent data losses save always the source code.
+
+
+Selecting and running a submodel
+--------------------------------
+A submodel of a translated model can be selected in order to run only a part of the original model. This can be done through the :py:data:`.select_submodel()` method:
+
+.. autoclass:: pysd.py_backend.statefuls.Model
+   :members: select_submodel
+
+In order to preview the needed exogenous variables the :py:data:`.get_dependencies()` method can be used:
+
+.. autoclass:: pysd.py_backend.statefuls.Model
+   :members: get_dependencies
diff --git a/pysd/py_backend/statefuls.py b/pysd/py_backend/statefuls.py
index 355fad0a..308f0041 100644
--- a/pysd/py_backend/statefuls.py
+++ b/pysd/py_backend/statefuls.py
@@ -1247,6 +1247,8 @@ def _timeseries_component(self, series, dims, args=[]):
         # this is only called if the set_component function recognizes a
         # pandas series
         # TODO: raise a warning if extrapolating from the end of the series.
+        # TODO: data type variables should be creted using a Data object
+        # lookup type variables should be created using a Lookup object
         if isinstance(series.values[0], xr.DataArray) and args:
             # the argument is already given in the model when the model
             # is called
@@ -1641,12 +1643,77 @@ def run(self, params=None, return_columns=None, return_timestamps=None,
 
         return return_df
 
-    def select_submodel(self, vars=[], modules=[], contour_values={}):
+    def select_submodel(self, vars=[], modules=[], exogenous_components={}):
+        """
+        Select a submodel from the original model. After selecting a submodel
+        only the necessary stateful objects for integrating this submodel will
+        be computed.
+
+        Parameters
+        ----------
+        vars: set or list of strings (optional)
+            Variables to include in the new submodel.
+            It can be an empty list if the submodel is only selected by
+            module names. Default is an empty list.
+
+        modules: set or list of strings (optional)
+            Modules to include in the new submodel.
+            It can be an empty list if the submodel is only selected by
+            variable names. Default is an empty list. Can select a full
+            module or a submodule by passing the path without the .py, e.g.:
+            "view_1/submodule1".
+
+        exogenous_components: dictionary of parameters (optional)
+            Exogenous value to fix to the model variables that are needed
+            to run the selected submodel. The exogenous_components should
+            be passed as a dictionary in the same way it is done for
+            set_components method. By default it is an empty dict and
+            the needed exogenous components will be set to a numpy.nan value.
+
+        Returns
+        -------
+        None
+
+        Notes
+        -----
+        modules can be only passed when the model has been split in
+        different files during translation.
+
+        Examples
+        --------
+        >>> model.select_submodel(
+        ...     vars=["Room Temperature", "Teacup temperature"])
+        UserWarning: Selecting submodel, to run the full model again use model.reload()
+
+        >>> model.select_submodel(
+        ...     modules=["view_1", "view_2/subview_1"])
+        UserWarning: Selecting submodel, to run the full model again use model.reload()
+        UserWarning: Exogenous components for the following variables are necessary but not given:
+            initial_value_stock1, stock3
+
+        >>> model.select_submodel(
+        ...     vars=["stock3"],
+        ...     modules=["view_1", "view_2/subview_1"])
+        UserWarning: Selecting submodel, to run the full model again use model.reload()
+        UserWarning: Exogenous components for the following variables are necessary but not given:
+            initial_value_stock1, initial_value_stock3
+        Please, set them before running the model using set_components method...
+
+        >>> model.select_submodel(
+        ...     vars=["stock3"],
+        ...     modules=["view_1", "view_2/subview_1"],
+        ...     exogenous_components={
+        ...         "initial_value_stock1": 3,
+        ...         "initial_value_stock3": 5})
+        UserWarning: Selecting submodel, to run the full model again use model.reload()
+
+        """
         c_vars, d_vars, s_deps = self._get_dependencies(vars, modules)
         warnings.warn(
             "Selecting submodel, "
             "to run the full model again use model.reload()")
 
+        # reassing the dictionary and lists of needed stateful objects
         self._stateful_elements = {
             name: getattr(self.components, name)
             for name in s_deps
@@ -1660,7 +1727,10 @@ def select_submodel(self, vars=[], modules=[], contour_values={}):
             getattr(self.components, name) for name in s_deps
             if isinstance(getattr(self.components, name), Macro)
         ]
+        # TODO: include subselection of external objects (update in the deps
+        # dictionary is needed -> NO BACK COMPATIBILITY)
 
+        # get set of all dependencies and all variables to select
         all_deps = d_vars["initial"].copy()
         all_deps.update(d_vars["step"])
         all_deps.update(d_vars["lookup"])
@@ -1668,6 +1738,7 @@ def select_submodel(self, vars=[], modules=[], contour_values={}):
         all_vars = all_deps.copy()
         all_vars.update(c_vars)
 
+        # clean dependendies and namespace dictionaries
         for real_name, py_name in self.components._namespace.copy().items():
             if py_name not in all_vars:
                 del self.components._namespace[real_name]
@@ -1677,8 +1748,30 @@ def select_submodel(self, vars=[], modules=[], contour_values={}):
             if py_name.startswith("_") and py_name not in s_deps:
                 del self.components._dependencies[py_name]
 
-        self.set_components({element: np.nan for element in all_deps})
-        self.set_components(contour_values)
+        # set all exogenous values to np.nan by default
+        new_components = {element: np.nan for element in all_deps}
+        # update exogenous values with the user input
+        [new_components.update(
+            {
+                utils.get_key_and_value_by_insensitive_key_or_value(
+                    key,
+                    self.components._namespace)[1]: value
+            }) for key, value in exogenous_components.items()]
+
+        self.set_components(new_components)
+
+        # show a warning message if exogenous values are needed for a
+        # dependency
+        new_components = [
+            key for key, value in new_components.items() if value is np.nan]
+        if new_components:
+            warnings.warn(
+                "Exogenous components for the following variables are "
+                f"necessary but not given:\n\t{', '.join(new_components)}"
+                "\n\n Please, set them before running the model using "
+                "set_components method...")
+
+        # re-assign the cache_type and initialization order
         self._assign_cache_type()
         self._get_initialize_order()
 
@@ -1688,16 +1781,57 @@ def get_dependencies(self, vars=[], modules=[]):
 
         Parameters
         ----------
-        vars: dict
-            Set or list of variables to get the dependencies from
-        modules: dict
-            Set or list of modules to get the dependencies from
+        vars: set or list of strings (optional)
+            Variables to get the dependencies from.
+            It can be an empty list if the dependencies are computed only
+            using modules. Default is an empty list.
+        modules: set or list of strings (optional)
+            Modules to get the dependencies from.
+            It can be an empty list if the dependencies are computed only
+            using variables. Default is an empty list. Can select a full
+            module or a submodule by passing the path without the .py, e.g.:
+            "view_1/submodule1".
 
         Returns
         -------
         dependencies: set
             Set of dependencies nedded to run vars.
 
+        Notes
+        -----
+        modules can be only passed when the model has been split in
+        different files during translation.
+
+        Examples
+        --------
+        >>> model.get_dependencies(
+        ...     vars=["Room Temperature", "Teacup temperature"])
+        Selected variables (total 1):
+            room_temperature, teacup_temperature
+        Stateful objects integrated with the selected variables (total 1):
+            _integ_teacup_temperature
+
+        >>> model.get_dependencies(
+        ...     modules=["view_1", "view_2/subview_1"])
+        Selected variables (total 4):
+            var1, var2, stock1, delay1
+        Dependencies for initialization only (total 1):
+            initial_value_stock1
+        Dependencies that may change over time (total 2):
+            stock3
+        Stateful objects integrated with the selected variables (total 1):
+            _integ_stock1, _delay_fixed_delay1
+
+        >>> model.get_dependencies(
+        ...     vars=["stock3"],
+        ...     modules=["view_1", "view_2/subview_1"])
+        Selected variables (total 4):
+            var1, var2, stock1, stock3, delay1
+        Dependencies for initialization only (total 1):
+            initial_value_stock1, initial_value_stock3
+        Stateful objects integrated with the selected variables (total 1):
+            _integ_stock1, _integ_stock3, _delay_fixed_delay1
+
         """
         c_vars, d_vars, s_deps = self._get_dependencies(vars, modules)
 
@@ -1728,21 +1862,32 @@ def _get_dependencies(self, vars=[], modules=[]):
 
         Parameters
         ----------
-        vars: dict
-            Set or list of variables to get the dependencies from
-        modules: dict
-            Set or list of modules to get the dependencies from
+        vars: set or list of strings (optional)
+            Variables to get the dependencies from.
+            It can be an empty list if the dependencies are computed only
+            using modules. Default is an empty list.
+        modules: set or list of strings (optional)
+            Modules to get the dependencies from.
+            It can be an empty list if the dependencies are computed only
+            using variables. Default is an empty list. Can select a full
+            module or a submodule by passing the path without the .py, e.g.:
+            "view_1/submodule1".
 
         Returns
         -------
-        dependencies: set
-            Set of dependencies nedded to run vars.
+        c_vars: set
+            Set of all selected model variables.
+        d_deps: dict of sets
+            Dictionary of dependencies nedded to run vars and modules.
+        s_deps: set
+            Set of stateful objects to update when integrating selected
+            model variables.
 
         """
         def check_dep(dependencies, initial=False):
             for dep in dependencies:
                 if dep in c_vars or dep.startswith("__"):
-                    continue
+                    pass
                 elif dep.startswith("_"):
                     s_deps.add(dep)
                     dep = self.components._dependencies[dep]
@@ -1794,23 +1939,19 @@ def get_vars_in_module(self, module):
 
         """
         try:
-            modules = self.components._modules.copy()
+            module_content = self.components._modules.copy()
         except NameError:
             raise ValueError(
                 "Trying to get a module from a non-modularized model")
 
-        while modules:
-            # find the module or the submodule
-            if module in modules:
-                module_content = [modules[module]]
-                break
-            new_modules = {}
-            [new_modules.update(value) for value in modules.values()
-             if isinstance(value, dict)]
-            modules = new_modules
-
-        if not modules:
-            raise NameError(f"Module or submodule '{module}' not found...\n")
+        try:
+            # get the module or the submodule content
+            for submodule in module.split("/"):
+                module_content = module_content[submodule]
+            module_content = [module_content]
+        except KeyError:
+            raise NameError(
+                f"Module or submodule '{submodule}' not found...\n")
 
         vars, new_content = set(), []
 
diff --git a/pysd/py_backend/utils.py b/pysd/py_backend/utils.py
index f4996f73..485ac4e5 100644
--- a/pysd/py_backend/utils.py
+++ b/pysd/py_backend/utils.py
@@ -503,12 +503,17 @@ def detect_encoding(filename):
 
 
 def print_objects_format(object_set, text):
+    """
+    Return a printable version of the variables in object_sect with the
+    header given with text.
+    """
     text += " (total %(n_obj)s):\n\t%(objs)s\n" % {
         "n_obj": len(object_set),
         "objs": ", ".join(object_set)
     }
     return text
 
+
 class ProgressBar:
     """
     Progress bar for integration
diff --git a/tests/more-tests/split_model/test_split_model.mdl b/tests/more-tests/split_model/test_split_model.mdl
index c45a195a..99005d30 100644
--- a/tests/more-tests/split_model/test_split_model.mdl
+++ b/tests/more-tests/split_model/test_split_model.mdl
@@ -1,17 +1,16 @@
 {UTF-8}
-other stock= INTEG (
-	6*"var-n",
-		3)
+another var=
+	3*Stock
 	~	
 	~		|
 
-"variable-x"=
-	lookup table(6*another var)
+initial stock=
+	0.1
 	~	
 	~		|
 
-initial stock=
-	0.1
+initial stock correction=
+	0
 	~	
 	~		|
 
@@ -20,19 +19,20 @@ lookup table(
 	~	
 	~		|
 
-Stock= INTEG (
-	"rate-1",
-		initial stock)
+other stock= INTEG (
+	6*"var-n",
+		3)
 	~	
 	~		|
 
-another var=
-	3*Stock
+"rate-1"=
+	"var-n"
 	~	
 	~		|
 
-"rate-1"=
-	"var-n"
+Stock= INTEG (
+	"rate-1"+Time*initial stock correction,
+		initial stock+initial stock correction)
 	~	
 	~		|
 
@@ -41,6 +41,11 @@ another var=
 	~	
 	~		|
 
+"variable-x"=
+	lookup table(6*another var)
+	~	
+	~		|
+
 ********************************************************
 	.Control
 ********************************************************~
@@ -83,13 +88,17 @@ Stock
 10,8,"var-n",207,367,18,11,8,3,0,0,0,0,0,0
 1,9,8,6,0,0,0,0,0,128,0,-1--1--1,,1|(288,318)|
 10,10,initial stock,431,347,36,11,8,3,0,0,0,0,0,0
-1,11,10,1,0,0,0,0,0,128,1,-1--1--1,,1|(457,302)|
+1,11,10,1,0,0,0,0,0,128,0,-1--1--1,,1|(457,302)|
 10,12,other stock,371,488,40,20,3,3,0,0,0,0,0,0
 12,13,48,95,500,10,8,0,3,0,0,-1,0,0,0
-11,14,0,218,500,6,8,2,3,0,0,1,0,0,0
+11,14,48,218,500,6,8,2,3,0,0,1,0,0,0
 1,15,14,12,4,0,0,22,0,0,0,-1--1--1,,1|(277,500)|
 1,16,14,13,100,0,0,22,0,0,0,-1--1--1,,1|(158,500)|
 1,17,8,14,0,0,0,0,0,128,0,-1--1--1,,1|(211,429)|
+10,18,Time,445,175,26,11,8,2,0,3,-1,0,0,0,128-128-128,0-0-0,|12||128-128-128
+1,19,18,1,0,0,0,0,0,128,0,-1--1--1,,1|(462,196)|
+10,20,initial stock correction,573,348,34,19,8,3,0,0,0,0,0,0
+1,21,20,1,0,0,0,0,0,128,0,-1--1--1,,1|(538,298)|
 \\\---/// Sketch information - do not modify anything except names
 V300  Do not put anything below this section - it will be ignored
 *View-2
@@ -122,7 +131,7 @@ $192-192-192,0,Times New Roman|12||0-0-0|0-0-0|0-0-255|-1--1--1|-1--1--1|96,96,1
 27:0,
 34:0,
 4:Time
-5:other stock
+5:another var
 35:Date
 36:YYYY-MM-DD
 37:2000
diff --git a/tests/pytest_pysd/user_interaction/pytest_select_submodel.py b/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
index 2b4f9ca5..15a0e516 100644
--- a/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
+++ b/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
@@ -15,8 +15,8 @@
             [],
             ["stock"],
             [],
-            (6, 1, 1, 0, 1),
-            {"rate1": 4, "initial_stock": 2}
+            (6, 1, 2, 0, 1),
+            {"rate1": 4, "initial_stock": 2, "initial_stock_correction": 0}
         ),
         (
             Path("more-tests/split_model/test_split_model_subviews.mdl"),
@@ -32,7 +32,7 @@
             Path("more-tests/split_model/test_split_model_sub_subviews.mdl"),
             [".", "-"],
             ["variablex"],
-            ["subview_1", "submodule_1"],
+            ["view_3/subview_1", "view_1/submodule_1"],
             (12, 0, 1, 1, 1),
             {"another_var": 5, "look_up_definition": 3}
         )
@@ -125,9 +125,12 @@ def test_select_submodel(self, model, variables, modules,
         assert "Stock" in model.components._namespace
 
         # select submodel
-        with pytest.warns(UserWarning, match=self.warning):
+        with pytest.warns(UserWarning) as record:
             model.select_submodel(vars=variables, modules=modules)
 
+        # assert warning
+        assert str(record[0].message) == self.warning
+
         # assert stateful elements change
         assert len(model._dynamicstateful_elements) == 1
         assert "_integ_other_stock" not in model._stateful_elements
@@ -142,14 +145,31 @@ def test_select_submodel(self, model, variables, modules,
         if not dep_vars:
             # totally independent submodels can run without producing
             # nan values
+            assert len(record) == 1
             assert not np.any(np.isnan(model.run()))
         else:
             # running the model without redefining dependencies will
             # produce nan values
+            assert len(record) == 2
+            assert "Exogenous components for the following variables are"\
+                + " necessary but not given:" in str(record[1].message)
+            assert "Please, set them before running the model using "\
+                + "set_components method..." in str(record[1].message)
+            for var in dep_vars:
+                assert var in str(record[1].message)
             assert np.any(np.isnan(model.run()))
             # redefine dependencies
             assert not np.any(np.isnan(model.run(params=dep_vars)))
 
+        # select submodel using contour values
+        model.reload()
+        with pytest.warns(UserWarning) as record:
+            model.select_submodel(vars=variables, modules=modules,
+                                  exogenous_components=dep_vars)
+
+        assert len(record) == 1
+        assert not np.any(np.isnan(model.run()))
+
 
 @pytest.mark.parametrize(
     "model_path,split_views,module,raise_type,error_message",
diff --git a/tests/pytest_translation/vensim2py/pytest_split_views.py b/tests/pytest_translation/vensim2py/pytest_split_views.py
index d4512a52..c8019981 100644
--- a/tests/pytest_translation/vensim2py/pytest_split_views.py
+++ b/tests/pytest_translation/vensim2py/pytest_split_views.py
@@ -92,7 +92,6 @@ def expected_files(self, shared_tmpdir, _root, model_path,
         [files.add(shared_tmpdir.joinpath(macro + ".py")) for macro in macros]
         return files
 
-
     @pytest.mark.filterwarnings("ignore")
     def test_read_vensim_split_model(self, model_file, subview_sep,
                                      expected_files, modules,
diff --git a/tests/unit_test_cli.py b/tests/unit_test_cli.py
index 4a93207b..6cbb861a 100644
--- a/tests/unit_test_cli.py
+++ b/tests/unit_test_cli.py
@@ -198,7 +198,8 @@ def test_read_vensim_split_model(self):
 
         model_name = "test_split_model"
         namespace_filename = "_namespace_" + model_name + ".json"
-        subscript_dict_filename = "_subscripts_" + model_name + ".json"
+        dependencies_filename = "_dependencies_" + model_name + ".json"
+        subscript_filename = "_subscripts_" + model_name + ".json"
         modules_filename = "_modules.json"
         modules_dirname = "modules_" + model_name
         model_name_mdl = root_dir + model_name + ".mdl"
@@ -209,7 +210,8 @@ def test_read_vensim_split_model(self):
 
         # check that _namespace and _subscript_dict json files where created
         self.assertTrue(os.path.isfile(root_dir + namespace_filename))
-        self.assertTrue(os.path.isfile(root_dir + subscript_dict_filename))
+        self.assertTrue(os.path.isfile(root_dir + subscript_filename))
+        self.assertTrue(os.path.isfile(root_dir + dependencies_filename))
 
         # check that the main model file was created
         self.assertTrue(os.path.isfile(root_dir + model_name + ".py"))
@@ -231,7 +233,8 @@ def test_read_vensim_split_model(self):
         # remove newly created files
         os.remove(root_dir + model_name + ".py")
         os.remove(root_dir + namespace_filename)
-        os.remove(root_dir + subscript_dict_filename)
+        os.remove(root_dir + subscript_filename)
+        os.remove(root_dir + dependencies_filename)
 
         # remove newly created modules folder
         shutil.rmtree(root_dir + modules_dirname)
@@ -251,7 +254,8 @@ def test_read_vensim_split_model_subviews(self):
         )
 
         namespace_filename = "_namespace_" + model_name + ".json"
-        subscript_dict_filename = "_subscripts_" + model_name + ".json"
+        subscript_filename = "_subscripts_" + model_name + ".json"
+        dependencies_filename = "_dependencies_" + model_name + ".json"
         modules_dirname = "modules_" + model_name
 
         separator = "."
@@ -289,7 +293,8 @@ def test_read_vensim_split_model_subviews(self):
         # remove newly created files
         os.remove(root_dir + model_name + ".py")
         os.remove(root_dir + namespace_filename)
-        os.remove(root_dir + subscript_dict_filename)
+        os.remove(root_dir + subscript_filename)
+        os.remove(root_dir + dependencies_filename)
 
         # remove newly created modules folder
         shutil.rmtree(root_dir + modules_dirname)

From 9f6da8ab94ab348c88b94213f570f899b380c600 Mon Sep 17 00:00:00 2001
From: Eneko Martin Martinez <eneko.martin.martinez@gmail.com>
Date: Mon, 13 Dec 2021 12:56:56 +0100
Subject: [PATCH 16/16] Make Data objects compatible with Path objects

---
 pysd/py_backend/data.py                       |  19 +-
 pysd/py_backend/utils.py                      |   6 +-
 .../pytest_select_submodel.py                 |  12 +-
 .../vensim2py/pytest_split_views.py           |  17 +-
 tests/pytest_types/data/pytest_columns.py     | 137 ++++++++++++
 tests/pytest_types/data/pytest_data.py        |  56 +++++
 .../data/pytest_data_with_model.py            | 139 ++++++++++++
 tests/unit_test_data.py                       | 197 ------------------
 tests/unit_test_pysd.py                       |  97 ---------
 9 files changed, 365 insertions(+), 315 deletions(-)
 create mode 100644 tests/pytest_types/data/pytest_columns.py
 create mode 100644 tests/pytest_types/data/pytest_data.py
 create mode 100644 tests/pytest_types/data/pytest_data_with_model.py
 delete mode 100644 tests/unit_test_data.py

diff --git a/pysd/py_backend/data.py b/pysd/py_backend/data.py
index da65bcb8..4a69d6fa 100644
--- a/pysd/py_backend/data.py
+++ b/pysd/py_backend/data.py
@@ -1,5 +1,6 @@
 import warnings
 import re
+from pathlib import Path
 
 import numpy as np
 import xarray as xr
@@ -19,6 +20,8 @@ def read(cls, file_name, encoding=None):
         """
         Read the columns from the data file or return the previously read ones
         """
+        if isinstance(file_name, str):
+            file_name = Path(file_name)
         if file_name in cls._files:
             return cls._files[file_name]
         else:
@@ -50,7 +53,7 @@ def read_file(cls, file_name, encoding=None):
         out = cls.read_line(file_name, encoding)
         if out is None:
             raise ValueError(
-                f"\nNot able to read '{file_name}'. "
+                f"\nNot able to read '{str(file_name)}'. "
                 + "Only '.csv', '.tab' files are accepted.")
 
         transpose = False
@@ -64,7 +67,7 @@ def read_file(cls, file_name, encoding=None):
             return out, transpose
         else:
             raise ValueError(
-                f"Invalid file format '{file_name}'... varible names "
+                f"Invalid file format '{str(file_name)}'... varible names "
                 "should appear in the first row or in the first column...")
 
     @classmethod
@@ -72,13 +75,13 @@ def read_line(cls, file_name, encoding=None):
         """
         Read the firts row and return a set of it.
         """
-        if file_name.lower().endswith(".tab"):
+        if file_name.suffix.lower() == ".tab":
             return set(pd.read_table(file_name,
                                      nrows=0,
                                      encoding=encoding,
                                      dtype=str,
                                      header=0).iloc[:, 1:])
-        elif file_name.lower().endswith(".csv"):
+        elif file_name.suffix.lower() == ".csv":
             return set(pd.read_csv(file_name,
                                    nrows=0,
                                    encoding=encoding,
@@ -92,12 +95,12 @@ def read_row(cls, file_name, encoding=None):
         """
         Read the firts column and return a set of it.
         """
-        if file_name.lower().endswith(".tab"):
+        if file_name.suffix.lower() == ".tab":
             return set(pd.read_table(file_name,
                                      usecols=[0],
                                      encoding=encoding,
                                      dtype=str).iloc[:, 0].to_list())
-        elif file_name.lower().endswith(".csv"):
+        elif file_name.suffix.lower() == ".csv":
             return set(pd.read_csv(file_name,
                                    usecols=[0],
                                    encoding=encoding,
@@ -236,7 +239,7 @@ def load_data(self, file_names):
             Resulting data array with the time in the first dimension.
 
         """
-        if isinstance(file_names, str):
+        if isinstance(file_names, (str, Path)):
             file_names = [file_names]
 
         for file_name in file_names:
@@ -248,7 +251,7 @@ def load_data(self, file_names):
             raise ValueError(
                 f"_data_{self.py_name}\n"
                 f"Data for {self.real_name} not found in "
-                f"{', '.join(file_names)}")
+                f"{', '.join([str(file_name) for file_name in file_names])}")
 
     def _load_data(self, file_name):
         """
diff --git a/pysd/py_backend/utils.py b/pysd/py_backend/utils.py
index 485ac4e5..097874c0 100644
--- a/pysd/py_backend/utils.py
+++ b/pysd/py_backend/utils.py
@@ -6,6 +6,7 @@
 
 import os
 import json
+from pathlib import Path
 from chardet.universaldetector import UniversalDetector
 
 import regex as re
@@ -452,13 +453,16 @@ def load_outputs(file_name, transpose=False, columns=None, encoding=None):
     """
     read_func = {'.csv': pd.read_csv, '.tab': pd.read_table}
 
+    if isinstance(file_name, str):
+        file_name = Path(file_name)
+
     if columns:
         columns = set(columns)
         if not transpose:
             columns.add("Time")
 
     for end, func in read_func.items():
-        if file_name.lower().endswith(end):
+        if file_name.suffix.lower() == end:
             if transpose:
                 out = func(file_name,
                            encoding=encoding,
diff --git a/tests/pytest_pysd/user_interaction/pytest_select_submodel.py b/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
index 15a0e516..0e101fe0 100644
--- a/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
+++ b/tests/pytest_pysd/user_interaction/pytest_select_submodel.py
@@ -10,7 +10,7 @@
 @pytest.mark.parametrize(
     "model_path,subview_sep,variables,modules,n_deps,dep_vars",
     [
-        (
+        (  # split_views
             Path("more-tests/split_model/test_split_model.mdl"),
             [],
             ["stock"],
@@ -18,7 +18,7 @@
             (6, 1, 2, 0, 1),
             {"rate1": 4, "initial_stock": 2, "initial_stock_correction": 0}
         ),
-        (
+        (  # split_subviews
             Path("more-tests/split_model/test_split_model_subviews.mdl"),
             ["."],
             [],
@@ -28,7 +28,7 @@
 
             }
         ),
-        (
+        (  # split_sub_subviews
             Path("more-tests/split_model/test_split_model_sub_subviews.mdl"),
             [".", "-"],
             ["variablex"],
@@ -37,6 +37,7 @@
             {"another_var": 5, "look_up_definition": 3}
         )
     ],
+    ids=["split_views", "split_subviews", "split_sub_subviews"]
 )
 class TestSubmodel:
     """Submodel selecting class"""
@@ -174,7 +175,7 @@ def test_select_submodel(self, model, variables, modules,
 @pytest.mark.parametrize(
     "model_path,split_views,module,raise_type,error_message",
     [
-        (
+        (  # module_not_found
             Path("more-tests/split_model/test_split_model.mdl"),
             True,
             "view_4",
@@ -182,7 +183,7 @@ def test_select_submodel(self, model, variables, modules,
             "Module or submodule 'view_4' not found..."
 
         ),
-        (
+        (  # not_modularized_model
             Path("more-tests/split_model/test_split_model.mdl"),
             False,
             "view_1",
@@ -191,6 +192,7 @@ def test_select_submodel(self, model, variables, modules,
 
         )
     ],
+    ids=["module_not_found", "not_modularized_model"]
 )
 class TestGetVarsInModuleErrors:
     @pytest.fixture
diff --git a/tests/pytest_translation/vensim2py/pytest_split_views.py b/tests/pytest_translation/vensim2py/pytest_split_views.py
index c8019981..1267bfe1 100644
--- a/tests/pytest_translation/vensim2py/pytest_split_views.py
+++ b/tests/pytest_translation/vensim2py/pytest_split_views.py
@@ -11,7 +11,7 @@
     "model_path,subview_sep,modules,macros,original_vars,py_vars,"
     + "stateful_objs",
     [
-        (
+        (  # split_views
             Path("more-tests/split_model/test_split_model.mdl"),
             [],
             ["view_1", "view2", "view_3"],
@@ -20,7 +20,7 @@
             ["another_var", "rate1", "varn", "variablex", "stock"],
             ["_integ_stock"]
         ),
-        (
+        (  # split_subviews
             Path("more-tests/split_model/test_split_model_subviews.mdl"),
             ["."],
             ["view_1/submodule_1", "view_1/submodule_2", "view_2"],
@@ -29,7 +29,7 @@
             ["another_var", "rate1", "varn", "variablex", "stock"],
             ["_integ_stock"]
         ),
-        (
+        (  # split_sub_subviews
             Path("more-tests/split_model/test_split_model_sub_subviews.mdl"),
             [".", "-"],
             [
@@ -43,7 +43,7 @@
              "interesting_var_2", "great_var"],
             ["_integ_stock"]
         ),
-        (
+        (  # split_macro
             Path("more-tests/split_model_with_macro/"
                  + "test_split_model_with_macro.mdl"),
             [".", "-"],
@@ -53,7 +53,7 @@
             ["new_var"],
             ["_macro_macro_output"]
         ),
-        (
+        (  # split_vensim_8_2_1
             Path("more-tests/split_model_vensim_8_2_1/"
                  + "test_split_model_vensim_8_2_1.mdl"),
             [],
@@ -64,6 +64,8 @@
             ["integ_teacup_temperature", "integ_cream_temperature"]
         )
     ],
+    ids=["split_views", "split_subviews", "split_sub_subviews", "split_macro",
+         "split_vensim_8_2_1"]
 )
 class TestSplitViews:
     """
@@ -152,18 +154,19 @@ def test_read_vensim_split_model(self, model_file, subview_sep,
 @pytest.mark.parametrize(
     "model_path,subview_sep,warning_message",
     [
-        (
+        (  # warning_noviews
             Path("test-models/samples/teacup/teacup.mdl"),
             [],
             "Only a single view with no subviews was detected. The model"
             + " will be built in a single file."
         ),
-        (
+        (  # not_match_separator
             Path("more-tests/split_model/test_split_model_sub_subviews.mdl"),
             ["a"],
             "The given subview separators were not matched in any view name."
         ),
     ],
+    ids=["warning_noviews", "not_match_separator"]
 )
 class TestSplitViewsWarnings:
     """
diff --git a/tests/pytest_types/data/pytest_columns.py b/tests/pytest_types/data/pytest_columns.py
new file mode 100644
index 00000000..2e6d81a5
--- /dev/null
+++ b/tests/pytest_types/data/pytest_columns.py
@@ -0,0 +1,137 @@
+import pytest
+import itertools
+
+from pysd.py_backend.data import Columns
+
+
+class TestColumns:
+    @pytest.fixture(scope="class")
+    def out_teacup(self, _root):
+        return _root.joinpath("data/out_teacup.csv")
+
+    @pytest.fixture(scope="class")
+    def out_teacup_transposed(self, _root):
+        return _root.joinpath("data/out_teacup_transposed.csv")
+
+    def test_clean_columns(self, out_teacup):
+        # test the singleton works well for laizy loading
+        Columns.clean()
+        assert Columns._files == {}
+        Columns.read(out_teacup)
+        assert Columns._files != {}
+        assert out_teacup in Columns._files
+        Columns.clean()
+        assert Columns._files == {}
+
+    def test_transposed_frame(self, out_teacup, out_teacup_transposed):
+        # test loading transposed frames
+        cols1, trans1 = Columns.get_columns(out_teacup)
+        cols2, trans2 = Columns.get_columns(out_teacup_transposed)
+        Columns.clean()
+
+        assert cols1 == cols2
+        assert not trans1
+        assert trans2
+
+    def test_get_columns(self, out_teacup, out_teacup_transposed):
+        # test getting specific columns by name
+        cols0, trans0 = Columns.get_columns(out_teacup)
+
+        cols1, trans1 = Columns.get_columns(
+            out_teacup,
+            vars=["Room Temperature", "Teacup Temperature"])
+
+        cols2, trans2 = Columns.get_columns(
+            out_teacup_transposed,
+            vars=["Heat Loss to Room"])
+
+        cols3 = Columns.get_columns(
+            out_teacup_transposed,
+            vars=["No column"])[0]
+
+        Columns.clean()
+
+        assert cols1.issubset(cols0)
+        assert cols1 == set(["Room Temperature", "Teacup Temperature"])
+
+        assert cols2.issubset(cols0)
+        assert cols2 == set(["Heat Loss to Room"])
+
+        assert cols3 == set()
+
+        assert not trans0
+        assert not trans1
+        assert trans2
+
+    def test_get_columns_subscripted(self, _root):
+        # test get subscripted columns
+        data_file = _root.joinpath(
+            "test-models/tests/subscript_3d_arrays_widthwise/output.tab"
+        )
+
+        data_file2 = _root.joinpath(
+            "test-models/tests/subscript_2d_arrays/output.tab"
+        )
+
+        subsd = {
+            "d3": ["Depth 1", "Depth 2"],
+            "d2": ["Column 1", "Column 2"],
+            "d1": ["Entry 1", "Entry 2", "Entry 3"]
+        }
+
+        cols1 = Columns.get_columns(
+            data_file,
+            vars=["Three Dimensional Constant"])[0]
+
+        expected = {
+            "Three Dimensional Constant[" + ",".join(el) + "]"
+            for el in itertools.product(subsd["d1"], subsd["d2"], subsd["d3"])
+        }
+
+        assert cols1 == expected
+
+        cols2 = Columns.get_columns(
+            data_file2,
+            vars=["Rate A", "Stock A"])[0]
+
+        subs = list(itertools.product(subsd["d1"], subsd["d2"]))
+        expected = {
+            "Rate A[" + ",".join(el) + "]"
+            for el in subs
+        }
+
+        expected.update({
+            "Stock A[" + ",".join(el) + "]"
+            for el in subs
+        })
+
+        assert cols2 == expected
+
+
+@pytest.mark.parametrize(
+    "file,raise_type,error_message",
+    [
+        (  # invalid_file_type
+            "more-tests/not_vensim/test_not_vensim.txt",
+            ValueError,
+            "Not able to read '%s'"
+        ),
+        (  # invalid_file_format
+            "data/out_teacup_no_head.csv",
+            ValueError,
+            "Invalid file format '%s'... varible names should appear"
+            + " in the first row or in the first column..."
+        )
+    ],
+    ids=["invalid_file_type", "invalid_file_format"]
+)
+class TestColumnsErrors:
+    # Test errors associated with Columns class
+
+    @pytest.fixture
+    def file_path(self, _root, file):
+        return _root.joinpath(file)
+
+    def test_columns_errors(self, file_path, raise_type, error_message):
+        with pytest.raises(raise_type, match=error_message % str(file_path)):
+            Columns.read_file(file_path)
diff --git a/tests/pytest_types/data/pytest_data.py b/tests/pytest_types/data/pytest_data.py
new file mode 100644
index 00000000..4aebd8d1
--- /dev/null
+++ b/tests/pytest_types/data/pytest_data.py
@@ -0,0 +1,56 @@
+import pytest
+
+import xarray as xr
+
+from pysd.py_backend.data import Data
+
+
+@pytest.mark.parametrize(
+    "value,interp,raise_type,error_message",
+    [
+        (  # not_loaded_data
+            None,
+            "interpolate",
+            ValueError,
+            "Trying to interpolate data variable before loading the data..."
+        ),
+        # test that try/except block on call doesn't catch errors differents
+        # than data = None
+        (  # try_except_1
+            3,
+            None,
+            TypeError,
+            "'int' object is not subscriptable"
+        ),
+        (  # try_except_2
+            xr.DataArray([10, 20], {'dim1': [0, 1]}, ['dim1']),
+            None,
+            KeyError,
+            "'time'"
+        ),
+        (  # try_except_3
+            xr.DataArray([10, 20], {'time': [0, 1]}, ['time']),
+            None,
+            AttributeError,
+            "'Data' object has no attribute 'is_float'"
+        )
+    ],
+    ids=["not_loaded_data", "try_except_1", "try_except_2", "try_except_3"]
+)
+@pytest.mark.filterwarnings("ignore")
+class TestDataErrors():
+    # Test errors associated with Data class
+    # Several Data cases are tested in unit_test_external while some other
+    # are tested indirectly in unit_test_pysd and integration_test_vensim
+
+    @pytest.fixture
+    def data(self, value, interp):
+        obj = Data()
+        obj.data = value
+        obj.interp = interp
+        obj.py_name = "data"
+        return obj
+
+    def test_data_errors(self, data, raise_type, error_message):
+        with pytest.raises(raise_type, match=error_message):
+            data(1.5)
diff --git a/tests/pytest_types/data/pytest_data_with_model.py b/tests/pytest_types/data/pytest_data_with_model.py
new file mode 100644
index 00000000..d6c74851
--- /dev/null
+++ b/tests/pytest_types/data/pytest_data_with_model.py
@@ -0,0 +1,139 @@
+import pytest
+import shutil
+
+import numpy as np
+import pandas as pd
+
+from pysd.tools.benchmarking import assert_frames_close
+from pysd import read_vensim, load
+
+
+@pytest.fixture(scope="module")
+def data_folder(_root):
+    return _root.joinpath("more-tests/data_model/")
+
+
+@pytest.fixture(scope="module")
+def data_model(data_folder):
+    return data_folder.joinpath("test_data_model.mdl")
+
+
+@pytest.fixture
+def data_files(data_files_short, data_folder):
+    if isinstance(data_files_short, str):
+        return data_folder.joinpath(data_files_short)
+    elif isinstance(data_files_short, list):
+        return [data_folder.joinpath(df) for df in data_files_short]
+    else:
+        return {
+            data_folder.joinpath(df): value
+            for df, value in data_files_short.items()
+            }
+
+
+times = np.arange(11)
+
+
+@pytest.mark.parametrize(
+    "data_files_short,expected",
+    [
+        (  # one_file
+            "data1.tab",
+            pd.DataFrame(
+                index=times,
+                data={'var1': times, "var2": 2*times, "var3": 3*times}
+            )
+        ),
+        (  # two_files
+            ["data3.tab",
+             "data1.tab"],
+            pd.DataFrame(
+                index=times,
+                data={'var1': -times, "var2": -2*times, "var3": 3*times}
+            )
+
+        ),
+        (  # transposed_file
+            ["data2.tab"],
+            pd.DataFrame(
+                index=times,
+                data={'var1': times-5, "var2": 2*times-5, "var3": 3*times-5}
+            )
+
+        ),
+        (  # dict_file
+            {"data2.tab": ["\"data-3\""],
+             "data1.tab": ["data_1", "Data 2"]},
+            pd.DataFrame(
+                index=times,
+                data={'var1': times, "var2": 2*times, "var3": 3*times-5}
+            )
+        )
+
+    ],
+    ids=["one_file", "two_files", "transposed_file", "dict_file"]
+)
+class TestPySDData:
+
+    @pytest.fixture
+    def model(self, data_model, data_files, shared_tmpdir):
+        # translated file
+        file = shared_tmpdir.joinpath(data_model.with_suffix(".py").name)
+        if file.is_file():
+            # load already translated file
+            return load(file, data_files)
+        else:
+            # copy mdl file to tmp_dir and translate it
+            file = shared_tmpdir.joinpath(data_model.name)
+            shutil.copy(data_model, file)
+            return read_vensim(file, data_files)
+
+    def test_get_data_and_run(self, model, expected):
+        assert_frames_close(
+            model.run(return_columns=["var1", "var2", "var3"]),
+            expected)
+
+
+class TestPySDDataErrors:
+    def model(self, data_model, data_files, shared_tmpdir):
+        # translated file
+        file = shared_tmpdir.joinpath(data_model.with_suffix(".py").name)
+        if file.is_file():
+            # load already translated file
+            return load(file, data_files)
+        else:
+            # copy mdl file to tmp_dir and translate it
+            file = shared_tmpdir.joinpath(data_model.name)
+            shutil.copy(data_model, file)
+            return read_vensim(file, data_files)
+
+    def test_run_error(self, data_model,  shared_tmpdir):
+        model = self.model(data_model, [], shared_tmpdir)
+        error_message = "Trying to interpolate data variable before loading"\
+            + " the data..."
+
+        with pytest.raises(ValueError, match=error_message):
+            model.run(return_columns=["var1", "var2", "var3"])
+
+    @pytest.mark.parametrize(
+        "data_files_short,raise_type,error_message",
+        [
+            (  # missing_data
+                "data3.tab",
+                ValueError,
+                "Data for \"data-3\" not found in %s"
+            ),
+            (  # data_variable_not_found_from_dict_file
+                {"data1.tab": ["non-existing-var"]},
+                ValueError,
+                "'non-existing-var' not found as model data variable"
+            ),
+        ],
+        ids=["missing_data", "data_variable_not_found_from_dict_file"]
+    )
+    def test_loading_error(self, data_model, data_files, raise_type,
+                           error_message, shared_tmpdir):
+
+        with pytest.raises(raise_type, match=error_message % (data_files)):
+            self.model(
+                data_model, data_files, shared_tmpdir)
diff --git a/tests/unit_test_data.py b/tests/unit_test_data.py
deleted file mode 100644
index 2b1f07d0..00000000
--- a/tests/unit_test_data.py
+++ /dev/null
@@ -1,197 +0,0 @@
-import os
-import itertools
-import unittest
-
-import xarray as xr
-
-_root = os.path.dirname(__file__)
-
-
-class TestColumns(unittest.TestCase):
-    def test_clean_columns(self):
-        from pysd.py_backend.data import Columns
-        Columns.clean()
-        self.assertEqual(Columns._files, {})
-        Columns.read(
-            os.path.join(_root, "data/out_teacup.csv"))
-        self.assertNotEqual(Columns._files, {})
-        self.assertIn(os.path.join(_root, "data/out_teacup.csv"),
-                      Columns._files)
-        Columns.clean()
-        self.assertEqual(Columns._files, {})
-
-    def test_non_valid_outputs(self):
-        from pysd.py_backend.data import Columns
-
-        with self.assertRaises(ValueError) as err:
-            Columns.read_file(
-                os.path.join(
-                    _root,
-                    "more-tests/not_vensim/test_not_vensim.txt"))
-
-        self.assertIn(
-            "Not able to read '",
-            str(err.exception))
-        self.assertIn(
-            "more-tests/not_vensim/test_not_vensim.txt'.",
-            str(err.exception))
-
-    def test_non_valid_file_format(self):
-        from pysd.py_backend.data import Columns
-
-        file_name = os.path.join(_root, "data/out_teacup_no_head.csv")
-        with self.assertRaises(ValueError) as err:
-            Columns.read_file(file_name)
-
-        self.assertIn(
-            f"Invalid file format '{file_name}'... varible names "
-            + "should appear in the first row or in the first column...",
-            str(err.exception))
-
-    def test_transposed_frame(self):
-        from pysd.py_backend.data import Columns
-
-        cols1, trans1 = Columns.get_columns(
-            os.path.join(_root, "data/out_teacup.csv"))
-        cols2, trans2 = Columns.get_columns(
-            os.path.join(_root, "data/out_teacup_transposed.csv"))
-        Columns.clean()
-
-        self.assertEqual(cols1, cols2)
-        self.assertFalse(trans1)
-        self.assertTrue(trans2)
-
-    def test_get_columns(self):
-        from pysd.py_backend.data import Columns
-
-        cols0, trans0 = Columns.get_columns(
-            os.path.join(_root, "data/out_teacup.csv"))
-
-        cols1, trans1 = Columns.get_columns(
-            os.path.join(_root, "data/out_teacup.csv"),
-            vars=["Room Temperature", "Teacup Temperature"])
-
-        cols2, trans2 = Columns.get_columns(
-            os.path.join(_root, "data/out_teacup_transposed.csv"),
-            vars=["Heat Loss to Room"])
-
-        cols3 = Columns.get_columns(
-            os.path.join(_root, "data/out_teacup_transposed.csv"),
-            vars=["No column"])[0]
-
-        Columns.clean()
-
-        self.assertTrue(cols1.issubset(cols0))
-        self.assertEqual(
-            cols1,
-            set(["Room Temperature", "Teacup Temperature"]))
-
-        self.assertTrue(cols2.issubset(cols0))
-        self.assertEqual(
-            cols2,
-            set(["Heat Loss to Room"]))
-
-        self.assertEqual(cols3, set())
-
-        self.assertFalse(trans0)
-        self.assertFalse(trans1)
-        self.assertTrue(trans2)
-
-    def test_get_columns_subscripted(self):
-        from pysd.py_backend.data import Columns
-
-        data_file = os.path.join(
-            _root,
-            "test-models/tests/subscript_3d_arrays_widthwise/output.tab"
-        )
-
-        data_file2 = os.path.join(
-            _root,
-            "test-models/tests/subscript_2d_arrays/output.tab"
-        )
-
-        subsd = {
-            "d3": ["Depth 1", "Depth 2"],
-            "d2": ["Column 1", "Column 2"],
-            "d1": ["Entry 1", "Entry 2", "Entry 3"]
-        }
-
-        cols1 = Columns.get_columns(
-            data_file,
-            vars=["Three Dimensional Constant"])[0]
-
-        expected = {
-            "Three Dimensional Constant[" + ",".join(el) + "]"
-            for el in itertools.product(subsd["d1"], subsd["d2"], subsd["d3"])
-        }
-
-        self.assertEqual(cols1, expected)
-
-        cols2 = Columns.get_columns(
-            data_file2,
-            vars=["Rate A", "Stock A"])[0]
-
-        subs = list(itertools.product(subsd["d1"], subsd["d2"]))
-        expected = {
-            "Rate A[" + ",".join(el) + "]"
-            for el in subs
-        }
-
-        expected.update({
-            "Stock A[" + ",".join(el) + "]"
-            for el in subs
-        })
-
-        self.assertEqual(cols2, expected)
-
-
-class TestData(unittest.TestCase):
-    # Several Data cases are tested in unit_test_external while some other
-    # are tested indirectly in unit_test_pysd and integration_test_vensim
-
-    def test_no_data_error(self):
-        from pysd.py_backend.data import Data
-        obj = Data()
-        obj.data = None
-        obj.interp = "interpolate"
-        obj.py_name = "data"
-        with self.assertRaises(ValueError) as err:
-            obj(1.5)
-
-        self.assertIn(
-            "Trying to interpolate data variable before loading the data...",
-            str(err.exception))
-
-    def test_invalid_data_regular_error(self):
-        # test that try/except block on call doesn't catch errors differents
-        # than data = None
-        from pysd.py_backend.data import Data
-
-        obj = Data()
-
-        obj.data = 3
-
-        with self.assertRaises(TypeError) as err:
-            obj(1.5)
-
-        self.assertIn(
-            "'int' object is not subscriptable",
-            str(err.exception))
-
-        obj.data = xr.DataArray([10, 20], {'dim1': [0, 1]}, ['dim1'])
-
-        with self.assertRaises(KeyError) as err:
-            obj(1.5)
-
-        self.assertIn(
-            "'time'",
-            str(err.exception))
-
-        obj.data = xr.DataArray([10, 20], {'time': [0, 1]}, ['time'])
-
-        with self.assertRaises(AttributeError) as err:
-            obj(1.5)
-
-        self.assertIn(
-            "'Data' object has no attribute 'interp'",
-            str(err.exception))
diff --git a/tests/unit_test_pysd.py b/tests/unit_test_pysd.py
index 3a1c9a28..3d9799ed 100644
--- a/tests/unit_test_pysd.py
+++ b/tests/unit_test_pysd.py
@@ -1579,103 +1579,6 @@ def test_change_constant_pipe(self):
             test_model_constant_pipe.replace(".mdl", ".py"))
 
 
-class TestDataReading(unittest.TestCase):
-    data_folder = os.path.join(_root, "more-tests/data_model/")
-    data_model = os.path.join(data_folder, "test_data_model.mdl")
-
-    def test_no_data_files_provided(self):
-        from pysd import read_vensim
-        model = read_vensim(self.data_model)
-
-        with self.assertRaises(ValueError) as err:
-            model.run(return_columns=["var1", "var2", "var3"])
-
-        self.assertIn("Trying to interpolate data variable before loading"
-                      " the data...", str(err.exception))
-
-    def test_missing_data(self):
-        from pysd import read_vensim
-
-        with self.assertRaises(ValueError) as err:
-            read_vensim(
-                self.data_model, data_files=self.data_folder+"data3.tab")
-
-        self.assertIn(
-            "Data for \"data-3\" not found in "
-            + self.data_folder + "data3.tab",
-            str(err.exception))
-
-    def test_get_data_variable_not_found_from_dict_file(self):
-        from pysd import read_vensim
-
-        with self.assertRaises(ValueError) as err:
-            read_vensim(
-                self.data_model,
-                data_files={
-                    self.data_folder+"data1.tab": ["non-existing-var"]})
-
-        self.assertIn(
-            "'non-existing-var' not found as model data variable",
-            str(err.exception))
-
-    def test_get_data_from_one_file(self):
-        from pysd import read_vensim
-
-        model = read_vensim(
-            self.data_model, data_files=self.data_folder+"data1.tab")
-        out = model.run(return_columns=["var1", "var2", "var3"])
-        times = np.arange(11)
-        expected = pd.DataFrame(
-            index=times,
-            data={'var1': times, "var2": 2*times, "var3": 3*times})
-
-        assert_frames_close(out, expected)
-
-    def test_get_data_from_two_file(self):
-        from pysd import read_vensim
-
-        model = read_vensim(
-            self.data_model,
-            data_files=[self.data_folder+"data3.tab",
-                        self.data_folder+"data1.tab"])
-        out = model.run(return_columns=["var1", "var2", "var3"])
-        times = np.arange(11)
-        expected = pd.DataFrame(
-            index=times,
-            data={'var1': -times, "var2": -2*times, "var3": 3*times})
-
-        assert_frames_close(out, expected)
-
-    def test_get_data_from_transposed_file(self):
-        from pysd import read_vensim
-
-        model = read_vensim(
-            self.data_model,
-            data_files=[self.data_folder+"data2.tab"])
-        out = model.run(return_columns=["var1", "var2", "var3"])
-        times = np.arange(11)
-        expected = pd.DataFrame(
-            index=times,
-            data={'var1': times-5, "var2": 2*times-5, "var3": 3*times-5})
-
-        assert_frames_close(out, expected)
-
-    def test_get_data_from_dict_file(self):
-        from pysd import read_vensim
-
-        model = read_vensim(
-            self.data_model,
-            data_files={self.data_folder+"data2.tab": ["\"data-3\""],
-                        self.data_folder+"data1.tab": ["data_1", "Data 2"]})
-        out = model.run(return_columns=["var1", "var2", "var3"])
-        times = np.arange(11)
-        expected = pd.DataFrame(
-            index=times,
-            data={'var1': times, "var2": 2*times, "var3": 3*times-5})
-
-        assert_frames_close(out, expected)
-
-
 class TestExportImport(unittest.TestCase):
     def test_run_export_import_integ(self):
         from pysd import read_vensim