Merge branch 'master' of https://github.com/JamesPHoughton/pysd

SDXorg · Aug 10, 2021 · b2463df · b2463df
2 parents 234d539 + 70d02e0
commit b2463df
Show file tree

Hide file tree

Showing 2 changed files with 283 additions and 40 deletions.
diff --git a/pysd/tools/benchmarking.py b/pysd/tools/benchmarking.py
@@ -48,7 +48,9 @@ def runner(model_file, canonical_file=None, transpose=False):
         else:
             raise FileNotFoundError('\nCanonical output file not found.')
 
-    canon = load_outputs(canonical_file, transpose)
+    canon = load_outputs(canonical_file,
+                         transpose=transpose,
+                         encoding=detect_encoding(canonical_file))
 
     # load model
     if model_file.lower().endswith('.mdl'):
@@ -63,7 +65,7 @@ def runner(model_file, canonical_file=None, transpose=False):
     return model.run(return_columns=canon.columns), canon
 
 
-def load_outputs(file_name, transpose=False):
+def load_outputs(file_name, transpose=False, columns=None, encoding=None):
     """
     Load outputs file
 
@@ -76,6 +78,17 @@ def load_outputs(file_name, transpose=False):
         If True reads transposed outputs file, i.e. one variable per row.
         Default is False.
 
+    columns: list or None (optional)
+        List of the column names to load. If None loads all the columns.
+        Default is None.
+        NOTE: if transpose=False, the loading will be faster as only
+        selected columns will be loaded. If transpose=True the whole
+        file must be read and it will be subselected later.
+
+    encoding: str or None (optional)
+        Encoding type to read output file. Needed if the file has special
+        characters. Default is None.
+
     Returns
     -------
     pandas.DataFrame
@@ -84,27 +97,36 @@ def load_outputs(file_name, transpose=False):
     """
     read_func = {'.csv': pd.read_csv, '.tab': pd.read_table}
 
+    if columns:
+        columns = set(columns)
+        if not transpose:
+            columns.add("Time")
+
     for end, func in read_func.items():
         if file_name.lower().endswith(end):
             if transpose:
                 out = func(file_name,
-                           encoding=_detect_encoding(file_name),
+                           encoding=encoding,
                            index_col=0).T
+                if columns:
+                    out = out[columns]
             else:
                 out = func(file_name,
-                           encoding=_detect_encoding(file_name),
-                           index_col='Time')
+                           encoding=encoding,
+                           usecols=columns,
+                           index_col="Time")
 
             out.index = out.index.astype(float)
-            return out
+            # return the dataframe removing nan index values
+            return out[~np.isnan(out.index)]
 
     raise ValueError(
         f"\nNot able to read '{file_name}'. "
         + f"Only {', '.join(list(read_func))} files are accepted.")
 
 
 def assert_frames_close(actual, expected, assertion="raise",
-                        precision=2, **kwargs):
+                        verbose=False, precision=2, **kwargs):
     """
     Compare DataFrame items by column and
     raise AssertionError if any column is not equal.
@@ -125,8 +147,13 @@ def assert_frames_close(actual, expected, assertion="raise",
         that two frames are close. Otherwise, it will show a warning
         message. Default is "raise".
 
+    verbose: bool (optional)
+        If True, if any column is not close the actual and expected values
+        will be printed in the error/warning message with the difference.
+        Default is False.
+
     precision: int (optional)
-        Precision to print the numerical values of assertion message.
+        Precision to print the numerical values of assertion verbosed message.
         Default is 2.
 
     kwargs:
@@ -150,6 +177,18 @@ def assert_frames_close(actual, expected, assertion="raise",
     Traceback (most recent call last):
     ...
     AssertionError:
+    Following columns are not close:
+    \t'0'
+
+    >>> assert_frames_close(
+    ...     pd.DataFrame(100, index=range(5), columns=range(3)),
+    ...     pd.DataFrame(150, index=range(5), columns=range(3)),
+    ...     verbose=True, rtol=.2)  # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+    ...
+    AssertionError:
+    Following columns are not close:
+    \t'0'
     Column '0' is not close.
     Expected values:
     \t[150, 150, 150, 150, 150]
@@ -164,42 +203,71 @@ def assert_frames_close(actual, expected, assertion="raise",
     ...     rtol=.2, assertion="warn")
     ...
     UserWarning:
-    Column '0' is not close.
-    Expected values:
-    \t[150, 150, 150, 150, 150]
-    Actual values:
-    \t[100, 100, 100, 100, 100]
-    Difference:
-    \t[50, 50, 50, 50, 50]
+    Following columns are not close:
+    \t'0'
 
     References
     ----------
     Derived from:
         http://nbviewer.jupyter.org/gist/jiffyclub/ac2e7506428d5e1d587b
 
     """
-    assert (isinstance(actual, pd.DataFrame) and
-            isinstance(expected, pd.DataFrame)), \
-        'Inputs must both be pandas DataFrames.'
+    if not isinstance(actual, pd.DataFrame)\
+       or not isinstance(expected, pd.DataFrame):
+        raise TypeError('\nInputs must both be pandas DataFrames.')
+
+    expected_cols, actual_cols = set(expected.columns), set(actual.columns)
+
+    if expected_cols != actual_cols:
+        # columns are not equal
+        message = ""
+
+        if actual_cols.difference(expected_cols):
+            columns = ["'" + col + "'" for col
+                       in actual_cols.difference(expected_cols)]
+            columns = ", ".join(columns)
+            message += '\nColumns ' + columns\
+                       + ' from actual values not found in expected values.'
+
+        if expected_cols.difference(actual_cols):
+            columns = ["'" + col + "'" for col
+                       in expected_cols.difference(actual_cols)]
+            columns = ", ".join(columns)
+            message += '\nColumns ' + columns\
+                       + ' from expected values not found in actual values.'
+
+        if assertion == "raise":
+            raise ValueError(
+                '\nColumns from actual and expected values must be equal.'
+                + message)
+        else:
+            warnings.warn(message)
 
-    assert set(expected.columns) == set(actual.columns), \
-        'test set columns must be equal to those in actual/observed set.'
+    columns = list(actual_cols.intersection(expected_cols))
 
+    # TODO let compare dataframes with different timestamps if "warn"
     assert np.all(np.equal(expected.index.values, actual.index.values)), \
         'test set and actual set must share a common index' \
         'instead found' + expected.index.values + 'vs' + actual.index.values
 
-    for col in expected.columns:
-        # if for Vensim outputs where constant values are only in the first row
-        if np.isnan(expected[col].values[1:]).all():
-            expected[col] = expected[col].values[0]
-        try:
-            assert_allclose(expected[col].values,
-                            actual[col].values,
-                            **kwargs)
-
-        except AssertionError:
-            assertion_details = '\n\n'\
+    # if for Vensim outputs where constant values are only in the first row
+    _remove_constant_nan(expected)
+    _remove_constant_nan(actual)
+
+    c = assert_allclose(expected[columns],
+                        actual[columns],
+                        **kwargs)
+
+    if c.all():
+        return
+
+    columns = np.array(columns, dtype=str)[~c.values]
+
+    assertion_details = "\nFollowing columns are not close:\n\t"\
+                        + ", ".join(columns)
+    if verbose:
+        for col in columns:
+            assertion_details += '\n\n'\
                 + f"Column '{col}' is not close."\
                 + '\n\nExpected values:\n\t'\
                 + np.array2string(expected[col].values,
@@ -214,12 +282,12 @@ def assert_frames_close(actual, expected, assertion="raise",
                 + np.array2string(expected[col].values-actual[col].values,
                                   precision=precision,
                                   separator=', ',
-                                  suppress_small=True)\
+                                  suppress_small=True)
 
-            if assertion == "raise":
-                raise AssertionError(assertion_details)
-            else:
-                warnings.warn(assertion_details)
+    if assertion == "raise":
+        raise AssertionError(assertion_details)
+    else:
+        warnings.warn(assertion_details)
 
 
 def assert_allclose(x, y, rtol=1.e-5, atol=1.e-5):
@@ -242,10 +310,18 @@ def assert_allclose(x, y, rtol=1.e-5, atol=1.e-5):
     None
 
     """
-    assert np.all(np.less_equal(abs(x - y), atol + rtol * abs(y)))
+    return (abs(x - y) <= atol + rtol * abs(y)).all()
+
+
+def _remove_constant_nan(df):
+    """
+    Removes nana values in constant value columns produced by Vensim
+    """
+    nan_cols = np.isnan(df.iloc[1:, :]).all()
+    df.loc[:, nan_cols] = df.loc[:, nan_cols].iloc[0].values
 
 
-def _detect_encoding(filename):
+def detect_encoding(filename):
     """
     Detects the encoding of a file.