Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/JamesPHoughton/pysd
Browse files Browse the repository at this point in the history
  • Loading branch information
enekomartinmartinez committed Aug 10, 2021
2 parents 234d539 + 70d02e0 commit b2463df
Show file tree
Hide file tree
Showing 2 changed files with 283 additions and 40 deletions.
152 changes: 114 additions & 38 deletions pysd/tools/benchmarking.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ def runner(model_file, canonical_file=None, transpose=False):
else:
raise FileNotFoundError('\nCanonical output file not found.')

canon = load_outputs(canonical_file, transpose)
canon = load_outputs(canonical_file,
transpose=transpose,
encoding=detect_encoding(canonical_file))

# load model
if model_file.lower().endswith('.mdl'):
Expand All @@ -63,7 +65,7 @@ def runner(model_file, canonical_file=None, transpose=False):
return model.run(return_columns=canon.columns), canon


def load_outputs(file_name, transpose=False):
def load_outputs(file_name, transpose=False, columns=None, encoding=None):
"""
Load outputs file
Expand All @@ -76,6 +78,17 @@ def load_outputs(file_name, transpose=False):
If True reads transposed outputs file, i.e. one variable per row.
Default is False.
columns: list or None (optional)
List of the column names to load. If None loads all the columns.
Default is None.
NOTE: if transpose=False, the loading will be faster as only
selected columns will be loaded. If transpose=True the whole
file must be read and it will be subselected later.
encoding: str or None (optional)
Encoding type to read output file. Needed if the file has special
characters. Default is None.
Returns
-------
pandas.DataFrame
Expand All @@ -84,27 +97,36 @@ def load_outputs(file_name, transpose=False):
"""
read_func = {'.csv': pd.read_csv, '.tab': pd.read_table}

if columns:
columns = set(columns)
if not transpose:
columns.add("Time")

for end, func in read_func.items():
if file_name.lower().endswith(end):
if transpose:
out = func(file_name,
encoding=_detect_encoding(file_name),
encoding=encoding,
index_col=0).T
if columns:
out = out[columns]
else:
out = func(file_name,
encoding=_detect_encoding(file_name),
index_col='Time')
encoding=encoding,
usecols=columns,
index_col="Time")

out.index = out.index.astype(float)
return out
# return the dataframe removing nan index values
return out[~np.isnan(out.index)]

raise ValueError(
f"\nNot able to read '{file_name}'. "
+ f"Only {', '.join(list(read_func))} files are accepted.")


def assert_frames_close(actual, expected, assertion="raise",
precision=2, **kwargs):
verbose=False, precision=2, **kwargs):
"""
Compare DataFrame items by column and
raise AssertionError if any column is not equal.
Expand All @@ -125,8 +147,13 @@ def assert_frames_close(actual, expected, assertion="raise",
that two frames are close. Otherwise, it will show a warning
message. Default is "raise".
verbose: bool (optional)
If True, if any column is not close the actual and expected values
will be printed in the error/warning message with the difference.
Default is False.
precision: int (optional)
Precision to print the numerical values of assertion message.
Precision to print the numerical values of assertion verbosed message.
Default is 2.
kwargs:
Expand All @@ -150,6 +177,18 @@ def assert_frames_close(actual, expected, assertion="raise",
Traceback (most recent call last):
...
AssertionError:
Following columns are not close:
\t'0'
>>> assert_frames_close(
... pd.DataFrame(100, index=range(5), columns=range(3)),
... pd.DataFrame(150, index=range(5), columns=range(3)),
... verbose=True, rtol=.2) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
AssertionError:
Following columns are not close:
\t'0'
Column '0' is not close.
Expected values:
\t[150, 150, 150, 150, 150]
Expand All @@ -164,42 +203,71 @@ def assert_frames_close(actual, expected, assertion="raise",
... rtol=.2, assertion="warn")
...
UserWarning:
Column '0' is not close.
Expected values:
\t[150, 150, 150, 150, 150]
Actual values:
\t[100, 100, 100, 100, 100]
Difference:
\t[50, 50, 50, 50, 50]
Following columns are not close:
\t'0'
References
----------
Derived from:
http://nbviewer.jupyter.org/gist/jiffyclub/ac2e7506428d5e1d587b
"""
assert (isinstance(actual, pd.DataFrame) and
isinstance(expected, pd.DataFrame)), \
'Inputs must both be pandas DataFrames.'
if not isinstance(actual, pd.DataFrame)\
or not isinstance(expected, pd.DataFrame):
raise TypeError('\nInputs must both be pandas DataFrames.')

expected_cols, actual_cols = set(expected.columns), set(actual.columns)

if expected_cols != actual_cols:
# columns are not equal
message = ""

if actual_cols.difference(expected_cols):
columns = ["'" + col + "'" for col
in actual_cols.difference(expected_cols)]
columns = ", ".join(columns)
message += '\nColumns ' + columns\
+ ' from actual values not found in expected values.'

if expected_cols.difference(actual_cols):
columns = ["'" + col + "'" for col
in expected_cols.difference(actual_cols)]
columns = ", ".join(columns)
message += '\nColumns ' + columns\
+ ' from expected values not found in actual values.'

if assertion == "raise":
raise ValueError(
'\nColumns from actual and expected values must be equal.'
+ message)
else:
warnings.warn(message)

assert set(expected.columns) == set(actual.columns), \
'test set columns must be equal to those in actual/observed set.'
columns = list(actual_cols.intersection(expected_cols))

# TODO let compare dataframes with different timestamps if "warn"
assert np.all(np.equal(expected.index.values, actual.index.values)), \
'test set and actual set must share a common index' \
'instead found' + expected.index.values + 'vs' + actual.index.values

for col in expected.columns:
# if for Vensim outputs where constant values are only in the first row
if np.isnan(expected[col].values[1:]).all():
expected[col] = expected[col].values[0]
try:
assert_allclose(expected[col].values,
actual[col].values,
**kwargs)

except AssertionError:
assertion_details = '\n\n'\
# if for Vensim outputs where constant values are only in the first row
_remove_constant_nan(expected)
_remove_constant_nan(actual)

c = assert_allclose(expected[columns],
actual[columns],
**kwargs)

if c.all():
return

columns = np.array(columns, dtype=str)[~c.values]

assertion_details = "\nFollowing columns are not close:\n\t"\
+ ", ".join(columns)
if verbose:
for col in columns:
assertion_details += '\n\n'\
+ f"Column '{col}' is not close."\
+ '\n\nExpected values:\n\t'\
+ np.array2string(expected[col].values,
Expand All @@ -214,12 +282,12 @@ def assert_frames_close(actual, expected, assertion="raise",
+ np.array2string(expected[col].values-actual[col].values,
precision=precision,
separator=', ',
suppress_small=True)\
suppress_small=True)

if assertion == "raise":
raise AssertionError(assertion_details)
else:
warnings.warn(assertion_details)
if assertion == "raise":
raise AssertionError(assertion_details)
else:
warnings.warn(assertion_details)


def assert_allclose(x, y, rtol=1.e-5, atol=1.e-5):
Expand All @@ -242,10 +310,18 @@ def assert_allclose(x, y, rtol=1.e-5, atol=1.e-5):
None
"""
assert np.all(np.less_equal(abs(x - y), atol + rtol * abs(y)))
return (abs(x - y) <= atol + rtol * abs(y)).all()


def _remove_constant_nan(df):
"""
Removes nana values in constant value columns produced by Vensim
"""
nan_cols = np.isnan(df.iloc[1:, :]).all()
df.loc[:, nan_cols] = df.loc[:, nan_cols].iloc[0].values


def _detect_encoding(filename):
def detect_encoding(filename):
"""
Detects the encoding of a file.
Expand Down
Loading

0 comments on commit b2463df

Please sign in to comment.