Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated Dockerfile and Pipfile.lock and fixed tests #458

Merged
merged 7 commits into from
Mar 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ Changelog
Development
-----------

* Placeholder
* Update python version in Dockerfile.
* Update other dependencies (including adding rust) in Dockerfile.
* Remove pinned dependencies in Pipfile.
* Relock Pipfile (and do so inside of the docker image).
* Update pytests to account for changes in newer pandas where categorical variables are no longer included in `df.sum().sum()`.



3.1.1
-----
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
FROM python:3.6.6
FROM python:3.10

RUN set -ex && pip install pip pipenv --upgrade

# sphinxcontrib-spelling dependency
RUN apt-get update \
&& apt-get install -yqq libenchant-dev
&& apt-get install -yqq libenchant-2-dev

COPY Pipfile Pipfile
COPY Pipfile.lock Pipfile.lock
Expand Down
14 changes: 7 additions & 7 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ name = "pypi"

[packages]

click = "==7.0"
eeweather = ">=0.3.12"
click = "*"
matplotlib = "*"
statsmodels = "==0.11.1"
scipy = "==1.4.1"
sqlalchemy = "*"
pandas = "==0.25.2"
statsmodels = "*"
scipy = "*"
pandas = "*"


[dev-packages]

black = "==18.6b4"
sqlalchemy = "*"
eeweather = ">=0.3.12"
black = "*"
coverage = "*"
jupyterlab = "*"
nbsphinx = "*"
Expand Down
3,009 changes: 1,910 additions & 1,099 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ help:

# Custom target for autobuild (philngo)
livehtml:
sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)"/html -H 0.0.0.0 -p 8000 --poll -z ../eemeter
sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)"/html --host 0.0.0.0 --port 8000 --watch ../eemeter

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,4 @@


def setup(app):
app.add_stylesheet("css/custom.css") # may also be an URL
app.add_css_file("css/custom.css") # may also be an URL
7 changes: 4 additions & 3 deletions eemeter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,11 @@ def cli():
def _get_data(
sample, meter_file, temperature_file, heating_balance_points, cooling_balance_points
):

if sample is not None:
with resource_stream("eemeter.samples", "metadata.json") as f:
metadata = json.loads(f.read().decode("utf-8"))
if sample in metadata:
click.echo("Loading sample: {}".format(sample))

meter_file = resource_stream(
"eemeter.samples", metadata[sample]["meter_data_filename"]
)
Expand Down Expand Up @@ -106,7 +104,10 @@ def _get_data(
heating_balance_points=heating_balance_points,
cooling_balance_points=cooling_balance_points,
)
return merge_features([usage_per_day, temperature_features])
merged_features = merge_features([usage_per_day, temperature_features])
# usage column must be `meter_value` for model fitting to work
merged_features.rename(columns={"usage_per_day": "meter_value"}, inplace=True)
return merged_features


@cli.command()
Expand Down
8 changes: 4 additions & 4 deletions eemeter/derivatives.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _compute_ols_error(
):
ols_model_agg_error = (
(t_stat * rmse_base_residuals * post_obs)
/ (base_obs ** 0.5)
/ (base_obs**0.5)
* (1.0 + ((base_avg - post_avg) ** 2.0 / base_var)) ** 0.5
)

Expand All @@ -46,7 +46,7 @@ def _compute_ols_error(
)

ols_total_agg_error = (
ols_model_agg_error ** 2.0 + ols_noise_agg_error ** 2.0
ols_model_agg_error**2.0 + ols_noise_agg_error**2.0
) ** 0.5

return ols_total_agg_error, ols_model_agg_error, ols_noise_agg_error
Expand Down Expand Up @@ -75,7 +75,7 @@ def _compute_fsu_error(

fsu_error_band = total_base_energy * (
t_stat
* (a_coeff * months_reporting ** 2.0 + b_coeff * months_reporting + c_coeff)
* (a_coeff * months_reporting**2.0 + b_coeff * months_reporting + c_coeff)
* (rmse_base_residuals / base_avg)
* ((base_obs / nprime) * (1.0 + (2.0 / nprime)) * (1.0 / post_obs)) ** 0.5
)
Expand Down Expand Up @@ -372,7 +372,7 @@ def _compute_error_bands_modeled_savings(
"FSU Error Band: Baseline": fsu_error_band_baseline,
"FSU Error Band: Reporting": fsu_error_band_reporting,
"FSU Error Band": (
fsu_error_band_baseline ** 2.0 + fsu_error_band_reporting ** 2.0
fsu_error_band_baseline**2.0 + fsu_error_band_reporting**2.0
)
** 0.5,
}
Expand Down
3 changes: 0 additions & 3 deletions eemeter/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ def _compute_columns(temps):
def _compute_columns(temps):
count = temps.shape[0]
if count > 24:

day_groups = np.floor(np.arange(count) / 24)
daily_temps = temps.groupby(day_groups).agg(["mean", "count"])
n_limit_period = percent_hourly_coverage_per_billing_period * count
Expand Down Expand Up @@ -267,7 +266,6 @@ def _compute_columns(temps):
for bp in heating_balance_points
}
else: # faster route for daily case, should have same effect.

if count > n_limit_daily:
count_cols = {"n_days_kept": 1, "n_days_dropped": 0}
# CalTRACK 2.2.2.3
Expand Down Expand Up @@ -833,7 +831,6 @@ def compute_temperature_bin_features(temperatures, bin_endpoints):
bins = {}

for i, (left_bin, right_bin) in enumerate(zip(bin_endpoints, bin_endpoints[1:])):

bin_name = "bin_{}".format(i)

in_bin = (temperatures > left_bin) & (temperatures <= right_bin)
Expand Down
23 changes: 12 additions & 11 deletions eemeter/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,12 +384,10 @@ def __init__(
or self.degrees_of_freedom < 1
or self.observed_length < self.num_parameters
):

self.cvrmse_auto_corr_correction = None
self.approx_factor_auto_corr_correction = None
self.fsu_base_term = None
else:

# factor to correct cvrmse_adj for autocorrelation of inputs
# i.e., divide by (n' - n_param) instead of by (n - n_param)
self.cvrmse_auto_corr_correction = (
Expand All @@ -411,15 +409,18 @@ def __init__(
)

def __repr__(self):
return "ModelMetrics(merged_length={}, r_squared_adj={}, cvrmse_adj={}, " "mape_no_zeros={}, nmae={}, nmbe={}, autocorr_resid={}, confidence_level={})".format(
self.merged_length,
round(self.r_squared_adj, 3),
round(self.cvrmse_adj, 3),
round(self.mape_no_zeros, 3),
round(self.nmae, 3),
round(self.nmbe, 3),
round(self.autocorr_resid, 3),
round(self.confidence_level, 3),
return (
"ModelMetrics(merged_length={}, r_squared_adj={}, cvrmse_adj={}, "
"mape_no_zeros={}, nmae={}, nmbe={}, autocorr_resid={}, confidence_level={})".format(
self.merged_length,
round(self.r_squared_adj, 3),
round(self.cvrmse_adj, 3),
round(self.mape_no_zeros, 3),
round(self.nmae, 3),
round(self.nmbe, 3),
round(self.autocorr_resid, 3),
round(self.confidence_level, 3),
)
)

def json(self):
Expand Down
10 changes: 7 additions & 3 deletions tests/test_caltrack_design_matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@


def test_create_caltrack_hourly_preliminary_design_matrix(
il_electricity_cdd_hdd_hourly
il_electricity_cdd_hdd_hourly,
):
meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
Expand All @@ -47,6 +47,8 @@ def test_create_caltrack_hourly_preliminary_design_matrix(
"n_hours_kept",
"temperature_mean",
]
# In newer pandas, categorical columns (like hour_of_week) arent included in sum
design_matrix.hour_of_week = design_matrix.hour_of_week.astype(float)
assert round(design_matrix.sum().sum(), 2) == 136352.61


Expand Down Expand Up @@ -386,6 +388,7 @@ def test_create_caltrack_hourly_segmented_design_matrices(
"meter_value",
"weight",
]
design_matrix.hour_of_week = design_matrix.hour_of_week.astype(float)
assert round(design_matrix.sum().sum(), 2) == 126210.07

design_matrix = design_matrices["mar-apr-may-weighted"]
Expand All @@ -397,11 +400,12 @@ def test_create_caltrack_hourly_segmented_design_matrices(
"meter_value",
"weight",
]
design_matrix.hour_of_week = design_matrix.hour_of_week.astype(float)
assert round(design_matrix.sum().sum(), 2) == 167659.28


def test_create_caltrack_billing_design_matrix_empty_temp(
il_electricity_cdd_hdd_billing_monthly
il_electricity_cdd_hdd_billing_monthly,
):
meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
temperature_data = il_electricity_cdd_hdd_billing_monthly["temperature_data"][:0]
Expand All @@ -412,7 +416,7 @@ def test_create_caltrack_billing_design_matrix_empty_temp(


def test_create_caltrack_billing_design_matrix_partial_empty_temp(
il_electricity_cdd_hdd_billing_monthly
il_electricity_cdd_hdd_billing_monthly,
):
meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
temperature_data = il_electricity_cdd_hdd_billing_monthly["temperature_data"][:200]
Expand Down
4 changes: 3 additions & 1 deletion tests/test_caltrack_hourly.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def test_caltrack_hourly_fit_feature_processor(
"weight",
]
assert result.shape == (24, 10)
result.hour_of_week = result.hour_of_week.astype(float)
assert round(result.sum().sum(), 2) == 5916.0


Expand Down Expand Up @@ -127,6 +128,7 @@ def test_caltrack_hourly_prediction_feature_processor(
"weight",
]
assert result.shape == (24, 9)
result.hour_of_week = result.hour_of_week.astype(float)
assert round(result.sum().sum(), 2) == 4956.0


Expand Down Expand Up @@ -458,7 +460,7 @@ def segmented_design_matrices_single_mode(


def test_fit_caltrack_hourly_model_segment_single_mode(
segmented_design_matrices_single_mode
segmented_design_matrices_single_mode,
):
segment_name = "dec-jan-feb-weighted"
segment_data = segmented_design_matrices_single_mode[segment_name]
Expand Down
14 changes: 8 additions & 6 deletions tests/test_caltrack_usage_per_day.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ def cdd_hdd_h54_c67_billing_monthly_totals(il_electricity_cdd_hdd_billing_monthl


def test_caltrack_predict_design_matrix_input_avg_false_output_avg_true(
cdd_hdd_h54_c67_billing_monthly_totals
cdd_hdd_h54_c67_billing_monthly_totals,
):
data = cdd_hdd_h54_c67_billing_monthly_totals
prediction = _caltrack_predict_design_matrix(
Expand All @@ -694,7 +694,7 @@ def test_caltrack_predict_design_matrix_input_avg_false_output_avg_true(


def test_caltrack_predict_design_matrix_input_avg_false_output_avg_false(
cdd_hdd_h54_c67_billing_monthly_totals
cdd_hdd_h54_c67_billing_monthly_totals,
):
data = cdd_hdd_h54_c67_billing_monthly_totals
prediction = _caltrack_predict_design_matrix(
Expand Down Expand Up @@ -730,7 +730,7 @@ def cdd_hdd_h54_c67_billing_monthly_avgs(il_electricity_cdd_hdd_billing_monthly)


def test_caltrack_predict_design_matrix_input_avg_true_output_avg_false(
cdd_hdd_h54_c67_billing_monthly_avgs
cdd_hdd_h54_c67_billing_monthly_avgs,
):
data = cdd_hdd_h54_c67_billing_monthly_avgs
prediction = _caltrack_predict_design_matrix(
Expand All @@ -750,7 +750,7 @@ def test_caltrack_predict_design_matrix_input_avg_true_output_avg_false(


def test_caltrack_predict_design_matrix_input_avg_true_output_avg_true(
cdd_hdd_h54_c67_billing_monthly_avgs
cdd_hdd_h54_c67_billing_monthly_avgs,
):
data = cdd_hdd_h54_c67_billing_monthly_avgs
prediction = _caltrack_predict_design_matrix(
Expand Down Expand Up @@ -792,7 +792,7 @@ def test_caltrack_predict_design_matrix_n_days(cdd_hdd_h54_c67_billing_monthly_t


def test_caltrack_predict_design_matrix_no_days_fails(
cdd_hdd_h54_c67_billing_monthly_totals
cdd_hdd_h54_c67_billing_monthly_totals,
):
# This makes sure that the method fails if neither n_days nor
# a DatetimeIndex is available.
Expand Down Expand Up @@ -1443,7 +1443,9 @@ def test_select_best_candidate_ok(
assert best_candidate.r_squared_adj == 1


def test_select_best_candidate_none(candidate_model_disqualified,):
def test_select_best_candidate_none(
candidate_model_disqualified,
):
candidates = [candidate_model_disqualified]

best_candidate, warnings = select_best_candidate(candidates)
Expand Down
Loading