Skip to content

Commit

Permalink
Merge pull request #517 from openeemeter/fix/billing-dst
Browse files Browse the repository at this point in the history
Handle ambiguous and nonexistent local times when creating billing data
  • Loading branch information
jason-recurve authored Jan 6, 2025
2 parents 7d04c06 + 44b280d commit 14d6b32
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Changelog
Development
-----------

* Handle ambiguous and nonexistent local times when creating billing dataclass
* Fix serialization and deserialization of metrics.
* Rename HourlyBaselineData.sufficiency_warnings -> HourlyBaselineData.warnings
* Add disqualification field to HourlyBaselineData and HourlyReportingData
Expand Down
2 changes: 2 additions & 0 deletions eemeter/eemeter/models/billing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def _compute_meter_value_df(self, df: pd.DataFrame):
end=end_date,
freq="D",
tz=df.index.tz,
ambiguous=True,
nonexistent="shift_forward",
)
all_days_df = pd.DataFrame(index=all_days_index)
meter_value_df = meter_value_df.merge(
Expand Down
19 changes: 19 additions & 0 deletions tests/daily_model/test_billing_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from eemeter.eemeter.samples import load_sample
import numpy as np
import pandas as pd
from pandas import Timestamp, DatetimeIndex, DataFrame
import pytest

TEMPERATURE_SEED = 29
Expand Down Expand Up @@ -634,3 +635,21 @@ def test_billing_reporting_data_with_missing_daily_frequencies(get_datetime_inde
disqualification.qualified_name in expected_disqualifications
for disqualification in cls.disqualification
)


def test_dst_handling():
# 2020-03-08 02:00 is nonexistent, should push to 03:00
tz = "America/New_York"
idx = DatetimeIndex([Timestamp("2020-03-07 02", tz=tz), Timestamp("2020-04-06 02", tz=tz), Timestamp("2020-05-06 02", tz=tz)])
df = DataFrame({"observed": [1]*3, "temperature": [50]*3}, index=idx)
baseline = BillingBaselineData(df, is_electricity_data=True)
assert len(baseline.df) == 61
hours = np.unique(baseline.df.index.hour)
assert (hours == [2, 3]).all()

# 2020-11-01 01:00 is ambiguous, single index should be chosen
tz = "America/New_York"
idx = DatetimeIndex([Timestamp("2020-10-31 01", tz=tz), Timestamp("2020-11-28 01", tz=tz), Timestamp("2020-12-28 01", tz=tz)])
df = DataFrame({"observed": [1]*3, "temperature": [50]*3}, index=idx)
baseline = BillingBaselineData(df, is_electricity_data=True)
assert (baseline.df.index.hour == 1).all()

0 comments on commit 14d6b32

Please sign in to comment.