Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add weights to daily model #536

Merged
merged 22 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2e1f0e9
converted attrs settings to pydantic
travis-recurve Jan 13, 2025
76c0e47
updating optimization - in progress
travis-recurve Jan 16, 2025
392d4b4
daily models using new opt
travis-recurve Jan 23, 2025
e4641aa
billing settings update
travis-recurve Jan 23, 2025
4f326e4
bug fix and made settings lowercase
travis-recurve Jan 24, 2025
ad66fdd
data class weights propogate through
travis-recurve Jan 27, 2025
bb10da1
fixing tests
travis-recurve Jan 27, 2025
9363943
converted attrs settings to pydantic
travis-recurve Jan 13, 2025
ec185c5
updating optimization - in progress
travis-recurve Jan 16, 2025
6a8ad45
daily models using new opt
travis-recurve Jan 23, 2025
ef19598
billing settings update
travis-recurve Jan 23, 2025
b53bd34
bug fix and made settings lowercase
travis-recurve Jan 24, 2025
5a09a35
data class weights propogate through
travis-recurve Jan 27, 2025
d79c841
fixing tests
travis-recurve Jan 27, 2025
59ce434
rebased to master after hourly merge
travis-recurve Jan 31, 2025
c776e49
rebased to master after hourly merge
travis-recurve Jan 31, 2025
92b565b
fixing tests and adding
travis-recurve Jan 31, 2025
7b5b15e
test changes
travis-recurve Feb 4, 2025
3f93e9e
reducing some copy/pasting in various daily model inheritance cases
travis-recurve Feb 5, 2025
653b7ee
updating daily model inheritances for consistent type hints
travis-recurve Feb 6, 2025
bae875b
Merge branch 'master' into feature/daily_model_weights
travis-recurve Feb 6, 2025
18052ff
Update CHANGELOG.md
travis-recurve Feb 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ Development
-----------

* Add GHI sufficiency check requiring 90% coverage for each month
* Add weights propogation from data class to daily model via "weights" column
* Converted daily model settings from attrs to pydantic
* Refactored daily model initial guess optimization to use consolidated optimize function

4.1.0
-----
Expand Down
41 changes: 29 additions & 12 deletions eemeter/common/base_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,32 @@


class BaseSettings(pydantic.BaseModel):
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)

"""Make all property keys case insensitive"""

# @pydantic.model_validator(mode="before")
# def __uppercase_property_keys__(cls, values: Any) -> Any:
# def __upper__(value: Any) -> Any:
# if isinstance(value, dict):
# return {k.upper() if isinstance(k, str) else k: __upper__(v) for k, v in value.items()}
# return value

# return __upper__(values)
model_config = pydantic.ConfigDict(
frozen = True,
arbitrary_types_allowed=True,
str_to_lower = True,
str_strip_whitespace = True,
)

"""Make all property keys lowercase and strip whitespace"""
@pydantic.model_validator(mode="before")
def __lowercase_property_keys__(cls, values: Any) -> Any:
def __lower__(value: Any) -> Any:
if isinstance(value, dict):
return {k.lower().strip() if isinstance(k, str) else k: __lower__(v) for k, v in value.items()}
return value

return __lower__(values)

"""Make all property values lowercase and strip whitespace before validation"""
@pydantic.field_validator("*", mode="before")
def lowercase_values(cls, v):
if isinstance(v, str):
return v.lower().strip()
return v


# add developer field to pydantic Field
def CustomField(developer=False, *args, **kwargs):
field = pydantic.Field(json_schema_extra={"developer": developer}, *args, **kwargs)
return field
3 changes: 3 additions & 0 deletions eemeter/eemeter/common/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,9 @@ def compute_temperature_features(
if not keep_partial_nan_rows:
df = overwrite_partial_rows_with_nan(df)

if df.dropna(how='all').empty:
raise ValueError("All rows are NaN.")

# nan last row
df = df.iloc[:-1].reindex(df.index)
return df
Expand Down
2 changes: 2 additions & 0 deletions eemeter/eemeter/models/billing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@

from .data import BillingBaselineData, BillingReportingData
from .model import BillingModel
from .weighted_model import BillingWeightedModel

__all__ = (
"BillingBaselineData",
"BillingReportingData",
"BillingModel",
"BillingWeightedModel",
)
55 changes: 55 additions & 0 deletions eemeter/eemeter/models/billing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,61 @@ def _compute_temperature_features(
features = temperature_features.drop(columns=["temperature_mean"])
return temp, features

# TODO: DELETE THIS after making real billing data class
@property
def billing_df(self) -> pd.DataFrame | None:
"""Get the corrected input data stored in the class. The actual dataframe is immutable, this returns a copy."""

df = self._df.copy()

# find indices where observed changes from prior
observed_change = df["observed"].diff()
observed_change = observed_change[observed_change != 0].index
obs_change_idx = df.index.get_indexer(observed_change)
obs_change_idx = np.append(obs_change_idx, len(df))
obs_change_idx = np.delete(obs_change_idx, np.where(np.diff(obs_change_idx) < 15)[0])

if obs_change_idx[0] != 0:
obs_change_idx = np.insert(obs_change_idx, 0, 0)

# create vector where value increases at each observed change
group = []
for i in range(1, len(obs_change_idx)):
idx_range = obs_change_idx[i] - obs_change_idx[i-1]

group.extend([i] * idx_range)

df["group"] = group

# get median delta

# get first datetime, average temperature, sum of observed for each group and make new df
df_temp = df.reset_index()
df_temp = df_temp.rename(columns={"index": "datetime"})

df_grouped = df_temp.groupby("group").agg({
"datetime": "first",
"season": "first",
"weekday_weekend": "first",
"temperature": "mean",
"observed": "mean",
}).set_index("datetime")

# create days column for number of days between current and previous index
df_grouped["days"] = df_grouped.index.to_series().diff().dt.days

df_grouped = df_grouped.dropna()

# create weights from days column
df_grouped["weights"] = df_grouped["days"] / df_grouped["days"].sum()

df_grouped = df_grouped.drop(columns=["days"])

if self._df is None:
return None
else:
return df_grouped.copy()


class BillingBaselineData(_BillingData):
"""
Expand Down
55 changes: 13 additions & 42 deletions eemeter/eemeter/models/billing/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,49 +56,19 @@ class BillingModel(DailyModel):
model (sklearn.pipeline.Pipeline): The final fitted model.
id (str): The index of the meter data.
"""
_baseline_data_type = BillingBaselineData
_reporting_data_type = BillingReportingData
_data_df_name = "df"

def __init__(self, settings=None):
super().__init__(model="legacy", settings=settings)
def __init__(self, settings=None, verbose: bool = False,):
super().__init__(model="legacy", settings=settings, verbose=verbose)

def fit(
self, baseline_data: BillingBaselineData, ignore_disqualification: bool = False
self,
baseline_data: BillingBaselineData,
ignore_disqualification: bool = False
) -> BillingModel:
"""Fit the model using baseline data.

Args:
baseline_data: BillingBaselineData object.
ignore_disqualification: Whether to ignore disqualification errors / warnings.

Returns:
The fitted model.

Raises:
TypeError: If baseline_data is not a BillingBaselineData object.
DataSufficiencyError: If the model can't be fit on disqualified baseline data.
"""
# TODO there's a fair bit of duplicated code between this and daily fit(), refactor
if not isinstance(baseline_data, BillingBaselineData):
raise TypeError("baseline_data must be a BillingBaselineData object")
baseline_data.log_warnings()
if baseline_data.disqualification and not ignore_disqualification:
for warning in baseline_data.disqualification + baseline_data.warnings:
print(warning.json())
raise DataSufficiencyError("Can't fit model on disqualified baseline data")
self.baseline_timezone = baseline_data.tz
self.warnings = baseline_data.warnings
self.disqualification = baseline_data.disqualification
self._fit(baseline_data.df)
if self.error["CVRMSE"] > self.settings.cvrmse_threshold:
cvrmse_warning = EEMeterWarning(
qualified_name="eemeter.model_fit_metrics.cvrmse",
description=(
f"Fit model has CVRMSE > {self.settings.cvrmse_threshold}"
),
data={"CVRMSE": self.error["CVRMSE"]},
)
cvrmse_warning.warn()
self.disqualification.append(cvrmse_warning)
return self
return super().fit(baseline_data, ignore_disqualification=ignore_disqualification)

def predict(
self,
Expand Down Expand Up @@ -135,7 +105,8 @@ def predict(
"reporting_data must be a BillingBaselineData or BillingReportingData object"
)

df_res = self._predict(reporting_data.df)
df = getattr(reporting_data, self._data_df_name)
df_res = self._predict(df)

if aggregation is None:
agg = None
Expand Down Expand Up @@ -182,7 +153,7 @@ def predict(

def plot(
self,
df_eval,
data,
aggregation: str | None = None,
):
"""Plot a model fit with baseline or reporting data. Requires matplotlib to use.
Expand All @@ -198,7 +169,7 @@ def plot(

# TODO: pass more kwargs to plotting function

plot(self, self.predict(df_eval, aggregation=aggregation))
plot(self, self.predict(data, aggregation=aggregation))

def to_dict(self) -> dict:
"""Returns a dictionary of model parameters.
Expand Down
34 changes: 34 additions & 0 deletions eemeter/eemeter/models/billing/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""

Copyright 2014-2024 OpenEEmeter contributors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

"""
from __future__ import annotations

from eemeter.common.base_settings import CustomField

from eemeter.eemeter.models.daily.utilities.settings import DailyLegacySettings



class BillingSettings(DailyLegacySettings):
segment_minimum_count: int = CustomField(
default=3,
ge=3,
developer=True,
description="Minimum number of data points for HDD/CDD",
)
Loading