Skip to content

Commit

Permalink
Merge pull request #536 from opendsm/feature/daily_model_weights
Browse files Browse the repository at this point in the history
Add weights to daily model
  • Loading branch information
travis-recurve authored Feb 6, 2025
2 parents 1daea97 + 18052ff commit 09fbcc2
Show file tree
Hide file tree
Showing 33 changed files with 1,716 additions and 1,363 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ Development
-----------

* Add GHI sufficiency check requiring 90% coverage for each month
* Add weights propogation from data class to daily model via "weights" column
* Converted daily model settings from attrs to pydantic
* Refactored daily model initial guess optimization to use consolidated optimize function

4.1.0
-----
Expand Down
41 changes: 29 additions & 12 deletions eemeter/common/base_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,32 @@


class BaseSettings(pydantic.BaseModel):
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)

"""Make all property keys case insensitive"""

# @pydantic.model_validator(mode="before")
# def __uppercase_property_keys__(cls, values: Any) -> Any:
# def __upper__(value: Any) -> Any:
# if isinstance(value, dict):
# return {k.upper() if isinstance(k, str) else k: __upper__(v) for k, v in value.items()}
# return value

# return __upper__(values)
model_config = pydantic.ConfigDict(
frozen = True,
arbitrary_types_allowed=True,
str_to_lower = True,
str_strip_whitespace = True,
)

"""Make all property keys lowercase and strip whitespace"""
@pydantic.model_validator(mode="before")
def __lowercase_property_keys__(cls, values: Any) -> Any:
def __lower__(value: Any) -> Any:
if isinstance(value, dict):
return {k.lower().strip() if isinstance(k, str) else k: __lower__(v) for k, v in value.items()}
return value

return __lower__(values)

"""Make all property values lowercase and strip whitespace before validation"""
@pydantic.field_validator("*", mode="before")
def lowercase_values(cls, v):
if isinstance(v, str):
return v.lower().strip()
return v


# add developer field to pydantic Field
def CustomField(developer=False, *args, **kwargs):
field = pydantic.Field(json_schema_extra={"developer": developer}, *args, **kwargs)
return field
3 changes: 3 additions & 0 deletions eemeter/eemeter/common/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,9 @@ def compute_temperature_features(
if not keep_partial_nan_rows:
df = overwrite_partial_rows_with_nan(df)

if df.dropna(how='all').empty:
raise ValueError("All rows are NaN.")

# nan last row
df = df.iloc[:-1].reindex(df.index)
return df
Expand Down
2 changes: 2 additions & 0 deletions eemeter/eemeter/models/billing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@

from .data import BillingBaselineData, BillingReportingData
from .model import BillingModel
from .weighted_model import BillingWeightedModel

__all__ = (
"BillingBaselineData",
"BillingReportingData",
"BillingModel",
"BillingWeightedModel",
)
55 changes: 55 additions & 0 deletions eemeter/eemeter/models/billing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,61 @@ def _compute_temperature_features(
features = temperature_features.drop(columns=["temperature_mean"])
return temp, features

# TODO: DELETE THIS after making real billing data class
@property
def billing_df(self) -> pd.DataFrame | None:
"""Get the corrected input data stored in the class. The actual dataframe is immutable, this returns a copy."""

df = self._df.copy()

# find indices where observed changes from prior
observed_change = df["observed"].diff()
observed_change = observed_change[observed_change != 0].index
obs_change_idx = df.index.get_indexer(observed_change)
obs_change_idx = np.append(obs_change_idx, len(df))
obs_change_idx = np.delete(obs_change_idx, np.where(np.diff(obs_change_idx) < 15)[0])

if obs_change_idx[0] != 0:
obs_change_idx = np.insert(obs_change_idx, 0, 0)

# create vector where value increases at each observed change
group = []
for i in range(1, len(obs_change_idx)):
idx_range = obs_change_idx[i] - obs_change_idx[i-1]

group.extend([i] * idx_range)

df["group"] = group

# get median delta

# get first datetime, average temperature, sum of observed for each group and make new df
df_temp = df.reset_index()
df_temp = df_temp.rename(columns={"index": "datetime"})

df_grouped = df_temp.groupby("group").agg({
"datetime": "first",
"season": "first",
"weekday_weekend": "first",
"temperature": "mean",
"observed": "mean",
}).set_index("datetime")

# create days column for number of days between current and previous index
df_grouped["days"] = df_grouped.index.to_series().diff().dt.days

df_grouped = df_grouped.dropna()

# create weights from days column
df_grouped["weights"] = df_grouped["days"] / df_grouped["days"].sum()

df_grouped = df_grouped.drop(columns=["days"])

if self._df is None:
return None
else:
return df_grouped.copy()


class BillingBaselineData(_BillingData):
"""
Expand Down
55 changes: 13 additions & 42 deletions eemeter/eemeter/models/billing/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,49 +56,19 @@ class BillingModel(DailyModel):
model (sklearn.pipeline.Pipeline): The final fitted model.
id (str): The index of the meter data.
"""
_baseline_data_type = BillingBaselineData
_reporting_data_type = BillingReportingData
_data_df_name = "df"

def __init__(self, settings=None):
super().__init__(model="legacy", settings=settings)
def __init__(self, settings=None, verbose: bool = False,):
super().__init__(model="legacy", settings=settings, verbose=verbose)

def fit(
self, baseline_data: BillingBaselineData, ignore_disqualification: bool = False
self,
baseline_data: BillingBaselineData,
ignore_disqualification: bool = False
) -> BillingModel:
"""Fit the model using baseline data.
Args:
baseline_data: BillingBaselineData object.
ignore_disqualification: Whether to ignore disqualification errors / warnings.
Returns:
The fitted model.
Raises:
TypeError: If baseline_data is not a BillingBaselineData object.
DataSufficiencyError: If the model can't be fit on disqualified baseline data.
"""
# TODO there's a fair bit of duplicated code between this and daily fit(), refactor
if not isinstance(baseline_data, BillingBaselineData):
raise TypeError("baseline_data must be a BillingBaselineData object")
baseline_data.log_warnings()
if baseline_data.disqualification and not ignore_disqualification:
for warning in baseline_data.disqualification + baseline_data.warnings:
print(warning.json())
raise DataSufficiencyError("Can't fit model on disqualified baseline data")
self.baseline_timezone = baseline_data.tz
self.warnings = baseline_data.warnings
self.disqualification = baseline_data.disqualification
self._fit(baseline_data.df)
if self.error["CVRMSE"] > self.settings.cvrmse_threshold:
cvrmse_warning = EEMeterWarning(
qualified_name="eemeter.model_fit_metrics.cvrmse",
description=(
f"Fit model has CVRMSE > {self.settings.cvrmse_threshold}"
),
data={"CVRMSE": self.error["CVRMSE"]},
)
cvrmse_warning.warn()
self.disqualification.append(cvrmse_warning)
return self
return super().fit(baseline_data, ignore_disqualification=ignore_disqualification)

def predict(
self,
Expand Down Expand Up @@ -135,7 +105,8 @@ def predict(
"reporting_data must be a BillingBaselineData or BillingReportingData object"
)

df_res = self._predict(reporting_data.df)
df = getattr(reporting_data, self._data_df_name)
df_res = self._predict(df)

if aggregation is None:
agg = None
Expand Down Expand Up @@ -182,7 +153,7 @@ def predict(

def plot(
self,
df_eval,
data,
aggregation: str | None = None,
):
"""Plot a model fit with baseline or reporting data. Requires matplotlib to use.
Expand All @@ -198,7 +169,7 @@ def plot(

# TODO: pass more kwargs to plotting function

plot(self, self.predict(df_eval, aggregation=aggregation))
plot(self, self.predict(data, aggregation=aggregation))

def to_dict(self) -> dict:
"""Returns a dictionary of model parameters.
Expand Down
34 changes: 34 additions & 0 deletions eemeter/eemeter/models/billing/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Copyright 2014-2024 OpenEEmeter contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import annotations

from eemeter.common.base_settings import CustomField

from eemeter.eemeter.models.daily.utilities.settings import DailyLegacySettings



class BillingSettings(DailyLegacySettings):
segment_minimum_count: int = CustomField(
default=3,
ge=3,
developer=True,
description="Minimum number of data points for HDD/CDD",
)
Loading

0 comments on commit 09fbcc2

Please sign in to comment.