verifying that non script changes pushed up as well

EPAENERGYSTAR · Nov 12, 2023 · 2a446ad · 2a446ad
1 parent 75e3bec
commit 2a446ad
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 45 deletions.
diff --git a/thermostat/core.py b/thermostat/core.py
@@ -7,6 +7,7 @@
 import pandas as pd
 import numpy as np
 from math import sqrt
+from loguru import logger as log
 
 from thermostat import get_version
 from thermostat.climate_zone import BASELINE_TEMPERATURE
@@ -22,11 +23,7 @@
         validate_cool_stage,
         )
 
-# set to True in order to save detailed coefficients from the Tau grid search
-save_tau_search = True
-# path to the directory where Tau search data will be saved
-tau_search_path = '/Users/ethan/Documents/Resilient Edge/Projects/ICF - Energy Star connected thermostats/epathermostat_code/tau-search-2/tau_search_stats'
-import os
+from pathlib import Path
 
 warnings.simplefilter('module', Warning)
 
@@ -195,7 +192,8 @@ def __init__(
             zipcode, station, climate_zone,
             temperature_in, temperature_out,
             cool_runtime, heat_runtime,
-            auxiliary_heat_runtime, emergency_heat_runtime):
+            auxiliary_heat_runtime, emergency_heat_runtime,
+            tau_search_path=None):
 
         self.thermostat_id = thermostat_id
 
@@ -207,6 +205,9 @@ def __init__(
         self.cool_runtime_daily = None
         self.heat_runtime_daily = None
 
+        # Set default tau path
+        self.tau_search_path = Path(tau_search_path)
+
         self.heat_type = heat_type
         self.heat_stage = heat_stage
         self.cool_type = cool_type
@@ -293,17 +294,18 @@ def __init__(
                     raise InsufficientCoreDaysError(f'Not enough core cooling core days for climate zone {self.climate_zone}: {self.core_cooling_days_total}')
             except KeyError:
                 raise KeyError(f'Missing climate zone for {self.climate_zone} ZIP Code {self.zipcode}')
-
-        if save_tau_search:
+        log.debug(f'Tau filepath: {tau_search_path}')
+        if not self.tau_search_path is None:
             # save delta-t and runtime dataframes for plotting
+            log.debug("Saving Tau Files")
             raw_delta_t = self.temperature_out - self.temperature_in
             delta_t_daily = raw_delta_t.resample('D').mean().dropna()
             delta_t_daily.columns = ['date', 'delta_t']
-            delta_t_daily.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_delta_t_daily_mean.csv'))
+            delta_t_daily.to_csv(self.tau_search_path / f'{self.thermostat_id}_delta_t_daily_mean.csv')
             if self.cool_runtime_daily is not None:
-                self.cool_runtime_daily.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_cool_runtime_daily.csv'))
+                self.cool_runtime_daily.to_csv(self.tau_search_path / f'{self.thermostat_id}_cool_runtime_daily.csv')
             if self.heat_runtime_daily is not None:
-                self.heat_runtime_daily.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_heat_runtime_daily.csv'))
+                self.heat_runtime_daily.to_csv(self.tau_search_path / f'{self.thermostat_id}_heat_runtime_daily.csv')
 
         logging.debug(f"{self.thermostat_id}: {self.core_heating_days_total} core heating days, {self.core_cooling_days_total} core cooling days")
         self.validate()
@@ -907,21 +909,21 @@ def search_cdd_tau(run_time_array, max_tau=20):
                              f' best tau={best_tau}')
             logger.debug(f'Best tau = {best_tau}')
             # for exploring the tau stats
-            if save_tau_search:
+            if not self.tau_search_path is None:
 
                 best_shifted_deg_days_array = calc_cdd(best_tau)
-                pd.DataFrame(best_shifted_deg_days_array).to_csv(os.path.join(tau_search_path,
-                                                                              f'{self.thermostat_id}_cool_dd.csv'),
+                pd.DataFrame(best_shifted_deg_days_array).to_csv(self.tau_search_path /
+                                                                              f'{self.thermostat_id}_cool_dd.csv',
                                                                  index=True)
-                pd.DataFrame(run_time_array).to_csv(os.path.join(tau_search_path,
-                                                                 f'{self.thermostat_id}_cool_run_time.csv'),
+                pd.DataFrame(run_time_array).to_csv(self.tau_search_path /
+                                                                 f'{self.thermostat_id}_cool_run_time.csv',
                                                         index=True)
                 tau_stats_cool = pd.DataFrame(tau_stats_list_cool)
                 # set all other taus not best and this one set to best
                 tau_stats_cool.set_index('tau', inplace=True)
                 tau_stats_cool.loc[:, 'is_best_tau'] = False
                 tau_stats_cool.loc[best_tau, 'is_best_tau'] = True
-                tau_stats_cool.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_cool_tau_search.csv'))
+                tau_stats_cool.to_csv(self.tau_search_path / f'{self.thermostat_id}_cool_tau_search.csv')
             return best_tau, best_alpha, best_errors
 
         try:
@@ -1074,20 +1076,20 @@ def search_hdd_tau(run_time_array, max_tau=20):
                              f' best tau={best_tau}')
             logger.debug(f'Best tau = {best_tau}')
             # for exploring the tau stats
-            if save_tau_search:
+            if not self.tau_search_path is None:
                 best_shifted_deg_days_array = calc_hdd(best_tau)
-                pd.DataFrame(best_shifted_deg_days_array).to_csv(os.path.join(tau_search_path,
-                                                                              f'{self.thermostat_id}_heat_dd.csv'),
+                pd.DataFrame(best_shifted_deg_days_array).to_csv(self.tau_search_path /
+                                                                              f'{self.thermostat_id}_heat_dd.csv',
                                                                  index=True)
-                pd.DataFrame(run_time_array).to_csv(os.path.join(tau_search_path,
-                                                                 f'{self.thermostat_id}_heat_run_time.csv'),
+                pd.DataFrame(run_time_array).to_csv(self.tau_search_path /
+                                                                 f'{self.thermostat_id}_heat_run_time.csv',
                                                     index=True)
                 tau_stats_heat = pd.DataFrame(tau_stats_list_heat)
                 # set all other taus not best and this one set to best
                 tau_stats_heat.set_index('tau', inplace=True)
                 tau_stats_heat.loc[:, 'is_best_tau'] = False
                 tau_stats_heat.loc[best_tau, 'is_best_tau'] = True
-                tau_stats_heat.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_heat_tau_search.csv'))
+                tau_stats_heat.to_csv(self.tau_search_path / f'{self.thermostat_id}_heat_tau_search.csv')
 
             return best_tau, best_alpha, best_errors
 

diff --git a/thermostat/importers.py b/thermostat/importers.py
@@ -26,6 +26,7 @@
 from multiprocessing import Pool, cpu_count
 from functools import partial
 import logging
+from pathlib import Path
 
 try:
     NUMBER_OF_CORES = len(os.sched_getaffinity(0))
@@ -113,17 +114,16 @@ def save_json_cache(index, thermostat_id, station, cache_path=None):
         json_cache[filename] = sqlite_json_store.retrieve_json(base_name)
 
     if cache_path is None:
-        directory = os.path.join(
-            os.curdir,
-            "epathermostat_weather_data")
+        directory = Path.cwd() / "epathermostat_weather_data"
     else:
-        directory = os.path.normpath(
+        directory = Path(
             cache_path)
 
     thermostat_filename = f"{thermostat_id}.json"
-    thermostat_path = os.path.join(directory, thermostat_filename)
+    thermostat_path = directory /thermostat_filename
+
     try:
-        os.makedirs(os.path.dirname(directory), exist_ok=True)
+        directory.mkdir(exist_ok=True)
         with open(thermostat_path, 'w') as outfile:
             json.dump(json_cache, outfile)
 
@@ -165,7 +165,7 @@ def normalize_utc_offset(utc_offset):
 
 
 def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True,
-             cache_path=None, top_n=None):
+             cache_path=None, top_n=None, tau_search_path=None):
     """
     Creates Thermostat objects from data stored in CSV files.
 
@@ -227,7 +227,9 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True,
         metadata_filename=metadata_filename,
         verbose=verbose,
         save_cache=save_cache,
-        cache_path=cache_path)
+        cache_path=cache_path,
+        tau_search_path=tau_search_path,
+        )
     result_list = p.imap(multiprocess_func_partial, metadata.iterrows())
     p.close()
     p.join()
@@ -250,15 +252,15 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True,
     return iter(results), error_list
 
 
-def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=False, cache_path=None):
+def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=False, cache_path=None, tau_search_path=None):
     """ This function is a partial function for multiproccessing and shares the same arguments as from_csv.
     It is not intended to be called directly."""
     _, row = metadata
     logger.info(f"Importing thermostat {row.thermostat_id}")
     if verbose and logger.getEffectiveLevel() > logging.INFO:
         print(f"Importing thermostat {row.thermostat_id}")
 
-    interval_data_filename = os.path.join(os.path.dirname(metadata_filename), row.interval_data_filename)
+    interval_data_filename = metadata_filename.parents[0] / row.interval_data_filename
 
     status_metadata = {
         'thermostat_id': row.thermostat_id,
@@ -282,6 +284,7 @@ def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=Fa
             interval_data_filename=interval_data_filename,
             save_cache=save_cache,
             cache_path=cache_path,
+            tau_search_path=tau_search_path,
         )
     except (ZIPCodeLookupError, StationLookupError, ClimateZoneLookupError) as e:
         # Could not locate a station for the thermostat. Warn and skip.
@@ -305,7 +308,8 @@ def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=Fa
 
 def get_single_thermostat(thermostat_id, zipcode,
                           heat_type, heat_stage, cool_type, cool_stage,
-                          utc_offset, interval_data_filename, save_cache=False, cache_path=None):
+                          utc_offset, interval_data_filename, save_cache=False, cache_path=None,
+                          tau_search_path=None):
     """ Load a single thermostat directly from an interval data file.
 
     Parameters
@@ -428,7 +432,8 @@ def get_single_thermostat(thermostat_id, zipcode,
         cool_runtime,
         heat_runtime,
         auxiliary_heat_runtime,
-        emergency_heat_runtime
+        emergency_heat_runtime,
+        tau_search_path=tau_search_path,
     )
     return thermostat
 

diff --git a/thermostat/parallel.py b/thermostat/parallel.py
@@ -3,7 +3,7 @@
 from itertools import cycle
 from zipfile import ZipFile
 import tempfile
-import os
+from pathlib import Path
 from thermostat.stations import get_closest_station_by_zipcode
 
 
@@ -42,6 +42,8 @@ def schedule_batches(metadata_filename, n_batches, zip_files=False, batches_dir=
             message = "Cannot have batches_dir==None when zip_files==True. " \
                     "Please supply a directory in which to save batches."
             raise ValueError(message)
+        else:
+            batches_dir = Path(batches_dir)
 
     metadata_df = pd.read_csv(metadata_filename, dtype={"zipcode": str})
     stations = [get_closest_station_by_zipcode(zipcode) for zipcode in metadata_df.zipcode]
@@ -80,25 +82,24 @@ def schedule_batches(metadata_filename, n_batches, zip_files=False, batches_dir=
 
     if zip_files:
 
-        if not os.path.exists(batches_dir):
-            os.makedirs(batches_dir)
+        batches_dir.mkdir(exist_ok=True)
 
         batch_zipfile_names = []
         for i, batch_df in enumerate(batch_dfs):
 
             batch_name = "batch_{:05d}.zip".format(i)
-            batch_zipfile_name = os.path.join(batches_dir, batch_name)
+            batch_zipfile_name = batches_dir / batch_name
             batch_zipfile_names.append(batch_zipfile_name)
 
             _, fname = tempfile.mkstemp()
             batch_df.to_csv(fname, index=False)
 
             with ZipFile(batch_zipfile_name, 'w') as batch_zip:
-                batch_zip.write(fname, arcname=os.path.join('data', 'metadata.csv'))
+                batch_zip.write(fname, arcname=(Path('data')/'metadata.csv'))
 
                 for filename in batch_df.interval_data_filename:
-                    interval_data_source = os.path.join(os.path.dirname(metadata_filename), filename)
-                    batch_zip.write(interval_data_source, arcname=os.path.join('data', filename))
+                    interval_data_source = metadata_filename.parents[0] / filename
+                    batch_zip.write(interval_data_source, arcname=(Path('data') / filename))
 
         return batch_zipfile_names
 

diff --git a/thermostat/util/testing.py b/thermostat/util/testing.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 import inspect
 
 def get_data_path(f=''):
@@ -7,5 +7,5 @@ def get_data_path(f=''):
     """
     # get our callers file
     _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1]
-    base_dir = os.path.abspath(os.path.dirname(filename))
-    return os.path.join(base_dir, f)
+    base_dir = Path(filename).parent[0].resolve()
+    return base_dir / f