diff --git a/thermostat/core.py b/thermostat/core.py index 4936d5e6..4ef2070a 100644 --- a/thermostat/core.py +++ b/thermostat/core.py @@ -7,6 +7,7 @@ import pandas as pd import numpy as np from math import sqrt +from loguru import logger as log from thermostat import get_version from thermostat.climate_zone import BASELINE_TEMPERATURE @@ -22,11 +23,7 @@ validate_cool_stage, ) -# set to True in order to save detailed coefficients from the Tau grid search -save_tau_search = True -# path to the directory where Tau search data will be saved -tau_search_path = '/Users/ethan/Documents/Resilient Edge/Projects/ICF - Energy Star connected thermostats/epathermostat_code/tau-search-2/tau_search_stats' -import os +from pathlib import Path warnings.simplefilter('module', Warning) @@ -195,7 +192,8 @@ def __init__( zipcode, station, climate_zone, temperature_in, temperature_out, cool_runtime, heat_runtime, - auxiliary_heat_runtime, emergency_heat_runtime): + auxiliary_heat_runtime, emergency_heat_runtime, + tau_search_path=None): self.thermostat_id = thermostat_id @@ -207,6 +205,9 @@ def __init__( self.cool_runtime_daily = None self.heat_runtime_daily = None + # Set default tau path + self.tau_search_path = Path(tau_search_path) + self.heat_type = heat_type self.heat_stage = heat_stage self.cool_type = cool_type @@ -293,17 +294,18 @@ def __init__( raise InsufficientCoreDaysError(f'Not enough core cooling core days for climate zone {self.climate_zone}: {self.core_cooling_days_total}') except KeyError: raise KeyError(f'Missing climate zone for {self.climate_zone} ZIP Code {self.zipcode}') - - if save_tau_search: + log.debug(f'Tau filepath: {tau_search_path}') + if not self.tau_search_path is None: # save delta-t and runtime dataframes for plotting + log.debug("Saving Tau Files") raw_delta_t = self.temperature_out - self.temperature_in delta_t_daily = raw_delta_t.resample('D').mean().dropna() delta_t_daily.columns = ['date', 'delta_t'] - delta_t_daily.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_delta_t_daily_mean.csv')) + delta_t_daily.to_csv(self.tau_search_path / f'{self.thermostat_id}_delta_t_daily_mean.csv') if self.cool_runtime_daily is not None: - self.cool_runtime_daily.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_cool_runtime_daily.csv')) + self.cool_runtime_daily.to_csv(self.tau_search_path / f'{self.thermostat_id}_cool_runtime_daily.csv') if self.heat_runtime_daily is not None: - self.heat_runtime_daily.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_heat_runtime_daily.csv')) + self.heat_runtime_daily.to_csv(self.tau_search_path / f'{self.thermostat_id}_heat_runtime_daily.csv') logging.debug(f"{self.thermostat_id}: {self.core_heating_days_total} core heating days, {self.core_cooling_days_total} core cooling days") self.validate() @@ -907,21 +909,21 @@ def search_cdd_tau(run_time_array, max_tau=20): f' best tau={best_tau}') logger.debug(f'Best tau = {best_tau}') # for exploring the tau stats - if save_tau_search: + if not self.tau_search_path is None: best_shifted_deg_days_array = calc_cdd(best_tau) - pd.DataFrame(best_shifted_deg_days_array).to_csv(os.path.join(tau_search_path, - f'{self.thermostat_id}_cool_dd.csv'), + pd.DataFrame(best_shifted_deg_days_array).to_csv(self.tau_search_path / + f'{self.thermostat_id}_cool_dd.csv', index=True) - pd.DataFrame(run_time_array).to_csv(os.path.join(tau_search_path, - f'{self.thermostat_id}_cool_run_time.csv'), + pd.DataFrame(run_time_array).to_csv(self.tau_search_path / + f'{self.thermostat_id}_cool_run_time.csv', index=True) tau_stats_cool = pd.DataFrame(tau_stats_list_cool) # set all other taus not best and this one set to best tau_stats_cool.set_index('tau', inplace=True) tau_stats_cool.loc[:, 'is_best_tau'] = False tau_stats_cool.loc[best_tau, 'is_best_tau'] = True - tau_stats_cool.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_cool_tau_search.csv')) + tau_stats_cool.to_csv(self.tau_search_path / f'{self.thermostat_id}_cool_tau_search.csv') return best_tau, best_alpha, best_errors try: @@ -1074,20 +1076,20 @@ def search_hdd_tau(run_time_array, max_tau=20): f' best tau={best_tau}') logger.debug(f'Best tau = {best_tau}') # for exploring the tau stats - if save_tau_search: + if not self.tau_search_path is None: best_shifted_deg_days_array = calc_hdd(best_tau) - pd.DataFrame(best_shifted_deg_days_array).to_csv(os.path.join(tau_search_path, - f'{self.thermostat_id}_heat_dd.csv'), + pd.DataFrame(best_shifted_deg_days_array).to_csv(self.tau_search_path / + f'{self.thermostat_id}_heat_dd.csv', index=True) - pd.DataFrame(run_time_array).to_csv(os.path.join(tau_search_path, - f'{self.thermostat_id}_heat_run_time.csv'), + pd.DataFrame(run_time_array).to_csv(self.tau_search_path / + f'{self.thermostat_id}_heat_run_time.csv', index=True) tau_stats_heat = pd.DataFrame(tau_stats_list_heat) # set all other taus not best and this one set to best tau_stats_heat.set_index('tau', inplace=True) tau_stats_heat.loc[:, 'is_best_tau'] = False tau_stats_heat.loc[best_tau, 'is_best_tau'] = True - tau_stats_heat.to_csv(os.path.join(tau_search_path, f'{self.thermostat_id}_heat_tau_search.csv')) + tau_stats_heat.to_csv(self.tau_search_path / f'{self.thermostat_id}_heat_tau_search.csv') return best_tau, best_alpha, best_errors diff --git a/thermostat/importers.py b/thermostat/importers.py index ddebe0d6..4e9fd4be 100644 --- a/thermostat/importers.py +++ b/thermostat/importers.py @@ -26,6 +26,7 @@ from multiprocessing import Pool, cpu_count from functools import partial import logging +from pathlib import Path try: NUMBER_OF_CORES = len(os.sched_getaffinity(0)) @@ -113,17 +114,16 @@ def save_json_cache(index, thermostat_id, station, cache_path=None): json_cache[filename] = sqlite_json_store.retrieve_json(base_name) if cache_path is None: - directory = os.path.join( - os.curdir, - "epathermostat_weather_data") + directory = Path.cwd() / "epathermostat_weather_data" else: - directory = os.path.normpath( + directory = Path( cache_path) thermostat_filename = f"{thermostat_id}.json" - thermostat_path = os.path.join(directory, thermostat_filename) + thermostat_path = directory /thermostat_filename + try: - os.makedirs(os.path.dirname(directory), exist_ok=True) + directory.mkdir(exist_ok=True) with open(thermostat_path, 'w') as outfile: json.dump(json_cache, outfile) @@ -165,7 +165,7 @@ def normalize_utc_offset(utc_offset): def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True, - cache_path=None, top_n=None): + cache_path=None, top_n=None, tau_search_path=None): """ Creates Thermostat objects from data stored in CSV files. @@ -227,7 +227,9 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True, metadata_filename=metadata_filename, verbose=verbose, save_cache=save_cache, - cache_path=cache_path) + cache_path=cache_path, + tau_search_path=tau_search_path, + ) result_list = p.imap(multiprocess_func_partial, metadata.iterrows()) p.close() p.join() @@ -250,7 +252,7 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True, return iter(results), error_list -def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=False, cache_path=None): +def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=False, cache_path=None, tau_search_path=None): """ This function is a partial function for multiproccessing and shares the same arguments as from_csv. It is not intended to be called directly.""" _, row = metadata @@ -258,7 +260,7 @@ def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=Fa if verbose and logger.getEffectiveLevel() > logging.INFO: print(f"Importing thermostat {row.thermostat_id}") - interval_data_filename = os.path.join(os.path.dirname(metadata_filename), row.interval_data_filename) + interval_data_filename = metadata_filename.parents[0] / row.interval_data_filename status_metadata = { 'thermostat_id': row.thermostat_id, @@ -282,6 +284,7 @@ def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=Fa interval_data_filename=interval_data_filename, save_cache=save_cache, cache_path=cache_path, + tau_search_path=tau_search_path, ) except (ZIPCodeLookupError, StationLookupError, ClimateZoneLookupError) as e: # Could not locate a station for the thermostat. Warn and skip. @@ -305,7 +308,8 @@ def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=Fa def get_single_thermostat(thermostat_id, zipcode, heat_type, heat_stage, cool_type, cool_stage, - utc_offset, interval_data_filename, save_cache=False, cache_path=None): + utc_offset, interval_data_filename, save_cache=False, cache_path=None, + tau_search_path=None): """ Load a single thermostat directly from an interval data file. Parameters @@ -428,7 +432,8 @@ def get_single_thermostat(thermostat_id, zipcode, cool_runtime, heat_runtime, auxiliary_heat_runtime, - emergency_heat_runtime + emergency_heat_runtime, + tau_search_path=tau_search_path, ) return thermostat diff --git a/thermostat/parallel.py b/thermostat/parallel.py index 93396009..1bd1dc03 100644 --- a/thermostat/parallel.py +++ b/thermostat/parallel.py @@ -3,7 +3,7 @@ from itertools import cycle from zipfile import ZipFile import tempfile -import os +from pathlib import Path from thermostat.stations import get_closest_station_by_zipcode @@ -42,6 +42,8 @@ def schedule_batches(metadata_filename, n_batches, zip_files=False, batches_dir= message = "Cannot have batches_dir==None when zip_files==True. " \ "Please supply a directory in which to save batches." raise ValueError(message) + else: + batches_dir = Path(batches_dir) metadata_df = pd.read_csv(metadata_filename, dtype={"zipcode": str}) stations = [get_closest_station_by_zipcode(zipcode) for zipcode in metadata_df.zipcode] @@ -80,25 +82,24 @@ def schedule_batches(metadata_filename, n_batches, zip_files=False, batches_dir= if zip_files: - if not os.path.exists(batches_dir): - os.makedirs(batches_dir) + batches_dir.mkdir(exist_ok=True) batch_zipfile_names = [] for i, batch_df in enumerate(batch_dfs): batch_name = "batch_{:05d}.zip".format(i) - batch_zipfile_name = os.path.join(batches_dir, batch_name) + batch_zipfile_name = batches_dir / batch_name batch_zipfile_names.append(batch_zipfile_name) _, fname = tempfile.mkstemp() batch_df.to_csv(fname, index=False) with ZipFile(batch_zipfile_name, 'w') as batch_zip: - batch_zip.write(fname, arcname=os.path.join('data', 'metadata.csv')) + batch_zip.write(fname, arcname=(Path('data')/'metadata.csv')) for filename in batch_df.interval_data_filename: - interval_data_source = os.path.join(os.path.dirname(metadata_filename), filename) - batch_zip.write(interval_data_source, arcname=os.path.join('data', filename)) + interval_data_source = metadata_filename.parents[0] / filename + batch_zip.write(interval_data_source, arcname=(Path('data') / filename)) return batch_zipfile_names diff --git a/thermostat/util/testing.py b/thermostat/util/testing.py index 61325762..95589d9e 100644 --- a/thermostat/util/testing.py +++ b/thermostat/util/testing.py @@ -1,4 +1,4 @@ -import os +from pathlib import Path import inspect def get_data_path(f=''): @@ -7,5 +7,5 @@ def get_data_path(f=''): """ # get our callers file _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1] - base_dir = os.path.abspath(os.path.dirname(filename)) - return os.path.join(base_dir, f) + base_dir = Path(filename).parent[0].resolve() + return base_dir / f