diff --git a/scripts/analyze_tau_stats.py b/scripts/analyze_tau_stats.py index 21fb06b9..7bebe8d7 100644 --- a/scripts/analyze_tau_stats.py +++ b/scripts/analyze_tau_stats.py @@ -1,7 +1,7 @@ -import os import pandas as pd import matplotlib.pyplot as plt import numpy as np +from pathlib import Path """" DIRECTIONS @@ -21,65 +21,45 @@ """ -debug = False -# configure paths -################# -# path to directory of raw csv files in EPAthermostat-compatible format -data_dir = os.path.join('../', '../', 'tau-search-2', 'EPA_Tau') - -# path to prior metrics file to use for comparison; should contain the same set of ct_identifiers -results_dir = os.path.join('../', '../', 'tau-search-2', 'EPA_Tau_results') -results_old_path = os.path.join(results_dir, '2019_EPA_tau_2022-11-03_metrics_base.csv') - -# path to new results from experimental tau search code, same as METRICS_FILEPATH in write_stats.py -results_path = os.path.join(results_dir, '2019_EPA_tau_2023_06_01_metrics_new.csv') - -# path to directory of stats files output from running tau search code; called "tau_search_path" in core.py module -stats_dir = 'tau_search_stats' - -# path to directory where output plots and tables will be saved -plots_dir = 'tau_stats_plots' - - -def get_stats(ct_id, heat_or_cool, stats_dir=stats_dir): - stats_path = os.path.join(stats_dir, f'{ct_id}_{heat_or_cool}_tau_search.csv') +def get_stats(ct_id, heat_or_cool, stats_dir): + stats_path = stats_dir / f'{ct_id}_{heat_or_cool}_tau_search.csv' stats = pd.read_csv(stats_path) return stats -def get_raw_data(ct_id, data_dir=data_dir): - data_path = os.path.join(data_dir, f'{ct_id}.csv') +def get_raw_data(ct_id, data_dir): + data_path = data_dir / f'{ct_id}.csv' data = pd.read_csv(data_path) return data -def get_daily_data(ct_id, heat_or_cool, stats_dir=stats_dir): +def get_daily_data(ct_id, heat_or_cool, stats_dir): daily_data = {} - dd_data_path = os.path.join(stats_dir, f'{ct_id}_{heat_or_cool}_dd.csv') - rt_data_path = os.path.join(stats_dir, f'{ct_id}_{heat_or_cool}_run_time.csv') - daily_data['degree_day'] = pd.read_csv(dd_data_path, usecols=[1]).iloc[:,0].to_numpy() - daily_data['run_time'] = pd.read_csv(rt_data_path, usecols=[1]).iloc[:,0].to_numpy() + dd_data_path = stats_dir / f'{ct_id}_{heat_or_cool}_dd.csv' + rt_data_path = stats_dir / f'{ct_id}_{heat_or_cool}_run_time.csv' + daily_data['degree_day'] = pd.read_csv(dd_data_path, usecols=[1]).iloc[:, 0].to_numpy() + daily_data['run_time'] = pd.read_csv(rt_data_path, usecols=[1]).iloc[:, 0].to_numpy() return daily_data def plot_regression(x_arr, y_arr, tau, alpha, ax, heat_or_cool='cool'): - point_style = 'bo' if heat_or_cool=='cool' else 'ro' - line_style = 'm-' if heat_or_cool=='cool' else 'g-' - ax.plot(x_arr, y_arr, point_style) - ax.plot(x_arr, np.full(len(x_arr), 0), 'k-') - ax.plot(x_arr, np.array(alpha)*x_arr, line_style) - ax.set_title(f'tau: {tau}, alpha: {alpha:.2f}') - ax.set_ylabel('runtime (minutes)') - if heat_or_cool == 'heat': - ax.invert_xaxis() - ax.set_xlabel('degree-days (reversed)') - else: - ax.set_xlabel('degree-days') - - -def get_runtime(ct_id, heat_or_cool, stats_dir=stats_dir): - runtime_path = os.path.join(stats_dir, f'{ct_id}_{heat_or_cool}_run_time.csv') - if os.path.exists(runtime_path): + point_style = 'bo' if heat_or_cool == 'cool' else 'ro' + line_style = 'm-' if heat_or_cool == 'cool' else 'g-' + ax.plot(x_arr, y_arr, point_style) + ax.plot(x_arr, np.full(len(x_arr), 0), 'k-') + ax.plot(x_arr, np.array(alpha)*x_arr, line_style) + ax.set_title(f'tau: {tau}, alpha: {alpha:.2f}') + ax.set_ylabel('runtime (minutes)') + if heat_or_cool == 'heat': + ax.invert_xaxis() + ax.set_xlabel('degree-days (reversed)') + else: + ax.set_xlabel('degree-days') + + +def get_runtime(ct_id, heat_or_cool, stats_dir): + runtime_path = stats_dir / f'{ct_id}_{heat_or_cool}_run_time.csv' + if runtime_path.exists(): runtime = pd.read_csv(runtime_path) runtime.columns = ['date', f'{heat_or_cool}_runtime'] runtime[f'{heat_or_cool}_runtime'].fillna(0, inplace=True) @@ -88,7 +68,7 @@ def get_runtime(ct_id, heat_or_cool, stats_dir=stats_dir): return runtime -def get_all_runtime(ct_id, stats_dir=stats_dir): +def get_all_runtime(ct_id, stats_dir): cool_runtime = get_runtime(ct_id, 'cool', stats_dir=stats_dir) heat_runtime = get_runtime(ct_id, 'heat', stats_dir=stats_dir) if cool_runtime is None and heat_runtime is None: @@ -101,38 +81,34 @@ def get_all_runtime(ct_id, stats_dir=stats_dir): all_runtime['heat_runtime'] = 0 if cool_runtime is not None and heat_runtime is not None: all_runtime = cool_runtime.merge(heat_runtime, on='date', how='outer') - all_runtime.loc[:, ['heat_runtime', 'cool_runtime']] = all_runtime.loc[:, ['heat_runtime', 'cool_runtime']].fillna(0) + all_runtime.loc[:, ['heat_runtime', 'cool_runtime']] = all_runtime.loc[ + :, ['heat_runtime', 'cool_runtime']].fillna(0) all_runtime['runtime'] = all_runtime.cool_runtime + all_runtime.heat_runtime return all_runtime -def get_delta_t(ct_id, stats_dir=stats_dir): - delta_t_path = os.path.join(stats_dir, f'{ct_id}_delta_t_daily_mean.csv') +def get_delta_t(ct_id, stats_dir): + delta_t_path = stats_dir / f'{ct_id}_delta_t_daily_mean.csv' delta_t = pd.read_csv(delta_t_path) delta_t.columns = ['date', 'delta_t'] return delta_t -def get_raw_daily_data(ct_id, stats_dir=stats_dir): +def get_raw_daily_data(ct_id, stats_dir): all_runtime = get_all_runtime(ct_id, stats_dir=stats_dir) delta_t = get_delta_t(ct_id, stats_dir=stats_dir) daily_data = all_runtime.merge(delta_t, on='date', how='inner') return daily_data - -def plot_raw_delta_t(ct_id, ax=None, results_old=None, results_new=None): +def plot_raw_delta_t(ct_id, data_dir, stats_dir, results, ax=None, results_old=None): if ax is None: - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(10,5)); - if results_old is None: - results_old = pd.read_csv(results_old_path) - if results_new is None: - results_new = pd.read_csv(results_path) - + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(10, 5)) + ct_stats = {} - raw_data = get_raw_data(ct_id) - all_runtime = get_all_runtime(ct_id) - delta_t = get_delta_t(ct_id) + raw_data = get_raw_data(ct_id, data_dir) + all_runtime = get_all_runtime(ct_id, stats_dir) + delta_t = get_delta_t(ct_id, stats_dir) daily_data = all_runtime.merge(delta_t, on='date', how='inner') try: @@ -147,12 +123,12 @@ def plot_raw_delta_t(ct_id, ax=None, results_old=None, results_new=None): has_heat = True except: has_heat = False - + if has_cool: - stats_cool = get_stats(ct_id, 'cool').query("is_best_tau").iloc[0] - cooling_indoor_temp = raw_data.query("cool_runtime_stg1>0").temp_in.mean() + stats_cool = get_stats(ct_id=ct_id, heat_or_cool='cool', stats_dir=stats_dir).query("is_best_tau").iloc[0] + _ = raw_data.query("cool_runtime_stg1>0").temp_in.mean() cool_data = daily_data.query("cool_runtime > 0") - + ct_stats['old_tau_cool'] = old_stats_cool.tau ct_stats['tau_cool'] = stats_cool.tau ct_stats['old_alpha_cool'] = old_stats_cool.alpha @@ -161,12 +137,12 @@ def plot_raw_delta_t(ct_id, ax=None, results_old=None, results_new=None): ct_stats['metric_cool'] = core_stats_cool.percent_savings_baseline_percentile ct_stats['old_cvrmse_cool'] = old_stats_cool.cv_root_mean_sq_err ct_stats['cvrmse_cool'] = (stats_cool.sq_errors ** 0.5) / cool_data.cool_runtime.mean() - + if has_heat: - stats_heat = get_stats(ct_id, 'heat').query("is_best_tau").iloc[0] - heating_indoor_temp = raw_data.query("heat_runtime_stg1>0").temp_in.mean() + stats_heat = get_stats(ct_id=ct_id, heat_or_cool='heat', stats_dir=stats_dir).query("is_best_tau").iloc[0] + _ = raw_data.query("heat_runtime_stg1>0").temp_in.mean() heat_data = daily_data.query("heat_runtime > 0") - + ct_stats['old_tau_heat'] = old_stats_heat.tau ct_stats['tau_heat'] = stats_heat.tau ct_stats['old_alpha_heat'] = old_stats_heat.alpha @@ -177,39 +153,40 @@ def plot_raw_delta_t(ct_id, ax=None, results_old=None, results_new=None): ct_stats['cvrmse_heat'] = (stats_heat.sq_errors ** 0.5) / heat_data.heat_runtime.mean() if has_cool: - daily_data.query("cool_runtime > 0").plot(kind='scatter', x='delta_t', y='cool_runtime', c='blue', alpha=0.5, - ylim=(0,daily_data.runtime.max()+50), ax=ax); - ax.plot(cool_data.delta_t, (cool_data.delta_t + stats_cool.tau) * stats_cool.alpha, 'b-', alpha=0.6); - ax.plot(cool_data.delta_t, (cool_data.delta_t + old_stats_cool.tau) * old_stats_cool.alpha, 'b--', alpha=0.6); + daily_data.query("cool_runtime > 0").plot(kind='scatter', x='delta_t', y='cool_runtime', c='blue', alpha=0.5, + ylim=(0, daily_data.runtime.max()+50), ax=ax) + ax.plot(cool_data.delta_t, (cool_data.delta_t + stats_cool.tau) * stats_cool.alpha, 'b-', alpha=0.6) + ax.plot(cool_data.delta_t, (cool_data.delta_t + old_stats_cool.tau) * old_stats_cool.alpha, 'b--', alpha=0.6) if has_heat: - daily_data.query("heat_runtime > 0").plot(kind='scatter', x='delta_t', y='heat_runtime', c='red', ax=ax, alpha=0.5); - ax.plot(heat_data.delta_t, (heat_data.delta_t + stats_heat.tau) * stats_heat.alpha * -1, 'r-', alpha=0.6); - ax.plot(heat_data.delta_t, (heat_data.delta_t + old_stats_heat.tau) * old_stats_heat.alpha * -1, 'r--', alpha=0.6); + daily_data.query("heat_runtime > 0").plot( + kind='scatter', x='delta_t', y='heat_runtime', c='red', ax=ax, alpha=0.5) + ax.plot( + heat_data.delta_t, (heat_data.delta_t + stats_heat.tau) * stats_heat.alpha * -1, 'r-', alpha=0.6) + ax.plot( + heat_data.delta_t, (heat_data.delta_t + old_stats_heat.tau) * old_stats_heat.alpha * -1, 'r--', alpha=0.6) return ct_stats - - -def generate_plots(ct_ids, plots_dir=plots_dir, results_old=None): +def generate_plots(ct_ids, plots_dir, data_dir, stats_dir, results, results_old=None, debug=False): if results_old is None: results_old = pd.read_csv(results_old_path) ct_data_list = [] plt.rcParams['font.size'] = 10 for ct_id in ct_ids[:]: - fig, ax = plt.subplots(ncols=2, nrows=3, figsize=(20,15)); + fig, ax = plt.subplots(ncols=2, nrows=3, figsize=(20, 15)) for heat_or_cool in ['heat', 'cool']: ax_offset = 0 if heat_or_cool == 'cool' else 1 h_c_label = f'{heat_or_cool}ing_ALL' ct_results = results_old.query("ct_identifier==@ct_id & heating_or_cooling==@h_c_label") if len(ct_results) == 0: - print(f'No results found for ct_id {ct_id} with {heat_or_cool}') + print(f'No results found for ct_id {ct_id} with {heat_or_cool}') else: try: - stats = get_stats(ct_id, heat_or_cool) + stats = get_stats(ct_id=ct_id, heat_or_cool=heat_or_cool, stats_dir=stats_dir) except FileNotFoundError: print(f'No stats for {ct_id} with {heat_or_cool}') continue - daily_data = get_daily_data(ct_id, heat_or_cool) + daily_data = get_daily_data(ct_id, heat_or_cool, stats_dir=stats_dir) old_tau = ct_results.iloc[0].tau old_alpha = ct_results.iloc[0].alpha best_tau = stats[stats.is_best_tau].tau.iloc[0] @@ -217,23 +194,25 @@ def generate_plots(ct_ids, plots_dir=plots_dir, results_old=None): best_sq_errors = stats[stats.is_best_tau].sq_errors.iloc[0] root_mean_sq_err = best_sq_errors ** 0.5 cv_root_mean_sq_err = root_mean_sq_err / pd.Series(daily_data['run_time']).mean() - ct_data_list.append({'ct_identifier': ct_id, - 'heating_or_cooling': h_c_label, - 'tau': best_tau, - 'alpha': best_alpha, + ct_data_list.append({'ct_identifier': ct_id, + 'heating_or_cooling': h_c_label, + 'tau': best_tau, + 'alpha': best_alpha, 'mean_sq_err': best_sq_errors, 'cv_root_mean_sq_err': cv_root_mean_sq_err}) ax_left = ax[ax_offset, 0] ax_right = ax[ax_offset, 1] - stats.plot(x='tau', y='alpha', ax=ax_left, title=f'{ct_id} - {heat_or_cool}'); - stats.plot(x='tau', y='sq_errors', ax=ax_left, secondary_y = True); - ax_left.axvline(best_tau, color="green", linestyle="dashed"); - ax_left.axvline(old_tau, color="grey", linestyle="dashed"); + stats.plot(x='tau', y='alpha', ax=ax_left, title=f'{ct_id} - {heat_or_cool}') + stats.plot(x='tau', y='sq_errors', ax=ax_left, secondary_y=True) + ax_left.axvline(best_tau, color="green", linestyle="dashed") + ax_left.axvline(old_tau, color="grey", linestyle="dashed") ax_left.plot(old_tau, old_alpha, marker="o", markersize=10, markerfacecolor="blue") - plot_regression(daily_data['degree_day'], daily_data['run_time'], best_tau, best_alpha, ax_right, heat_or_cool) - ct_stats = plot_raw_delta_t(ct_id, ax[2, 0], results_old) + plot_regression( + daily_data['degree_day'], daily_data['run_time'], best_tau, best_alpha, ax_right, heat_or_cool) + ct_stats = plot_raw_delta_t( + ct_id=ct_id, data_dir=data_dir, stats_dir=stats_dir, results=results, ax=ax[2, 0], results_old=results_old) # insert a table of text stats table_text = [] table_text.append(['', 'old', 'new']) @@ -246,28 +225,45 @@ def generate_plots(ct_ids, plots_dir=plots_dir, results_old=None): stats_table = ax[2, 1].table(table_text, loc='center') stats_table.auto_set_font_size(False) stats_table.set_fontsize(16) - stats_table.scale(1,2) + stats_table.scale(1, 2) ax[2, 1].axis('off') plot_path = f'{ct_id}_plots.png' if debug: print(f'Saving to {plot_path}') - plt.savefig(os.path.join(plots_dir, plot_path)) + plt.savefig(plots_dir / plot_path) plt.close() results_new = pd.DataFrame(ct_data_list) - results_compared = results_old[['ct_identifier', 'heating_or_cooling', 'tau', 'alpha', + results_compared = results_old[['ct_identifier', 'heating_or_cooling', 'tau', 'alpha', 'cv_root_mean_sq_err', 'percent_savings_baseline_percentile', 'climate_zone']]\ .merge(results_new, how='inner', on=['ct_identifier', 'heating_or_cooling'], suffixes=['_old', '_new']) return results_compared -if __name__ == '__main__': +def analyze_tau_stats(data_dir, results_old_path, results_path, stats_dir, plots_dir): results_old = pd.read_csv(results_old_path) results = pd.read_csv(results_path) - if not os.path.exists(plots_dir): - os.makedirs(plots_dir) ct_ids = results.ct_identifier.unique() print('length of ct ids', len(ct_ids)) - results_compared = generate_plots(ct_ids, results_old=results_old) - results_compared.to_csv(os.path.join(plots_dir, 'results_compared.csv')) \ No newline at end of file + results_compared = generate_plots(ct_ids, plots_dir, data_dir, stats_dir, results=results, results_old=results_old) + results_compared.to_csv(plots_dir / 'results_compared.csv') + + +if __name__ == '__main__': + data_dir = Path('..') / '..' / 'datadir' / 'EPA_Tau' + + # path to prior metrics file to use for comparison; should contain the same set of ct_identifiers + results_dir = Path('..') / '..' / 'datadir' / 'Tau Results' + results_old_path = results_dir / 'test_product_2023-10-26_metrics_base.csv' + + # path to new results from experimental tau search code, same as METRICS_FILEPATH in write_stats.py + results_path = results_dir / '2019_EPA_tau_2023_06_01_metrics_new.csv' + + # path to directory of stats files output from running tau search code; called "tau_search_path" in core.py module + stats_dir = Path('..') / '..' / 'datadir' / 'tau_search_stats' + + # path to directory where output plots and tables will be saved + plots_dir = Path('..') / '..' / 'datadir' / 'tau_stats_plots' + + analyze_tau_stats(data_dir, results_old_path, results_path, stats_dir, plots_dir) diff --git a/scripts/multi_thermostat_driver.py b/scripts/multi_thermostat_driver.py index 6904b4ab..90a7b96d 100644 --- a/scripts/multi_thermostat_driver.py +++ b/scripts/multi_thermostat_driver.py @@ -1,16 +1,6 @@ -import os -import logging -import logging.config -import json -import csv +from pathlib import Path from datetime import date -from zipfile import ZipFile -from thermostat.importers import from_csv -from thermostat.exporters import metrics_to_csv, certification_to_csv -from thermostat.stats import compute_summary_statistics -from thermostat.stats import summary_statistics_to_csv -from thermostat.multiple import multiple_thermostat_calculate_epa_field_savings_metrics - +from multi_thermostat_driver_func import mult_thermostat_driver # These are variables used in the example code. Please tailor these to your # environment as needed. @@ -44,6 +34,7 @@ # Save cached weather data files? (Default: False) SAVE_CACHE = False CACHE_PATH = None # Replace with location to save the weather cache files (e.g. '/tmp/epathermosat') +TAU_SAVE_PATH = Path('/home/theurerjohn3/Documents/energystar/epa5/datadir/attempt1') # This section finds the metadata files and data files for the thermostats. # These point to examples of the various styles of files @@ -51,8 +42,8 @@ # data in the same file. # Single Stage -DATA_DIR = os.path.join('..', 'tests', 'data', 'single_stage') -METADATA_FILENAME = os.path.join(DATA_DIR, 'metadata.csv') +DATA_DIR = Path('../../datadir/EPA_Tau') +METADATA_FILENAME = DATA_DIR / '2019_epa_tau.csv' # Two Stage # DATA_DIR = os.path.join('..', 'tests', 'data', 'two_stage') @@ -81,130 +72,33 @@ ZIP_FILENAME = f'{BASE_FILENAME}.zip' # These are the locations of where these files will be stored. -METRICS_FILEPATH = os.path.join(OUTPUT_DIR, METRICS_FILENAME) -STATS_FILEPATH = os.path.join(DATA_DIR, STATISTICS_FILENAME) -CERTIFICATION_FILEPATH = os.path.join(DATA_DIR, CERTIFICATION_FILENAME) -STATS_ADVANCED_FILEPATH = os.path.join(DATA_DIR, ADVANCED_STATISTICS_FILENAME) -IMPORT_ERRORS_FILEPATH = os.path.join(OUTPUT_DIR, IMPORT_ERRORS_FILENAME) -SANITIZED_IMPORT_ERRORS_FILEPATH = os.path.join(OUTPUT_DIR, SANITIZED_IMPORT_ERRORS_FILENAME) -CLIMATE_ZONE_INSUFFICIENT_FILEPATH = os.path.join(OUTPUT_DIR, CLIMATE_ZONE_INSUFFICIENT_FILENAME) -ZIP_FILEPATH = os.path.join(OUTPUT_DIR, ZIP_FILENAME) - - -def write_errors(filepath, fieldnames, errors, extrasaction=None): - with open(filepath, 'w') as error_file: - if extrasaction: - writer = csv.DictWriter( - error_file, - fieldnames=fieldnames, - dialect='excel', - extrasaction=extrasaction) - else: - writer = csv.DictWriter( - error_file, - fieldnames=fieldnames, - dialect='excel') - writer.writeheader() - for error in errors: - writer.writerow(error) - - -def count_metadata(filepath): - with open(filepath, 'r') as metadata_file: - reader = csv.DictReader(metadata_file) - return len(list(reader)) - - -def main(): - ''' - This script processes the thermostat metadata and data files to generate - the certification files for submission to EPA. - ''' - - logging.basicConfig() - with open(LOGGING_CONFIG, 'r') as logging_config: - logging.config.dictConfig(json.load(logging_config)) - - # Uses the 'epathermostat' logging - logger = logging.getLogger('epathermostat') - logger.debug('Starting...') - logging.captureWarnings(CAPTURE_WARNINGS) - - thermostats, import_errors = from_csv( - METADATA_FILENAME, - verbose=VERBOSE, - save_cache=SAVE_CACHE, - cache_path=CACHE_PATH) - - # This logs any import errors that might have occurred. - if import_errors: - # This writes a file with the thermostat ID as part of the file. This - # is for your own troubleshooting - fieldnames = ['thermostat_id', 'error'] - write_errors(IMPORT_ERRORS_FILEPATH, fieldnames, import_errors) - - # This writes a file without the thermostat ID as part of the file. - # This file is sent as part of the certification to help with - # diagnosing issues with missing thermostats - fieldnames = ['error'] - write_errors(SANITIZED_IMPORT_ERRORS_FILEPATH, fieldnames, import_errors, extrasaction='ignore') - - # Check to see how many thermostats we are importing and warn if less than 30% - metadata_count = count_metadata(METADATA_FILENAME) - thermostat_estimate_count = thermostats.__length_hint__() # Get a rough estimate of the number of thermostats - percent_thermostats_imported = (thermostat_estimate_count / metadata_count) * 100 - if percent_thermostats_imported < 30: - logger.warning(f'Imported {percent_thermostats_imported}% of thermostats, which is less than 30%') - logger.warning(f'Please check {IMPORT_ERRORS_FILEPATH} for more details') - else: - logger.debug(f'Imported {percent_thermostats_imported}% of thermostats') - - metrics = multiple_thermostat_calculate_epa_field_savings_metrics(thermostats) - - metrics_out = metrics_to_csv(metrics, METRICS_FILEPATH) - - stats, insufficient = compute_summary_statistics(metrics_out) - - if insufficient: - fieldnames = ['climate_zone', 'count', 'error'] - write_errors(CLIMATE_ZONE_INSUFFICIENT_FILEPATH, fieldnames, insufficient) - - certification_to_csv(stats, CERTIFICATION_FILEPATH, PRODUCT_ID) - - summary_statistics_to_csv( - stats, - STATS_FILEPATH, - PRODUCT_ID) - - if ADVANCED_STATS: - stats_advanced = compute_summary_statistics( - metrics_out, - advanced_filtering=True) - - summary_statistics_to_csv( - stats_advanced, - STATS_ADVANCED_FILEPATH, - PRODUCT_ID) - - # Compile the files together in a neat package - files_to_zip = [ - CERTIFICATION_FILEPATH, - STATS_FILEPATH, - ] - if ADVANCED_STATS: - files_to_zip.append(STATS_ADVANCED_FILEPATH) - - if import_errors: - files_to_zip.append(SANITIZED_IMPORT_ERRORS_FILEPATH) - - if insufficient: - files_to_zip.append(CLIMATE_ZONE_INSUFFICIENT_FILEPATH) - - with ZipFile(ZIP_FILEPATH, 'w') as certification_zip: - for filename in files_to_zip: - if os.path.exists(filename): - certification_zip.write(filename, arcname=os.path.basename(filename)) +METRICS_FILEPATH = OUTPUT_DIR / METRICS_FILENAME +STATS_FILEPATH = DATA_DIR / STATISTICS_FILENAME +CERTIFICATION_FILEPATH = DATA_DIR / CERTIFICATION_FILENAME +STATS_ADVANCED_FILEPATH = DATA_DIR / ADVANCED_STATISTICS_FILENAME +IMPORT_ERRORS_FILEPATH = OUTPUT_DIR / IMPORT_ERRORS_FILENAME +SANITIZED_IMPORT_ERRORS_FILEPATH = OUTPUT_DIR / SANITIZED_IMPORT_ERRORS_FILENAME +CLIMATE_ZONE_INSUFFICIENT_FILEPATH = OUTPUT_DIR / CLIMATE_ZONE_INSUFFICIENT_FILENAME +ZIP_FILEPATH = OUTPUT_DIR / ZIP_FILENAME if __name__ == '__main__': - main() + mult_thermostat_driver( + advanced_stats=ADVANCED_STATS, + product_id=PRODUCT_ID, + verbose=VERBOSE, + capture_warnings=CAPTURE_WARNINGS, + logging_config=LOGGING_CONFIG, + save_cache=SAVE_CACHE, + cache_path=CACHE_PATH, + tau_save_path=TAU_SAVE_PATH, + metadata_filename=METADATA_FILENAME, + metrics_filepath=METRICS_FILEPATH, + stats_filepath=STATS_FILEPATH, + certification_filepath=CERTIFICATION_FILEPATH, + stats_advanced_filepath=STATS_ADVANCED_FILEPATH, + import_errors_filepath=IMPORT_ERRORS_FILEPATH, + sanitized_import_errors_filepath=SANITIZED_IMPORT_ERRORS_FILEPATH, + climate_zone_insufficient_filepath=CLIMATE_ZONE_INSUFFICIENT_FILEPATH, + zip_filepath=ZIP_FILEPATH + ) diff --git a/scripts/multi_thermostat_driver_func.py b/scripts/multi_thermostat_driver_func.py new file mode 100644 index 00000000..e42e8d98 --- /dev/null +++ b/scripts/multi_thermostat_driver_func.py @@ -0,0 +1,145 @@ +from pathlib import Path +import logging +import logging.config +import json +import csv +from zipfile import ZipFile +from thermostat.importers import from_csv +from thermostat.exporters import metrics_to_csv, certification_to_csv +from thermostat.stats import compute_summary_statistics +from thermostat.stats import summary_statistics_to_csv +from thermostat.multiple import multiple_thermostat_calculate_epa_field_savings_metrics + + +def write_errors(filepath, fieldnames, errors, extrasaction=None): + with open(filepath, 'w') as error_file: + if extrasaction: + writer = csv.DictWriter( + error_file, + fieldnames=fieldnames, + dialect='excel', + extrasaction=extrasaction) + else: + writer = csv.DictWriter( + error_file, + fieldnames=fieldnames, + dialect='excel') + writer.writeheader() + for error in errors: + writer.writerow(error) + + +def count_metadata(filepath): + with open(filepath, 'r') as metadata_file: + reader = csv.DictReader(metadata_file) + return len(list(reader)) + + +def mult_thermostat_driver( + advanced_stats: bool, + product_id: str, + verbose: bool, + capture_warnings: bool, + logging_config: str, + save_cache: Path, + cache_path: Path, + tau_save_path: Path, + metadata_filename: Path, + metrics_filepath: Path, + stats_filepath: Path, + certification_filepath: Path, + stats_advanced_filepath: Path, + import_errors_filepath: Path, + sanitized_import_errors_filepath: Path, + climate_zone_insufficient_filepath: Path, + zip_filepath: Path +): + ''' + This script processes the thermostat metadata and data files to generate + the certification files for submission to EPA. + ''' + + logging.basicConfig() + with open(logging_config, 'r') as logging_config: + logging.config.dictConfig(json.load(logging_config)) + + # Uses the 'epathermostat' logging + logger = logging.getLogger('epathermostat') + logger.debug('Starting...') + logging.captureWarnings(capture_warnings) + + thermostats, import_errors = from_csv( + metadata_filename, + verbose=verbose, + save_cache=save_cache, + cache_path=cache_path, + tau_search_path=tau_save_path) + + # This logs any import errors that might have occurred. + if import_errors: + # This writes a file with the thermostat ID as part of the file. This + # is for your own troubleshooting + fieldnames = ['thermostat_id', 'error'] + write_errors(import_errors_filepath, fieldnames, import_errors) + + # This writes a file without the thermostat ID as part of the file. + # This file is sent as part of the certification to help with + # diagnosing issues with missing thermostats + fieldnames = ['error'] + write_errors(sanitized_import_errors_filepath, fieldnames, import_errors, extrasaction='ignore') + + # Check to see how many thermostats we are importing and warn if less than 30% + metadata_count = count_metadata(metadata_filename) + thermostat_estimate_count = thermostats.__length_hint__() # Get a rough estimate of the number of thermostats + percent_thermostats_imported = (thermostat_estimate_count / metadata_count) * 100 + if percent_thermostats_imported < 30: + logger.warning(f'Imported {percent_thermostats_imported}% of thermostats, which is less than 30%') + logger.warning(f'Please check {import_errors_filepath} for more details') + else: + logger.debug(f'Imported {percent_thermostats_imported}% of thermostats') + + metrics = multiple_thermostat_calculate_epa_field_savings_metrics(thermostats) + + metrics_out = metrics_to_csv(metrics, metrics_filepath) + + stats, insufficient = compute_summary_statistics(metrics_out) + + if insufficient: + fieldnames = ['climate_zone', 'count', 'error'] + write_errors(climate_zone_insufficient_filepath, fieldnames, insufficient) + + certification_to_csv(stats, certification_filepath, product_id) + + summary_statistics_to_csv( + stats, + stats_filepath, + product_id) + + if advanced_stats: + stats_advanced = compute_summary_statistics( + metrics_out, + advanced_filtering=True) + + summary_statistics_to_csv( + stats_advanced, + stats_advanced_filepath, + product_id) + + # Compile the files together in a neat package + files_to_zip = [ + certification_filepath, + stats_filepath, + ] + if advanced_stats: + files_to_zip.append(stats_advanced_filepath) + + if import_errors: + files_to_zip.append(sanitized_import_errors_filepath) + + if insufficient: + files_to_zip.append(climate_zone_insufficient_filepath) + + with ZipFile(zip_filepath, 'w') as certification_zip: + for filename in files_to_zip: + if filename.exists(): + certification_zip.write(filename, arcname=filename.name) diff --git a/scripts/tau_analysis.py b/scripts/tau_analysis.py new file mode 100644 index 00000000..253fd5f2 --- /dev/null +++ b/scripts/tau_analysis.py @@ -0,0 +1,93 @@ +from write_stats import write_stats +from multi_thermostat_driver_func import mult_thermostat_driver +from analyze_tau_stats import analyze_tau_stats +from pathlib import Path +from datetime import date + + +if __name__ == '__main__': + + tau_save_path = Path('/home/theurerjohn3/Documents/energystar/epa5/datadir/attempt1') + + # This section finds the metadata files and data files for the thermostats. + # These point to examples of the various styles of files + # In most cases you will combine Single Stage, Two Stage, and Two Stage ERT + # data in the same file. + + # Single Stage + data_dir = Path('..') / '..' / 'datadir' / 'EPA_Tau' + metadata_path = data_dir / '2019_epa_tau.csv' + + # path to the raw thermostat data in csv format + # dir to save tau search stats to (multiple files per thermostat) + output_dir = data_dir.parents[0] / 'Tau Results' + metrics_filepath = output_dir / '2019_EPA_tau_2023_06_01_metrics_new.csv' + + # path to prior metrics file to use for comparison; should contain the same set of ct_identifiers + results_old_path = output_dir / 'test_product_2023-10-26_metrics_base.csv' + + # path to directory of stats files output from running tau search code; called "tau_search_path" in core.py module + stats_dir = Path('..') / '..' / 'datadir' / 'tau_search_stats' + + # path to directory where output plots and tables will be saved + plots_dir = Path('..') / '..' / 'datadir' / 'tau_stats_plots' + + # The date of the run (defaults to today's date in YYYY-MM-DD format) + RUN_DATE = date.today().strftime('%F') + + # The name of the product to be certified + PRODUCT_ID = 'test_product' + + # This creates the base filename for the files that are created (e.g. + # test_product_2022-03-28) + BASE_FILENAME = f'{PRODUCT_ID}_{RUN_DATE}' + + # These are the filenames for the output files. + METRICS_FILENAME = f'{BASE_FILENAME}_metrics.csv' + CERTIFICATION_FILENAME = f'{BASE_FILENAME}_certification.csv' + STATISTICS_FILENAME = f'{BASE_FILENAME}_stats.csv' + ADVANCED_STATISTICS_FILENAME = f'{BASE_FILENAME}_stats_advanced.csv' + IMPORT_ERRORS_FILENAME = f'{BASE_FILENAME}_import_errors.csv' + SANITIZED_IMPORT_ERRORS_FILENAME = f'{BASE_FILENAME}_errors_sanitized.csv' + CLIMATE_ZONE_INSUFFICIENT_FILENAME = f'{BASE_FILENAME}_climate_zone_insufficient.csv' + ZIP_FILENAME = f'{BASE_FILENAME}.zip' + + # These are the locations of where these files will be stored. + METRICS_FILEPATH = data_dir / METRICS_FILENAME + STATS_FILEPATH = data_dir / STATISTICS_FILENAME + CERTIFICATION_FILEPATH = data_dir / CERTIFICATION_FILENAME + STATS_ADVANCED_FILEPATH = data_dir / ADVANCED_STATISTICS_FILENAME + IMPORT_ERRORS_FILEPATH = data_dir / IMPORT_ERRORS_FILENAME + SANITIZED_IMPORT_ERRORS_FILEPATH = data_dir / SANITIZED_IMPORT_ERRORS_FILENAME + CLIMATE_ZONE_INSUFFICIENT_FILEPATH = data_dir / CLIMATE_ZONE_INSUFFICIENT_FILENAME + ZIP_FILEPATH = data_dir / ZIP_FILENAME + + top_n = 30 + + mult_thermostat_driver( + advanced_stats=False, # Whether to compute Advanced Statistics (in most cases this is NOT needed) + product_id=PRODUCT_ID, # The name of the product to be certified + verbose=True, # Verbose will override logging to display the imported thermostats. + capture_warnings=True, # Set to True to log additional warning messages, + logging_config='logging.json', # Example logging configuration for file and console output + save_cache=False, # Save cached weather data files? (Default: False) + cache_path=None, + tau_save_path=tau_save_path, + metadata_filename=metadata_path, + metrics_filepath=METRICS_FILEPATH, + stats_filepath=STATS_FILEPATH, + certification_filepath=CERTIFICATION_FILEPATH, + stats_advanced_filepath=STATS_ADVANCED_FILEPATH, + import_errors_filepath=IMPORT_ERRORS_FILEPATH, + sanitized_import_errors_filepath=SANITIZED_IMPORT_ERRORS_FILEPATH, + climate_zone_insufficient_filepath=CLIMATE_ZONE_INSUFFICIENT_FILEPATH, + zip_filepath=ZIP_FILEPATH + ) + + write_stats( + top_n=top_n, + metadata_path=metadata_path, + metrics_filepath=metrics_filepath + ) + + analyze_tau_stats(data_dir, results_old_path, metrics_filepath, stats_dir, plots_dir) diff --git a/scripts/write_stats.py b/scripts/write_stats.py index cd5ceaf1..d5302d9e 100644 --- a/scripts/write_stats.py +++ b/scripts/write_stats.py @@ -1,25 +1,35 @@ -# for testing purposes; only run the first n thermostat files -# set to None to run all -top_n = 30 - - -import os -from thermostat.importers import from_csv, get_single_thermostat +from pathlib import Path +from thermostat.importers import from_csv from thermostat.exporters import metrics_to_csv from thermostat.multiple import multiple_thermostat_calculate_epa_field_savings_metrics -def main(): - # path to the raw thermostat data in csv format - data_dir = os.path.join('../', '../', 'tau-search-2', 'EPA_Tau') - metadata_path = os.path.join(data_dir, '2019_epa_tau.csv') - # dir to save tau search stats to (multiple files per thermostat) - output_dir = os.path.join('../', '../', 'tau-search-2', 'EPA_Tau_results') - METRICS_FILEPATH = os.path.join(output_dir, '2019_EPA_tau_2023_06_01_metrics_new.csv') + +def write_stats( + top_n: int, + metadata_path: Path, + metrics_filepath: Path + ): thermostats, tstat_errors = from_csv(metadata_path, top_n=top_n) metrics = multiple_thermostat_calculate_epa_field_savings_metrics(thermostats) - metrics_to_csv(metrics, METRICS_FILEPATH) + metrics_to_csv(metrics, metrics_filepath) + + +if __name__ == '__main__': + # for testing purposes; only run the first n thermostat files + # set to None to run all + top_n = 30 + + # path to the raw thermostat data in csv format + data_dir = Path('..') / '..' / 'datadir' / 'EPA_Tau' + metadata_path = data_dir / '2019_epa_tau.csv' + # dir to save tau search stats to (multiple files per thermostat) + output_dir = data_dir.parents[0] / 'Tau Results' + metrics_filepath = output_dir / '2019_EPA_tau_2023_06_01_metrics_new.csv' -if __name__=='__main__': - main() \ No newline at end of file + write_stats( + top_n=top_n, + metadata_path=metadata_path, + metrics_filepath=metrics_filepath + )