EPAENERGYSTAR · john-zither · Dec 11, 2023 · Nov 13, 2023 · Nov 13, 2023 · Nov 13, 2023
diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       matrix:
         # Apparently 3.10 gets converted to 3.1 if it's not a string. Brilliant.
-        python-version: [3.7, 3.8, 3.9, '3.10', '3.11']
+        python-version: [3.8, 3.9, '3.10', '3.11']
 
     steps:
     - uses: actions/checkout@v3
@@ -22,7 +22,7 @@ jobs:
         python -m pip install --upgrade pip
         pip install wheel flake8
         pip install -r dev-requirements.txt
-        pip install -e .
+        pip install -r requirements.txt
     - name: Prime Cache
       run: |
         mkdir $HOME/.eeweather

diff --git a/.github/workflows/windows_conda_testing.yaml b/.github/workflows/windows_conda_testing.yaml
@@ -32,6 +32,6 @@ jobs:
         C:\Miniconda\condabin\conda.bat activate base
         C:\Miniconda\condabin\conda.bat install pytest pytest-cov coverage mock pip
         C:\Miniconda\condabin\conda.bat install -c conda-forge shapely
-        pip install -e .
+        pip install -r requirements.txt
         pytest
 
diff --git a/scripts/metadata_converter.py b/scripts/metadata_converter.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import csv
 import glob
-import os
+from pathlib import Path
 
 from datetime import datetime, timedelta
 
@@ -36,7 +36,7 @@ def main():
 
     for filename in glob.iglob('metadata*.csv'):
         input_filename = filename
-        output_filename = os.path.join("new", filename)
+        output_filename = Path("new") / filename
         with open(output_filename, 'w') as outfile:
             csv_out = csv.DictWriter(outfile, FIELDNAMES)
             csv_out.writeheader()

diff --git a/scripts/multi_thermostat_driver.py b/scripts/multi_thermostat_driver.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 import logging
 import logging.config
 import json
@@ -51,8 +51,8 @@
 # data in the same file.
 
 # Single Stage
-DATA_DIR = os.path.join('..', 'tests', 'data', 'single_stage')
-METADATA_FILENAME = os.path.join(DATA_DIR, 'metadata.csv')
+DATA_DIR = Path('../../datadir/EPA_Tau')
+METADATA_FILENAME = DATA_DIR / '2019_epa_tau.csv'
 
 # Two Stage
 # DATA_DIR = os.path.join('..', 'tests', 'data', 'two_stage')
@@ -81,15 +81,14 @@
 ZIP_FILENAME = f'{BASE_FILENAME}.zip'
 
 # These are the locations of where these files will be stored.
-METRICS_FILEPATH = os.path.join(OUTPUT_DIR, METRICS_FILENAME)
-STATS_FILEPATH = os.path.join(DATA_DIR, STATISTICS_FILENAME)
-CERTIFICATION_FILEPATH = os.path.join(DATA_DIR, CERTIFICATION_FILENAME)
-STATS_ADVANCED_FILEPATH = os.path.join(DATA_DIR, ADVANCED_STATISTICS_FILENAME)
-IMPORT_ERRORS_FILEPATH = os.path.join(OUTPUT_DIR, IMPORT_ERRORS_FILENAME)
-SANITIZED_IMPORT_ERRORS_FILEPATH = os.path.join(OUTPUT_DIR, SANITIZED_IMPORT_ERRORS_FILENAME)
-CLIMATE_ZONE_INSUFFICIENT_FILEPATH = os.path.join(OUTPUT_DIR, CLIMATE_ZONE_INSUFFICIENT_FILENAME)
-ZIP_FILEPATH = os.path.join(OUTPUT_DIR, ZIP_FILENAME)
-
+METRICS_FILEPATH = OUTPUT_DIR / METRICS_FILENAME
+STATS_FILEPATH = DATA_DIR / STATISTICS_FILENAME
+CERTIFICATION_FILEPATH = DATA_DIR / CERTIFICATION_FILENAME
+STATS_ADVANCED_FILEPATH = DATA_DIR / ADVANCED_STATISTICS_FILENAME
+IMPORT_ERRORS_FILEPATH = OUTPUT_DIR / IMPORT_ERRORS_FILENAME
+SANITIZED_IMPORT_ERRORS_FILEPATH = OUTPUT_DIR / SANITIZED_IMPORT_ERRORS_FILENAME
+CLIMATE_ZONE_INSUFFICIENT_FILEPATH = OUTPUT_DIR / CLIMATE_ZONE_INSUFFICIENT_FILENAME
+ZIP_FILEPATH = OUTPUT_DIR / ZIP_FILENAME
 
 def write_errors(filepath, fieldnames, errors, extrasaction=None):
     with open(filepath, 'w') as error_file:
@@ -202,9 +201,8 @@ def main():
 
     with ZipFile(ZIP_FILEPATH, 'w') as certification_zip:
         for filename in files_to_zip:
-            if os.path.exists(filename):
-                certification_zip.write(filename, arcname=os.path.basename(filename))
-
+            if filename.exists():
+                certification_zip.write(filename, arcname=filename.name)
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/random_uuid_generation.py b/scripts/random_uuid_generation.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 import uuid
-import os
+from pathlib import Path
 
 # Number of thermostat IDs to generate
 NUM_THERMOSTATS = 200
@@ -14,7 +14,7 @@
 ]
 
 # Change this file path to a different location if you wish to save the files
-FILE_PATH = '/tmp'
+FILE_PATH = Path('/tmp')
 
 
 def main():
@@ -26,7 +26,7 @@ def main():
     """
     for climate_zone in CLIMATE_ZONES:
         climate_zone_filename = climate_zone + '.csv'
-        with open(os.path.join(FILE_PATH, climate_zone_filename), 'w') as thermostat_file:
+        with open((FILE_PATH /climate_zone_filename), 'w') as thermostat_file:
             for _ in range(0, NUM_THERMOSTATS):
                 thermostat_id = uuid.uuid4()
                 thermostat_file.write(str(thermostat_id) + '\n')

diff --git a/tests/test_core_single_stage.py b/tests/test_core_single_stage.py
@@ -99,7 +99,7 @@ def test_multiple_same_key(thermostats_multiple_same_key):
 def test_interpolate_empty(thermostat_type_1):
     s1 = pd.Series([])
     s1_intp = thermostat_type_1._interpolate(s1)
-    np.testing.assert_allclose(s1_intp, [])
+    np.testing.assert_allclose(s1_intp.astype(float), [])
 
 
 def test_interpolate_full(thermostat_type_1):

diff --git a/tests/test_parallel.py b/tests/test_parallel.py
@@ -2,6 +2,7 @@
 from thermostat.equipment_type import EQUIPMENT_MAPPING
 
 import os
+from pathlib import Path
 import tempfile
 import zipfile
 from uuid import uuid4
@@ -102,7 +103,7 @@ def test_schedule_batches_zip_files(metadata_filename):
     batch_zipfile_names = schedule_batches(metadata_filename, 5, True, temp_dir)
 
     assert len(batch_zipfile_names) == 5
-    assert isinstance(batch_zipfile_names[0], str)
+    assert isinstance(batch_zipfile_names[0], Path)
 
     with zipfile.ZipFile(batch_zipfile_names[0]) as zf:
         assert len(zf.infolist()) == 21
diff --git a/thermostat/eeweather_wrapper.py b/thermostat/eeweather_wrapper.py
@@ -1,6 +1,6 @@
-from datetime import datetime
+import datetime
 import eeweather
-
+import pytz
 import pandas as pd
 
 # This routine is a compact and distilled version of code that was originally
@@ -55,9 +55,10 @@ def get_indexed_temperatures_eeweather(usaf_id, index):
     if index.shape == (0, 0) or index.shape == (0,):
         return pd.Series([], index=(), dtype=float)
     years = sorted(index.groupby(index.year).keys())
-    start = pd.to_datetime(datetime(years[0], 1, 1), utc=True)
-    end = pd.to_datetime(datetime(years[-1], 12, 31, 23, 59), utc=True)
+    start = pd.to_datetime(datetime.datetime(years[0], 1, 1, tzinfo=pytz.UTC))
+    end = pd.to_datetime(datetime.datetime(years[-1], 12, 31, 23, 59, tzinfo=pytz.UTC))
     tempC, _ = eeweather.load_isd_hourly_temp_data(usaf_id, start, end)
+
     tempC = tempC.resample('H').mean()[index]
     tempF = _convert_to_farenheit(tempC)
     return tempF
diff --git a/thermostat/importers.py b/thermostat/importers.py
@@ -2,6 +2,7 @@
 import warnings
 import pandas as pd
 import dateutil.parser
+from pathlib import Path
 import os
 import pytz
 from multiprocessing import Pool, cpu_count
@@ -111,17 +112,15 @@ def save_json_cache(index, thermostat_id, station, cache_path=None):
         json_cache[filename] = sqlite_json_store.retrieve_json(base_name)
 
     if cache_path is None:
-        directory = os.path.join(
-            os.curdir,
-            "epathermostat_weather_data")
+        directory = Path.cwd() / "epathermostat_weather_data"
     else:
-        directory = os.path.normpath(
+        directory = Path(
             cache_path)
 
     thermostat_filename = f"{thermostat_id}.json"
-    thermostat_path = os.path.join(directory, thermostat_filename)
+    thermostat_path = directory /thermostat_filename
     try:
-        os.makedirs(os.path.dirname(directory), exist_ok=True)
+        directory.mkdir(exist_ok=True)
         with open(thermostat_path, 'w') as outfile:
             json.dump(json_cache, outfile)
 
@@ -241,7 +240,6 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True,
             error_list.append(error_dict)
         else:
             results.append(result['thermostat'])
-
     # Convert this to an iterator to maintain compatibility
     return iter(results), error_list
 
@@ -254,7 +252,7 @@ def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=Fa
     if verbose and logger.getEffectiveLevel() > logging.INFO:
         print(f"Importing thermostat {row.thermostat_id}")
 
-    interval_data_filename = os.path.join(os.path.dirname(metadata_filename), row.interval_data_filename)
+    interval_data_filename = Path(metadata_filename).parents[0] / row.interval_data_filename
 
     status_metadata = {
         'thermostat_id': row.thermostat_id,

diff --git a/thermostat/parallel.py b/thermostat/parallel.py
@@ -3,7 +3,7 @@
 from itertools import cycle
 from zipfile import ZipFile
 import tempfile
-import os
+from pathlib import Path
 from thermostat.stations import get_closest_station_by_zipcode
 
 
@@ -42,6 +42,8 @@ def schedule_batches(metadata_filename, n_batches, zip_files=False, batches_dir=
             message = "Cannot have batches_dir==None when zip_files==True. " \
                     "Please supply a directory in which to save batches."
             raise ValueError(message)
+
+    metadata_filename = Path(metadata_filename)
 
     metadata_df = pd.read_csv(metadata_filename, dtype={"zipcode": str})
     stations = [get_closest_station_by_zipcode(zipcode) for zipcode in metadata_df.zipcode]
@@ -79,26 +81,25 @@ def schedule_batches(metadata_filename, n_batches, zip_files=False, batches_dir=
     batch_dfs = [pd.DataFrame(rows) for rows in batches]
 
     if zip_files:
-
-        if not os.path.exists(batches_dir):
-            os.makedirs(batches_dir)
+        batches_dir = Path(batches_dir)
+        batches_dir.mkdir(exist_ok=True)
 
         batch_zipfile_names = []
         for i, batch_df in enumerate(batch_dfs):
 
             batch_name = "batch_{:05d}.zip".format(i)
-            batch_zipfile_name = os.path.join(batches_dir, batch_name)
+            batch_zipfile_name = batches_dir / batch_name
             batch_zipfile_names.append(batch_zipfile_name)
 
             _, fname = tempfile.mkstemp()
             batch_df.to_csv(fname, index=False)
 
             with ZipFile(batch_zipfile_name, 'w') as batch_zip:
-                batch_zip.write(fname, arcname=os.path.join('data', 'metadata.csv'))
+                batch_zip.write(fname, arcname=Path('data') / 'metadata.csv')
 
                 for filename in batch_df.interval_data_filename:
-                    interval_data_source = os.path.join(os.path.dirname(metadata_filename), filename)
-                    batch_zip.write(interval_data_source, arcname=os.path.join('data', filename))
+                    interval_data_source = metadata_filename.parents[0] / filename
+                    batch_zip.write(interval_data_source, arcname=Path('data') / filename)
 
         return batch_zipfile_names
 

diff --git a/thermostat/util/testing.py b/thermostat/util/testing.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 import inspect
 
 def get_data_path(f=''):
@@ -7,5 +7,5 @@ def get_data_path(f=''):
     """
     # get our callers file
     _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1]
-    base_dir = os.path.abspath(os.path.dirname(filename))
-    return os.path.join(base_dir, f)
+    base_dir = Path(filename).parents[0].resolve()
+    return base_dir / f