Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/epathermostat 2.0 pathlib #39

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
45e9b22
pathlib basic update
john-zither Nov 13, 2023
5d4833f
pathlib basic update
john-zither Nov 13, 2023
0dc914e
fixed for tests
john-zither Nov 13, 2023
633b2e5
fixed for tests
john-zither Nov 13, 2023
1a74901
seeing why start and end are not UTC according to github actions
john-zither Nov 24, 2023
6f731a6
seeing why start and end are not UTC according to github actions
john-zither Nov 24, 2023
09b3327
is python 3.7 the next(thermostat) error
john-zither Nov 24, 2023
0e4ed2e
is python 3.11 the next(thermostat) error
john-zither Nov 24, 2023
8902e9b
the iterator is throwing errors, so it must be emptying too fast. Try…
john-zither Nov 24, 2023
2b8301e
the iterator is throwing errors, so it must be emptying too fast. Try…
john-zither Nov 24, 2023
d0cb53d
maybe it was the pathlib switch
john-zither Nov 24, 2023
8e2c01e
looking at errors in the results length
john-zither Nov 24, 2023
6a95e70
I think the path is the error, hopefully
john-zither Nov 24, 2023
16460d1
no the error is still the time zone error
john-zither Nov 24, 2023
a6b1925
basic error
john-zither Nov 24, 2023
da5936c
try pytz to solve timezone error
john-zither Nov 24, 2023
7eb0060
checking the value of s1_intp
john-zither Nov 24, 2023
35d1fc2
checking the value of s1_intp
john-zither Nov 24, 2023
f603a2a
make sure s1_intp is a float empty series
john-zither Nov 24, 2023
67819ce
result length is allowed to be zero
john-zither Nov 25, 2023
adee7d8
look at the value
john-zither Nov 25, 2023
23f4c81
is the pandas version 1.5.2
john-zither Nov 25, 2023
31a9a5c
the test pandas version was not consistent with the requirements file
john-zither Nov 25, 2023
0d429eb
the test pandas version was not consistent with the requirements file
john-zither Nov 25, 2023
b77613f
3.7 isnt working, 3.8 is, how about the rest?
john-zither Nov 25, 2023
1e403ba
fix conda test
john-zither Nov 26, 2023
fe37a12
cleaning up the tests, returning to original format
john-zither Nov 27, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/python-app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
matrix:
# Apparently 3.10 gets converted to 3.1 if it's not a string. Brilliant.
python-version: [3.7, 3.8, 3.9, '3.10', '3.11']
python-version: [3.8, 3.9, '3.10', '3.11']

steps:
- uses: actions/checkout@v3
Expand All @@ -22,7 +22,7 @@ jobs:
python -m pip install --upgrade pip
pip install wheel flake8
pip install -r dev-requirements.txt
pip install -e .
pip install -r requirements.txt
- name: Prime Cache
run: |
mkdir $HOME/.eeweather
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/windows_conda_testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ jobs:
C:\Miniconda\condabin\conda.bat activate base
C:\Miniconda\condabin\conda.bat install pytest pytest-cov coverage mock pip
C:\Miniconda\condabin\conda.bat install -c conda-forge shapely
pip install -e .
pip install -r requirements.txt
pytest
4 changes: 2 additions & 2 deletions scripts/metadata_converter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import csv
import glob
import os
from pathlib import Path

from datetime import datetime, timedelta

Expand Down Expand Up @@ -36,7 +36,7 @@ def main():

for filename in glob.iglob('metadata*.csv'):
input_filename = filename
output_filename = os.path.join("new", filename)
output_filename = Path("new") / filename
with open(output_filename, 'w') as outfile:
csv_out = csv.DictWriter(outfile, FIELDNAMES)
csv_out.writeheader()
Expand Down
28 changes: 13 additions & 15 deletions scripts/multi_thermostat_driver.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path
import logging
import logging.config
import json
Expand Down Expand Up @@ -51,8 +51,8 @@
# data in the same file.

# Single Stage
DATA_DIR = os.path.join('..', 'tests', 'data', 'single_stage')
METADATA_FILENAME = os.path.join(DATA_DIR, 'metadata.csv')
DATA_DIR = Path('../../datadir/EPA_Tau')
METADATA_FILENAME = DATA_DIR / '2019_epa_tau.csv'

# Two Stage
# DATA_DIR = os.path.join('..', 'tests', 'data', 'two_stage')
Expand Down Expand Up @@ -81,15 +81,14 @@
ZIP_FILENAME = f'{BASE_FILENAME}.zip'

# These are the locations of where these files will be stored.
METRICS_FILEPATH = os.path.join(OUTPUT_DIR, METRICS_FILENAME)
STATS_FILEPATH = os.path.join(DATA_DIR, STATISTICS_FILENAME)
CERTIFICATION_FILEPATH = os.path.join(DATA_DIR, CERTIFICATION_FILENAME)
STATS_ADVANCED_FILEPATH = os.path.join(DATA_DIR, ADVANCED_STATISTICS_FILENAME)
IMPORT_ERRORS_FILEPATH = os.path.join(OUTPUT_DIR, IMPORT_ERRORS_FILENAME)
SANITIZED_IMPORT_ERRORS_FILEPATH = os.path.join(OUTPUT_DIR, SANITIZED_IMPORT_ERRORS_FILENAME)
CLIMATE_ZONE_INSUFFICIENT_FILEPATH = os.path.join(OUTPUT_DIR, CLIMATE_ZONE_INSUFFICIENT_FILENAME)
ZIP_FILEPATH = os.path.join(OUTPUT_DIR, ZIP_FILENAME)

METRICS_FILEPATH = OUTPUT_DIR / METRICS_FILENAME
STATS_FILEPATH = DATA_DIR / STATISTICS_FILENAME
CERTIFICATION_FILEPATH = DATA_DIR / CERTIFICATION_FILENAME
STATS_ADVANCED_FILEPATH = DATA_DIR / ADVANCED_STATISTICS_FILENAME
IMPORT_ERRORS_FILEPATH = OUTPUT_DIR / IMPORT_ERRORS_FILENAME
SANITIZED_IMPORT_ERRORS_FILEPATH = OUTPUT_DIR / SANITIZED_IMPORT_ERRORS_FILENAME
CLIMATE_ZONE_INSUFFICIENT_FILEPATH = OUTPUT_DIR / CLIMATE_ZONE_INSUFFICIENT_FILENAME
ZIP_FILEPATH = OUTPUT_DIR / ZIP_FILENAME

def write_errors(filepath, fieldnames, errors, extrasaction=None):
with open(filepath, 'w') as error_file:
Expand Down Expand Up @@ -202,9 +201,8 @@ def main():

with ZipFile(ZIP_FILEPATH, 'w') as certification_zip:
for filename in files_to_zip:
if os.path.exists(filename):
certification_zip.write(filename, arcname=os.path.basename(filename))

if filename.exists():
certification_zip.write(filename, arcname=filename.name)

if __name__ == '__main__':
main()
6 changes: 3 additions & 3 deletions scripts/random_uuid_generation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
import uuid
import os
from pathlib import Path

# Number of thermostat IDs to generate
NUM_THERMOSTATS = 200
Expand All @@ -14,7 +14,7 @@
]

# Change this file path to a different location if you wish to save the files
FILE_PATH = '/tmp'
FILE_PATH = Path('/tmp')


def main():
Expand All @@ -26,7 +26,7 @@ def main():
"""
for climate_zone in CLIMATE_ZONES:
climate_zone_filename = climate_zone + '.csv'
with open(os.path.join(FILE_PATH, climate_zone_filename), 'w') as thermostat_file:
with open((FILE_PATH /climate_zone_filename), 'w') as thermostat_file:
for _ in range(0, NUM_THERMOSTATS):
thermostat_id = uuid.uuid4()
thermostat_file.write(str(thermostat_id) + '\n')
Expand Down
2 changes: 1 addition & 1 deletion tests/test_core_single_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_multiple_same_key(thermostats_multiple_same_key):
def test_interpolate_empty(thermostat_type_1):
s1 = pd.Series([])
s1_intp = thermostat_type_1._interpolate(s1)
np.testing.assert_allclose(s1_intp, [])
np.testing.assert_allclose(s1_intp.astype(float), [])


def test_interpolate_full(thermostat_type_1):
Expand Down
3 changes: 2 additions & 1 deletion tests/test_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from thermostat.equipment_type import EQUIPMENT_MAPPING

import os
from pathlib import Path
import tempfile
import zipfile
from uuid import uuid4
Expand Down Expand Up @@ -102,7 +103,7 @@ def test_schedule_batches_zip_files(metadata_filename):
batch_zipfile_names = schedule_batches(metadata_filename, 5, True, temp_dir)

assert len(batch_zipfile_names) == 5
assert isinstance(batch_zipfile_names[0], str)
assert isinstance(batch_zipfile_names[0], Path)

with zipfile.ZipFile(batch_zipfile_names[0]) as zf:
assert len(zf.infolist()) == 21
9 changes: 5 additions & 4 deletions thermostat/eeweather_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime
import datetime
import eeweather

import pytz
import pandas as pd

# This routine is a compact and distilled version of code that was originally
Expand Down Expand Up @@ -55,9 +55,10 @@ def get_indexed_temperatures_eeweather(usaf_id, index):
if index.shape == (0, 0) or index.shape == (0,):
return pd.Series([], index=(), dtype=float)
years = sorted(index.groupby(index.year).keys())
start = pd.to_datetime(datetime(years[0], 1, 1), utc=True)
end = pd.to_datetime(datetime(years[-1], 12, 31, 23, 59), utc=True)
start = pd.to_datetime(datetime.datetime(years[0], 1, 1, tzinfo=pytz.UTC))
end = pd.to_datetime(datetime.datetime(years[-1], 12, 31, 23, 59, tzinfo=pytz.UTC))
tempC, _ = eeweather.load_isd_hourly_temp_data(usaf_id, start, end)

tempC = tempC.resample('H').mean()[index]
tempF = _convert_to_farenheit(tempC)
return tempF
14 changes: 6 additions & 8 deletions thermostat/importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import warnings
import pandas as pd
import dateutil.parser
from pathlib import Path
import os
import pytz
from multiprocessing import Pool, cpu_count
Expand Down Expand Up @@ -111,17 +112,15 @@ def save_json_cache(index, thermostat_id, station, cache_path=None):
json_cache[filename] = sqlite_json_store.retrieve_json(base_name)

if cache_path is None:
directory = os.path.join(
os.curdir,
"epathermostat_weather_data")
directory = Path.cwd() / "epathermostat_weather_data"
else:
directory = os.path.normpath(
directory = Path(
cache_path)

thermostat_filename = f"{thermostat_id}.json"
thermostat_path = os.path.join(directory, thermostat_filename)
thermostat_path = directory /thermostat_filename
try:
os.makedirs(os.path.dirname(directory), exist_ok=True)
directory.mkdir(exist_ok=True)
with open(thermostat_path, 'w') as outfile:
json.dump(json_cache, outfile)

Expand Down Expand Up @@ -241,7 +240,6 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True,
error_list.append(error_dict)
else:
results.append(result['thermostat'])

# Convert this to an iterator to maintain compatibility
return iter(results), error_list

Expand All @@ -254,7 +252,7 @@ def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=Fa
if verbose and logger.getEffectiveLevel() > logging.INFO:
print(f"Importing thermostat {row.thermostat_id}")

interval_data_filename = os.path.join(os.path.dirname(metadata_filename), row.interval_data_filename)
interval_data_filename = Path(metadata_filename).parents[0] / row.interval_data_filename

status_metadata = {
'thermostat_id': row.thermostat_id,
Expand Down
17 changes: 9 additions & 8 deletions thermostat/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from itertools import cycle
from zipfile import ZipFile
import tempfile
import os
from pathlib import Path
from thermostat.stations import get_closest_station_by_zipcode


Expand Down Expand Up @@ -42,6 +42,8 @@ def schedule_batches(metadata_filename, n_batches, zip_files=False, batches_dir=
message = "Cannot have batches_dir==None when zip_files==True. " \
"Please supply a directory in which to save batches."
raise ValueError(message)

metadata_filename = Path(metadata_filename)

metadata_df = pd.read_csv(metadata_filename, dtype={"zipcode": str})
stations = [get_closest_station_by_zipcode(zipcode) for zipcode in metadata_df.zipcode]
Expand Down Expand Up @@ -79,26 +81,25 @@ def schedule_batches(metadata_filename, n_batches, zip_files=False, batches_dir=
batch_dfs = [pd.DataFrame(rows) for rows in batches]

if zip_files:

if not os.path.exists(batches_dir):
os.makedirs(batches_dir)
batches_dir = Path(batches_dir)
batches_dir.mkdir(exist_ok=True)

batch_zipfile_names = []
for i, batch_df in enumerate(batch_dfs):

batch_name = "batch_{:05d}.zip".format(i)
batch_zipfile_name = os.path.join(batches_dir, batch_name)
batch_zipfile_name = batches_dir / batch_name
batch_zipfile_names.append(batch_zipfile_name)

_, fname = tempfile.mkstemp()
batch_df.to_csv(fname, index=False)

with ZipFile(batch_zipfile_name, 'w') as batch_zip:
batch_zip.write(fname, arcname=os.path.join('data', 'metadata.csv'))
batch_zip.write(fname, arcname=Path('data') / 'metadata.csv')

for filename in batch_df.interval_data_filename:
interval_data_source = os.path.join(os.path.dirname(metadata_filename), filename)
batch_zip.write(interval_data_source, arcname=os.path.join('data', filename))
interval_data_source = metadata_filename.parents[0] / filename
batch_zip.write(interval_data_source, arcname=Path('data') / filename)

return batch_zipfile_names

Expand Down
6 changes: 3 additions & 3 deletions thermostat/util/testing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path
import inspect

def get_data_path(f=''):
Expand All @@ -7,5 +7,5 @@ def get_data_path(f=''):
"""
# get our callers file
_, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1]
base_dir = os.path.abspath(os.path.dirname(filename))
return os.path.join(base_dir, f)
base_dir = Path(filename).parents[0].resolve()
return base_dir / f
Loading