Skip to content

Commit

Permalink
Merge pull request #50 from USEPA/develop
Browse files Browse the repository at this point in the history
v0.3.2-release
  • Loading branch information
catherinebirney authored Dec 8, 2023
2 parents e6230c1 + fef05a7 commit 6249841
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 9 deletions.
8 changes: 4 additions & 4 deletions esupy/dqi.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,10 @@ def get_weighted_average(df, data_col, weight_col, agg_cols):
df_agg[data_col] = get_weighted_average(df, data_col,
weight_col, agg_cols)
"""
df.loc[:, '_data_times_weight'] = df[data_col] * df[weight_col]
df.loc[:, '_weight_where_notnull'] = df[weight_col] * pd.notnull(df[data_col])
calc_cols = ['_weight_where_notnull', '_data_times_weight']
df[calc_cols] = df[calc_cols].applymap(float)
df = (df.assign(_data_times_weight = lambda x: x[data_col] * x[weight_col])
.assign(_weight_where_notnull = lambda x:
x[weight_col] * pd.notnull(x[data_col]))
)
g = df.groupby(agg_cols)
wt_avg = np.divide(g['_data_times_weight'].sum(), g['_weight_where_notnull'].sum(),
out=np.zeros_like(g['_data_times_weight'].sum()),
Expand Down
8 changes: 3 additions & 5 deletions esupy/processed_data_mgmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,6 @@ def download_from_remote(file_meta, paths, **kwargs):
base_url = paths.remote_path + file_meta.tool + '/'
if file_meta.category != '':
base_url = base_url + file_meta.category + '/'
## TODO: re-implement URL handling via f-strings and/or urllib
# base_url = f'{paths.remote_path}/{file_meta.tool}'
# if not file_meta.category == '':
# base_url = f'{base_url}/{file_meta.category}'
files = get_most_recent_from_index(file_meta, paths)
if files is None:
log.info(f'{file_meta.name_data} not found in {base_url}')
Expand Down Expand Up @@ -173,7 +169,9 @@ def get_most_recent_from_index(file_meta, paths):
if file_df is None:
return None
file_df = parse_data_commons_index(file_df)
df = file_df[file_df['name'].str.startswith(file_meta.name_data)]
# subset using "file_name" instead of "name" to work when a user
# includes a GitHub version and hash
df = file_df[file_df['file_name'].str.startswith(file_meta.name_data)]
df_ext = df[df['ext'] == file_meta.ext]
if len(df_ext) == 0:
return None
Expand Down

0 comments on commit 6249841

Please sign in to comment.