Merge pull request #50 from USEPA/develop

v0.3.2-release
USEPA · Dec 8, 2023 · 6249841 · 6249841
2 parents e6230c1 + fef05a7
commit 6249841
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 9 deletions.
diff --git a/esupy/dqi.py b/esupy/dqi.py
@@ -119,10 +119,10 @@ def get_weighted_average(df, data_col, weight_col, agg_cols):
     df_agg[data_col] = get_weighted_average(df, data_col,
                                             weight_col, agg_cols)
     """
-    df.loc[:, '_data_times_weight'] = df[data_col] * df[weight_col]
-    df.loc[:, '_weight_where_notnull'] = df[weight_col] * pd.notnull(df[data_col])
-    calc_cols = ['_weight_where_notnull', '_data_times_weight']
-    df[calc_cols] = df[calc_cols].applymap(float)
+    df = (df.assign(_data_times_weight = lambda x: x[data_col] * x[weight_col])
+            .assign(_weight_where_notnull = lambda x:
+                    x[weight_col] * pd.notnull(x[data_col]))
+            )
     g = df.groupby(agg_cols)
     wt_avg = np.divide(g['_data_times_weight'].sum(), g['_weight_where_notnull'].sum(),
                        out=np.zeros_like(g['_data_times_weight'].sum()),

diff --git a/esupy/processed_data_mgmt.py b/esupy/processed_data_mgmt.py
@@ -70,10 +70,6 @@ def download_from_remote(file_meta, paths, **kwargs):
     base_url = paths.remote_path + file_meta.tool + '/'
     if file_meta.category != '':
         base_url = base_url + file_meta.category + '/'
-    ## TODO: re-implement URL handling via f-strings and/or urllib
-    # base_url = f'{paths.remote_path}/{file_meta.tool}'
-    # if not file_meta.category == '':
-    #     base_url = f'{base_url}/{file_meta.category}'
     files = get_most_recent_from_index(file_meta, paths)
     if files is None:
         log.info(f'{file_meta.name_data} not found in {base_url}')
@@ -173,7 +169,9 @@ def get_most_recent_from_index(file_meta, paths):
     if file_df is None:
         return None
     file_df = parse_data_commons_index(file_df)
-    df = file_df[file_df['name'].str.startswith(file_meta.name_data)]
+    # subset using "file_name" instead of "name" to work when a user
+    # includes a GitHub version and hash
+    df = file_df[file_df['file_name'].str.startswith(file_meta.name_data)]
     df_ext = df[df['ext'] == file_meta.ext]
     if len(df_ext) == 0:
         return None