From f28ac144aa8d30ac0c1a051e1d90950990345b5c Mon Sep 17 00:00:00 2001
From: Ben Young <Ben.Young@erg.com>
Date: Wed, 15 Nov 2023 14:59:05 -0500
Subject: [PATCH 1/2] avoid SettingWithCopyWarning

---
 esupy/dqi.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/esupy/dqi.py b/esupy/dqi.py
index 971e353..9873e62 100644
--- a/esupy/dqi.py
+++ b/esupy/dqi.py
@@ -119,10 +119,10 @@ def get_weighted_average(df, data_col, weight_col, agg_cols):
     df_agg[data_col] = get_weighted_average(df, data_col,
                                             weight_col, agg_cols)
     """
-    df.loc[:, '_data_times_weight'] = df[data_col] * df[weight_col]
-    df.loc[:, '_weight_where_notnull'] = df[weight_col] * pd.notnull(df[data_col])
-    calc_cols = ['_weight_where_notnull', '_data_times_weight']
-    df[calc_cols] = df[calc_cols].applymap(float)
+    df = (df.assign(_data_times_weight = lambda x: x[data_col] * x[weight_col])
+            .assign(_weight_where_notnull = lambda x:
+                    x[weight_col] * pd.notnull(x[data_col]))
+            )
     g = df.groupby(agg_cols)
     wt_avg = np.divide(g['_data_times_weight'].sum(), g['_weight_where_notnull'].sum(),
                        out=np.zeros_like(g['_data_times_weight'].sum()),

From fef05a709ba0bb4813cc9a39c10525f35c96ebae Mon Sep 17 00:00:00 2001
From: catherinebirney <birney.catherine@epa.gov>
Date: Fri, 8 Dec 2023 11:53:32 -0700
Subject: [PATCH 2/2] subset list of data available on data commons with
 file_name instead of "name" so we can specify a version and/or hash

---
 esupy/processed_data_mgmt.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/esupy/processed_data_mgmt.py b/esupy/processed_data_mgmt.py
index fbd5097..260b363 100644
--- a/esupy/processed_data_mgmt.py
+++ b/esupy/processed_data_mgmt.py
@@ -70,10 +70,6 @@ def download_from_remote(file_meta, paths, **kwargs):
     base_url = paths.remote_path + file_meta.tool + '/'
     if file_meta.category != '':
         base_url = base_url + file_meta.category + '/'
-    ## TODO: re-implement URL handling via f-strings and/or urllib
-    # base_url = f'{paths.remote_path}/{file_meta.tool}'
-    # if not file_meta.category == '':
-    #     base_url = f'{base_url}/{file_meta.category}'
     files = get_most_recent_from_index(file_meta, paths)
     if files is None:
         log.info(f'{file_meta.name_data} not found in {base_url}')
@@ -173,7 +169,9 @@ def get_most_recent_from_index(file_meta, paths):
     if file_df is None:
         return None
     file_df = parse_data_commons_index(file_df)
-    df = file_df[file_df['name'].str.startswith(file_meta.name_data)]
+    # subset using "file_name" instead of "name" to work when a user
+    # includes a GitHub version and hash
+    df = file_df[file_df['file_name'].str.startswith(file_meta.name_data)]
     df_ext = df[df['ext'] == file_meta.ext]
     if len(df_ext) == 0:
         return None