Skip to content

Commit

Permalink
subset list of data available on data commons with file_name instead …
Browse files Browse the repository at this point in the history
…of "name" so we can specify a version and/or hash
  • Loading branch information
catherinebirney committed Dec 8, 2023
1 parent f28ac14 commit fef05a7
Showing 1 changed file with 3 additions and 5 deletions.
8 changes: 3 additions & 5 deletions esupy/processed_data_mgmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,6 @@ def download_from_remote(file_meta, paths, **kwargs):
base_url = paths.remote_path + file_meta.tool + '/'
if file_meta.category != '':
base_url = base_url + file_meta.category + '/'
## TODO: re-implement URL handling via f-strings and/or urllib
# base_url = f'{paths.remote_path}/{file_meta.tool}'
# if not file_meta.category == '':
# base_url = f'{base_url}/{file_meta.category}'
files = get_most_recent_from_index(file_meta, paths)
if files is None:
log.info(f'{file_meta.name_data} not found in {base_url}')
Expand Down Expand Up @@ -173,7 +169,9 @@ def get_most_recent_from_index(file_meta, paths):
if file_df is None:
return None
file_df = parse_data_commons_index(file_df)
df = file_df[file_df['name'].str.startswith(file_meta.name_data)]
# subset using "file_name" instead of "name" to work when a user
# includes a GitHub version and hash
df = file_df[file_df['file_name'].str.startswith(file_meta.name_data)]
df_ext = df[df['ext'] == file_meta.ext]
if len(df_ext) == 0:
return None
Expand Down

1 comment on commit fef05a7

@catherinebirney
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WesIngwersen This change addresses the flowsa.collapse_FlowBySector() flowsa issue. You can now specify an FBS version and/or git hash. I'll pull this into esupy master

Please sign in to comment.