-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #96 from marabuuu/download-statistics
Add download statistics and zenodo link scripts
- Loading branch information
Showing
3 changed files
with
102 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#summarize download statistics | ||
|
||
#import statements | ||
import requests | ||
import json | ||
from generate_link_lists import read_zenodo, read_yaml_file, all_content | ||
import pandas as pd | ||
from pathlib import Path | ||
import datetime | ||
|
||
#define directory path | ||
directory_path = './resources/' | ||
|
||
#directory where the current script is located | ||
#current_dir = Path(__file__).parent | ||
|
||
#define path to 'resources' directory relative to current script | ||
#directory_path = current_dir.parent / 'resources' | ||
|
||
#collect all content in a list of dictionaries | ||
content = all_content(directory_path) | ||
|
||
#create pandas Dataframe called download_statistics | ||
download_statistics = pd.DataFrame(columns=['file_id', 'downloads', 'unique_downloads', 'views', 'unique_views', 'version_downloads', 'version_unique_downloads', 'version_unique_views', 'version_views']) | ||
|
||
for entry in content['resources']: | ||
urls = entry['url'] | ||
|
||
#make urls a list if it is not already | ||
if not type(urls) is list: | ||
urls = [urls] | ||
|
||
for url in urls: | ||
# if zenodo in url | ||
if 'zenodo.org' in url: | ||
|
||
#extract meta data of records in zenodo | ||
zenodo = read_zenodo(url) | ||
|
||
if 'stats' in zenodo.keys(): | ||
|
||
#zenodo metadata download statistics stored on per-file basis, so we need to access all files in the record using 'id' key | ||
for file in zenodo['files']: | ||
|
||
# define row entry | ||
row_entry = {'file_id': file['id'], 'downloads': zenodo['stats']['downloads'], 'unique_downloads': zenodo['stats']['unique_downloads'], 'views': zenodo['stats']['views'], 'unique_views': zenodo['stats']['unique_views'], 'version_downloads': zenodo['stats']['version_downloads'], 'version_unique_downloads': zenodo['stats']['version_unique_downloads'], 'version_unique_views': zenodo['stats']['version_unique_views'], 'version_views': zenodo['stats']['version_views']} | ||
|
||
# Create a new DataFrame with the new row | ||
df_entry = pd.DataFrame([row_entry]) | ||
|
||
# Concatenate the new DataFrame with the existing `download_statistics` DataFrame | ||
download_statistics = pd.concat([download_statistics, df_entry], ignore_index=True) | ||
print(download_statistics) | ||
|
||
#get current date | ||
date = datetime.datetime.now().strftime("%Y%m%d") | ||
|
||
#create filename | ||
filename = f'download_statistics_{date}.csv' | ||
|
||
#save download_statistics to CSV file with the new filename | ||
download_statistics.to_csv(filename, index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#store zenodo links from doi | ||
|
||
import requests | ||
import json | ||
from generate_link_lists import read_doi, all_content | ||
|
||
#define directory path | ||
directory_path = './resources/' | ||
|
||
#collect all content in a list of dictionaries | ||
content = all_content(directory_path) | ||
|
||
for entry in content['resources']: | ||
urls = entry['url'] | ||
|
||
#make urls a list if it is not already | ||
if not type(urls) is list: | ||
urls = [urls] | ||
#print(urls) | ||
|
||
for url in urls: | ||
|
||
if 'doi.org' in url: | ||
|
||
#extract meta data of records from doi.org | ||
data = read_doi(url) | ||
|
||
#search for word zenodo in meta data because this is the zenodo-link we want to append to url | ||
if 'zenodo.org' in str(data['values']): | ||
|
||
#check if zenodo is already in url | ||
if 'zenodo' in url: | ||
|
||
#replace zenodo link with the new one but keep all other links | ||
entry['url'].remove(url) | ||
entry['url'].append(data['values'][1]['data']['value']) | ||
print(entry['url']) |