Skip to content

Commit

Permalink
Merge pull request #96 from marabuuu/download-statistics
Browse files Browse the repository at this point in the history
Add download statistics and zenodo link scripts
  • Loading branch information
haesleinhuepf authored Jun 28, 2024
2 parents edf1e0d + 5b804de commit ec03c8a
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 0 deletions.
3 changes: 3 additions & 0 deletions scripts/generate_link_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,10 @@ def read_zenodo(record):
import json

record = record.replace("https://zenodo.org/", "")
record = record.replace("record/", "records/")
url = "https://zenodo.org/api/" + record

print(url)

# Download the file
response = requests.get(url)
Expand Down
62 changes: 62 additions & 0 deletions scripts/summary_download_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#summarize download statistics

#import statements
import requests
import json
from generate_link_lists import read_zenodo, read_yaml_file, all_content
import pandas as pd
from pathlib import Path
import datetime

#define directory path
directory_path = './resources/'

#directory where the current script is located
#current_dir = Path(__file__).parent

#define path to 'resources' directory relative to current script
#directory_path = current_dir.parent / 'resources'

#collect all content in a list of dictionaries
content = all_content(directory_path)

#create pandas Dataframe called download_statistics
download_statistics = pd.DataFrame(columns=['file_id', 'downloads', 'unique_downloads', 'views', 'unique_views', 'version_downloads', 'version_unique_downloads', 'version_unique_views', 'version_views'])

for entry in content['resources']:
urls = entry['url']

#make urls a list if it is not already
if not type(urls) is list:
urls = [urls]

for url in urls:
# if zenodo in url
if 'zenodo.org' in url:

#extract meta data of records in zenodo
zenodo = read_zenodo(url)

if 'stats' in zenodo.keys():

#zenodo metadata download statistics stored on per-file basis, so we need to access all files in the record using 'id' key
for file in zenodo['files']:

# define row entry
row_entry = {'file_id': file['id'], 'downloads': zenodo['stats']['downloads'], 'unique_downloads': zenodo['stats']['unique_downloads'], 'views': zenodo['stats']['views'], 'unique_views': zenodo['stats']['unique_views'], 'version_downloads': zenodo['stats']['version_downloads'], 'version_unique_downloads': zenodo['stats']['version_unique_downloads'], 'version_unique_views': zenodo['stats']['version_unique_views'], 'version_views': zenodo['stats']['version_views']}

# Create a new DataFrame with the new row
df_entry = pd.DataFrame([row_entry])

# Concatenate the new DataFrame with the existing `download_statistics` DataFrame
download_statistics = pd.concat([download_statistics, df_entry], ignore_index=True)
print(download_statistics)

#get current date
date = datetime.datetime.now().strftime("%Y%m%d")

#create filename
filename = f'download_statistics_{date}.csv'

#save download_statistics to CSV file with the new filename
download_statistics.to_csv(filename, index=False)
37 changes: 37 additions & 0 deletions scripts/zenodo_links_from_doi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#store zenodo links from doi

import requests
import json
from generate_link_lists import read_doi, all_content

#define directory path
directory_path = './resources/'

#collect all content in a list of dictionaries
content = all_content(directory_path)

for entry in content['resources']:
urls = entry['url']

#make urls a list if it is not already
if not type(urls) is list:
urls = [urls]
#print(urls)

for url in urls:

if 'doi.org' in url:

#extract meta data of records from doi.org
data = read_doi(url)

#search for word zenodo in meta data because this is the zenodo-link we want to append to url
if 'zenodo.org' in str(data['values']):

#check if zenodo is already in url
if 'zenodo' in url:

#replace zenodo link with the new one but keep all other links
entry['url'].remove(url)
entry['url'].append(data['values'][1]['data']['value'])
print(entry['url'])

0 comments on commit ec03c8a

Please sign in to comment.