Merge pull request #96 from marabuuu/download-statistics

Add download statistics and zenodo link scripts
NFDI4BIOIMAGE · Jun 28, 2024 · ec03c8a · ec03c8a
2 parents edf1e0d + 5b804de
commit ec03c8a
Show file tree

Hide file tree

Showing 3 changed files with 102 additions and 0 deletions.
diff --git a/scripts/generate_link_lists.py b/scripts/generate_link_lists.py
@@ -290,7 +290,10 @@ def read_zenodo(record):
     import json
 
     record = record.replace("https://zenodo.org/", "")
+    record = record.replace("record/", "records/")
     url = "https://zenodo.org/api/" + record
+
+    print(url)
 
     # Download the file
     response = requests.get(url)

diff --git a/scripts/summary_download_statistics.py b/scripts/summary_download_statistics.py
@@ -0,0 +1,62 @@
+#summarize download statistics
+
+#import statements
+import requests
+import json
+from generate_link_lists import read_zenodo, read_yaml_file, all_content
+import pandas as pd
+from pathlib import Path
+import datetime
+
+#define directory path
+directory_path = './resources/'
+
+#directory where the current script is located
+#current_dir = Path(__file__).parent
+
+#define path to 'resources' directory relative to current script
+#directory_path = current_dir.parent / 'resources'
+
+#collect all content in a list of dictionaries
+content = all_content(directory_path) 
+
+#create pandas Dataframe called download_statistics 
+download_statistics = pd.DataFrame(columns=['file_id', 'downloads', 'unique_downloads', 'views', 'unique_views', 'version_downloads', 'version_unique_downloads', 'version_unique_views', 'version_views'])
+
+for entry in content['resources']:
+    urls = entry['url']
+
+    #make urls a list if it is not already
+    if not type(urls) is list:
+            urls = [urls]
+
+    for url in urls:
+        # if zenodo in url
+        if 'zenodo.org' in url:
+
+            #extract meta data of records in zenodo
+            zenodo = read_zenodo(url)
+
+            if 'stats' in zenodo.keys():
+
+                #zenodo metadata download statistics stored on per-file basis, so we need to access all files in the record using 'id' key
+                for file in zenodo['files']:
+
+                    # define row entry
+                    row_entry = {'file_id': file['id'], 'downloads': zenodo['stats']['downloads'], 'unique_downloads': zenodo['stats']['unique_downloads'], 'views': zenodo['stats']['views'], 'unique_views': zenodo['stats']['unique_views'], 'version_downloads': zenodo['stats']['version_downloads'], 'version_unique_downloads': zenodo['stats']['version_unique_downloads'], 'version_unique_views': zenodo['stats']['version_unique_views'], 'version_views': zenodo['stats']['version_views']}
+
+                    # Create a new DataFrame with the new row
+                    df_entry = pd.DataFrame([row_entry])
+
+                    # Concatenate the new DataFrame with the existing `download_statistics` DataFrame
+                    download_statistics = pd.concat([download_statistics, df_entry], ignore_index=True)
+                    print(download_statistics)
+
+#get current date
+date = datetime.datetime.now().strftime("%Y%m%d")
+
+#create filename
+filename = f'download_statistics_{date}.csv'
+
+#save download_statistics to CSV file with the new filename
+download_statistics.to_csv(filename, index=False)
diff --git a/scripts/zenodo_links_from_doi.py b/scripts/zenodo_links_from_doi.py
@@ -0,0 +1,37 @@
+#store zenodo links from doi
+
+import requests
+import json
+from generate_link_lists import read_doi, all_content
+
+#define directory path
+directory_path = './resources/'
+
+#collect all content in a list of dictionaries
+content = all_content(directory_path) 
+
+for entry in content['resources']:
+    urls = entry['url']
+
+    #make urls a list if it is not already
+    if not type(urls) is list:
+            urls = [urls]
+            #print(urls)
+
+    for url in urls:
+
+        if 'doi.org' in url:
+
+            #extract meta data of records from doi.org
+            data = read_doi(url)
+
+            #search for word zenodo in meta data because this is the zenodo-link we want to append to url
+            if 'zenodo.org' in str(data['values']):
+
+                 #check if zenodo is already in url
+                 if 'zenodo' in url:
+
+                    #replace zenodo link with the new one but keep all other links
+                    entry['url'].remove(url)
+                    entry['url'].append(data['values'][1]['data']['value'])
+                    print(entry['url'])