Merge pull request #491 from NFDI4BIOIMAGE/git-bob-mod-1X3rN0cpkz

Create a script to fetch GitHub statistics from YAML file and save to CSV.
NFDI4BIOIMAGE · Nov 19, 2024 · ddcf787 · ddcf787
2 parents 5f4d0b0 + 74480de
commit ddcf787
Show file tree

Hide file tree

Showing 4 changed files with 144 additions and 1 deletion.
diff --git a/.github/workflows/auto-add-weekly_github_stats.yml b/.github/workflows/auto-add-weekly_github_stats.yml
@@ -0,0 +1,30 @@
+name: Run Weekly Summary Download Statistics
+
+on:
+  schedule:
+    - cron: '0 11 * * 2'  # This runs the job every Tuesday at 11 AM
+
+jobs:
+  run-weekly-summary:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Run Python
+      env:
+        GITHUB_API_KEY: "${{ secrets.GITHUB_TOKEN }}"
+        GITHUB_RUN_ID: "${{ github.run_id }}"
+      run: |
+        python scripts/auto-add-github-statistics.py nfdi4bioimage/training
diff --git a/.github/workflows/auto-add-weekly.yml → ...ws/auto-add-weekly_zenodo_communities.yml b/.github/workflows/auto-add-weekly.yml → ...ws/auto-add-weekly_zenodo_communities.yml
@@ -27,5 +27,4 @@ jobs:
         GITHUB_API_KEY: "${{ secrets.GITHUB_TOKEN }}"
         GITHUB_RUN_ID: "${{ github.run_id }}"
       run: |
-        python scripts/auto-add-download-statistics.py nfdi4bioimage/training
         python scripts/auto-add-from-zenodo-communities.py nfdi4bioimage/training
diff --git a/.github/workflows/auto-add-weekly_zenodo_stats.yml b/.github/workflows/auto-add-weekly_zenodo_stats.yml
@@ -0,0 +1,30 @@
+name: Run Weekly Summary Download Statistics
+
+on:
+  schedule:
+    - cron: '0 11 * * 2'  # This runs the job every Tuesday at 11 AM
+
+jobs:
+  run-weekly-summary:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Run Python
+      env:
+        GITHUB_API_KEY: "${{ secrets.GITHUB_TOKEN }}"
+        GITHUB_RUN_ID: "${{ github.run_id }}"
+      run: |
+        python scripts/auto-add-download-statistics.py nfdi4bioimage/training
diff --git a/scripts/auto-add-github-statistics.py b/scripts/auto-add-github-statistics.py
@@ -0,0 +1,84 @@
+import os
+import pandas as pd
+from datetime import datetime
+
+def extract_github_repos(yaml_data):
+    """Extract GitHub repository URLs from the YAML data.
+    
+    Parameters
+    ----------
+    yaml_data : dict
+        Data loaded from the YAML file.
+    
+    Returns
+    -------
+    list of str
+        List of GitHub repository URLs.
+    """
+    repos = []
+    for resource in yaml_data.get('resources', []):
+        urls = resource.get('url', [])
+        if isinstance(urls, str):
+            urls = [urls]
+        for url in urls:
+            if url.startswith("https://github.com/"):
+                url = url.replace("https://github.com/", "")
+                if url.endswith("/"):
+                    url = url[:-1]
+                if len(url.split("/")) > 2:
+                    url = "/".join(url.split("/")[:2])
+                repos.append(url)
+    return repos
+
+def get_repo_stats(repo_name):
+    """Retrieve the number of stars and forks for a given GitHub repository.
+    
+    Parameters
+    ----------
+    repo_name : str
+        Name of the repository in 'user/repo' format.
+    
+    Returns
+    -------
+    dict
+        Dictionary containing stars and forks count.
+    """
+    print(f"-> get_repo_stats({repo_name})")
+    from _github_utilities import get_github_repository
+
+    repo = get_github_repository(repo_name)
+    return {
+        "repo_name": repo_name,
+        "stars": repo.stargazers_count,
+        "forks": repo.forks_count
+    }
+
+def main():
+
+    from generate_link_lists import all_content
+    from _github_utilities import create_branch, get_file_in_repository, get_issue_body, write_file, send_pull_request
+
+    yaml_data = all_content('./resources/')
+    repos = extract_github_repos(yaml_data)
+
+    stats_list = [get_repo_stats(repo) for repo in repos]
+    df = pd.DataFrame(stats_list)
+
+    today = datetime.now().strftime('%Y%m%d')
+    output_dir = 'github_statistics'
+    os.makedirs(output_dir, exist_ok=True)
+    output_file = f"{output_dir}/{today}.csv"
+    df.to_csv(output_file, index=False)
+
+
+    # upload to github and send a pull-request
+    repository = "nfdi4bioimage/training"
+    branch = create_branch(repository)
+    with open(output_file, 'r') as file:
+        file_content = file.read()
+
+    write_file(repository, branch, output_file, file_content, "Add " + output_file)
+    res = send_pull_request(repository, branch, f"Add {output_file}", "")
+
+if __name__ == "__main__":
+    main()