From 2bbc54a2934507f617c2a7e4fe6af5c116e8baa6 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 21 Aug 2024 10:49:06 -0400 Subject: [PATCH 01/10] Add branch protection ruleset to list of repos This first iteration is conservative, since the list of target repos for the branch ruleset is those explicitly marked as "FALSE" in the "would break" column of our spreadsheet: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 --- pyproject.toml | 8 +- requirements/requirements-dev.txt | 16 +- requirements/requirements.txt | 16 +- src/standardize_repo_settings/app.py | 141 +++++++++++++++++- .../reichlab_default_branch_protections.json | 32 ++++ src/standardize_repo_settings/util/logs.py | 6 - 6 files changed, 190 insertions(+), 29 deletions(-) create mode 100644 src/standardize_repo_settings/rulesets/reichlab_default_branch_protections.json diff --git a/pyproject.toml b/pyproject.toml index 79fbbc5..7304101 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,9 +11,9 @@ classifiers = [ dynamic = ["version"] dependencies = [ - "freezegun", - "structlog", + "requests", "rich", + "structlog", ] [project.optional-dependencies] @@ -34,10 +34,6 @@ requires = ["setuptools", "wheel"] [tools.setuptools] packages = ["standardize_repo_settings"] -[tool.standardize_repo_settings] -# to write json-formatted logs to disk, uncomment the following line specify the file location -# log_file = "/path/to/log/files/rechlab_python_template.log" - [tool.ruff] line-length = 120 lint.extend-select = ["I", "Q"] diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 46c647e..1fc5547 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,15 +1,19 @@ # This file was autogenerated by uv via the following command: # uv pip compile pyproject.toml --extra dev -o requirements/requirements-dev.txt +certifi==2024.7.4 + # via requests cfgv==3.4.0 # via pre-commit +charset-normalizer==3.3.2 + # via requests distlib==0.3.8 # via virtualenv filelock==3.14.0 # via virtualenv -freezegun==1.5.1 - # via standardize-repo-settings (pyproject.toml) identify==2.5.36 # via pre-commit +idna==3.7 + # via requests iniconfig==2.0.0 # via pytest markdown-it-py==3.0.0 @@ -34,21 +38,21 @@ pygments==2.18.0 # via rich pytest==8.2.0 # via standardize-repo-settings (pyproject.toml) -python-dateutil==2.9.0.post0 - # via freezegun pyyaml==6.0.1 # via pre-commit +requests==2.32.3 + # via standardize-repo-settings (pyproject.toml) rich==13.7.1 # via standardize-repo-settings (pyproject.toml) ruff==0.4.3 # via standardize-repo-settings (pyproject.toml) setuptools==72.1.0 # via nodeenv -six==1.16.0 - # via python-dateutil structlog==24.1.0 # via standardize-repo-settings (pyproject.toml) typing-extensions==4.11.0 # via mypy +urllib3==2.2.2 + # via requests virtualenv==20.26.1 # via pre-commit diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 51efcc8..79782e1 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,18 +1,22 @@ # This file was autogenerated by uv via the following command: # uv pip compile pyproject.toml -o requirements/requirements.txt -freezegun==1.5.1 - # via standardize-repo-settings (pyproject.toml) +certifi==2024.7.4 + # via requests +charset-normalizer==3.3.2 + # via requests +idna==3.7 + # via requests markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py pygments==2.18.0 # via rich -python-dateutil==2.9.0.post0 - # via freezegun +requests==2.32.3 + # via standardize-repo-settings (pyproject.toml) rich==13.7.1 # via standardize-repo-settings (pyproject.toml) -six==1.16.0 - # via python-dateutil structlog==24.1.0 # via standardize-repo-settings (pyproject.toml) +urllib3==2.2.2 + # via requests diff --git a/src/standardize_repo_settings/app.py b/src/standardize_repo_settings/app.py index b400792..5b1b5a2 100644 --- a/src/standardize_repo_settings/app.py +++ b/src/standardize_repo_settings/app.py @@ -1,19 +1,150 @@ +import importlib +import json +import os +from pathlib import Path + +import requests import structlog +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry # type: ignore -from standardize_repo_settings.util.date import get_current_date from standardize_repo_settings.util.logs import setup_logging setup_logging() logger = structlog.get_logger() +# source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 +# (any repo with a WILL_BREAK column = FALSE) +REPO_LIST = [ + "reichlab-python-template", + # "container-utils", + # "covidData", + # "distfromq", + # "docs.zoltardata", + # "ensemble-comparison", + # "flu-hosp-models-2021-2022", + # "flusion", + # "forecast-repository", + # "gbq_operational", + # "genomicdata", + # "hub-infrastructure-experiments", + # "idforecastutils", + # "jacques", + # "jacques-covid", + # "llmtime", + # "malaria-serology", + # "predictability", + # "predtimechart", + # "qenspy", + # "qensr", + # "rclp", + # "sarimaTD", + # "sarix-covid", + # "simplets", + # "timeseriesutils", + # "variant-nowcast-hub", + # "Zoltar-Vizualization", + # "zoltpy", + # "zoltr", +] + + +def get_session(token: str) -> requests.Session: + """Return a requests session with retry logic.""" + + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github.v3+json", + "X-GitHub-Api-Version": "2022-11-28", + } + session = requests.Session() + + # attach a urllib3 retry adapter to the requests session + # https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.retry.Retry + retries = Retry( + total=5, + allowed_methods=frozenset(["GET", "POST"]), + backoff_factor=1, + status_forcelist=[500, 502, 503, 504], + ) + session.mount("https://", HTTPAdapter(max_retries=retries)) + session.headers.update(headers) + + return session + + +def load_branch_ruleset(filepath: str) -> dict: + """ + Load branch ruleset from a JSON file. + + :param filepath: Path to the JSON file containing the branch ruleset + :return: Dictionary containing the branch ruleset + """ + with open(filepath, "r") as file: + return json.load(file) + + +def get_all_repos(org_name: str, session: requests.Session) -> list[dict]: + """ + Retrieve all repositories from a GitHub organization, handling pagination. + + :param org_name: Name of the GitHub organization + :param session: Requests session for interacting with the GitHub API + :return: List of repositories + """ + repos = [] + repos_url = f"https://api.github.com/orgs/{org_name}/repos" + while repos_url: + response = session.get(repos_url) + response.raise_for_status() + repos.extend(response.json()) + repos_url = response.links.get("next", {}).get("url") + return repos + + +def apply_branch_ruleset(org_name: str, branch_ruleset: dict, session: requests.Session): + """ + Apply a branch ruleset to every repository in a GitHub organization. + + :param org_name: Name of the GitHub organization + :param branch_ruleset: Dictionary containing the branch ruleset + :param session: Requests session for interacting with the GitHub API + """ + + # Get all repositories in the organization + repos = get_all_repos(org_name, session) + + for repo in repos: + repo_name = repo["name"] + logger.info(repo_name) + if repo_name in REPO_LIST: + branch_protection_url = f"https://api.github.com/repos/{org_name}/{repo_name}/rulesets" + + # Apply the branch ruleset + response = session.post(branch_protection_url, json=branch_ruleset) + if response.ok: + logger.info(f"Successfully applied branch ruleset to {repo_name}") + elif response.status_code == 422: + logger.warning( + "Failed to apply branch ruleset (likely because it already exists)", + repo=repo_name, + response=response.json(), + ) + else: + logger.error("Failed to apply branch ruleset", repo=repo_name, response=response.json()) + def main(): - """Application entry point.""" + org_name = "reichlab" + token = os.getenv("GITHUB_TOKEN") + + session = get_session(token) - today = get_current_date() - logger.info("retrieved the date", date=today) + mod_path = Path(importlib.util.find_spec("standardize_repo_settings").origin).parent + ruleset_path = mod_path / "rulesets" / "reichlab_default_branch_protections.json" + branch_ruleset = load_branch_ruleset(str(ruleset_path)) - return f"Hello, today is {today}!" + apply_branch_ruleset(org_name, branch_ruleset, session) if __name__ == "__main__": diff --git a/src/standardize_repo_settings/rulesets/reichlab_default_branch_protections.json b/src/standardize_repo_settings/rulesets/reichlab_default_branch_protections.json new file mode 100644 index 0000000..4a11147 --- /dev/null +++ b/src/standardize_repo_settings/rulesets/reichlab_default_branch_protections.json @@ -0,0 +1,32 @@ +{ + "name": "reichlab-default-branch-protections", + "target": "branch", + "enforcement": "active", + "conditions": { + "ref_name": { + "exclude": [], + "include": [ + "~DEFAULT_BRANCH" + ] + } + }, + "rules": [ + { + "type": "deletion" + }, + { + "type": "non_fast_forward" + }, + { + "type": "pull_request", + "parameters": { + "required_approving_review_count": 1, + "dismiss_stale_reviews_on_push": true, + "require_code_owner_review": false, + "require_last_push_approval": true, + "required_review_thread_resolution": false + } + } + ], + "bypass_actors": [] +} \ No newline at end of file diff --git a/src/standardize_repo_settings/util/logs.py b/src/standardize_repo_settings/util/logs.py index 25f3021..a8b5ef5 100644 --- a/src/standardize_repo_settings/util/logs.py +++ b/src/standardize_repo_settings/util/logs.py @@ -15,12 +15,6 @@ def setup_logging(): add_custom_info, structlog.processors.TimeStamper(fmt="iso"), structlog.processors.add_log_level, - structlog.processors.CallsiteParameterAdder( - [ - structlog.processors.CallsiteParameter.FILENAME, - structlog.processors.CallsiteParameter.FUNC_NAME, - ] - ), ] if sys.stderr.isatty(): From c4aa5f5d0fdafc4523304a0c7c41908482c627e3 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 21 Aug 2024 11:20:14 -0400 Subject: [PATCH 02/10] Cleanup and README update --- README.md | 41 +++++++++++++++---- pyproject.toml | 2 +- .../{app.py => add_repo_rulesets.py} | 11 ++++- src/standardize_repo_settings/util/date.py | 15 ------- tests/reichlab_python_template/test_app.py | 8 ---- .../unit/util/test_date.py | 10 ----- 6 files changed, 42 insertions(+), 45 deletions(-) rename src/standardize_repo_settings/{app.py => add_repo_rulesets.py} (94%) delete mode 100644 src/standardize_repo_settings/util/date.py delete mode 100644 tests/reichlab_python_template/test_app.py delete mode 100644 tests/reichlab_python_template/unit/util/test_date.py diff --git a/README.md b/README.md index a283125..f380590 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,41 @@ Tools to standardize repository settings in a specific GitHub organization. -For the Reich Lab repos, we've decided to: +For the Reich Lab repos, we've decided to apply the following settings to the default +branches (e.g., `main`) of our repos: -* Disallow direct pushes to the main branch -* Allow repo write access to all members of the Reich Lab organization -* Require code reviews before merging to the main branch +* Branch cannot be deleted +* Disallow direct pushes (must open a pull request instead) +* Require at least one reviewer approval before merging a pull request +* Require re-approval when changes are made to a pull request -## Setup for local development + +# Usage + +## Prerequisites + +* Write access to all repos in the reichlab GitHub organization +* A `GITHUB_TOKEN` environment variable that contains a [GitHub personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) + +## Running the code + +1. Install this Python package via pip: + + ```bash + pip install git+https://github.com/reichlab/standardize-repo-settings.git + ``` + +2. To apply the Reichlab's default branch rulesets to all repos in the Reichlab GitHub organization: + ```bash + add_default_rulesets + ``` + +# Setup for local development The steps below are for setting up a local development environment. This process entails more than just installing the package, because we need to ensure that all developers have a consistent, reproducible environment. -### Assumptions +## Assumptions Developers will be using a Python virtual environment that: @@ -21,7 +44,7 @@ Developers will be using a Python virtual environment that: - contains the dependency versions specified in the "lockfile" (in this case [requirements/requirements-dev.txt](requirements/requirements-dev.txt)). - contains the package installed in ["editable" mode](https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#working-in-development-mode). -### Setup steps +## Setup steps 1. Clone this repository @@ -61,12 +84,12 @@ Developers will be using a Python virtual environment that: python -m pytest ``` -## Development workflow +# Development workflow Because the package is installed in "editable" mode, you can run the code as though it were a normal Python package, while also being able to make changes and see them immediately. -### Updating dependencies +## Updating dependencies Prerequisites: - [`uv`](https://github.com/astral-sh/uv?tab=readme-ov-file#getting-started) diff --git a/pyproject.toml b/pyproject.toml index 7304101..327912c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dev = [ ] [project.entry-points."console_scripts"] -standardize_repo_settings = "standardize_repo_settings.app:main" +add_default_rulesets = "standardize_repo_settings.add_repo_rulesets:main" [build-system] # Minimum requirements for the build system to execute. diff --git a/src/standardize_repo_settings/app.py b/src/standardize_repo_settings/add_repo_rulesets.py similarity index 94% rename from src/standardize_repo_settings/app.py rename to src/standardize_repo_settings/add_repo_rulesets.py index 5b1b5a2..2a44947 100644 --- a/src/standardize_repo_settings/app.py +++ b/src/standardize_repo_settings/add_repo_rulesets.py @@ -13,6 +13,10 @@ setup_logging() logger = structlog.get_logger() + +GITHUB_ORG = "reichlab" +RULESET_TO_APPLY = "reichlab_default_branch_protections.json" + # source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 # (any repo with a WILL_BREAK column = FALSE) REPO_LIST = [ @@ -135,13 +139,16 @@ def apply_branch_ruleset(org_name: str, branch_ruleset: dict, session: requests. def main(): - org_name = "reichlab" + org_name = GITHUB_ORG token = os.getenv("GITHUB_TOKEN") + if not token: + logger.error("GITHUB_TOKEN environment variable is required") + return session = get_session(token) mod_path = Path(importlib.util.find_spec("standardize_repo_settings").origin).parent - ruleset_path = mod_path / "rulesets" / "reichlab_default_branch_protections.json" + ruleset_path = mod_path / "rulesets" / RULESET_TO_APPLY branch_ruleset = load_branch_ruleset(str(ruleset_path)) apply_branch_ruleset(org_name, branch_ruleset, session) diff --git a/src/standardize_repo_settings/util/date.py b/src/standardize_repo_settings/util/date.py deleted file mode 100644 index dacc3ff..0000000 --- a/src/standardize_repo_settings/util/date.py +++ /dev/null @@ -1,15 +0,0 @@ -import datetime - -import structlog - -logger = structlog.get_logger() - - -def get_current_date() -> str: - """Return current date in human-readable format.""" - - logger.info("getting the current date") - current_date = datetime.datetime.now() - formatted_date = current_date.strftime("%B %d, %Y") - - return formatted_date diff --git a/tests/reichlab_python_template/test_app.py b/tests/reichlab_python_template/test_app.py deleted file mode 100644 index 0cb43fc..0000000 --- a/tests/reichlab_python_template/test_app.py +++ /dev/null @@ -1,8 +0,0 @@ -from freezegun import freeze_time -from standardize_repo_settings.app import main - - -@freeze_time("2019-07-13") -def test_main_date(): - output = main() - assert "July 13, 2019" in output diff --git a/tests/reichlab_python_template/unit/util/test_date.py b/tests/reichlab_python_template/unit/util/test_date.py deleted file mode 100644 index cd13b6e..0000000 --- a/tests/reichlab_python_template/unit/util/test_date.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Unit tests for the date module.""" - -from freezegun import freeze_time -from standardize_repo_settings.util.date import get_current_date - - -@freeze_time("2024-01-02") -def test_current_date(): - cd = get_current_date() - assert cd == "January 02, 2024" From 517935de6ab732ac5f06a5b852be434f14103e72 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 21 Aug 2024 13:55:42 -0400 Subject: [PATCH 03/10] Add a script to archive repos --- pyproject.toml | 1 + .../add_repo_rulesets.py | 68 +++----- .../archive_repos.py | 150 ++++++++++++++++++ src/standardize_repo_settings/util/logs.py | 6 +- src/standardize_repo_settings/util/session.py | 29 ++++ 5 files changed, 207 insertions(+), 47 deletions(-) create mode 100644 src/standardize_repo_settings/archive_repos.py create mode 100644 src/standardize_repo_settings/util/session.py diff --git a/pyproject.toml b/pyproject.toml index 327912c..15c923d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dev = [ [project.entry-points."console_scripts"] add_default_rulesets = "standardize_repo_settings.add_repo_rulesets:main" +archive_repos = "standardize_repo_settings.archive_repos:main" [build-system] # Minimum requirements for the build system to execute. diff --git a/src/standardize_repo_settings/add_repo_rulesets.py b/src/standardize_repo_settings/add_repo_rulesets.py index 2a44947..907b6ba 100644 --- a/src/standardize_repo_settings/add_repo_rulesets.py +++ b/src/standardize_repo_settings/add_repo_rulesets.py @@ -5,10 +5,9 @@ import requests import structlog -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry # type: ignore from standardize_repo_settings.util.logs import setup_logging +from standardize_repo_settings.util.session import get_session setup_logging() logger = structlog.get_logger() @@ -19,8 +18,9 @@ # source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 # (any repo with a WILL_BREAK column = FALSE) -REPO_LIST = [ +RULESET_REPO_LIST = [ "reichlab-python-template", + "duck-hub", # "container-utils", # "covidData", # "distfromq", @@ -53,30 +53,6 @@ ] -def get_session(token: str) -> requests.Session: - """Return a requests session with retry logic.""" - - headers = { - "Authorization": f"Bearer {token}", - "Accept": "application/vnd.github.v3+json", - "X-GitHub-Api-Version": "2022-11-28", - } - session = requests.Session() - - # attach a urllib3 retry adapter to the requests session - # https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.retry.Retry - retries = Retry( - total=5, - allowed_methods=frozenset(["GET", "POST"]), - backoff_factor=1, - status_forcelist=[500, 502, 503, 504], - ) - session.mount("https://", HTTPAdapter(max_retries=retries)) - session.headers.update(headers) - - return session - - def load_branch_ruleset(filepath: str) -> dict: """ Load branch ruleset from a JSON file. @@ -118,24 +94,30 @@ def apply_branch_ruleset(org_name: str, branch_ruleset: dict, session: requests. # Get all repositories in the organization repos = get_all_repos(org_name, session) - for repo in repos: + # Only update repos that are on our list and are not already archived + repos_to_update = [repo for repo in repos if (repo["name"] in RULESET_REPO_LIST and repo["archived"] is False)] + + update_count = 0 + for repo in repos_to_update: repo_name = repo["name"] logger.info(repo_name) - if repo_name in REPO_LIST: - branch_protection_url = f"https://api.github.com/repos/{org_name}/{repo_name}/rulesets" - - # Apply the branch ruleset - response = session.post(branch_protection_url, json=branch_ruleset) - if response.ok: - logger.info(f"Successfully applied branch ruleset to {repo_name}") - elif response.status_code == 422: - logger.warning( - "Failed to apply branch ruleset (likely because it already exists)", - repo=repo_name, - response=response.json(), - ) - else: - logger.error("Failed to apply branch ruleset", repo=repo_name, response=response.json()) + branch_protection_url = f"https://api.github.com/repos/{org_name}/{repo_name}/rulesets" + + # Apply the branch ruleset + response = session.post(branch_protection_url, json=branch_ruleset) + if response.ok: + logger.info(f"Successfully applied branch ruleset to {repo_name}") + update_count += 1 + elif response.status_code == 422: + logger.warning( + "Failed to apply branch ruleset (likely because it already exists)", + repo=repo_name, + response=response.json(), + ) + else: + logger.error("Failed to apply branch ruleset", repo=repo_name, response=response.json()) + + logger.info("All rulesets applied", count=update_count) def main(): diff --git a/src/standardize_repo_settings/archive_repos.py b/src/standardize_repo_settings/archive_repos.py new file mode 100644 index 0000000..28e3ef4 --- /dev/null +++ b/src/standardize_repo_settings/archive_repos.py @@ -0,0 +1,150 @@ +import os + +import requests +import structlog + +from standardize_repo_settings.util.logs import setup_logging +from standardize_repo_settings.util.session import get_session + +setup_logging() +logger = structlog.get_logger() + + +GITHUB_ORG = "reichlab" +RULESET_TO_APPLY = "reichlab_default_branch_protections.json" + +# source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 +# (any repo with candidate_for_archive column = TRUE) +ARCHIVE_REPO_LIST = [ + "duck-hub", + # "ensemble-comparison", + # "Zoltar-Vizualization", + # "container-demo-app", + # "2017-2018-cdc-flu-contest", + # "2018-2019-cdc-flu-contest", + # "activemonitr", + # "adaptively-weighted-ensemble", + # "ALERT", + # "annual-predictions-paper", + # "ardfa", + # "article-disease-pred-with-kcde", + # "bayesian_non_parametric", + # "casebot", + # "cdcfluforecasts", + # "cdcfluutils", + # "cdcForecastUtils", + # "covid-hosp-forecasts-with-cases", + # "covid19-ensemble-methods-manuscript", + # "covid19-forecast-evals", + # "d3-foresight", + # "dengue-data-stub", + # "dengue-ssr-prediction", + # "dengue-thailand-2014-forecasts", + # "densitystackr", + # "diffport", + # "ensemble-size", + # "flu-eda", + # "flusight-csv-tools", + # "Flusight-forecast-data", + # "FluSight-package", + # "flusight-test", + # "flusurv-forecasts-2020-2021", + # "forecast-framework-demos", + # "forecastTools", + # "foresight-visualization-template", + # "german-flu-forecasting", + # "hubEnsembles", + # "kcde", + # "ledge", + # "lssm", + # "make-example", + # "mmwr-week", + # "mvtnorm-mod-kcde", + # "ncov", + # "neural-stack", + # "nuxt-forecast-viz", + # "online-lag-ensemble", + # "pdtmvn", + # "pkr", + # "proper-scores-comparison", + # "pylssm", + # "reviewMID", + # "shiny-predictions", + # "ssr-influenza-competition", + # "style", + # "tracking-ensemble", + # "TSIRsim", + # "xgboost-mod", + # "xgbstack", + # "xpull", +] + + +def get_all_repos(org_name: str, session: requests.Session) -> list[dict]: + """ + Retrieve all repositories from a GitHub organization, handling pagination. + + :param org_name: Name of the GitHub organization + :param session: Requests session for interacting with the GitHub API + :return: List of repositories + """ + repos = [] + repos_url = f"https://api.github.com/orgs/{org_name}/repos" + while repos_url: + response = session.get(repos_url) + response.raise_for_status() + repos.extend(response.json()) + repos_url = response.links.get("next", {}).get("url") + + logger.info("Retrieved repositories", org=org_name, repo_count=len(repos)) + return repos + + +def archive_repo(org_name: str, session: requests.Session): + """ + Archive repositories in the organization. + + :param org_name: Name of the GitHub organization + :param session: Requests session for interacting with the GitHub API + """ + + # Get all repositories in the organization + repos = get_all_repos(org_name, session) + repo_updates = { + "archived": True, + } + + # Only archive repos that are on our list and are not already archived + repos_to_update = [repo for repo in repos if (repo["name"] in ARCHIVE_REPO_LIST and repo["archived"] is False)] + + update_count = 0 + for repo in repos_to_update: + repo_name = repo["name"] + logger.info(repo_name) + repo_url = f"https://api.github.com/repos/{org_name}/{repo_name}" + + # Archive the repo + response = session.patch(repo_url, json=repo_updates) + if response.ok: + logger.info(f"Successfully archived {repo_name}") + update_count += 1 + else: + logger.error("Failed to update repo", repo=repo_name, response=response.json()) + + logger.info("Repository archive complete", count=update_count) + + +def main(): + org_name = GITHUB_ORG + token = os.getenv("GITHUB_TOKEN") + if not token: + logger.error("GITHUB_TOKEN environment variable is required") + return + + session = get_session(token) + + archive_repo(org_name, session) + + +if __name__ == "__main__": + main() diff --git a/src/standardize_repo_settings/util/logs.py b/src/standardize_repo_settings/util/logs.py index a8b5ef5..c2651b9 100644 --- a/src/standardize_repo_settings/util/logs.py +++ b/src/standardize_repo_settings/util/logs.py @@ -2,18 +2,16 @@ import structlog -import standardize_repo_settings - def add_custom_info(logger, method_name, event_dict): - event_dict["version"] = standardize_repo_settings.__version__ + # placeholder for custom log info return event_dict def setup_logging(): shared_processors = [ add_custom_info, - structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S"), structlog.processors.add_log_level, ] diff --git a/src/standardize_repo_settings/util/session.py b/src/standardize_repo_settings/util/session.py new file mode 100644 index 0000000..2870024 --- /dev/null +++ b/src/standardize_repo_settings/util/session.py @@ -0,0 +1,29 @@ +"""Code to handle requests sessions.""" + +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry # type: ignore + + +def get_session(token: str) -> requests.Session: + """Return a requests session with retry logic.""" + + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github.v3+json", + "X-GitHub-Api-Version": "2022-11-28", + } + session = requests.Session() + + # attach a urllib3 retry adapter to the requests session + # https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.retry.Retry + retries = Retry( + total=5, + allowed_methods=frozenset(["GET", "POST"]), + backoff_factor=1, + status_forcelist=[500, 502, 503, 504], + ) + session.mount("https://", HTTPAdapter(max_retries=retries)) + session.headers.update(headers) + + return session From 7e177b0b8ad92e6be453ed8323dd4849dcc14dcc Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 21 Aug 2024 14:10:12 -0400 Subject: [PATCH 04/10] Rename repo and module --- README.md | 2 +- pyproject.toml | 10 +++++----- requirements/requirements-dev.txt | 14 +++++++------- requirements/requirements.txt | 6 +++--- .../__init__.py | 0 .../add_repo_rulesets.py | 6 +++--- .../archive_repos.py | 4 ++-- .../reichlab_default_branch_protections.json | 0 .../util/__init__.py | 0 .../util/logs.py | 0 .../util/session.py | 0 11 files changed, 21 insertions(+), 21 deletions(-) rename src/{standardize_repo_settings => reichlab_repo_utils}/__init__.py (100%) rename src/{standardize_repo_settings => reichlab_repo_utils}/add_repo_rulesets.py (94%) rename src/{standardize_repo_settings => reichlab_repo_utils}/archive_repos.py (97%) rename src/{standardize_repo_settings => reichlab_repo_utils}/rulesets/reichlab_default_branch_protections.json (100%) rename src/{standardize_repo_settings => reichlab_repo_utils}/util/__init__.py (100%) rename src/{standardize_repo_settings => reichlab_repo_utils}/util/logs.py (100%) rename src/{standardize_repo_settings => reichlab_repo_utils}/util/session.py (100%) diff --git a/README.md b/README.md index f380590..c25eac4 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Developers will be using a Python virtual environment that: 2. Change to the repo's root directory: ```bash - cd standardize-repo-settings + cd reichlab-repo-utils ``` 3. Make sure the correct version of Python is currently active, and create a Python virtual environment: diff --git a/pyproject.toml b/pyproject.toml index 15c923d..2a6978d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "standardize-repo-settings" +name = "reichlab-repo-utils" description = "Standardize GitHub repository settings" license = {text = "MIT License"} readme = "README.md" @@ -25,15 +25,15 @@ dev = [ ] [project.entry-points."console_scripts"] -add_default_rulesets = "standardize_repo_settings.add_repo_rulesets:main" -archive_repos = "standardize_repo_settings.archive_repos:main" +add_default_rulesets = "reichlab_repo_utils.add_repo_rulesets:main" +archive_repos = "reichlab_repo_utils.archive_repos:main" [build-system] # Minimum requirements for the build system to execute. requires = ["setuptools", "wheel"] [tools.setuptools] -packages = ["standardize_repo_settings"] +packages = ["reichlab_repo_utils"] [tool.ruff] line-length = 120 @@ -46,4 +46,4 @@ inline-quotes = "double" quote-style = "double" [tool.setuptools.dynamic] -version = {attr = "standardize_repo_settings.__version__"} +version = {attr = "reichlab_repo_utils.__version__"} diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 1fc5547..4ad7f41 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -21,7 +21,7 @@ markdown-it-py==3.0.0 mdurl==0.1.2 # via markdown-it-py mypy==1.10.0 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) mypy-extensions==1.0.0 # via mypy nodeenv==1.8.0 @@ -33,23 +33,23 @@ platformdirs==4.2.1 pluggy==1.5.0 # via pytest pre-commit==3.7.0 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) pygments==2.18.0 # via rich pytest==8.2.0 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) pyyaml==6.0.1 # via pre-commit requests==2.32.3 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) rich==13.7.1 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) ruff==0.4.3 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) setuptools==72.1.0 # via nodeenv structlog==24.1.0 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) typing-extensions==4.11.0 # via mypy urllib3==2.2.2 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 79782e1..a77bac8 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -13,10 +13,10 @@ mdurl==0.1.2 pygments==2.18.0 # via rich requests==2.32.3 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) rich==13.7.1 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) structlog==24.1.0 - # via standardize-repo-settings (pyproject.toml) + # via reichlab-repo-utils (pyproject.toml) urllib3==2.2.2 # via requests diff --git a/src/standardize_repo_settings/__init__.py b/src/reichlab_repo_utils/__init__.py similarity index 100% rename from src/standardize_repo_settings/__init__.py rename to src/reichlab_repo_utils/__init__.py diff --git a/src/standardize_repo_settings/add_repo_rulesets.py b/src/reichlab_repo_utils/add_repo_rulesets.py similarity index 94% rename from src/standardize_repo_settings/add_repo_rulesets.py rename to src/reichlab_repo_utils/add_repo_rulesets.py index 907b6ba..be72f7a 100644 --- a/src/standardize_repo_settings/add_repo_rulesets.py +++ b/src/reichlab_repo_utils/add_repo_rulesets.py @@ -6,8 +6,8 @@ import requests import structlog -from standardize_repo_settings.util.logs import setup_logging -from standardize_repo_settings.util.session import get_session +from reichlab_repo_utils.util.logs import setup_logging +from reichlab_repo_utils.util.session import get_session setup_logging() logger = structlog.get_logger() @@ -129,7 +129,7 @@ def main(): session = get_session(token) - mod_path = Path(importlib.util.find_spec("standardize_repo_settings").origin).parent + mod_path = Path(importlib.util.find_spec("reichlab_repo_utils").origin).parent ruleset_path = mod_path / "rulesets" / RULESET_TO_APPLY branch_ruleset = load_branch_ruleset(str(ruleset_path)) diff --git a/src/standardize_repo_settings/archive_repos.py b/src/reichlab_repo_utils/archive_repos.py similarity index 97% rename from src/standardize_repo_settings/archive_repos.py rename to src/reichlab_repo_utils/archive_repos.py index 28e3ef4..7a2ca77 100644 --- a/src/standardize_repo_settings/archive_repos.py +++ b/src/reichlab_repo_utils/archive_repos.py @@ -3,8 +3,8 @@ import requests import structlog -from standardize_repo_settings.util.logs import setup_logging -from standardize_repo_settings.util.session import get_session +from reichlab_repo_utils.util.logs import setup_logging +from reichlab_repo_utils.util.session import get_session setup_logging() logger = structlog.get_logger() diff --git a/src/standardize_repo_settings/rulesets/reichlab_default_branch_protections.json b/src/reichlab_repo_utils/rulesets/reichlab_default_branch_protections.json similarity index 100% rename from src/standardize_repo_settings/rulesets/reichlab_default_branch_protections.json rename to src/reichlab_repo_utils/rulesets/reichlab_default_branch_protections.json diff --git a/src/standardize_repo_settings/util/__init__.py b/src/reichlab_repo_utils/util/__init__.py similarity index 100% rename from src/standardize_repo_settings/util/__init__.py rename to src/reichlab_repo_utils/util/__init__.py diff --git a/src/standardize_repo_settings/util/logs.py b/src/reichlab_repo_utils/util/logs.py similarity index 100% rename from src/standardize_repo_settings/util/logs.py rename to src/reichlab_repo_utils/util/logs.py diff --git a/src/standardize_repo_settings/util/session.py b/src/reichlab_repo_utils/util/session.py similarity index 100% rename from src/standardize_repo_settings/util/session.py rename to src/reichlab_repo_utils/util/session.py From 74652f1bce7af838989e76857ca93098eabb3b83 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 21 Aug 2024 14:20:39 -0400 Subject: [PATCH 05/10] Remove get_all_repos duplication and move repo lists to __init__.py --- src/reichlab_repo_utils/__init__.py | 102 +++++++++++++++++++ src/reichlab_repo_utils/add_repo_rulesets.py | 57 +---------- src/reichlab_repo_utils/archive_repos.py | 89 +--------------- src/reichlab_repo_utils/util/repo.py | 21 ++++ 4 files changed, 127 insertions(+), 142 deletions(-) create mode 100644 src/reichlab_repo_utils/util/repo.py diff --git a/src/reichlab_repo_utils/__init__.py b/src/reichlab_repo_utils/__init__.py index f102a9c..29b21e0 100644 --- a/src/reichlab_repo_utils/__init__.py +++ b/src/reichlab_repo_utils/__init__.py @@ -1 +1,103 @@ __version__ = "0.0.1" + +# source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 +# (any repo with a WILL_BREAK column = FALSE) +RULESET_REPO_LIST = [ + "reichlab-python-template", + "duck-hub", + # "container-utils", + # "covidData", + # "distfromq", + # "docs.zoltardata", + # "ensemble-comparison", + # "flu-hosp-models-2021-2022", + # "flusion", + # "forecast-repository", + # "gbq_operational", + # "genomicdata", + # "hub-infrastructure-experiments", + # "idforecastutils", + # "jacques", + # "jacques-covid", + # "llmtime", + # "malaria-serology", + # "predictability", + # "predtimechart", + # "qenspy", + # "qensr", + # "rclp", + # "sarimaTD", + # "sarix-covid", + # "simplets", + # "timeseriesutils", + # "variant-nowcast-hub", + # "Zoltar-Vizualization", + # "zoltpy", + # "zoltr", +] + +# source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 +# (any repo with candidate_for_archive column = TRUE) +ARCHIVE_REPO_LIST = [ + "duck-hub", + # "ensemble-comparison", + # "Zoltar-Vizualization", + # "container-demo-app", + # "2017-2018-cdc-flu-contest", + # "2018-2019-cdc-flu-contest", + # "activemonitr", + # "adaptively-weighted-ensemble", + # "ALERT", + # "annual-predictions-paper", + # "ardfa", + # "article-disease-pred-with-kcde", + # "bayesian_non_parametric", + # "casebot", + # "cdcfluforecasts", + # "cdcfluutils", + # "cdcForecastUtils", + # "covid-hosp-forecasts-with-cases", + # "covid19-ensemble-methods-manuscript", + # "covid19-forecast-evals", + # "d3-foresight", + # "dengue-data-stub", + # "dengue-ssr-prediction", + # "dengue-thailand-2014-forecasts", + # "densitystackr", + # "diffport", + # "ensemble-size", + # "flu-eda", + # "flusight-csv-tools", + # "Flusight-forecast-data", + # "FluSight-package", + # "flusight-test", + # "flusurv-forecasts-2020-2021", + # "forecast-framework-demos", + # "forecastTools", + # "foresight-visualization-template", + # "german-flu-forecasting", + # "hubEnsembles", + # "kcde", + # "ledge", + # "lssm", + # "make-example", + # "mmwr-week", + # "mvtnorm-mod-kcde", + # "ncov", + # "neural-stack", + # "nuxt-forecast-viz", + # "online-lag-ensemble", + # "pdtmvn", + # "pkr", + # "proper-scores-comparison", + # "pylssm", + # "reviewMID", + # "shiny-predictions", + # "ssr-influenza-competition", + # "style", + # "tracking-ensemble", + # "TSIRsim", + # "xgboost-mod", + # "xgbstack", + # "xpull", +] diff --git a/src/reichlab_repo_utils/add_repo_rulesets.py b/src/reichlab_repo_utils/add_repo_rulesets.py index be72f7a..1a61c66 100644 --- a/src/reichlab_repo_utils/add_repo_rulesets.py +++ b/src/reichlab_repo_utils/add_repo_rulesets.py @@ -6,52 +6,17 @@ import requests import structlog +from reichlab_repo_utils import RULESET_REPO_LIST from reichlab_repo_utils.util.logs import setup_logging +from reichlab_repo_utils.util.repo import get_all_repos from reichlab_repo_utils.util.session import get_session setup_logging() logger = structlog.get_logger() - GITHUB_ORG = "reichlab" RULESET_TO_APPLY = "reichlab_default_branch_protections.json" -# source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 -# (any repo with a WILL_BREAK column = FALSE) -RULESET_REPO_LIST = [ - "reichlab-python-template", - "duck-hub", - # "container-utils", - # "covidData", - # "distfromq", - # "docs.zoltardata", - # "ensemble-comparison", - # "flu-hosp-models-2021-2022", - # "flusion", - # "forecast-repository", - # "gbq_operational", - # "genomicdata", - # "hub-infrastructure-experiments", - # "idforecastutils", - # "jacques", - # "jacques-covid", - # "llmtime", - # "malaria-serology", - # "predictability", - # "predtimechart", - # "qenspy", - # "qensr", - # "rclp", - # "sarimaTD", - # "sarix-covid", - # "simplets", - # "timeseriesutils", - # "variant-nowcast-hub", - # "Zoltar-Vizualization", - # "zoltpy", - # "zoltr", -] - def load_branch_ruleset(filepath: str) -> dict: """ @@ -64,24 +29,6 @@ def load_branch_ruleset(filepath: str) -> dict: return json.load(file) -def get_all_repos(org_name: str, session: requests.Session) -> list[dict]: - """ - Retrieve all repositories from a GitHub organization, handling pagination. - - :param org_name: Name of the GitHub organization - :param session: Requests session for interacting with the GitHub API - :return: List of repositories - """ - repos = [] - repos_url = f"https://api.github.com/orgs/{org_name}/repos" - while repos_url: - response = session.get(repos_url) - response.raise_for_status() - repos.extend(response.json()) - repos_url = response.links.get("next", {}).get("url") - return repos - - def apply_branch_ruleset(org_name: str, branch_ruleset: dict, session: requests.Session): """ Apply a branch ruleset to every repository in a GitHub organization. diff --git a/src/reichlab_repo_utils/archive_repos.py b/src/reichlab_repo_utils/archive_repos.py index 7a2ca77..e8e0ec9 100644 --- a/src/reichlab_repo_utils/archive_repos.py +++ b/src/reichlab_repo_utils/archive_repos.py @@ -3,102 +3,17 @@ import requests import structlog +from reichlab_repo_utils import ARCHIVE_REPO_LIST from reichlab_repo_utils.util.logs import setup_logging +from reichlab_repo_utils.util.repo import get_all_repos from reichlab_repo_utils.util.session import get_session setup_logging() logger = structlog.get_logger() - GITHUB_ORG = "reichlab" RULESET_TO_APPLY = "reichlab_default_branch_protections.json" -# source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 -# (any repo with candidate_for_archive column = TRUE) -ARCHIVE_REPO_LIST = [ - "duck-hub", - # "ensemble-comparison", - # "Zoltar-Vizualization", - # "container-demo-app", - # "2017-2018-cdc-flu-contest", - # "2018-2019-cdc-flu-contest", - # "activemonitr", - # "adaptively-weighted-ensemble", - # "ALERT", - # "annual-predictions-paper", - # "ardfa", - # "article-disease-pred-with-kcde", - # "bayesian_non_parametric", - # "casebot", - # "cdcfluforecasts", - # "cdcfluutils", - # "cdcForecastUtils", - # "covid-hosp-forecasts-with-cases", - # "covid19-ensemble-methods-manuscript", - # "covid19-forecast-evals", - # "d3-foresight", - # "dengue-data-stub", - # "dengue-ssr-prediction", - # "dengue-thailand-2014-forecasts", - # "densitystackr", - # "diffport", - # "ensemble-size", - # "flu-eda", - # "flusight-csv-tools", - # "Flusight-forecast-data", - # "FluSight-package", - # "flusight-test", - # "flusurv-forecasts-2020-2021", - # "forecast-framework-demos", - # "forecastTools", - # "foresight-visualization-template", - # "german-flu-forecasting", - # "hubEnsembles", - # "kcde", - # "ledge", - # "lssm", - # "make-example", - # "mmwr-week", - # "mvtnorm-mod-kcde", - # "ncov", - # "neural-stack", - # "nuxt-forecast-viz", - # "online-lag-ensemble", - # "pdtmvn", - # "pkr", - # "proper-scores-comparison", - # "pylssm", - # "reviewMID", - # "shiny-predictions", - # "ssr-influenza-competition", - # "style", - # "tracking-ensemble", - # "TSIRsim", - # "xgboost-mod", - # "xgbstack", - # "xpull", -] - - -def get_all_repos(org_name: str, session: requests.Session) -> list[dict]: - """ - Retrieve all repositories from a GitHub organization, handling pagination. - - :param org_name: Name of the GitHub organization - :param session: Requests session for interacting with the GitHub API - :return: List of repositories - """ - repos = [] - repos_url = f"https://api.github.com/orgs/{org_name}/repos" - while repos_url: - response = session.get(repos_url) - response.raise_for_status() - repos.extend(response.json()) - repos_url = response.links.get("next", {}).get("url") - - logger.info("Retrieved repositories", org=org_name, repo_count=len(repos)) - return repos - def archive_repo(org_name: str, session: requests.Session): """ diff --git a/src/reichlab_repo_utils/util/repo.py b/src/reichlab_repo_utils/util/repo.py new file mode 100644 index 0000000..78ee348 --- /dev/null +++ b/src/reichlab_repo_utils/util/repo.py @@ -0,0 +1,21 @@ +"""Functions to get information about GitHub repositories.""" + +import requests + + +def get_all_repos(org_name: str, session: requests.Session) -> list[dict]: + """ + Retrieve all repositories from a GitHub organization, handling pagination. + + :param org_name: Name of the GitHub organization + :param session: Requests session for interacting with the GitHub API + :return: List of repositories + """ + repos = [] + repos_url = f"https://api.github.com/orgs/{org_name}/repos" + while repos_url: + response = session.get(repos_url) + response.raise_for_status() + repos.extend(response.json()) + repos_url = response.links.get("next", {}).get("url") + return repos From d9208bc9601791d2d0837f066447b0872b763073 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 21 Aug 2024 16:57:34 -0400 Subject: [PATCH 06/10] Add a script to list repos --- .pre-commit-config.yaml | 5 -- pyproject.toml | 1 + src/reichlab_repo_utils/archive_repos.py | 7 +- src/reichlab_repo_utils/list_repos.py | 87 ++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 8 deletions(-) create mode 100644 src/reichlab_repo_utils/list_repos.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fcdaa28..051465b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,11 +18,6 @@ repos: - id: detect-aws-credentials args: [--allow-missing-credentials] - id: detect-private-key -- repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.10.1' # Use the sha / tag you want to point at - hooks: - - id: mypy - additional_dependencies: [types-all] - repo: https://github.com/codespell-project/codespell rev: v2.3.0 hooks: diff --git a/pyproject.toml b/pyproject.toml index 2a6978d..f7da84f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dev = [ [project.entry-points."console_scripts"] add_default_rulesets = "reichlab_repo_utils.add_repo_rulesets:main" archive_repos = "reichlab_repo_utils.archive_repos:main" +list_repos = "reichlab_repo_utils.list_repos:main" [build-system] # Minimum requirements for the build system to execute. diff --git a/src/reichlab_repo_utils/archive_repos.py b/src/reichlab_repo_utils/archive_repos.py index e8e0ec9..f17b4a9 100644 --- a/src/reichlab_repo_utils/archive_repos.py +++ b/src/reichlab_repo_utils/archive_repos.py @@ -25,13 +25,14 @@ def archive_repo(org_name: str, session: requests.Session): # Get all repositories in the organization repos = get_all_repos(org_name, session) + # Only archive repos that are on our list and are not already archived + repos_to_update = [repo for repo in repos if (repo["name"] in ARCHIVE_REPO_LIST and repo["archived"] is False)] + + # payload for updating the repo to archive status repo_updates = { "archived": True, } - # Only archive repos that are on our list and are not already archived - repos_to_update = [repo for repo in repos if (repo["name"] in ARCHIVE_REPO_LIST and repo["archived"] is False)] - update_count = 0 for repo in repos_to_update: repo_name = repo["name"] diff --git a/src/reichlab_repo_utils/list_repos.py b/src/reichlab_repo_utils/list_repos.py new file mode 100644 index 0000000..b166bd2 --- /dev/null +++ b/src/reichlab_repo_utils/list_repos.py @@ -0,0 +1,87 @@ +import os +from collections import namedtuple +from itertools import zip_longest + +import requests +import structlog +from rich.console import Console +from rich.style import Style +from rich.table import Table + +from reichlab_repo_utils.util.logs import setup_logging +from reichlab_repo_utils.util.repo import get_all_repos +from reichlab_repo_utils.util.session import get_session + +setup_logging() +logger = structlog.get_logger() + +GITHUB_ORG = "reichlab" + + +def list_repos(org_name: str, session: requests.Session): + """ + Archive repositories in the organization. + + :param org_name: Name of the GitHub organization + :param session: Requests session for interacting with the GitHub API + """ + + # Settings for the output columns when listing repo information + output_column_list = ["name", "created_at", "archived", "visibility", "id"] + output_column_colors = ["green", "magenta", "cyan", "blue", "yellow"] + OutputColumns = namedtuple( + "OutputColumns", + output_column_list, + ) + + # Create the output table and columns + console = Console() + table = Table( + title=f"Repositories in the {org_name} GitHub organization", + ) + for col, color in zip_longest(output_column_list, output_column_colors, fillvalue="cyan"): + # add additional attributes, depending on the column + style_kwargs = {} + col_kwargs = {} + if col == "name": + col_kwargs = {"ratio": 4} + style_kwargs = {"link": True} + + style = Style(color=color, **style_kwargs) + table.add_column(col, style=style, **col_kwargs) + + repos = get_all_repos(org_name, session) + repo_count = len(repos) + + for repo in repos: + r = OutputColumns( + name=f"[link={repo.get('html_url')}]{repo.get('name')}[/link]", + created_at=str(repo.get("created_at", "")), + archived=str(repo.get("archived", "")), + visibility=str(repo.get("visibility", "")), + id=str(repo.get("id", "")), + ) + try: + table.add_row(*r) + except Exception as e: + logger.error(f"Error adding row for repo {r.name}: {e}") + + logger.info("Repository report complete", count=repo_count) + + console.print(table) + + +def main(): + org_name = GITHUB_ORG + token = os.getenv("GITHUB_TOKEN") + if not token: + logger.error("GITHUB_TOKEN environment variable is required") + return + + session = get_session(token) + + list_repos(org_name, session) + + +if __name__ == "__main__": + main() From 6f2213c354611a3e101202632adc740d8c4da2c2 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Thu, 22 Aug 2024 12:53:57 -0400 Subject: [PATCH 07/10] Update lists of repos for archival and branch protections --- src/reichlab_repo_utils/__init__.py | 231 +++++++++++++++++----------- 1 file changed, 140 insertions(+), 91 deletions(-) diff --git a/src/reichlab_repo_utils/__init__.py b/src/reichlab_repo_utils/__init__.py index 29b21e0..f3373e5 100644 --- a/src/reichlab_repo_utils/__init__.py +++ b/src/reichlab_repo_utils/__init__.py @@ -1,103 +1,152 @@ __version__ = "0.0.1" +# add_repo_rulesets.py will apply branch protections to the repos in this list +# branch protections are defined in rulesets/reichlab_default_branch_protections.json # source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 -# (any repo with a WILL_BREAK column = FALSE) +# (any repo with "add branch protections" column = TRUE) RULESET_REPO_LIST = [ + "container-utils", + "covid-hosp-models", + "covidData", + "distfromq", + "docs.zoltardata", + "election-forecast", + "flu-hosp-models-2021-2022", + "flusion", + "forecast-repository", + "gbq_operational", + "genomicdata", + "hub-infrastructure-experiments", + "idforecastutils", + "jacques", + "jacques-covid", + "llmtime", + "malaria-serology", + "predictability", + "predtimechart", + "pymmwr", + "qenspy", + "qensr", + "rclp", + "sarimaTD", + "sarix-covid", + "simplets", + "timeseriesutils", + "variant-nowcast-hub", + "zoltpy", + "zoltr", + "se-asia-annual-preds", + "Dengue-district-map", + "spatialpred", + "dengueThailand", + "bangkok-forecasting", + "dengue-reporting", + "chikv-inference", + "2016-2017-flu-contest-ensembles", + "delay-analysis-method", + "R6spatialpred", + "dengue-data", + "under-reported", + "delay-analysis-dynamic", + "kcde-via-stacking", + "forecastingForest", + "thaiDhfDwe", + "challenges-inference-filtering", + "thai-dengue-district-challenge", + "delay-analysis-thailand", + "survival-densities", + "dengue-scraps", + "moph-forecast-files", + "wiki", + "pfep", + "hforecast", + "flusense-data", + "covid19ILIUMassCoEModels", + "covid-19-ili-forecasting-models", + "epiWaves", + "wnv", + "hierarchicalGP", + "midas2021", + "covid-forecast-eval-post", + "abc-model-selection", + "conditional-calibration", + "flusion-manuscript", + "pytorch-exploration", + "FluSight-forecast-hub", + "streamlit-flusight", + "predtimechart-s3-example", + "hubverse-cloud-viz", + "trendsEnsemble", "reichlab-python-template", - "duck-hub", - # "container-utils", - # "covidData", - # "distfromq", - # "docs.zoltardata", - # "ensemble-comparison", - # "flu-hosp-models-2021-2022", - # "flusion", - # "forecast-repository", - # "gbq_operational", - # "genomicdata", - # "hub-infrastructure-experiments", - # "idforecastutils", - # "jacques", - # "jacques-covid", - # "llmtime", - # "malaria-serology", - # "predictability", - # "predtimechart", - # "qenspy", - # "qensr", - # "rclp", - # "sarimaTD", - # "sarix-covid", - # "simplets", - # "timeseriesutils", - # "variant-nowcast-hub", - # "Zoltar-Vizualization", - # "zoltpy", - # "zoltr", + "reichlab-repo-utils", + "virus-clade-utils", ] +# archive_repos.py will archive the repos in this list # source: https://docs.google.com/spreadsheets/d/1UaVsqGQ2uyI42t8HWTQjt0MthQJ-o4Yom0-Q2ahBnJc/edit?gid=1230520805#gid=1230520805 # (any repo with candidate_for_archive column = TRUE) ARCHIVE_REPO_LIST = [ + "2017-2018-cdc-flu-contest", + "2018-2019-cdc-flu-contest", + "activemonitr", + "adaptively-weighted-ensemble", + "ALERT", + "annual-predictions-paper", + "ardfa", + "article-disease-pred-with-kcde", + "bayesian_non_parametric", + "casebot", + "cdcfluforecasts", + "cdcfluutils", + "cdcForecastUtils", + "container-demo-app", + "covid-hosp-forecasts-with-cases", + "covid19-ensemble-methods-manuscript", + "covid19-forecast-evals", + "covid19-forecast-hub-validations", + "d3-foresight", + "dengue-data-stub", + "dengue-ssr-prediction", + "dengue-thailand-2014-forecasts", + "densitystackr", + "diffport", + "ensemble-comparison", + "ensemble-size", + "flu-eda", + "flusight-csv-tools", + "Flusight-forecast-data", + "FluSight-package", + "flusight-test", + "flusurv-forecasts-2020-2021", + "forecast-framework-demos", + "forecastTools", + "foresight-visualization-template", + "german-flu-forecasting", + "hubEnsembles", + "kcde", + "ledge", + "lssm", + "make-example", + "mmwr-week", + "mvtnorm-mod-kcde", + "ncov", + "neural-stack", + "nuxt-forecast-viz", + "online-lag-ensemble", + "pdtmvn", + "pkr", + "proper-scores-comparison", + "pykwalify", + "pylssm", + "reviewMID", + "shiny-predictions", + "ssr-influenza-competition", + "style", + "tracking-ensemble", + "TSIRsim", + "xgboost-mod", + "xgbstack", + "xpull", + "Zoltar-Vizualization", "duck-hub", - # "ensemble-comparison", - # "Zoltar-Vizualization", - # "container-demo-app", - # "2017-2018-cdc-flu-contest", - # "2018-2019-cdc-flu-contest", - # "activemonitr", - # "adaptively-weighted-ensemble", - # "ALERT", - # "annual-predictions-paper", - # "ardfa", - # "article-disease-pred-with-kcde", - # "bayesian_non_parametric", - # "casebot", - # "cdcfluforecasts", - # "cdcfluutils", - # "cdcForecastUtils", - # "covid-hosp-forecasts-with-cases", - # "covid19-ensemble-methods-manuscript", - # "covid19-forecast-evals", - # "d3-foresight", - # "dengue-data-stub", - # "dengue-ssr-prediction", - # "dengue-thailand-2014-forecasts", - # "densitystackr", - # "diffport", - # "ensemble-size", - # "flu-eda", - # "flusight-csv-tools", - # "Flusight-forecast-data", - # "FluSight-package", - # "flusight-test", - # "flusurv-forecasts-2020-2021", - # "forecast-framework-demos", - # "forecastTools", - # "foresight-visualization-template", - # "german-flu-forecasting", - # "hubEnsembles", - # "kcde", - # "ledge", - # "lssm", - # "make-example", - # "mmwr-week", - # "mvtnorm-mod-kcde", - # "ncov", - # "neural-stack", - # "nuxt-forecast-viz", - # "online-lag-ensemble", - # "pdtmvn", - # "pkr", - # "proper-scores-comparison", - # "pylssm", - # "reviewMID", - # "shiny-predictions", - # "ssr-influenza-competition", - # "style", - # "tracking-ensemble", - # "TSIRsim", - # "xgboost-mod", - # "xgbstack", - # "xpull", ] From 8933145faaddd50292fce194985a2fdcb6a7d83c Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Thu, 22 Aug 2024 14:12:20 -0400 Subject: [PATCH 08/10] Fix mypy errors --- .pre-commit-config.yaml | 5 +++++ pyproject.toml | 8 ++++++++ requirements/requirements-dev.txt | 6 +++++- src/reichlab_repo_utils/list_repos.py | 16 ++++++++++------ 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 051465b..c68b6e5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,11 @@ repos: - id: detect-aws-credentials args: [--allow-missing-credentials] - id: detect-private-key +- repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v1.10.1' # Use the sha / tag you want to point at + hooks: + - id: mypy + additional_dependencies: [types-requests] - repo: https://github.com/codespell-project/codespell rev: v2.3.0 hooks: diff --git a/pyproject.toml b/pyproject.toml index f7da84f..87b0c0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dev = [ "pre-commit", "pytest", "ruff", + "types-requests", ] [project.entry-points."console_scripts"] @@ -48,3 +49,10 @@ quote-style = "double" [tool.setuptools.dynamic] version = {attr = "reichlab_repo_utils.__version__"} + +[tool.mypy] +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = ["rich.*"] +follow_imports = "skip" diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 4ad7f41..9aad875 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -50,9 +50,13 @@ setuptools==72.1.0 # via nodeenv structlog==24.1.0 # via reichlab-repo-utils (pyproject.toml) +types-requests==2.32.0.20240712 + # via reichlab-repo-utils (pyproject.toml) typing-extensions==4.11.0 # via mypy urllib3==2.2.2 - # via requests + # via + # requests + # types-requests virtualenv==20.26.1 # via pre-commit diff --git a/src/reichlab_repo_utils/list_repos.py b/src/reichlab_repo_utils/list_repos.py index b166bd2..b948a6d 100644 --- a/src/reichlab_repo_utils/list_repos.py +++ b/src/reichlab_repo_utils/list_repos.py @@ -1,6 +1,6 @@ import os -from collections import namedtuple from itertools import zip_longest +from typing import NamedTuple import requests import structlog @@ -18,6 +18,14 @@ GITHUB_ORG = "reichlab" +class OutputColumns(NamedTuple): + name: str + created_at: str + archived: str + visibility: str + id: str + + def list_repos(org_name: str, session: requests.Session): """ Archive repositories in the organization. @@ -27,12 +35,8 @@ def list_repos(org_name: str, session: requests.Session): """ # Settings for the output columns when listing repo information - output_column_list = ["name", "created_at", "archived", "visibility", "id"] + output_column_list = list(OutputColumns._fields) output_column_colors = ["green", "magenta", "cyan", "blue", "yellow"] - OutputColumns = namedtuple( - "OutputColumns", - output_column_list, - ) # Create the output table and columns console = Console() From 22ce19d5b117cc39732a2b1c9ca73ce88167b464 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Thu, 22 Aug 2024 14:21:57 -0400 Subject: [PATCH 09/10] Add a placeholder test --- tests/test_placeholder.py | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/test_placeholder.py diff --git a/tests/test_placeholder.py b/tests/test_placeholder.py new file mode 100644 index 0000000..201975f --- /dev/null +++ b/tests/test_placeholder.py @@ -0,0 +1,2 @@ +def test_placeholder(): + pass From 56b1c712955bff760468a676e2df274540bdef18 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Mon, 26 Aug 2024 11:45:30 -0400 Subject: [PATCH 10/10] Update session handler to retry on rate limit error --- src/reichlab_repo_utils/util/session.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reichlab_repo_utils/util/session.py b/src/reichlab_repo_utils/util/session.py index 2870024..93a0a1c 100644 --- a/src/reichlab_repo_utils/util/session.py +++ b/src/reichlab_repo_utils/util/session.py @@ -19,9 +19,9 @@ def get_session(token: str) -> requests.Session: # https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.retry.Retry retries = Retry( total=5, - allowed_methods=frozenset(["GET", "POST"]), + allowed_methods=frozenset(["GET", "POST", "PATCH"]), backoff_factor=1, - status_forcelist=[500, 502, 503, 504], + status_forcelist=[429, 500, 502, 503, 504], ) session.mount("https://", HTTPAdapter(max_retries=retries)) session.headers.update(headers)