Skip to content

Commit

Permalink
update PBS generate_urls.py housekeeping script with resolve and opti…
Browse files Browse the repository at this point in the history
…ons (#21674)

Some small updates to the PBS `generate_urls.py` housekeeping script:

- Put the script into a `pbs-script` resolve to isolate its
dependencies.
- Add a `pex_binary` target for running the script.
- Use `argparse` to add new options `--scrape-all-releases` and
`--scrape-release=RELEASE` for forcing the script to re-process
already-scraped releases. I am using these for another PR where I
modified the data format for `versions_info.json` and need to rescrape
the PBS release data.
- The build root check is now just a check that the script is being run
from the root of a Pants repository (via presence of the expected
subdirectories). (This avoids needing to import `get_buildroot` from the
other Pants sources.)
  • Loading branch information
tdyas authored Nov 21, 2024
1 parent 58aea21 commit 6cac603
Show file tree
Hide file tree
Showing 6 changed files with 729 additions and 6 deletions.
6 changes: 6 additions & 0 deletions 3rdparty/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ python_requirements(
resolve="pytest",
)

python_requirements(
name="pbs-script",
source="pbs-script-requirements.txt",
resolve="pbs-script",
)

__dependents_rules__(
( # Only the explorer server may depend on these libraries
(
Expand Down
674 changes: 674 additions & 0 deletions 3rdparty/python/pbs-script-requirements.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions 3rdparty/python/pbs-script-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
PyGithub>=2.5.0
requests[security]>=2.28.1
types-requests==2.28.1
1 change: 1 addition & 0 deletions pants.toml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ python-default = "3rdparty/python/user_reqs.lock"
flake8 = "3rdparty/python/flake8.lock"
mypy = "3rdparty/python/mypy.lock"
pytest = "3rdparty/python/pytest.lock"
pbs-script = "3rdparty/python/pbs-script-requirements.lock"

[python-infer]
assets = true
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_sources()
python_sources(resolve="pbs-script")

pex_binary(
name="bin",
entry_point="generate_urls.py",
dependencies=[":scripts"],
resolve="pbs-script",
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,32 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import annotations

import argparse
import hashlib
import itertools
import json
import os
import re
import sys
from pathlib import Path

import github
import requests
from github.GitReleaseAsset import GitReleaseAsset

from pants.base.build_environment import get_buildroot

VERSIONS_PATH = get_buildroot() / Path(
VERSIONS_PATH = Path(
"src/python/pants/backend/python/providers/python_build_standalone/versions_info.json"
)


def _github():
# generate with `gh auth token`
token = os.environ.get("GH_TOKEN")
if token is None:
print(
"WARNING: No GitHub token configured in GH_TOKEN. Lower rate limits will apply!",
file=sys.stderr,
)
return github.Github(auth=github.Auth.Token(token) if token else None)


Expand All @@ -37,19 +43,37 @@ def _compute_sha256(url):


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--scrape-all-releases", dest="scrape_all_releases", action="store_true")
parser.add_argument(
"--scrape-release", metavar="RELEASE", dest="scrape_releases", action="append"
)
options = parser.parse_args()

print("Starting to scrape GitHub PBS releases.")
if not VERSIONS_PATH.parent.exists():
raise Exception("This helper script must be run from the root of the Pants repository.")

versions_info = json.loads(VERSIONS_PATH.read_text())
scraped_releases = set(versions_info["scraped_releases"])

github = _github()
pbs_repo = github.get_repo("indygreg/python-build-standalone")
print("Downloading PBS release metadata.")
releases = pbs_repo.get_releases()
print("Downloaded PBS release metadata.")

asset_map: dict[str, GitReleaseAsset] = {}
sha256_map: dict[str, str] = {}
for release in releases.reversed:
tag_name = release.tag_name

if tag_name not in scraped_releases:
if (
tag_name not in scraped_releases
or options.scrape_all_releases
or tag_name in options.scrape_releases
):
print(f"Scraping release tag `{tag_name}`.")
scraped_releases.add(release.tag_name)
assets = release.get_assets()
for asset in assets:
Expand All @@ -72,10 +96,18 @@ def main() -> None:
else:
asset_map[asset.name] = asset

print("Finished scraping releases.")

versions_info["scraped_releases"] = sorted(scraped_releases)
pythons_dict = versions_info["pythons"]
asset_matcher = re.compile(r"^([a-zA-Z0-9]+)-([0-9.]+)\+([0-9.]+)-")

for asset in asset_map.values():
python_version = asset.name.split("+")[0].split("-")[1]
matched_versions = asset_matcher.match(asset.name)
if not matched_versions:
continue

python_version, pbs_release_tag = matched_versions.groups()[1:3]
if python_version not in pythons_dict:
pythons_dict[python_version] = {}

Expand Down

0 comments on commit 6cac603

Please sign in to comment.