Merge pull request #79 from bioconda/github-artifact-fetching

feat: download github artifacts and condense comment
bioconda · Apr 5, 2024 · 03817dd · 03817dd
2 parents c687280 + 2e6824d
commit 03817dd
Show file tree

Hide file tree

Showing 4 changed files with 134 additions and 91 deletions.
diff --git a/.github/workflows/bot.yaml b/.github/workflows/bot.yaml
@@ -26,7 +26,7 @@ jobs:
     runs-on: ubuntu-22.04
     env:
       IMAGE_NAME: bot
-      IMAGE_VERSION: '1.2.0'
+      IMAGE_VERSION: '1.3.0'
 
     steps:
     - uses: actions/checkout@v2

diff --git a/images/bot/setup.cfg b/images/bot/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = bioconda-bot
-version = 0.0.3
+version = 0.0.4
 
 [options]
 python_requires = >=3.8

diff --git a/images/bot/src/bioconda_bot/comment.py b/images/bot/src/bioconda_bot/comment.py
@@ -24,84 +24,80 @@
 
 # Given a PR and commit sha, post a comment with any artifacts
 async def make_artifact_comment(session: ClientSession, pr: int, sha: str) -> None:
-    artifacts = await fetch_pr_sha_artifacts(session, pr, sha)
+    artifactDict = await fetch_pr_sha_artifacts(session, pr, sha)
 
-    comment = compose_azure_comment(artifacts["azure"] if "azure" in artifacts else [])
-    if len(comment) > 0:
-        comment += "\n\n"
-    comment += compose_circlci_comment(artifacts["circleci"] if "circleci" in artifacts else [])
-
-    await send_comment(session, pr, comment)
-
-def compose_azure_comment(artifacts: List[Tuple[str, str]]) -> str:
-    nPackages = len(artifacts)
-    comment = "## Azure\n\n"
-
-    if nPackages > 0:
-        comment += "Package(s) built on Azure are ready for inspection:\n\n"
-        comment += "Arch | Package | Zip File\n-----|---------|---------\n"
-
-        # Table of packages and zips
-        for URL, artifact in artifacts:
-            if not (package_match := re.match(r"^((.+)\/(.+)\/(.+)\/(.+\.conda|.+\.tar\.bz2))$", artifact)):
-                continue
-            url, archdir, basedir, subdir, packageName = package_match.groups()
-            urlBase = URL[:-3]  # trim off zip from format=
-            urlBase += "file&subPath=%2F{}".format("%2F".join([basedir, subdir]))
-            conda_install_url = urlBase
-            # N.B., the zip file URL is nearly identical to the URL for the individual member files. It's unclear if there's an API for getting the correct URL to the files themselves
-            #pkgUrl = "%2F".join([urlBase, packageName])
-            #repoUrl = "%2F".join([urlBase, "current_repodata.json"])
-            #resp = await session.get(repoUrl)
-
-            if subdir == "noarch":
-                comment += "noarch |"
-            elif subdir == "linux-64":
-                comment += "linux-64 |"
-            elif subdir == "linux-aarch64":
-                comment += "linux-aarch64 |"
-            else:
-                comment += "osx-64 |"
-            comment += f" {packageName} | [{archdir}]({URL})\n"
-
-        # Conda install examples
-        comment += "***\n\nYou may also use `conda` to install these after downloading and extracting the appropriate zip file. From the LinuxArtifacts or OSXArtifacts directories:\n\n"
-        comment += "```\nconda install -c ./packages <package name>\n```\n"
+    header = "Package(s) built are ready for inspection:\n\n"
+    header += "Arch | Package | Zip File / Repodata | CI | Instructions\n"
+    header += "-----|---------|---------|-----|---------\n"
+
+    comment = ""
+    # Table of packages and zips
+    for [ci_platform, artifacts] in artifactDict.items():
+        if ci_platform == "azure":
+            comment += compose_azure_comment(artifacts)
+        elif ci_platform == "circleci":
+            comment += compose_circlci_comment(artifacts)
+        elif ci_platform == "github-actions":
+            comment += compose_gha_comment(artifacts)
+    if len(comment) == 0:
+        comment = ( "No artifacts found on the most recent builds. "
+            "Either the builds failed, the artifacts have been removed due to age, or the recipe was blacklisted/skipped.")
+    else:
+        comment = header + comment
 
-        # Table of containers
-        imageHeader = "***\n\nDocker image(s) built (images for Azure are in the LinuxArtifacts zip file above):\n\n"
-        imageHeader += "Package | Tag | Install with `docker`\n"
-        imageHeader += "--------|-----|----------------------\n"
+    # Table of containers
+    imageHeader = "***\n\nDocker image(s) built:\n\n"
+    imageHeader += "Package | Tag | CI | Install with `docker`\n"
+    imageHeader += "---------|---------|-----|---------\n"
 
+    for [ci_platform, artifacts] in artifactDict.items():
         for URL, artifact in artifacts:
             if artifact.endswith(".tar.gz"):
                 image_name = artifact.split("/").pop()[: -len(".tar.gz")]
                 if ':' in image_name:
                     package_name, tag = image_name.split(':', 1)
-                    #image_url = URL[:-3]  # trim off zip from format=
-                    #image_url += "file&subPath=%2F{}.tar.gz".format("%2F".join(["images", '%3A'.join([package_name, tag])]))
                     comment += imageHeader
                     imageHeader = "" # only add the header for the first image
-                    comment += f"{package_name} | {tag} | "
-                    comment += f'<details><summary>show</summary>`gzip -dc LinuxArtifacts/images/{image_name}.tar.gz \\| docker load`\n'
-        comment += "\n\n"
-    else:
-        comment += (
-            "No artifacts found on the most recent Azure build. "
-            "Either the build failed, the artifacts have been removed due to age, or the recipe was blacklisted/skipped."
-        )
+                    if ci_platform == "azure":
+                        comment += f"{package_name} | {tag} | Azure | "
+                        comment += "<details><summary>show</summary>Images for Azure are in the LinuxArtifacts zip file above."
+                        comment += f"`gzip -dc LinuxArtifacts/images/{image_name}.tar.gz \\| docker load`</details>\n"
+                    elif ci_platform == "circleci":
+                        comment += f"[{package_name}]({URL}) | {tag} | CircleCI | "
+                        comment += f'<details><summary>show</summary>`curl -L "{URL}" \\| gzip -dc \\| docker load`</details>\n'
+    comment += "\n\n"
+
+    await send_comment(session, pr, comment)
+
+def compose_azure_comment(artifacts: List[Tuple[str, str]]) -> str:
+    nPackages = len(artifacts)
+
+    if nPackages < 1:
+        return ""
+
+    comment = ""
+    # Table of packages and zips
+    for URL, artifact in artifacts:
+        if not (package_match := re.match(r"^((.+)\/(.+)\/(.+)\/(.+\.conda|.+\.tar\.bz2))$", artifact)):
+            continue
+        url, archdir, basedir, subdir, packageName = package_match.groups()
+
+        comment += f"{subdir} | {packageName} | [{archdir}.zip]({URL}) | Azure | "
+        comment += f'<details><summary>show</summary>'
+        # Conda install examples
+        comment += f"You may also use `conda` to install after downloading and extracting the zip file. From the {archdir} directory: "
+        comment += "`conda install -c ./packages <package name>`"
+        comment +='</details>\n'
+
     return comment
 
 def compose_circlci_comment(artifacts: List[Tuple[str, str]]) -> str:
     nPackages = len(artifacts)
 
     if nPackages < 1:
         return ""
-
-    comment = "## CircleCI\n\n"
-    comment += "Package(s) built on CircleCI are ready for inspection:\n\n"
-    comment += "Arch | Package | Repodata\n-----|---------|---------\n"
 
+    comment = ""
     # Table of packages and repodata.json
     for URL, artifact in artifacts:
         if not (package_match := re.match(r"^((.+)\/(.+)\/(.+\.conda|.+\.tar\.bz2))$", URL)):
@@ -110,35 +106,34 @@ def compose_circlci_comment(artifacts: List[Tuple[str, str]]) -> str:
         repo_url = "/".join([basedir, subdir, "repodata.json"])
         conda_install_url = basedir
 
-        if subdir == "noarch":
-            comment += "noarch |"
-        elif subdir == "linux-64":
-            comment += "linux-64 |"
-        elif subdir == "linux-aarch64":
-            comment += "linux-aarch64 |"
-        else:
-            comment += "osx-64 |"
-        comment += f" [{packageName}]({URL}) | [repodata.json]({repo_url})\n"
+        comment += f"{subdir} | [{packageName}]({URL}) | [repodata.json]({repo_url}) | CircleCI | "
+        comment += f'<details><summary>show</summary>'
+        # Conda install examples
+        comment += "You may also use `conda` to install:"
+        comment += f"`conda install -c {conda_install_url} <package name>`"
+        comment +='</details>\n'
 
-    # Conda install examples
-    comment += "***\n\nYou may also use `conda` to install these:\n\n"
-    comment += f"```\nconda install -c {conda_install_url} <package name>\n```\n"
+    return comment
 
-    # Table of containers
-    imageHeader = "***\n\nDocker image(s) built:\n\n"
-    imageHeader += "Package | Tag | Install with `docker`\n"
-    imageHeader += "--------|-----|----------------------\n"
+def compose_gha_comment(artifacts: List[Tuple[str, str]]) -> str:
+    nPackages = len(artifacts)
 
+    if nPackages < 1:
+        return ""
+
+    comment = ""
+    # Table of packages and zips
     for URL, artifact in artifacts:
-        if artifact.endswith(".tar.gz"):
-            image_name = artifact.split("/").pop()[: -len(".tar.gz")]
-            if ":" in image_name:
-                package_name, tag = image_name.split(":", 1)
-                comment += imageHeader
-                imageHeader = "" # only add the header for the first image
-                comment += f"[{package_name}]({URL}) | {tag} | "
-                comment += f'<details><summary>show</summary>`curl -L "{URL}" \\| gzip -dc \\| docker load`</details>\n'
-    comment += "</details>\n"
+        if not (package_match := re.match(r"^((.+)\/(.+)\/(.+\.conda|.+\.tar\.bz2))$", artifact)):
+            continue
+        url, basedir, subdir, packageName = package_match.groups()
+        comment += f"{subdir} | {packageName} | [{subdir}.zip]({URL}) | GitHub Actions | "
+        comment += f'<details><summary>show</summary>'
+        # Conda install examples
+        comment += "You may also use `conda` to install after downloading and extracting the zip file. "
+        comment += "`conda install -c ./packages <package name>`"
+        comment +='</details>\n'
+
     return comment
 
 # Post a comment on a given PR with its artifacts

diff --git a/images/bot/src/bioconda_bot/common.py b/images/bot/src/bioconda_bot/common.py
@@ -6,7 +6,7 @@
 from asyncio.subprocess import create_subprocess_exec
 from pathlib import Path
 from shutil import which
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple, Mapping
 from zipfile import ZipFile
 
 from aiohttp import ClientSession
@@ -94,8 +94,8 @@ def list_zip_contents(fname: str) -> [str]:
 
 # Download a zip file from url to zipName.zip and return that path
 # Timeout is 30 minutes to compensate for any network issues
-async def download_file(session: ClientSession, zipName: str, url: str) -> str:
-    async with session.get(url, timeout=60*30) as response:
+async def download_file(session: ClientSession, zipName: str, url: str, headers: Optional[Mapping[str, str]] = None) -> str:
+    async with session.get(url, timeout=60*30, headers=headers) as response:
         if response.status == 200:
             ofile = f"{zipName}.zip"
             with open(ofile, 'wb') as fd:
@@ -116,7 +116,7 @@ async def fetch_azure_zip_files(session: ClientSession, buildId: str) -> [(str,
     log("contacting azure %s", url)
     async with session.get(url) as response:
         # Sometimes we get a 301 error, so there are no longer artifacts available
-        if response.status == 301:
+        if response.status == 301 or response.status == 404:
             return artifacts
         res = await response.text()
 
@@ -173,6 +173,46 @@ async def fetch_circleci_artifacts(session: ClientSession, workflowId: str) -> [
                         artifacts.append((zipUrl, pkg))
         return artifacts
 
+
+# Find artifact zip files, download them and return their URLs and contents
+async def fetch_gha_zip_files(session: ClientSession, workflowId: str) -> [(str, str)]:
+    artifacts = []
+    token = os.environ["BOT_TOKEN"]
+    headers = {
+        "Authorization": f"token {token}",
+        "User-Agent": "BiocondaCommentResponder",
+    }
+    # GitHub Actions uses two different URLs, one for downloading from a browser and another for API downloads
+    url = f"https://api.github.com/repos/bioconda/bioconda-recipes/actions/runs/{workflowId}/artifacts"
+    log("contacting github actions %s", url)
+    async with session.get(url, headers=headers) as response:
+        # Sometimes we get a 301 error, so there are no longer artifacts available
+        if response.status == 301:
+            return artifacts
+        res = await response.text()
+
+    res_object = safe_load(res)
+    if res_object['total_count'] == 0:
+        return artifacts
+
+    for artifact in res_object['artifacts']:
+        zipName = artifact['name']
+        zipUrl = artifact['archive_download_url']
+        log(f"zip name is {zipName} url {zipUrl}")
+        fname = await download_file(session, zipName, zipUrl, headers)
+        if not fname:
+            continue
+        pkgsImages = list_zip_contents(fname)
+        commentZipUrl = f"https://github.com/bioconda/bioconda-recipes/actions/runs/{workflowId}/artifacts/{artifact['id']}"
+        for pkg in pkgsImages:
+            artifacts.append((commentZipUrl, pkg))
+
+    return artifacts
+
+def parse_gha_build_id(url: str) -> str:
+    return re.search("runs/(\d+)/", url).group(1)
+
+
 # Given a PR and commit sha, fetch a list of the artifact zip files URLs and their contents
 async def fetch_pr_sha_artifacts(session: ClientSession, pr: int, sha: str) -> Dict[str, List[Tuple[str, str]]]:
     url = f"https://api.github.com/repos/bioconda/bioconda-recipes/commits/{sha}/check-runs"
@@ -206,6 +246,14 @@ async def fetch_pr_sha_artifacts(session: ClientSession, pr: int, sha: str) -> D
             workflowId = safe_load(check_run["external_id"])["workflow-id"]
             zipFiles = await fetch_circleci_artifacts(session, workflowId)
             artifact_sources["circleci"] = zipFiles  # We've already fetched all possible artifacts from CircleCI
+        elif (
+            "github-actions" not in artifact_sources and 
+            check_run["app"]["slug"] == "github-actions"
+        ):
+            # GitHub Actions builds
+            buildID = parse_gha_build_id(check_run["details_url"])
+            zipFiles = await fetch_gha_zip_files(session, buildID)
+            artifact_sources["github-actions"] = zipFiles  # We've already fetched all possible artifacts from GitHub Actions
 
     return artifact_sources