huggingface · ydshieh · Dec 16, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -58,14 +58,14 @@ jobs:
                 name: "Prepare pipeline parameters"
                 command: |
                     python utils/process_test_artifacts.py 
-            
+
             # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
             # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
             # We used:
 
             # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
             # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
-                
+
             - store_artifacts:
                 path: test_preparation/transformed_artifacts.json
             - store_artifacts:

diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -40,9 +40,22 @@ class EmptyJob:
     job_name = "empty"
 
     def to_dict(self):
+        steps = [{"run": 'ls -la'}]
+        if self.job_name == "collection_job":
+            steps.extend(
+                [
+                    "checkout",
+                    {"run": "pip install requests"},
+                    {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done"""},
+                    {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID'},
+                    {"store_artifacts": {"path": "outputs"}},
+                    {"run": 'echo "All required jobs have now completed"'},
+                ]
+            )
+
         return {
             "docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
-            "steps":["checkout"],
+            "steps": steps,
         }
 
 
@@ -352,6 +365,7 @@ def job_name(self):
 DOC_TESTS = [doc_test_job]
 ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job]  # fmt: skip
 
+
 def create_circleci_config(folder=None):
     if folder is None:
         folder = os.getcwd()
@@ -361,7 +375,13 @@ def create_circleci_config(folder=None):
 
     if len(jobs) == 0:
         jobs = [EmptyJob()]
-    print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
+    else:
+        print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
+        # Add a job waiting all the test jobs and aggregate their test summary files at the end
+        collection_job = EmptyJob()
+        collection_job.job_name = "collection_job"
+        jobs = [collection_job] + jobs
+
     config = {
         "version": "2.1",
         "parameters": {
@@ -371,7 +391,7 @@ def create_circleci_config(folder=None):
             **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
             **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
         },
-        "jobs" : {j.job_name: j.to_dict() for j in jobs},
+        "jobs": {j.job_name: j.to_dict() for j in jobs},
         "workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
     }
     with open(os.path.join(folder, "generated_config.yml"), "w") as f:

diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py
@@ -0,0 +1,83 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import json
+import os
+import requests
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--workflow_id', type=str, required=True)
+    args = parser.parse_args()
+    workflow_id = args.workflow_id
+
+    r = requests.get(f"https://circleci.com/api/v2/workflow/{workflow_id}/job", headers={"Circle-Token": os.environ.get('CIRCLE_TOKEN', "")})
+    jobs = r.json()["items"]
+
+    os.makedirs("outputs", exist_ok=True)
+
+    workflow_summary = {}
+    # for each job, download artifacts
+    for job in jobs:
+
+        project_slug = job["project_slug"]
+        if job["name"].startswith(("tests_", "examples_", "pipelines_")):
+
+            url = f'https://circleci.com/api/v2/project/{project_slug}/{job["job_number"]}/artifacts'
+            r = requests.get(url, headers={"Circle-Token": os.environ.get('CIRCLE_TOKEN', "")})
+            job_artifacts = r.json()["items"]
+
+            os.makedirs(job["name"], exist_ok=True)
+            os.makedirs(f'outputs/{job["name"]}', exist_ok=True)
+
+            job_test_summaries = {}
+            for artifact in job_artifacts:
+                if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"):
+                    node_index = artifact["node_index"]
+                    url = artifact["url"]
+                    r = requests.get(url, headers={"Circle-Token": os.environ.get('CIRCLE_TOKEN', "")})
+                    test_summary = r.text
+                    job_test_summaries[node_index] = test_summary
+
+            summary = {}
+            for node_index, node_test_summary in job_test_summaries.items():
+                for line in node_test_summary.splitlines():
+                    if line.startswith("PASSED "):
+                        test = line[len("PASSED "):]
+                        summary[test] = "passed"
+                    elif line.startswith("FAILED "):
+                        test = line[len("FAILED "):].split()[0]
+                        summary[test] = "failed"
+            # failed before passed
+            summary = {k: v for k, v in sorted(summary.items(), key=lambda x: (x[1], x[0]))}
+            workflow_summary[job["name"]] = summary
+
+            # collected version
+            with open(f'outputs/{job["name"]}/test_summary.json', "w") as fp:
+                json.dump(summary, fp, indent=4)
+
+    new_workflow_summary = {}
+    for job_name, job_summary in workflow_summary.items():
+        for test, status in job_summary.items():
+            if test not in new_workflow_summary:
+                new_workflow_summary[test] = {}
+            new_workflow_summary[test][job_name] = status
+
+    for test, result in new_workflow_summary.items():
+        new_workflow_summary[test] = {k: v for k, v in sorted(result.items())}
+    new_workflow_summary = {k: v for k, v in sorted(new_workflow_summary.items())}
+
+    with open(f'outputs/test_summary.json', "w") as fp:
+        json.dump(new_workflow_summary, fp, indent=4)