diff --git a/.circleci/config.yml b/.circleci/config.yml index 9c414901c4f5..75413af8bf52 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -58,14 +58,14 @@ jobs: name: "Prepare pipeline parameters" command: | python utils/process_test_artifacts.py - + # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters. # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation. # We used: # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job - + - store_artifacts: path: test_preparation/transformed_artifacts.json - store_artifacts: diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index daf842fbd719..be8952903e2c 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -40,9 +40,22 @@ class EmptyJob: job_name = "empty" def to_dict(self): + steps = [{"run": 'ls -la'}] + if self.job_name == "collection_job": + steps.extend( + [ + "checkout", + {"run": "pip install requests || true"}, + {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""}, + {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'}, + {"store_artifacts": {"path": "outputs"}}, + {"run": 'echo "All required jobs have now completed"'}, + ] + ) + return { "docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE), - "steps":["checkout"], + "steps": steps, } @@ -352,6 +365,7 @@ def job_name(self): DOC_TESTS = [doc_test_job] ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip + def create_circleci_config(folder=None): if folder is None: folder = os.getcwd() @@ -361,7 +375,13 @@ def create_circleci_config(folder=None): if len(jobs) == 0: jobs = [EmptyJob()] - print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}) + else: + print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}) + # Add a job waiting all the test jobs and aggregate their test summary files at the end + collection_job = EmptyJob() + collection_job.job_name = "collection_job" + jobs = [collection_job] + jobs + config = { "version": "2.1", "parameters": { @@ -371,7 +391,7 @@ def create_circleci_config(folder=None): **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}, **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs}, }, - "jobs" : {j.job_name: j.to_dict() for j in jobs} + "jobs": {j.job_name: j.to_dict() for j in jobs} } if "CIRCLE_TOKEN" in os.environ: # For private forked repo. (e.g. new model addition) diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py new file mode 100644 index 000000000000..944bc47a7e2f --- /dev/null +++ b/utils/process_circleci_workflow_test_reports.py @@ -0,0 +1,85 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import json +import os + +import requests + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--workflow_id", type=str, required=True) + args = parser.parse_args() + workflow_id = args.workflow_id + + r = requests.get( + f"https://circleci.com/api/v2/workflow/{workflow_id}/job", + headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}, + ) + jobs = r.json()["items"] + + os.makedirs("outputs", exist_ok=True) + + workflow_summary = {} + # for each job, download artifacts + for job in jobs: + project_slug = job["project_slug"] + if job["name"].startswith(("tests_", "examples_", "pipelines_")): + url = f'https://circleci.com/api/v2/project/{project_slug}/{job["job_number"]}/artifacts' + r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}) + job_artifacts = r.json()["items"] + + os.makedirs(job["name"], exist_ok=True) + os.makedirs(f'outputs/{job["name"]}', exist_ok=True) + + job_test_summaries = {} + for artifact in job_artifacts: + if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"): + node_index = artifact["node_index"] + url = artifact["url"] + r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}) + test_summary = r.text + job_test_summaries[node_index] = test_summary + + summary = {} + for node_index, node_test_summary in job_test_summaries.items(): + for line in node_test_summary.splitlines(): + if line.startswith("PASSED "): + test = line[len("PASSED ") :] + summary[test] = "passed" + elif line.startswith("FAILED "): + test = line[len("FAILED ") :].split()[0] + summary[test] = "failed" + # failed before passed + summary = dict(sorted(summary.items(), key=lambda x: (x[1], x[0]))) + workflow_summary[job["name"]] = summary + + # collected version + with open(f'outputs/{job["name"]}/test_summary.json', "w") as fp: + json.dump(summary, fp, indent=4) + + new_workflow_summary = {} + for job_name, job_summary in workflow_summary.items(): + for test, status in job_summary.items(): + if test not in new_workflow_summary: + new_workflow_summary[test] = {} + new_workflow_summary[test][job_name] = status + + for test, result in new_workflow_summary.items(): + new_workflow_summary[test] = dict(sorted(result.items())) + new_workflow_summary = dict(sorted(new_workflow_summary.items())) + + with open("outputs/test_summary.json", "w") as fp: + json.dump(new_workflow_summary, fp, indent=4)