From cbc9e4c9b8d7cdefe2f1a9d80b6d8d1788b9d8d2 Mon Sep 17 00:00:00 2001
From: Ben Hearsum <ben@mozilla.com>
Date: Wed, 18 Dec 2024 13:18:43 -0500
Subject: [PATCH] feat: add cron task that runs the minimal training pipeline
 nightly

---
 .cron.yml                                        | 13 +++++++++++++
 taskcluster/translations_taskgraph/parameters.py | 13 +++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 .cron.yml

diff --git a/.cron.yml b/.cron.yml
new file mode 100644
index 000000000..ef7da2dd3
--- /dev/null
+++ b/.cron.yml
@@ -0,0 +1,13 @@
+jobs:
+  # Run the minimal pipeline once a day to support integration testing before
+  # worker image changes are made (see https://bugzilla.mozilla.org/show_bug.cgi?id=1937882).
+  # These runs will pick up cached tasks, so most of the time this will simply
+  # end up running `all-pipeline`.
+  - name: run-pipeline
+    job:
+      type: decision-task
+      # we don't use treeherder...but this is a required field
+      treeherder-symbol: pipeline
+      target-tasks-method: train-target-tasks
+    when:
+      - {hour: 0, minute: 0}
diff --git a/taskcluster/translations_taskgraph/parameters.py b/taskcluster/translations_taskgraph/parameters.py
index fea1f1f11..49688b9de 100644
--- a/taskcluster/translations_taskgraph/parameters.py
+++ b/taskcluster/translations_taskgraph/parameters.py
@@ -2,11 +2,14 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+import logging
 from pathlib import Path
 from taskgraph.parameters import extend_parameters_schema
 from voluptuous import Extra, Optional, Required
 import yaml
 
+logger = logging.getLogger(__name__)
+
 
 # By default, provide a very minimal config for CI that runs very quickly. This allows
 # the pipeline to be validated in CI. The production training configs should override
@@ -103,3 +106,13 @@ def deep_setdefault(dict_, defaults):
 def get_decision_parameters(graph_config, parameters):
     parameters.setdefault("training_config", {})
     deep_setdefault(parameters, get_ci_training_config())
+    # We run the pipeline on a cron schedule to enable integration testing when
+    # worker images change (see https://bugzilla.mozilla.org/show_bug.cgi?id=1937882).
+    # These runs should _never_ be sent to W&B to avoid cluttering it up
+    # with data of no value.
+    if (
+        parameters["tasks_for"] == "cron"
+        and parameters["target_tasks_method"] == "train-target-tasks"
+    ):
+        logger.info("Overriding wandb-publication to be False for cron pipeline run")
+        parameters["training_config"]["wandb-publication"] = False