Skip to content

Commit

Permalink
Add a util for local remote settings (#955)
Browse files Browse the repository at this point in the history
  • Loading branch information
gregtatum authored Jan 2, 2025
1 parent 5dc7076 commit 16077f1
Show file tree
Hide file tree
Showing 7 changed files with 1,076 additions and 2 deletions.
11 changes: 11 additions & 0 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ tasks:
poetry run python -W ignore utils/taskcluster_downloader.py
--mode=model {{.CLI_ARGS}}
local-remote-settings:
desc: Runs models for use within Firefox via local Remote Settings.
summary: |
The artifacts will be saved to: ./data/artifacts
Example: `task local-remote-settings -- --task-group-ids GU9ZyWFhRDe_nxlAHcen8g`
deps: [poetry-install-utils]
cmds:
- >-
PYTHONPATH=$(pwd) poetry run python -W ignore
utils/local_remote_settings {{.CLI_ARGS}}
config-generator:
desc: Create a training config for a language pair
summary: |
Expand Down
47 changes: 45 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ pyperclip="1.9.0"
ruamel-yaml = "^0.18.6"
taskcluster = "^56.0.3"
taskcluster-taskgraph = "^11.1.0"
kinto-http="11.7.0"
# Use an outdated version of pydantic due to dependency requirements conflict.
pydantic="1.10.19"

# This install group is for running tests. Note that any dependencies in the
# pipeline are installed separately through the run_task test abstraction. This
Expand Down
67 changes: 67 additions & 0 deletions utils/common/remote_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Type definitions for the translations data that is stored Remote Settings.
"""

from pydantic import BaseModel, Field
from typing import Optional

prod_endpoint = "https://firefox.settings.services.mozilla.com"
models_collection = "translations-models"
wasm_collection = "translations-wasm"


def get_prod_records_url(collection: str):
return f"{prod_endpoint}/v1/buckets/main/collections/{collection}/records"


class Attachment(BaseModel):
"""A Remote Settings attachment"""

hash: str # "724b358d399c6b23444de36d76e9e2630c7024c29d9e617323b820a11631535a"
size: int # 17141051
filename: str # "model.enel.intgemm.alphas.bin"
location: str # "main-workspace/translations-models/26dead91-e46c-442c-85d0-a6fe13fad55a.bin"
mimetype: str # "application/octet-stream"


class WasmRecord(BaseModel):
"""A Remote Settings WasmRecord"""

name: str
schema_name: Optional[int] = Field(default=None, alias="schema")
license: str
release: str
version: str
revision: str
attachment: Optional[Attachment] = None
fx_release: Optional[str] = None
filter_expression: Optional[str] = None
id: str
last_modified: int


class WasmResponse(BaseModel):
"""The response from calling the Remote Settings records endpoint."""

data: list[WasmRecord]


class ModelRecord(BaseModel):
name: str # "model.enel.intgemm.alphas.bin"
schema_name: Optional[int] = Field(default=None, alias="schema")
toLang: str # "en"
fromLang: str # "el"
version: str # "1.0"
fileType: str # "model", "lex", "vocab"
attachment: Optional[Attachment] = None
filter_expression: Optional[
str
] = None # "env.channel == 'default' || env.channel == 'nightly'"
id: str # "136b1eae-9cef-4d03-a38f-74b0cb543b74"
last_modified: int # 1728419357986


class ModelsResponse(BaseModel):
"""The response from calling the Remote Settings records endpoint."""

data: list[ModelRecord]
142 changes: 142 additions & 0 deletions utils/common/taskcluster_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""
Typed responses for working with the Taskcluster API.
"""

from pydantic import BaseModel
from typing import Any, Optional
import taskcluster


class Run(BaseModel):
runId: int # 0,
state: str # "completed"
reasonCreated: str # "scheduled"
reasonResolved: str # "completed"
scheduled: str # "2024-11-12T13:50:55.910Z"

# These are not present during an exception:
workerGroup: Optional[str] = None # "us-central1-a"
workerId: Optional[str] = None # "4909417939093873369"
takenUntil: Optional[str] = None # "2024-11-12T14:10:56.073Z"
started: Optional[str] = None # "2024-11-12T13:50:56.076Z"
resolved: Optional[str] = None # "2024-11-12T13:52:36.465Z"


class Metadata(BaseModel):
description: str # "Dummy task that ensures all parts of training pipeline will run"
name: str # "all-pipeline-en-lt-2"
owner: str # "[email protected]"
source: str # "https://github.com/mozilla/translations/blob/4b99af14117a3e662ee0a27bda93aa1170e964e7/taskcluster/kinds/all-pipeline"


class TaskExtra(BaseModel):
index: Any
parent: str # "e1DMdEzNSGyGhdjaWFYpxQ"


class Task(BaseModel):
created: str # "2024-11-04T15:39:54.898Z"
deadline: str # "2024-11-24T15:39:54.898Z"
dependencies: list[str] # [ "CM1cp-ZWSnWkiQ96mKjmvA", "CnkDIu9LRCqI89thrhWAlQ", … ]
expires: str # "2025-02-02T15:39:54.898Z"
extra: TaskExtra
metadata: Metadata
payload: Any
priority: str # "low"
projectId: str # "none"
provisionerId: str # "built-in"
requires: str # "all-completed"
retries: int
routes: list[str] # [ "checks" ]
schedulerId: str # "translations-level-1"
scopes: list[Any]
tags: dict[str, Any]
taskGroupId: str # "TaeCdUs5Rqq7w1Tbf1PShQ"
taskQueueId: str # "built-in/succeed"
workerType: str # "succeed"

@staticmethod
def call(queue: taskcluster.Queue, *args, **kwargs):
response: Any = queue.task(*args, **kwargs)
return Task(**response)


class Status(BaseModel):
deadline: str # "2024-11-24T15:39:54.898Z"
expires: str # "2025-02-02T15:39:54.898Z"
projectId: str # "none"
provisionerId: str # "built-in"
retriesLeft: int
runs: list[Run]
schedulerId: str # "translations-level-1"
state: str # "completed"
taskGroupId: str # "TaeCdUs5Rqq7w1Tbf1PShQ"
taskId: str # "CmximseBTi-d8tcWOl-KZA"
taskQueueId: str # "built-in/succeed"
workerType: str # "succeed"

@staticmethod
def call(queue: taskcluster.Queue, *args, **kwargs):
response: Any = queue.status(*args, **kwargs)
return Status(**response["status"])


class TaskAndStatus(BaseModel):
task: Task
status: Status

@staticmethod
def call(queue: taskcluster.Queue, *args, **kwargs):
# This requires 2 API calls, even though other APIs return both.
return TaskAndStatus(
task=Task.call(queue, *args, **kwargs), status=Status.call(queue, *args, **kwargs)
)


class GetTaskGroup(BaseModel):
taskGroupId: str # 'I9uKJEPvQd-1zeItJK0cOQ'
schedulerId: str # 'translations-level-1'
expires: str # '2025-11-07T21:56:15.759Z'

@staticmethod
def call(queue: taskcluster.Queue, *args, **kwargs):
response: Any = queue.getTaskGroup(*args, **kwargs)
return GetTaskGroup(**response)


class ListTaskGroup(BaseModel):
expires: str # "2025-11-04T16:39:54.296Z"
schedulerId: str # "translations-level-1"
taskGroupId: str # "TaeCdUs5Rqq7w1Tbf1PShQ"
tasks: list[TaskAndStatus]

@staticmethod
def call(queue: taskcluster.Queue, *args, **kwargs):
response: Any = queue.listTaskGroup(*args, **kwargs)
return ListTaskGroup(**response)


class Artifact(BaseModel):
storageType: str # "s3"
name: str # "public/build/lex.50.50.enlt.s2t.bin.gz"
expires: str # "2025-11-04T15:39:54.626Z"
contentType: str # "application/gzip"


class ListArtifacts(BaseModel):
artifacts: list[Artifact]

@staticmethod
def call(queue: taskcluster.Queue, *args, **kwargs):
response: Any = queue.listArtifacts(*args, **kwargs)
return ListArtifacts(**response)


class ListDependentTasks(BaseModel):
taskId: str
tasks: list[TaskAndStatus]

@staticmethod
def call(queue: taskcluster.Queue, *args, **kwargs):
response: Any = queue.listDependentTasks(*args, **kwargs)
return ListDependentTasks(**response)
Loading

0 comments on commit 16077f1

Please sign in to comment.