Skip to content

Commit

Permalink
Introduce the translation API v3 (advanced) models operators.
Browse files Browse the repository at this point in the history
- TranslateCreateModelOperator
- TranslateModelsListOperator
- TranslateDeleteModelOperator

More details on using AutoML translation: https://cloud.google.com/translate/docs/advanced/automl-beginner.
  • Loading branch information
Oleg Kachur committed Dec 2, 2024
1 parent 0997e07 commit 46cbd1c
Show file tree
Hide file tree
Showing 7 changed files with 870 additions and 0 deletions.
63 changes: 63 additions & 0 deletions docs/apache-airflow-providers-google/operators/cloud/translate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,69 @@ Basic usage of the operator:
:end-before: [END howto_operator_translate_automl_delete_dataset]


.. _howto/operator:TranslateCreateModelOperator:

TranslateCreateModelOperator
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Create a native translation model using Cloud Translate API (Advanced V3).

For parameter definition, take a look at
:class:`~airflow.providers.google.cloud.operators.translate.TranslateCreateModelOperator`

Using the operator
""""""""""""""""""

Basic usage of the operator:

.. exampleinclude:: /../../providers/tests/system/google/cloud/translate/example_translate_model.py
:language: python
:dedent: 4
:start-after: [START howto_operator_translate_automl_create_model]
:end-before: [END howto_operator_translate_automl_create_model]


.. _howto/operator:TranslateModelsListOperator:

TranslateModelsListOperator
^^^^^^^^^^^^^^^^^^^^^^^^^^^
Get list of native translation models using Cloud Translate API (Advanced V3).

For parameter definition, take a look at
:class:`~airflow.providers.google.cloud.operators.translate.TranslateModelsListOperator`

Using the operator
""""""""""""""""""

Basic usage of the operator:

.. exampleinclude:: /../../providers/tests/system/google/cloud/translate/example_translate_model.py
:language: python
:dedent: 4
:start-after: [START howto_operator_translate_automl_list_models]
:end-before: [END howto_operator_translate_automl_list_models]


.. _howto/operator:TranslateDeleteModelOperator:

TranslateDeleteModelOperator
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Delete a native translation model using Cloud Translate API (Advanced V3).

For parameter definition, take a look at
:class:`~airflow.providers.google.cloud.operators.translate.TranslateDeleteModelOperator`

Using the operator
""""""""""""""""""

Basic usage of the operator:

.. exampleinclude:: /../../providers/tests/system/google/cloud/translate/example_translate_model.py
:language: python
:dedent: 4
:start-after: [START howto_operator_translate_automl_delete_model]
:end-before: [END howto_operator_translate_automl_delete_model]


More information
""""""""""""""""""
See:
Expand Down
1 change: 1 addition & 0 deletions docs/spelling_wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,7 @@ linux
ListDatasetsPager
ListGenerator
ListInfoTypesResponse
ListModelsPager
ListSecretsPager
Liveness
liveness
Expand Down
154 changes: 154 additions & 0 deletions providers/src/airflow/providers/google/cloud/hooks/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,157 @@ def delete_dataset(
metadata=metadata,
)
return result

def create_model(
self,
dataset_id: str,
display_name: str,
project_id: str,
location: str,
retry: Retry | _MethodDefault = DEFAULT,
timeout: float | None = None,
metadata: Sequence[tuple[str, str]] = (),
) -> Operation:
"""
Create the native model by training on translation dataset provided.
:param dataset_id: ID of dataset to be used for model training.
:param display_name: Display name of the model trained.
A-Z and a-z, underscores (_), and ASCII digits 0-9.
:param project_id: ID of the Google Cloud project where dataset is located. If not provided
default project_id is used.
:param location: The location of the project.
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
retried.
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
`retry` is specified, the timeout applies to each individual attempt.
:param metadata: Additional metadata that is provided to the method.
:return: `Operation` object with the model creation results, when finished.
"""
client = self.get_client()
project_id = project_id or self.project_id
parent = f"projects/{project_id}/locations/{location}"
dataset = f"projects/{project_id}/locations/{location}/datasets/{dataset_id}"
result = client.create_model(
request={
"parent": parent,
"model": {
"display_name": display_name,
"dataset": dataset,
},
},
retry=retry,
timeout=timeout,
metadata=metadata,
)
return result

def get_model(
self,
model_id: str,
project_id: str,
location: str,
retry: Retry | _MethodDefault = DEFAULT,
timeout: float | _MethodDefault = DEFAULT,
metadata: Sequence[tuple[str, str]] = (),
) -> automl_translation.Model:
"""
Retrieve the dataset for the given model_id.
:param model_id: ID of translation model to be retrieved.
:param project_id: ID of the Google Cloud project where dataset is located. If not provided
default project_id is used.
:param location: The location of the project.
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
retried.
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
`retry` is specified, the timeout applies to each individual attempt.
:param metadata: Additional metadata that is provided to the method.
:return: `automl_translation.Model` instance.
"""
client = self.get_client()
name = f"projects/{project_id}/locations/{location}/models/{model_id}"
return client.get_model(
request={"name": name},
retry=retry,
timeout=timeout,
metadata=metadata,
)

def list_models(
self,
project_id: str,
location: str,
filter_str: str | None = None,
page_size: int | None = None,
retry: Retry | _MethodDefault = DEFAULT,
timeout: float | _MethodDefault = DEFAULT,
metadata: Sequence[tuple[str, str]] = (),
) -> pagers.ListModelsPager:
"""
List translation models in a project.
:param project_id: ID of the Google Cloud project where models are located. If not provided
default project_id is used.
:param location: The location of the project.
:param filter_str: An optional expression for filtering the models that will
be returned. Supported filter: ``dataset_id=${dataset_id}``.
:param page_size: Optional custom page size value. The server can
return fewer results than requested.
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
retried.
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
`retry` is specified, the timeout applies to each individual attempt.
:param metadata: Additional metadata that is provided to the method.
:return: ``pagers.ListDatasetsPager`` instance, iterable object to retrieve the datasets list.
"""
client = self.get_client()
parent = f"projects/{project_id}/locations/{location}"
result = client.list_models(
request={
"parent": parent,
"filter": filter_str,
"page_size": page_size,
},
retry=retry,
timeout=timeout,
metadata=metadata,
)
return result

def delete_model(
self,
model_id: str,
project_id: str,
location: str,
retry: Retry | _MethodDefault = DEFAULT,
timeout: float | None = None,
metadata: Sequence[tuple[str, str]] = (),
) -> Operation:
"""
Delete the translation model and all of its contents.
:param model_id: ID of model to be deleted.
:param project_id: ID of the Google Cloud project where dataset is located. If not provided
default project_id is used.
:param location: The location of the project.
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
retried.
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
`retry` is specified, the timeout applies to each individual attempt.
:param metadata: Additional metadata that is provided to the method.
:return: `Operation` object with dataset deletion results, when finished.
"""
client = self.get_client()
name = f"projects/{project_id}/locations/{location}/models/{model_id}"
result = client.delete_model(
request={"name": name},
retry=retry,
timeout=timeout,
metadata=metadata,
)
return result
63 changes: 63 additions & 0 deletions providers/src/airflow/providers/google/cloud/links/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@
)
TRANSLATION_NATIVE_LIST_LINK = TRANSLATION_BASE_LINK + "/datasets?project={project_id}"

TRANSLATION_NATIVE_MODEL_LINK = (
TRANSLATION_BASE_LINK
+ "/locations/{location}/datasets/{dataset_id}/evaluate;modelId={model_id}?project={project_id}"
)
TRANSLATION_MODELS_LIST_LINK = TRANSLATION_BASE_LINK + "/models/list?project={project_id}"


class TranslationLegacyDatasetLink(BaseGoogleLink):
"""
Expand Down Expand Up @@ -270,3 +276,60 @@ def persist(
"project_id": project_id,
},
)


class TranslationModelLink(BaseGoogleLink):
"""
Helper class for constructing Translation Model link.
Link for legacy and native models.
"""

name = "Translation Model"
key = "translation_model"
format_str = TRANSLATION_NATIVE_MODEL_LINK

@staticmethod
def persist(
context: Context,
task_instance,
dataset_id: str,
model_id: str,
project_id: str,
):
task_instance.xcom_push(
context,
key=TranslationLegacyModelLink.key,
value={
"location": task_instance.location,
"dataset_id": dataset_id,
"model_id": model_id,
"project_id": project_id,
},
)


class TranslationModelsListLink(BaseGoogleLink):
"""
Helper class for constructing Translation Models List link.
Both legacy and native models are available under this link.
"""

name = "Translation Models List"
key = "translation_models_list"
format_str = TRANSLATION_MODELS_LIST_LINK

@staticmethod
def persist(
context: Context,
task_instance,
project_id: str,
):
task_instance.xcom_push(
context,
key=TranslationModelsListLink.key,
value={
"project_id": project_id,
},
)
Loading

0 comments on commit 46cbd1c

Please sign in to comment.