Skip to content

Commit

Permalink
Update VertexAI system tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MaksYermak committed Jul 26, 2024
1 parent 871c88f commit 975b827
Show file tree
Hide file tree
Showing 17 changed files with 159 additions and 238 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@
from google.protobuf.struct_pb2 import Value

from airflow.models.dag import DAG
from airflow.providers.google.cloud.operators.gcs import (
GCSCreateBucketOperator,
GCSDeleteBucketOperator,
GCSSynchronizeBucketsOperator,
)
from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
CreateAutoMLForecastingTrainingJobOperator,
DeleteAutoMLTrainingJobOperator,
Expand All @@ -48,21 +43,22 @@

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
DAG_ID = "example_vertex_ai_auto_ml_operations"
DAG_ID = "vertex_ai_auto_ml_operations"
REGION = "us-central1"
FORECASTING_DISPLAY_NAME = f"auto-ml-forecasting-{ENV_ID}"
MODEL_DISPLAY_NAME = f"auto-ml-forecasting-model-{ENV_ID}"

RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
FORECAST_GCS_BUCKET_NAME = f"bucket_forecast_{DAG_ID}_{ENV_ID}".replace("_", "-")

FORECAST_DATASET = {
"display_name": f"forecast-dataset-{ENV_ID}",
"metadata_schema_uri": schema.dataset.metadata.time_series,
"metadata": ParseDict(
{
"input_config": {
"gcs_source": {"uri": [f"gs://{FORECAST_GCS_BUCKET_NAME}/vertex-ai/forecast-dataset.csv"]}
"gcs_source": {
"uri": [f"gs://{RESOURCE_DATA_BUCKET}/vertex-ai/datasets/forecast-dataset.csv"]
}
}
},
Value(),
Expand All @@ -89,22 +85,6 @@
catchup=False,
tags=["example", "vertex_ai", "auto_ml"],
) as dag:
create_bucket = GCSCreateBucketOperator(
task_id="create_bucket",
bucket_name=FORECAST_GCS_BUCKET_NAME,
storage_class="REGIONAL",
location=REGION,
)

move_dataset_file = GCSSynchronizeBucketsOperator(
task_id="move_dataset_to_bucket",
source_bucket=RESOURCE_DATA_BUCKET,
source_object="vertex-ai/datasets",
destination_bucket=FORECAST_GCS_BUCKET_NAME,
destination_object="vertex-ai",
recursive=True,
)

create_forecast_dataset = CreateDatasetOperator(
task_id="forecast_dataset",
dataset=FORECAST_DATASET,
Expand Down Expand Up @@ -157,23 +137,24 @@
project_id=PROJECT_ID,
trigger_rule=TriggerRule.ALL_DONE,
)
delete_bucket = GCSDeleteBucketOperator(
task_id="delete_bucket", bucket_name=FORECAST_GCS_BUCKET_NAME, trigger_rule=TriggerRule.ALL_DONE
)

(
# TEST SETUP
create_bucket
>> move_dataset_file
>> create_forecast_dataset
create_forecast_dataset
# TEST BODY
>> create_auto_ml_forecasting_training_job
# TEST TEARDOWN
>> delete_auto_ml_forecasting_training_job
>> delete_forecast_dataset
>> delete_bucket
)

# ### Everything below this line is not part of example ###
# ### Just for system tests purpose ###
from tests.system.utils.watcher import watcher

# This test needs watcher in order to properly mark success/failure
# when "tearDown" task with trigger rule is part of the DAG
list(dag.tasks) >> watcher()

from tests.system.utils import get_test_run # noqa: E402

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@
from google.protobuf.struct_pb2 import Value

from airflow.models.dag import DAG
from airflow.providers.google.cloud.operators.gcs import (
GCSCreateBucketOperator,
GCSDeleteBucketOperator,
GCSSynchronizeBucketsOperator,
)
from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
CreateAutoMLImageTrainingJobOperator,
DeleteAutoMLTrainingJobOperator,
Expand All @@ -48,13 +43,12 @@

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
DAG_ID = "example_vertex_ai_auto_ml_operations"
DAG_ID = "vertex_ai_auto_ml_operations"
REGION = "us-central1"
IMAGE_DISPLAY_NAME = f"auto-ml-image-{ENV_ID}"
MODEL_DISPLAY_NAME = f"auto-ml-image-model-{ENV_ID}"

RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
IMAGE_GCS_BUCKET_NAME = f"bucket_image_{DAG_ID}_{ENV_ID}".replace("_", "-")

IMAGE_DATASET = {
"display_name": f"image-dataset-{ENV_ID}",
Expand All @@ -64,7 +58,7 @@
IMAGE_DATA_CONFIG = [
{
"import_schema_uri": schema.dataset.ioformat.image.single_label_classification,
"gcs_source": {"uris": [f"gs://{IMAGE_GCS_BUCKET_NAME}/vertex-ai/image-dataset.csv"]},
"gcs_source": {"uris": [f"gs://{RESOURCE_DATA_BUCKET}/vertex-ai/datasets/flowers-dataset.csv"]},
},
]

Expand All @@ -76,22 +70,6 @@
catchup=False,
tags=["example", "vertex_ai", "auto_ml"],
) as dag:
create_bucket = GCSCreateBucketOperator(
task_id="create_bucket",
bucket_name=IMAGE_GCS_BUCKET_NAME,
storage_class="REGIONAL",
location=REGION,
)

move_dataset_file = GCSSynchronizeBucketsOperator(
task_id="move_dataset_to_bucket",
source_bucket=RESOURCE_DATA_BUCKET,
source_object="vertex-ai/datasets",
destination_bucket=IMAGE_GCS_BUCKET_NAME,
destination_object="vertex-ai",
recursive=True,
)

create_image_dataset = CreateDatasetOperator(
task_id="image_dataset",
dataset=IMAGE_DATASET,
Expand Down Expand Up @@ -143,27 +121,25 @@
project_id=PROJECT_ID,
trigger_rule=TriggerRule.ALL_DONE,
)
delete_bucket = GCSDeleteBucketOperator(
task_id="delete_bucket",
bucket_name=IMAGE_GCS_BUCKET_NAME,
trigger_rule=TriggerRule.ALL_DONE,
)

(
# TEST SETUP
[
create_bucket >> move_dataset_file,
create_image_dataset,
]
create_image_dataset
>> import_image_dataset
# TEST BODY
>> create_auto_ml_image_training_job
# TEST TEARDOWN
>> delete_auto_ml_image_training_job
>> delete_image_dataset
>> delete_bucket
)

# ### Everything below this line is not part of example ###
# ### Just for system tests purpose ###
from tests.system.utils.watcher import watcher

# This test needs watcher in order to properly mark success/failure
# when "tearDown" task with trigger rule is part of the DAG
list(dag.tasks) >> watcher()

from tests.system.utils import get_test_run # noqa: E402

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@
)
# [END how_to_cloud_vertex_ai_list_auto_ml_training_job_operator]

# ### Everything below this line is not part of example ###
# ### Just for system tests purpose ###
from tests.system.utils.watcher import watcher

# This test needs watcher in order to properly mark success/failure
# when "tearDown" task with trigger rule is part of the DAG
list(dag.tasks) >> watcher()

from tests.system.utils import get_test_run # noqa: E402

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@
from google.protobuf.struct_pb2 import Value

from airflow.models.dag import DAG
from airflow.providers.google.cloud.operators.gcs import (
GCSCreateBucketOperator,
GCSDeleteBucketOperator,
GCSSynchronizeBucketsOperator,
)
from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
CreateAutoMLTabularTrainingJobOperator,
DeleteAutoMLTrainingJobOperator,
Expand All @@ -48,21 +43,20 @@

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
DAG_ID = "example_vertex_ai_auto_ml_operations"
DAG_ID = "vertex_ai_auto_ml_operations"
REGION = "us-central1"
TABULAR_DISPLAY_NAME = f"auto-ml-tabular-{ENV_ID}"
MODEL_DISPLAY_NAME = "adopted-prediction-model"

RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
TABULAR_GCS_BUCKET_NAME = f"bucket_tabular_{DAG_ID}_{ENV_ID}".replace("_", "-")

TABULAR_DATASET = {
"display_name": f"tabular-dataset-{ENV_ID}",
"metadata_schema_uri": schema.dataset.metadata.tabular,
"metadata": ParseDict(
{
"input_config": {
"gcs_source": {"uri": [f"gs://{TABULAR_GCS_BUCKET_NAME}/vertex-ai/tabular-dataset.csv"]}
"gcs_source": {"uri": [f"gs://{RESOURCE_DATA_BUCKET}/vertex-ai/datasets/tabular-dataset.csv"]}
}
},
Value(),
Expand Down Expand Up @@ -91,22 +85,6 @@
catchup=False,
tags=["example", "vertex_ai", "auto_ml"],
) as dag:
create_bucket = GCSCreateBucketOperator(
task_id="create_bucket",
bucket_name=TABULAR_GCS_BUCKET_NAME,
storage_class="REGIONAL",
location=REGION,
)

move_dataset_file = GCSSynchronizeBucketsOperator(
task_id="move_dataset_to_bucket",
source_bucket=RESOURCE_DATA_BUCKET,
source_object="vertex-ai/datasets",
destination_bucket=TABULAR_GCS_BUCKET_NAME,
destination_object="vertex-ai",
recursive=True,
)

create_tabular_dataset = CreateDatasetOperator(
task_id="tabular_dataset",
dataset=TABULAR_DATASET,
Expand Down Expand Up @@ -150,25 +128,23 @@
trigger_rule=TriggerRule.ALL_DONE,
)

delete_bucket = GCSDeleteBucketOperator(
task_id="delete_bucket",
bucket_name=TABULAR_GCS_BUCKET_NAME,
trigger_rule=TriggerRule.ALL_DONE,
)

(
# TEST SETUP
create_bucket
>> move_dataset_file
>> create_tabular_dataset
create_tabular_dataset
# TEST BODY
>> create_auto_ml_tabular_training_job
# TEST TEARDOWN
>> delete_auto_ml_tabular_training_job
>> delete_tabular_dataset
>> delete_bucket
)

# ### Everything below this line is not part of example ###
# ### Just for system tests purpose ###
from tests.system.utils.watcher import watcher

# This test needs watcher in order to properly mark success/failure
# when "tearDown" task with trigger rule is part of the DAG
list(dag.tasks) >> watcher()

from tests.system.utils import get_test_run # noqa: E402

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@
from google.protobuf.struct_pb2 import Value

from airflow.models.dag import DAG
from airflow.providers.google.cloud.operators.gcs import (
GCSCreateBucketOperator,
GCSDeleteBucketOperator,
GCSSynchronizeBucketsOperator,
)
from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
CreateAutoMLTextTrainingJobOperator,
DeleteAutoMLTrainingJobOperator,
Expand All @@ -48,13 +43,12 @@

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default")
DAG_ID = "example_vertex_ai_auto_ml_operations"
DAG_ID = "vertex_ai_auto_ml_operations"
REGION = "us-central1"
TEXT_DISPLAY_NAME = f"auto-ml-text-{ENV_ID}"
MODEL_DISPLAY_NAME = f"auto-ml-text-model-{ENV_ID}"

RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
TEXT_GCS_BUCKET_NAME = f"bucket_text_{DAG_ID}_{ENV_ID}".replace("_", "-")

TEXT_DATASET = {
"display_name": f"text-dataset-{ENV_ID}",
Expand All @@ -64,7 +58,7 @@
TEXT_DATA_CONFIG = [
{
"import_schema_uri": schema.dataset.ioformat.text.single_label_classification,
"gcs_source": {"uris": [f"gs://{TEXT_GCS_BUCKET_NAME}/vertex-ai/text-dataset.csv"]},
"gcs_source": {"uris": [f"gs://{RESOURCE_DATA_BUCKET}/vertex-ai/datasets/text-dataset.csv"]},
},
]

Expand All @@ -75,22 +69,6 @@
catchup=False,
tags=["example", "vertex_ai", "auto_ml"],
) as dag:
create_bucket = GCSCreateBucketOperator(
task_id="create_bucket",
bucket_name=TEXT_GCS_BUCKET_NAME,
storage_class="REGIONAL",
location=REGION,
)

move_dataset_file = GCSSynchronizeBucketsOperator(
task_id="move_dataset_to_bucket",
source_bucket=RESOURCE_DATA_BUCKET,
source_object="vertex-ai/datasets",
destination_bucket=TEXT_GCS_BUCKET_NAME,
destination_object="vertex-ai",
recursive=True,
)

create_text_dataset = CreateDatasetOperator(
task_id="text_dataset",
dataset=TEXT_DATASET,
Expand Down Expand Up @@ -140,27 +118,24 @@
trigger_rule=TriggerRule.ALL_DONE,
)

delete_bucket = GCSDeleteBucketOperator(
task_id="delete_bucket",
bucket_name=TEXT_GCS_BUCKET_NAME,
trigger_rule=TriggerRule.ALL_DONE,
)

(
# TEST SETUP
[
create_bucket >> move_dataset_file,
create_text_dataset,
]
create_text_dataset
>> import_text_dataset
# TEST BODY
>> create_auto_ml_text_training_job
# TEST TEARDOWN
>> delete_auto_ml_text_training_job
>> delete_text_dataset
>> delete_bucket
)

# ### Everything below this line is not part of example ###
# ### Just for system tests purpose ###
from tests.system.utils.watcher import watcher

# This test needs watcher in order to properly mark success/failure
# when "tearDown" task with trigger rule is part of the DAG
list(dag.tasks) >> watcher()

from tests.system.utils import get_test_run # noqa: E402

Expand Down
Loading

0 comments on commit 975b827

Please sign in to comment.